CLI vs Jupyter λΉκ΅ - λͺ¨λν°λ§ μλ λ°μ΄ν° λΆμμ ν΅ν 체κ³μ νμ΅
λͺ©ν: Jupyter νκ²½κ³Ό λ°μ΄ν°λ₯Ό μ€λΉν©λλ€
# Jupyter μ€μΉ νμΈ
jupyter --version
# νμν λΌμ΄λΈλ¬λ¦¬ μ€μΉ
pip install pandas matplotlib numpy
your_project/
βββ data/
β βββ Alarm_list_*.csv
β βββ Failure_list_*.csv
βββ notebooks/
βββ monitoring_analysis.ipynb
# νλ‘μ νΈ ν΄λμμ μ€ν
cd your_project
jupyter lab
# μ λ
ΈνΈλΆ μμ±: Python 3 μ ν
μ€μ Jupyter Lab νκ²½μμ λͺ¨λν°λ§ λ°μ΄ν° λΆμ μ€μΈ νλ©΄
ν΅μ¬: λΆμ νκ²½μ μ€λΉνκ³ λ°μ΄ν°λ₯Ό λ‘λν©λλ€
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from datetime import datetime
# νκΈ ν°νΈ μ€μ (macOS)
plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['axes.unicode_minus'] = False
print("π λΌμ΄λΈλ¬λ¦¬ λ‘λ μλ£!")
# λ°μ΄ν° κ²½λ‘ μ€μ
data_path = './data' # λ³ΈμΈ νκ²½μ λ§κ² μμ
# CSV νμΌ μ°ΎκΈ°
csv_files = [f for f in os.listdir(data_path) if f.endswith('.csv')]
print(f"π λ°κ²¬λ νμΌ: {csv_files}")
# μλ λ°μ΄ν° λ‘λ
alarm_files = [f for f in csv_files if f.startswith('Alarm_list')]
df_alarms = pd.read_csv(os.path.join(data_path, alarm_files[0]))
# μ₯μ λ°μ΄ν° λ‘λ
failure_files = [f for f in csv_files if f.startswith('Failure_list')]
df_failures = pd.read_csv(os.path.join(data_path, failure_files[0]))
print(f"β
μλ: {df_alarms.shape[0]:,}ν")
print(f"β
μ₯μ : {df_failures.shape[0]}ν")
# μλ λ°μ΄ν° ꡬ쑰
print("π μλ λ°μ΄ν°:")
print(f"컬λΌ: {list(df_alarms.columns)}")
print(df_alarms.head(3))
print("\nπ μ₯μ λ°μ΄ν°:")
print(f"컬λΌ: {list(df_failures.columns)}")
print(df_failures.head(3))
ν΅μ¬: μκ° κΈ°λ° λΆμμ μν μ μ²λ¦¬μ ERP μμ€ν μ§μ€ λΆμ
# λ μ§ λ³ν
df_alarms['μΌμ'] = pd.to_datetime(df_alarms['μΌμ'])
df_alarms['μ'] = df_alarms['μΌμ'].dt.month
df_failures['λ°μ μΌμ'] = pd.to_datetime(df_failures['λ°μ μΌμ'])
df_failures['μ'] = df_failures['λ°μ μΌμ'].dt.month
print("β
λ μ§ μ μ²λ¦¬ μλ£!")
print(f"κΈ°κ°: {df_alarms['μΌμ'].min()} ~ {df_alarms['μΌμ'].max()}")
# κ³μ κ·Έλ£Ήλ³ νν© νμΈ
print("π’ κ³μ κ·Έλ£Ήλ³ νν©:")
print(df_alarms['AccountGroup'].value_counts())
# ERP λ°μ΄ν° μΆμΆ
erp_alarms = df_alarms[df_alarms['AccountGroup'] == 'ERP'].copy()
erp_failures = df_failures[df_failures['AccountGroup'] == 'ERP'].copy()
print(f"\nπ― ERP μλ: {len(erp_alarms):,}건")
print(f"π¨ ERP μ₯μ : {len(erp_failures)}건")
print(f"π μ νμ¨: {(len(erp_failures)/len(erp_alarms)*100):.2f}%")
# μλ³ μ§κ³
monthly_alarms = erp_alarms.groupby('μ').agg({
'κ°μ': 'sum',
'μ ν¨μλ': lambda x: (x == 'O').sum()
})
monthly_failures = erp_failures.groupby('μ').size().to_frame('μ₯μ μ')
# κ²°ν© λΆμ
monthly_combined = monthly_alarms.join(monthly_failures, how='outer').fillna(0)
monthly_combined['μ₯μ μ¨%'] = (monthly_combined['μ₯μ μ'] / monthly_combined['μ ν¨μλ'] * 100).round(2)
print("π
μλ³ νν©:")
print(monthly_combined)
ν΅μ¬: μκ°λλ³ ν¨ν΄, μλ μ ν, AWS 리μμ€ λ§€ν λ° μκ°ν
# μκ°λλ³ μ§κ³
hourly = erp_alarms.groupby('μκ°').agg({
'κ°μ': 'sum',
'μ ν¨μλ': lambda x: (x == 'O').sum()
})
# μκ°λ κ΅¬λΆ ν¨μ
def get_period(hour):
if 6 <= hour < 12: return 'μ€μ '
elif 12 <= hour < 18: return 'μ€ν'
elif 18 <= hour < 24: return 'μ λ
'
else: return 'μλ²½'
hourly['μκ°λ'] = hourly.index.map(get_period)
time_summary = hourly.groupby('μκ°λ')['μ ν¨μλ'].sum()
print("β° μκ°λλ³ μλ:")
print(time_summary.sort_values(ascending=False))
print("\nπ₯ κ°μ₯ λ°μ μκ° TOP 5:")
top_hours = hourly.sort_values('μ ν¨μλ', ascending=False).head()
for hour, data in top_hours.iterrows():
print(f" {hour:2d}μ: {int(data['μ ν¨μλ'])}건")
# μΉ΄ν
κ³ λ¦¬λ³ λΆμ
category_analysis = erp_alarms.groupby('Category').agg({
'κ°μ': 'sum',
'μ ν¨μλ': lambda x: (x == 'O').sum()
}).sort_values('μ ν¨μλ', ascending=False)
print("π μλ μ νλ³ νν©:")
print(category_analysis)
# μ£Όμ μμΈ λΆμ
print("\nπ μ£Όμ λ°μ μμΈ TOP 5:")
causes = erp_alarms['λ°μ μμΈ'].value_counts().head()
for i, (cause, count) in enumerate(causes.items(), 1):
print(f"{i}. ({count}건) {cause[:50]}...")
# AWS μλΉμ€ ν¨ν΄ μ μ
aws_patterns = {
'ALB': r'\[ALB\]',
'EC2': r'\[EC2\]|i-[0-9a-f]+',
'FSx': r'\[FSx\]|FSx ONTAP',
'RDS': r'\[RDS\]|database',
'S3': r'\[S3\]|bucket'
}
# μλΉμ€λ³ μλ μΆμΆ
service_alarms = {}
for service, pattern in aws_patterns.items():
matches = erp_alarms[erp_alarms['λͺ¨λν°λ§ λ©μμ§'].str.contains(pattern, case=False, na=False)]
if len(matches) > 0:
service_alarms[service] = {
'μλμ': len(matches),
'μ ν¨μλ': (matches['μ ν¨μλ'] == 'O').sum()
}
if service_alarms:
service_df = pd.DataFrame(service_alarms).T
print("π AWS μλΉμ€λ³ μλ:")
print(service_df.sort_values('μ ν¨μλ', ascending=False))
else:
print("AWS μλΉμ€ κ΄λ ¨ μλμ΄ λ°κ²¬λμ§ μμμ΅λλ€.")
# μ’
ν© μκ°ν
plt.figure(figsize=(15, 10))
# 1. μλ³ νΈλ λ
plt.subplot(2, 3, 1)
monthly_combined[['μ ν¨μλ', 'μ₯μ μ']].plot(kind='bar', ax=plt.gca())
plt.title('Monthly Trend')
plt.xticks(rotation=0)
plt.legend()
# 2. μκ°λλ³ λΆν¬
plt.subplot(2, 3, 2)
time_summary.plot(kind='bar', color='orange', ax=plt.gca())
plt.title('Alarms by Time Period')
plt.xticks(rotation=45)
# 3. μΉ΄ν
κ³ λ¦¬λ³ λΆν¬
plt.subplot(2, 3, 3)
category_analysis['μ ν¨μλ'].head(5).plot(kind='pie', autopct='%1.1f%%', ax=plt.gca())
plt.title('Top 5 Categories')
plt.ylabel('')
# 4. μκ°λ³ λΆν¬
plt.subplot(2, 3, 4)
hourly['μ ν¨μλ'].plot(kind='bar', color='lightblue', ax=plt.gca())
plt.title('Hourly Distribution')
plt.xlabel('Hour')
# 5. AWS μλΉμ€λ³ (μλ κ²½μ°)
if service_alarms:
plt.subplot(2, 3, 5)
service_df['μ ν¨μλ'].plot(kind='bar', color='lightcoral', ax=plt.gca())
plt.title('AWS Services')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
print("π μκ°ν μλ£!")