-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_processor.py
More file actions
58 lines (48 loc) · 2.04 KB
/
data_processor.py
File metadata and controls
58 lines (48 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import streamlit as st
COLUMN_MAPPING = {
'tanggal': 'date',
'judul': 'title',
'sentimen': 'sentiment',
'sumber': 'source',
'isi': 'content',
'kategori': 'category'
}
def process_upload(uploaded_file):
"""Proses file CSV yang diupload"""
try:
# Deteksi format file
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith(('.xlsx', '.xls')):
df = pd.read_excel(uploaded_file)
else:
return None, "Format file tidak didukung. Gunakan CSV atau Excel."
# Auto rename columns
df.columns = [col.strip().lower() for col in df.columns]
df.rename(columns=lambda x: COLUMN_MAPPING.get(x, x), inplace=True)
# Validasi kolom wajib
required_columns = ['date', 'title', 'sentiment', 'source', 'content']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
return None, f"Kolom wajib tidak ditemukan: {', '.join(missing_columns)}"
# Konversi tanggal
df['date'] = pd.to_datetime(df['date'], errors='coerce')
# Validasi sentimen
valid_sentiments = ['positif', 'negatif', 'netral']
invalid_sentiments = df[~df['sentiment'].str.lower().isin(valid_sentiments)]
if not invalid_sentiments.empty:
st.warning(f"Terdapat {len(invalid_sentiments)} sentimen tidak valid. Hanya gunakan: Positif, Negatif, Netral")
return df, None
except Exception as e:
return None, f"Error memproses file: {str(e)}"
def generate_template():
"""Generate sample CSV template"""
data = {
'date': ['2024-01-01', '2024-01-02'],
'title': ['Contoh Judul Berita 1', 'Contoh Judul Berita 2'],
'sentiment': ['Positif', 'Netral'],
'source': ['Media Satu', 'Media Dua'],
'content': ['Isi berita pertama...', 'Isi berita kedua...']
}
return pd.DataFrame(data)