Skip to content

Commit 8c9a139

Browse files
committed
generic feature column
1 parent 573447d commit 8c9a139

16 files changed

+51907
-14
lines changed

backend/dendogram_controller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def generate_dendogram_from_csv():
125125
file_content = file.stream.read().decode('utf-8')
126126
csv_reader = csv.DictReader(file_content.splitlines())
127127
for row in csv_reader:
128-
extracted_features = row.get("extracted_features_TransFeatEx", "")
128+
extracted_features = row.get("extracted_features", "")
129129
if extracted_features:
130130
features.extend(extracted_features.split(';'))
131131
logger.info(f"Extracted {len(features)} features from CSV file")

cli-client/scripts/Stage 1 - Feature extraction/review_postprocessing.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ def parse_and_add_column(csv_folder, json_folder, output_folder):
4343
# Concatenate features separated by ';'
4444
review_features_map[review_id] = ";".join(filter(None, features))
4545

46-
# Add the 'extracted_features_TransFeatEx' column
47-
df['extracted_features_TransFeatEx'] = df['reviewId'].map(review_features_map)
48-
# Filter out rows where extracted_features_TransFeatEx is empty
49-
df = df[df['extracted_features_TransFeatEx'].notna() & (df['extracted_features_TransFeatEx'] != '')]
46+
# Add the 'extracted_features' column
47+
df['extracted_features'] = df['reviewId'].map(review_features_map)
48+
# Filter out rows where extracted_features is empty
49+
df = df[df['extracted_features'].notna() & (df['extracted_features'] != '')]
5050
# Save the updated DataFrame to the output folder
5151
output_path = os.path.join(output_folder, csv_file)
5252
df.to_csv(output_path, sep=',', index=False)

cli-client/scripts/Stage 1 - Feature extraction/statistics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def process_files(input_folder, output_folder, top_n):
3535

3636
# Extract and flatten features
3737
all_features = []
38-
for cell in df['extracted_features_TransFeatEx'].dropna():
38+
for cell in df['extracted_features'].dropna():
3939
features = cell.split(';')
4040
all_features.extend(features)
4141

@@ -112,7 +112,7 @@ def process_files(input_folder, output_folder, top_n):
112112
category_features[category] = []
113113

114114
# Extract and flatten features for this category
115-
for cell in group['extracted_features_TransFeatEx'].dropna():
115+
for cell in group['extracted_features'].dropna():
116116
features = cell.split(';')
117117
category_features[category].extend(features)
118118

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/com.discord.csv

Lines changed: 2338 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/com.google.android.apps.bard.csv

Lines changed: 4195 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/com.microsoft.copilot.csv

Lines changed: 1365 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/com.openai.chatgpt.csv

Lines changed: 11473 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/com.whatsapp.csv

Lines changed: 27789 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/TransFeatEx/org.telegram.messenger.csv

Lines changed: 4733 additions & 0 deletions
Large diffs are not rendered by default.

data/Stage 2 - Hierarchical Clustering/input/ab.mini_discord.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
app_name,app_package,app_categoryId,reviewId,review,reply,userName,score,at,extracted_features_TransFeatEx
1+
app_name,app_package,app_categoryId,reviewId,review,reply,userName,score,at,extracted_features
22
"Discord - Talk, Play, Hang Out",com.discord,COMMUNICATION,00280421-44e5-4026-8374-72b714bfe6ec,"Buggy (eg. notifications just don't work for me), there's no way to quit when you are done, and it uses a lot of the battery ... but great when it works. Edit: They say to contact support, but I did and they haven't gotten back for weeks now.","Make sure you aren't logged in on another client and try setting server notifications to @mentions. If you're still having trouble, change your password and verify your notification settings with this guide: https://support.discord.com/hc/en-us/articles/218892547",Jeremy Walker,3,"Nov 06, 2024",Buggy;UseLot;Edit
33
"Discord - Talk, Play, Hang Out",com.discord,COMMUNICATION,2966add5-6b3f-40dc-8057-41fc61c03dba,it's gotten so many bugs overtime and discord doesn't do anything to fix it. I can't access the shop or change pfps without being kicked out of app.,"Try reinstalling the app and restarting your device. If issues persist, see this article: https://support.discord.com/hc/en-us/articles/13148417007767",Broken Lexi,3,"Nov 06, 2024",GetSoManyBug;AccessShop
44
"Discord - Talk, Play, Hang Out",com.discord,COMMUNICATION,cc5fe35b-2338-4183-99bc-4b925c68da43,"One Star. LITERALLY, one freaking star. Because you know why? Well when u try to change my email, I can't. It will always be the old email, can u like fix this?","If you're having trouble updating your email, please submit a support ticket here so we can provide assistance: dis.gd/contact",Princess Javelina,1,"Nov 06, 2024",OneStar;LiterallyOneFreakingStar;ChangeEmail

0 commit comments

Comments
 (0)