|
6 | 6 |
|
7 | 7 | from tools.auth import authenticate_user |
8 | 8 | from tools.aws_functions import ( |
9 | | - download_file_from_s3, |
| 9 | + download_cost_codes_with_error_handling, |
10 | 10 | export_outputs_to_s3, |
11 | 11 | upload_file_to_s3, |
12 | 12 | ) |
|
83 | 83 | LOG_FILE_NAME, |
84 | 84 | MAX_FILE_SIZE, |
85 | 85 | MAX_QUEUE_SIZE, |
| 86 | + MAXIMUM_ALLOWED_TOPICS, |
86 | 87 | MPLCONFIGDIR, |
87 | 88 | OUTPUT_COST_CODES_PATH, |
88 | 89 | OUTPUT_DEBUG_FILES, |
|
119 | 120 | from tools.custom_csvlogger import CSVLogger_custom |
120 | 121 | from tools.dedup_summaries import ( |
121 | 122 | deduplicate_topics, |
122 | | - deduplicate_topics_llm, |
123 | 123 | overall_summary, |
124 | 124 | wrapper_summarise_output_topics_per_group, |
125 | 125 | ) |
|
135 | 135 | empty_output_vars_extract_topics, |
136 | 136 | empty_output_vars_summarise, |
137 | 137 | enforce_cost_codes, |
138 | | - ensure_model_in_map, |
139 | 138 | get_connection_params, |
140 | 139 | join_cols_onto_reference_df, |
141 | 140 | load_in_data_file, |
|
151 | 150 | ) |
152 | 151 | from tools.llm_api_call import ( |
153 | 152 | all_in_one_pipeline, |
| 153 | + deduplicate_topics_llm_wrapper, |
154 | 154 | modify_existing_output_tables, |
155 | 155 | validate_topics_wrapper, |
156 | 156 | wrapper_extract_topics_per_column_value, |
@@ -923,6 +923,7 @@ def show_info_box_on_click( |
923 | 923 |
|
924 | 924 | with gr.Accordion("Response sentiment analysis", open=False): |
925 | 925 | sentiment_checkbox = gr.Radio( |
| 926 | + label="Should the model assess the sentiment of responses?", |
926 | 927 | value="Negative or Positive", |
927 | 928 | choices=[ |
928 | 929 | "Negative or Positive", |
@@ -1268,7 +1269,15 @@ def show_info_box_on_click( |
1268 | 1269 | precision=1, |
1269 | 1270 | step=0.1, |
1270 | 1271 | ) |
| 1272 | + with gr.Row(equal_height=True): |
1271 | 1273 | batch_size_number.render() |
| 1274 | + max_topics_number = gr.Number( |
| 1275 | + value=MAXIMUM_ALLOWED_TOPICS, |
| 1276 | + label="Maximum number of topics allowed. If exceeded, the LLM will make efforts to deduplicate topics after every batch until the total number of topics is below this number (not foolproof).", |
| 1277 | + precision=0, |
| 1278 | + minimum=1, |
| 1279 | + maximum=1000, |
| 1280 | + ) |
1272 | 1281 | random_seed = gr.Number( |
1273 | 1282 | value=LLM_SEED, label="Random seed for LLM generation", visible="hidden" |
1274 | 1283 | ) |
@@ -1533,6 +1542,7 @@ def show_info_box_on_click( |
1533 | 1542 | additional_validation_issues_textbox, |
1534 | 1543 | show_previous_table_radio, |
1535 | 1544 | api_url_textbox, |
| 1545 | + max_topics_number, |
1536 | 1546 | ], |
1537 | 1547 | outputs=[ |
1538 | 1548 | display_topic_table_markdown, |
@@ -1676,6 +1686,7 @@ def show_info_box_on_click( |
1676 | 1686 | aws_secret_key_textbox, |
1677 | 1687 | aws_region_textbox, |
1678 | 1688 | api_url_textbox, |
| 1689 | + max_topics_number, |
1679 | 1690 | ], |
1680 | 1691 | outputs=[ |
1681 | 1692 | display_topic_table_markdown, |
@@ -1835,63 +1846,6 @@ def show_info_box_on_click( |
1835 | 1846 | api_name="deduplicate_topics", |
1836 | 1847 | ) |
1837 | 1848 |
|
1838 | | - # When LLM deduplication button pressed, deduplicate data using LLM |
1839 | | - def deduplicate_topics_llm_wrapper( |
1840 | | - reference_df, |
1841 | | - topic_summary_df, |
1842 | | - reference_table_file_name, |
1843 | | - unique_topics_table_file_name, |
1844 | | - model_choice, |
1845 | | - in_api_key, |
1846 | | - temperature, |
1847 | | - in_excel_sheets, |
1848 | | - merge_sentiment, |
1849 | | - merge_general_topics, |
1850 | | - in_data_files, |
1851 | | - chosen_cols, |
1852 | | - output_folder, |
1853 | | - candidate_topics=None, |
1854 | | - azure_endpoint="", |
1855 | | - api_url=None, |
1856 | | - aws_access_key_textbox="", |
1857 | | - aws_secret_key_textbox="", |
1858 | | - aws_region_textbox="", |
1859 | | - azure_api_key_textbox="", |
1860 | | - sentiment_checkbox="Negative or Positive", |
1861 | | - ): |
1862 | | - # Ensure custom model_choice is registered in model_name_map |
1863 | | - ensure_model_in_map(model_choice) |
1864 | | - model_source = model_name_map[model_choice]["source"] |
1865 | | - return deduplicate_topics_llm( |
1866 | | - reference_df, |
1867 | | - topic_summary_df, |
1868 | | - reference_table_file_name, |
1869 | | - unique_topics_table_file_name, |
1870 | | - model_choice, |
1871 | | - in_api_key, |
1872 | | - temperature, |
1873 | | - model_source, |
1874 | | - None, |
1875 | | - None, |
1876 | | - None, |
1877 | | - None, |
1878 | | - in_excel_sheets, |
1879 | | - merge_sentiment, |
1880 | | - merge_general_topics, |
1881 | | - in_data_files, |
1882 | | - chosen_cols, |
1883 | | - output_folder, |
1884 | | - candidate_topics, |
1885 | | - azure_endpoint, |
1886 | | - OUTPUT_DEBUG_FILES, |
1887 | | - api_url, |
1888 | | - aws_access_key_textbox, |
1889 | | - aws_secret_key_textbox, |
1890 | | - aws_region_textbox, |
1891 | | - azure_api_key_textbox, |
1892 | | - sentiment_checkbox=sentiment_checkbox, |
1893 | | - ) |
1894 | | - |
1895 | 1849 | deduplicate_llm_previous_data_btn.click( |
1896 | 1850 | load_in_previous_data_files, |
1897 | 1851 | inputs=[deduplication_input_files], |
@@ -2511,16 +2465,6 @@ def deduplicate_topics_llm_wrapper( |
2511 | 2465 | f"Attempting to download from bucket: {S3_LOG_BUCKET}, key: {S3_COST_CODES_PATH}" |
2512 | 2466 | ) |
2513 | 2467 |
|
2514 | | - # Create a wrapper function with error handling |
2515 | | - def download_cost_codes_with_error_handling(bucket, key, local_path): |
2516 | | - try: |
2517 | | - download_file_from_s3(bucket, key, local_path) |
2518 | | - return True |
2519 | | - except Exception as e: |
2520 | | - print(f"Error downloading cost codes from S3: {e}") |
2521 | | - print(f"Failed to download s3://{bucket}/{key}") |
2522 | | - return False |
2523 | | - |
2524 | 2468 | app.load( |
2525 | 2469 | download_cost_codes_with_error_handling, |
2526 | 2470 | inputs=[ |
|
0 commit comments