Bump version. Gradio upgraded. Minor formatting improvements

seanpedrick-case · seanpedrick-case · commit b025b67bc448 · 2026-01-07T15:30:03.000Z
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ emoji: 📚
 colorFrom: purple
 colorTo: yellow
 sdk: gradio
-sdk_version: 6.0.2
+sdk_version: 6.2.0
 app_file: app.py
 pinned: true
 license: agpl-3.0
@@ -13,9 +13,9 @@ short_description: Create thematic summaries for open text data with LLMs
 
 # Large language model topic modelling
 
-Version: 0.7.0
+Version: 0.8.0
 
-Extract topics and summarise outputs using Large Language Models (LLMs, Gemma 3 4b/GPT-OSS 20b if local (see tools/config.py to modify), Gemini, Azure, or AWS Bedrock models (e.g. Claude, Nova models). The app will query the LLM with batches of responses to produce summary tables, which are then compared iteratively to output a table with the general topics, subtopics, topic sentiment, and a topic summary. Instructions on use can be found in the README.md file. You can try out examples by clicking on one of the example datasets on the main app page, which will show you example outputs from a local model run. API keys for AWS, Azure, and Gemini services can be entered on the settings page (note that Gemini has a free public API).
+Extract topics and summarise outputs using Large Language Models (LLMs), either local, Gemini, Azure, or AWS Bedrock models (e.g. Claude, Nova models). The app will query the LLM with batches of responses to produce summary tables, which are then compared iteratively to output a table with the general topics, subtopics, topic sentiment, and a topic summary. Instructions on use can be found in the README.md file. You can try out examples by clicking on one of the example datasets on the main app page, which will show you example outputs from a local model run. API keys for AWS, Azure, and Gemini services can be entered on the settings page (note that Gemini has a free public API).
 
 NOTE: Large language models are not 100% accurate and may produce biased or harmful outputs. All outputs from this app **absolutely need to be checked by a human** to check for harmful outputs, hallucinations, and accuracy.
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm_topic_modelling"
-version = "0.7.0"
+version = "0.8.0"
 description = "Generate thematic summaries from open text in tabular data files with a large language model."
 requires-python = ">=3.10"
 readme = "README.md"
@@ -51,7 +51,7 @@ classifiers = [
 ]
 
 dependencies = [    
-    "gradio==6.0.2",
+    "gradio==6.2.0",
     "transformers==4.57.2",
     "spaces==0.42.1",
     "boto3==1.42.1",
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 # Note that this requirements file is optimised for Hugging Face spaces / Python 3.10. Please use requirements_no_local.txt for installation without local model inference (simplest approach to get going). Please use requirements_cpu.txt for CPU instances and requirements_gpu.txt for GPU instances using Python 3.11
-gradio==6.0.2
+gradio==6.2.0
 transformers==4.57.2
 spaces==0.42.1
 boto3>=1.42.1
diff --git a/requirements_cpu.txt b/requirements_cpu.txt
@@ -1,4 +1,4 @@
-gradio==6.0.2
+gradio==6.2.0
 transformers==4.57.2
 spaces==0.42.1
 pandas>=2.3.3
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
@@ -1,5 +1,4 @@
-
-gradio==6.0.2
+gradio==6.2.0
 transformers==4.57.2
 spaces==0.42.1
 boto3>=1.42.1
diff --git a/requirements_lightweight.txt b/requirements_lightweight.txt
@@ -1,5 +1,5 @@
 # This requirements file is optimised for AWS ECS using Python 3.11 alongside the Dockerfile, without local torch and llama-cpp-python. For AWS ECS, torch and llama-cpp-python are optionally installed in the main Dockerfile
-gradio==6.0.2
+gradio==6.2.0
 transformers==4.57.2
 spaces==0.42.1
 boto3>=1.42.1
diff --git a/tools/dedup_summaries.py b/tools/dedup_summaries.py
@@ -3086,6 +3086,14 @@ def overall_summary(
                 summarised_output = ""
                 summarised_output_for_df = ""
 
+            # Remove multiple consecutive line breaks (2 or more) and replace with single line break
+            if summarised_output_for_df:
+                summarised_output_for_df = re.sub(
+                    r"\n{2,}", "\n", summarised_output_for_df
+                )
+            if summarised_output:
+                summarised_output = re.sub(r"\n{2,}", "\n", summarised_output)
+
             summarised_outputs_for_df.append(summarised_output_for_df)
             summarised_outputs.append(summarised_output)
             txt_summarised_outputs.append(
@@ -3155,6 +3163,7 @@ def overall_summary(
             summarised_outputs_df_for_display["Summary"]
             .apply(lambda x: markdown.markdown(x) if isinstance(x, str) else x)
             .str.replace(r"\n", "<br>", regex=False)
+            .str.replace(r"(<br>\s*){2,}", "<br>", regex=True)
         )
         html_output_table = summarised_outputs_df_for_display.to_html(
             index=False, escape=False
diff --git a/tools/llm_api_call.py b/tools/llm_api_call.py
@@ -5396,9 +5396,7 @@ def all_in_one_pipeline(
         total_number_of_calls += dedup_number_of_calls
         total_time_taken += dedup_estimated_time_taken
         out_message.append(
-            f"LLM deduplication completed: {dedup_input_tokens} input tokens, "
-            f"{dedup_output_tokens} output tokens, {dedup_number_of_calls} calls, "
-            f"{dedup_estimated_time_taken:.2f}s"
+            f"LLM deduplication completed. Total time: {dedup_estimated_time_taken:.2f}s"
         )
 
     # 3) Summarisation

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-gradio==6.0.2`
	`1`	`+gradio==6.2.0`
`2`	`2`	`transformers==4.57.2`
`3`	`3`	`spaces==0.42.1`
`4`	`4`	`pandas>=2.3.3`