Merge pull request #57 from seanpedrick-case/dev

seanpedrick-case · web-flow · commit 5a3b1b170d48 · 2025-12-03T15:55:17.000Z
Upgraded packages and added compatibility with Gradio 6
diff --git a/.dockerignore b/.dockerignore
@@ -16,6 +16,7 @@ feedback/*
 test_code/*
 unsloth_compiled_cache/*
 .vscode/*
+llm_topic_modelling.egg-info/*
 input/
 output/
 logs/
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,5 @@ test_code/*
 config/*
 tmp/*
 unsloth_compiled_cache/*
-.vscode/*
+.vscode/*
+llm_topic_modelling.egg-info/*
diff --git a/README.md b/README.md
@@ -1,17 +1,19 @@
 ---
 title: Large language model topic modelling
-emoji: 📝
+emoji: 📚
 colorFrom: purple
 colorTo: yellow
 sdk: gradio
+sdk_version: 6.0.2
 app_file: app.py
 pinned: true
 license: agpl-3.0
+short_description: Create thematic summaries for open text data
 ---
 
 # Large language model topic modelling
 
-Version: 0.5.1
+Version: 0.5.2
 
 Extract topics and summarise outputs using Large Language Models (LLMs, Gemma 3 4b/GPT-OSS 20b if local (see tools/config.py to modify), Gemini, Azure, or AWS Bedrock models (e.g. Claude, Nova models). The app will query the LLM with batches of responses to produce summary tables, which are then compared iteratively to output a table with the general topics, subtopics, topic sentiment, and a topic summary. Instructions on use can be found in the README.md file. You can try out examples by clicking on one of the example datasets on the main app page, which will show you example outputs from a local model run. API keys for AWS, Azure, and Gemini services can be entered on the settings page (note that Gemini has a free public API).
 
diff --git a/app.py b/app.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm_topic_modelling"
-version = "0.5.1"
+version = "0.5.2"
 description = "Generate thematic summaries from open text in tabular data files with a large language model."
 requires-python = ">=3.10"
 readme = "README.md"
@@ -50,55 +50,59 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 
-dependencies = [
-    "pandas==2.3.3",
-    "gradio==5.49.1",
-    "transformers==4.57.1",
+dependencies = [    
+    "gradio==6.0.2",
+    "transformers==4.57.2",
     "spaces==0.42.1",
-    "boto3==1.40.72",
-    "pyarrow==21.0.0",
-    "openpyxl==3.1.5",
-    "markdown==3.7",
-    "tabulate==0.9.0",
-    "lxml==5.3.0",
-    "google-genai==1.50.0",
-    "openai==2.2.0",
-    "html5lib==1.1",
-    "beautifulsoup4==4.12.3",
-    "rapidfuzz==3.13.0",
-    "python-dotenv==1.1.0"
+    "boto3==1.42.1",
+    "pandas<=2.3.3",
+    "pyarrow>=21.0.0",
+    "openpyxl>=3.1.5",
+    "markdown>=3.7",
+    "tabulate>=0.9.0",
+    "lxml>=5.3.0",
+    "google-genai<=1.52.0",
+    "openai<=2.8.1",
+    "html5lib>=1.1",
+    "beautifulsoup4>=4.12.3",
+    "rapidfuzz>=3.13.0",
+    "python-dotenv>=1.1.0"
 ]
 
 [project.optional-dependencies]
 dev = ["pytest"]
 test = ["pytest", "pytest-cov"]
 
 # Extra dependencies for VLM models
-# For torch you should use --index-url https://download.pytorch.org/whl/cu124. Additionally installs the unsloth package
+# For torch you should use --index-url https://download.pytorch.org/whl/cu128. Additionally installs the unsloth package
 torch = [
-    "torch==2.6.0",
-    "accelerate==1.11.0",
-    "bitsandbytes==0.48.2",
-    "unsloth==2025.9.4",
-    "unsloth_zoo==2025.9.5",
-    "timm==1.0.19"
+    "torch<=2.9.1",
+    "torchvision",
+    "accelerate",
+    "bitsandbytes",
+    "unsloth==2025.11.6",
+    "unsloth_zoo==2025.11.6",
+    "timm",
+    "xformers"
 ]
 
-# If you want to install llama-cpp-python in GPU mode, use cmake.args="-DGGML_CUDA=on" . If that doesn't work, try specific wheels for your system, e.g. for Linux: See files in https://github.com/abetlen/llama-cpp-python/releases/tag/v0.3.16-cu124 . More details on installation here: https://llama-cpp-python.readthedocs.io/en/latest
+# If you want to install llama-cpp-python in GPU mode, use cmake.args="-DGGML_CUDA=on" . If that doesn't work, try specific wheels for your system, e.g. for Linux see files in https://github.com/JamePeng/llama-cpp-python/releases. More details on installation here: https://llama-cpp-python.readthedocs.io/en/latest
 llamacpp = [
-    "llama-cpp-python==0.3.16",
+    "llama-cpp-python>=0.3.16",
 ]
 
 # Run Gradio as an mcp server
 mcp = [
-    "gradio[mcp]==5.49.1"
+    "gradio[mcp]==6.0.2"
 ]
 
 [project.urls]
 Homepage = "https://github.com/seanpedrick-case/llm_topic_modelling"
 repository = "https://github.com/seanpedrick-case/llm_topic_modelling"
 
-
+[tool.setuptools]
+packages = ["tools"]
+py-modules = ["app"]
 
 # Configuration for Ruff linter:
 [tool.ruff]
diff --git a/requirements.txt b/requirements.txt
@@ -1,28 +1,29 @@
 # Note that this requirements file is optimised for Hugging Face spaces / Python 3.10. Please use requirements_no_local.txt for installation without local model inference (simplest approach to get going). Please use requirements_cpu.txt for CPU instances and requirements_gpu.txt for GPU instances using Python 3.11
-pandas==2.3.3
-gradio==5.49.1
-transformers==4.57.1
+gradio==6.0.2
+transformers==4.57.2
 spaces==0.42.1
-boto3==1.40.72
-pyarrow==21.0.0
-openpyxl==3.1.5
-markdown==3.7
-tabulate==0.9.0
-lxml==5.3.0
-google-genai==1.50.0
-openai==2.2.0
-html5lib==1.1
-beautifulsoup4==4.12.3
-rapidfuzz==3.13.0
-python-dotenv==1.1.0
+boto3>=1.42.1
+pandas>=2.3.3
+pyarrow>=21.0.0
+openpyxl>=3.1.5
+markdown>=3.7
+tabulate>=0.9.0
+lxml>=5.3.0
+google-genai>=1.52.0
+openai>=2.8.1
+html5lib>=1.1
+beautifulsoup4>=4.12.3
+rapidfuzz>=3.13.0
+python-dotenv>=1.1.0
 # GPU (for huggingface instance)
 # Torch/Unsloth and llama-cpp-python
 # Latest compatible with CUDA 12.4
-torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124
-unsloth[cu124-torch260]==2025.9.4
-unsloth_zoo==2025.9.5
-timm==1.0.19
-# llama-cpp-python direct wheel link for GPU compatible version 3.16 for use with Python 3.10 and Hugging Face
-https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
+torch<=2.9.1 --extra-index-url https://download.pytorch.org/whl/cu128
+unsloth[cu128-torch280]<=2025.11.6
+unsloth_zoo<=2025.11.6
+timm
+# llama-cpp-python direct wheel link for GPU compatible version 3.17 for use with Python 3.10 and Hugging Face
+https://github.com/JamePeng/llama-cpp-python/releases/download/v0.3.17-cu128-Basic-linux-20251202/llama_cpp_python-0.3.17-cp310-cp310-linux_x86_64.whl
+#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
 
 
diff --git a/requirements_cpu.txt b/requirements_cpu.txt
@@ -1,20 +1,20 @@
-pandas==2.3.3
-gradio==5.49.1
-transformers==4.56.0
+gradio==6.0.2
+transformers==4.57.2
 spaces==0.42.1
-boto3==1.40.48
-pyarrow==21.0.0
-openpyxl==3.1.5
-markdown==3.7
-tabulate==0.9.0
-lxml==5.3.0
-google-genai==1.50.0
-openai==2.2.0
-html5lib==1.1
-beautifulsoup4==4.12.3
-rapidfuzz==3.13.0
-python-dotenv==1.1.0
-torch==2.8.0 --extra-index-url https://download.pytorch.org/whl/cpu
+pandas>=2.3.3
+boto3>=1.42.1
+pyarrow>=21.0.0
+openpyxl>=3.1.5
+markdown>=3.7
+tabulate>=0.9.0
+lxml>=5.3.0
+google-genai>=1.52.0
+openai>=2.8.1
+html5lib>=1.1
+beautifulsoup4>=4.12.3
+rapidfuzz>=3.13.0
+python-dotenv>=1.1.0
+torch<=2.9.1 --extra-index-url https://download.pytorch.org/whl/cpu
 llama-cpp-python==0.3.16 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 # Direct wheel links if above doesn't work
 # I have created CPU Linux, Python 3.11 compatible wheels:
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
@@ -1,33 +1,28 @@
-pandas==2.3.3
-gradio==5.49.1
-transformers==4.57.1
+
+gradio==6.0.2
+transformers==4.57.2
 spaces==0.42.1
-boto3==1.40.72
-pyarrow==21.0.0
-openpyxl==3.1.5
-markdown==3.7
-tabulate==0.9.0
-lxml==5.3.0
-google-genai==1.50.0
-openai==2.2.0
-html5lib==1.1
-beautifulsoup4==4.12.3
-rapidfuzz==3.13.0
-python-dotenv==1.1.0
+boto3>=1.42.1
+pandas>=2.3.3
+pyarrow>=21.0.0
+openpyxl>=3.1.5
+markdown>=3.7
+tabulate>=0.9.0
+lxml>=5.3.0
+google-genai>=1.52.0
+openai>=2.8.1
+html5lib>=1.1
+beautifulsoup4>=4.12.3
+rapidfuzz>=3.13.0
+python-dotenv>=1.1.0
 # Torch/Unsloth
 # Latest compatible with CUDA 12.4
-torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124
-unsloth[cu124-torch260]==2025.9.4 # Refer here for more details on installation: https://pypi.org/project/unsloth
-unsloth_zoo==2025.9.5
+torch<=2.9.1 --extra-index-url https://download.pytorch.org/whl/cu128
+unsloth[cu128-torch280]<=2025.11.6 # Refer here for more details on installation: https://pypi.org/project/unsloth
+unsloth_zoo<=2025.11.6
 # Additional for Windows and CUDA 12.4 older GPUS (RTX 3x or similar):
 #triton-windows<3.3
-timm==1.0.19
+timm
 # Llama CPP Python
-llama-cpp-python==0.3.16 -C cmake.args="-DGGML_CUDA=on"
-# If below doesn't work, try specific wheels for your system:
-# For Linux:
-# See files in https://github.com/abetlen/llama-cpp-python/releases/tag/v0.3.16-cu124 for different python versions
-# Python 3.11 compatible wheel: 
-# https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp311-cp311-linux_x86_64.whl
-# For Windows, not available at above link. I have made a GPU Windows wheel for Python 3.11:
-# https://github.com/seanpedrick-case/llama-cpp-python-whl-builder/releases/download/v0.1.0/llama_cpp_python-0.3.16-cp311-cp311-win_amd64.whl
+llama-cpp-python>=0.3.16 -C cmake.args="-DGGML_CUDA=on"
+# If above doesn't work, try specific wheels for your system, see files in https://github.com/JamePeng/llama-cpp-python/releases for different python versions
diff --git a/requirements_lightweight.txt b/requirements_lightweight.txt
@@ -1,17 +1,17 @@
 # This requirements file is optimised for AWS ECS using Python 3.11 alongside the Dockerfile, without local torch and llama-cpp-python. For AWS ECS, torch and llama-cpp-python are optionally installed in the main Dockerfile
-pandas==2.3.3
-gradio==5.49.1
-transformers==4.57.1
+gradio==6.0.2
+transformers==4.57.2
 spaces==0.42.1
-boto3==1.40.72
-pyarrow==21.0.0
-openpyxl==3.1.5
-markdown==3.7
-tabulate==0.9.0
-lxml==5.3.0
-google-genai==1.50.0
-openai==2.2.0
-html5lib==1.1
-beautifulsoup4==4.12.3
-rapidfuzz==3.13.0
-python-dotenv==1.1.0
+boto3>=1.42.1
+pandas>=2.3.3
+pyarrow>=21.0.0
+openpyxl>=3.1.5
+markdown>=3.7
+tabulate>=0.9.0
+lxml>=5.3.0
+google-genai>=1.52.0
+openai>=2.8.1
+html5lib>=1.1
+beautifulsoup4>=4.12.3
+rapidfuzz>=3.13.0
+python-dotenv>=1.1.0
diff --git a/tools/config.py b/tools/config.py
@@ -261,6 +261,7 @@ def convert_string_to_boolean(value: str) -> bool:
 model_source = list()
 
 CHOSEN_LOCAL_MODEL_TYPE = get_or_create_env_var("CHOSEN_LOCAL_MODEL_TYPE", "Qwen 3 4B") # Gemma 3 1B #  "Gemma 2b" # "Gemma 3 4B"
+print("CHOSEN_LOCAL_MODEL_TYPE:", CHOSEN_LOCAL_MODEL_TYPE)
 
 if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
     model_full_names.append(CHOSEN_LOCAL_MODEL_TYPE)
@@ -329,6 +330,8 @@ def update_model_choice_config(default_model_source, model_name_map):
 # If you are using a system with low VRAM, you can set this to True to reduce the memory requirements
 LOW_VRAM_SYSTEM = get_or_create_env_var('LOW_VRAM_SYSTEM', 'False')
 
+MULTIMODAL_PROMPT_FORMAT = get_or_create_env_var('MULTIMODAL_PROMPT_FORMAT', 'False')
+
 if LOW_VRAM_SYSTEM == 'True':
     print("Using settings for low VRAM system")
     USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True')
@@ -340,14 +343,18 @@ def update_model_choice_config(default_model_source, model_name_map):
 
 USE_LLAMA_CPP = get_or_create_env_var('USE_LLAMA_CPP', 'True') # Llama.cpp or transformers with unsloth
 
+LOCAL_REPO_ID = get_or_create_env_var("LOCAL_REPO_ID", "")
+LOCAL_MODEL_FILE = get_or_create_env_var("LOCAL_MODEL_FILE", "")
+LOCAL_MODEL_FOLDER = get_or_create_env_var("LOCAL_MODEL_FOLDER", "")
+
 GEMMA2_REPO_ID = get_or_create_env_var("GEMMA2_2B_REPO_ID", "unsloth/gemma-2-it-GGUF")
 GEMMA2_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA2_2B_REPO_TRANSFORMERS_ID", "unsloth/gemma-2-2b-it-bnb-4bit")
 if USE_LLAMA_CPP == "False": GEMMA2_REPO_ID = GEMMA2_REPO_TRANSFORMERS_ID
 GEMMA2_MODEL_FILE = get_or_create_env_var("GEMMA2_2B_MODEL_FILE", "gemma-2-2b-it.q8_0.gguf")
 GEMMA2_MODEL_FOLDER = get_or_create_env_var("GEMMA2_2B_MODEL_FOLDER", "model/gemma")
 
 GEMMA3_4B_REPO_ID = get_or_create_env_var("GEMMA3_4B_REPO_ID", "unsloth/gemma-3-4b-it-qat-GGUF")
-GEMMA3_4B_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA3_4B_REPO_TRANSFORMERS_ID", "https://huggingface.co/unsloth/gemma-3-4b-it-bnb-4bit" )
+GEMMA3_4B_REPO_TRANSFORMERS_ID = get_or_create_env_var("GEMMA3_4B_REPO_TRANSFORMERS_ID", "unsloth/gemma-3-4b-it-bnb-4bit" )
 if USE_LLAMA_CPP == "False":  GEMMA3_4B_REPO_ID = GEMMA3_4B_REPO_TRANSFORMERS_ID
 GEMMA3_4B_MODEL_FILE = get_or_create_env_var("GEMMA3_4B_MODEL_FILE", "gemma-3-4b-it-qat-UD-Q4_K_XL.gguf")
 GEMMA3_4B_MODEL_FOLDER = get_or_create_env_var("GEMMA3_4B_MODEL_FOLDER", "model/gemma3_4b")
@@ -392,11 +399,13 @@ def update_model_choice_config(default_model_source, model_name_map):
     LOCAL_REPO_ID = GEMMA3_4B_REPO_ID
     LOCAL_MODEL_FILE = GEMMA3_4B_MODEL_FILE
     LOCAL_MODEL_FOLDER = GEMMA3_4B_MODEL_FOLDER
+    MULTIMODAL_PROMPT_FORMAT = "True"
 
 elif CHOSEN_LOCAL_MODEL_TYPE == "Gemma 3 12B":
     LOCAL_REPO_ID = GEMMA3_12B_REPO_ID
     LOCAL_MODEL_FILE = GEMMA3_12B_MODEL_FILE
     LOCAL_MODEL_FOLDER = GEMMA3_12B_MODEL_FOLDER
+    MULTIMODAL_PROMPT_FORMAT = "True"
 
 elif CHOSEN_LOCAL_MODEL_TYPE == "Qwen 3 4B":
     LOCAL_REPO_ID = QWEN3_4B_REPO_ID
@@ -419,9 +428,17 @@ def update_model_choice_config(default_model_source, model_name_map):
     LOCAL_MODEL_FOLDER = GRANITE_4_3B_MODEL_FOLDER
 
 elif not CHOSEN_LOCAL_MODEL_TYPE:
+    print("No local model type chosen")
     LOCAL_REPO_ID = ""
     LOCAL_MODEL_FILE = ""
     LOCAL_MODEL_FOLDER = ""
+else:
+    print("CHOSEN_LOCAL_MODEL_TYPE not found")
+    LOCAL_REPO_ID = ""
+    LOCAL_MODEL_FILE = ""
+    LOCAL_MODEL_FOLDER = ""
+
+print("LOCAL_REPO_ID:", LOCAL_REPO_ID)
 
 
 USE_SPECULATIVE_DECODING = get_or_create_env_var("USE_SPECULATIVE_DECODING", "False")
@@ -456,7 +473,7 @@ def update_model_choice_config(default_model_source, model_name_map):
 LLM_CONTEXT_LENGTH = int(get_or_create_env_var('LLM_CONTEXT_LENGTH', '24576'))
 LLM_SAMPLE = get_or_create_env_var('LLM_SAMPLE', 'True')
 LLM_STOP_STRINGS = get_or_create_env_var('LLM_STOP_STRINGS', r"['\n\n\n\n\n\n']")
-MULTIMODAL_PROMPT_FORMAT = get_or_create_env_var('MULTIMODAL_PROMPT_FORMAT', 'False')
+
 SPECULATIVE_DECODING = get_or_create_env_var('SPECULATIVE_DECODING', 'False')
 NUM_PRED_TOKENS = int(get_or_create_env_var('NUM_PRED_TOKENS', '2'))
 K_QUANT_LEVEL = get_or_create_env_var('K_QUANT_LEVEL', '')  # 2 = q4_0, 8 = q8_0, 4 = fp16
diff --git a/tools/helper_functions.py b/tools/helper_functions.py
diff --git a/tools/llm_api_call.py b/tools/llm_api_call.py
diff --git a/tools/llm_funcs.py b/tools/llm_funcs.py