LLM Model Config fixes (#508)

jexp · aashipandya · web-flow · commit f6fcef1675a9 · 2024-07-08T18:29:46.000+05:30
* LLM Model Config fixes

* Added ENTITY_EMBEDDING to docker compose yml

* qa and embedding config and local network for ollama

* updated readme and frontend constants for llms

---------

Co-authored-by: aashipandya &lt;156318202+aashipandya@users.noreply.github.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -167,4 +168,4 @@ google-cloud-cli-469.0.0-linux-x86_64.tar.gz
 /backend/src/chunks
 /backend/merged_files
 /backend/chunks
-google-cloud-cli-479.0.0-linux-x86_64.tar.gz
+google-cloud-cli-479.0.0-linux-x86_64.tar.gz
diff --git a/README.md b/README.md
@@ -140,6 +140,14 @@ Allow unauthenticated request : Yes
 | CHUNK_SIZE              | Optional           | 5242880       | Size of each chunk of file for upload                                                                |
 | GOOGLE_CLIENT_ID        | Optional           |               | Client ID for Google authentication                                                              |
 | GCS_FILE_CACHE        | Optional           | False              | If set to True, will save the files to process into GCS. If set to False, will save the files locally   |
+| ENTITY_EMBEDDING        | Optional           | False              | If set to True, It will add embeddings for each entity in database |
+| LLM_MODEL_CONFIG_azure_ai_<azure_deployment_name>        | Optional           |              | Set azure config as - azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version|
+| LLM_MODEL_CONFIG_groq_<model_name>        | Optional           |               | Set groq config as - model_name,base_url,groq_api_key |
+| LLM_MODEL_CONFIG_anthropic_<model_name>        | Optional           |               | Set anthropic config as - model_name,anthropic_api_key |
+| LLM_MODEL_CONFIG_fireworks_<model_name>        | Optional           |               | Set fireworks config as - model_name,fireworks_api_key |
+| LLM_MODEL_CONFIG_bedrock_<model_name>        | Optional           |               | Set bedrock config as - model_name,aws_access_key_id,aws_secret__access_key,region_name |
+| LLM_MODEL_CONFIG_ollama_<model_name>        | Optional           |               | Set ollama config as - model_name,model_local_url |
+
 
 
 
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -16,7 +16,8 @@ RUN apt-get update && \
 ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
 # Copy requirements file and install Python dependencies
 COPY requirements.txt /code/
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# --no-cache-dir --upgrade 
+RUN pip install -r requirements.txt 
 # Copy application code
 COPY . /code
 # Set command
diff --git a/backend/example.env b/backend/example.env
@@ -26,11 +26,11 @@ ENABLE_USER_AGENT = ""
 LLM_MODEL_CONFIG_model_version=""
 ENTITY_EMBEDDING="" True or False
 #examples
-LLM_MODEL_CONFIG_azure-ai-gpt-35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version"
-LLM_MODEL_CONFIG_azure-ai-gpt-4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version"
-LLM_MODEL_CONFIG_groq-llama3-70b="model_name,base_url,groq_api_key"
-LLM_MODEL_CONFIG_anthropic-claude-3-5-sonnet="model_name,anthropic_api_key"
-LLM_MODEL_CONFIG_fireworks-llama-v3-70b="model_name,fireworks_api_key"
-LLM_MODEL_CONFIG_bedrock-claude-3-5-sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name"
+LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version"
+LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version"
+LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key"
+LLM_MODEL_CONFIG_anthropic_claude_3_5_sonnet="model_name,anthropic_api_key"
+LLM_MODEL_CONFIG_fireworks_llama_v3_70b="model_name,fireworks_api_key"
+LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name"
 LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url"
 
diff --git a/backend/score.py b/backend/score.py
@@ -262,7 +262,7 @@ async def post_processing(uri=Form(None), userName=Form(None), password=Form(Non
             josn_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
             logger.log_struct(josn_obj)
             logging.info(f'Full Text index created')
-        if os.environ.get('ENTITY_EMBEDDING').upper()=="TRUE" and "create_entity_embedding" in tasks:
+        if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "create_entity_embedding" in tasks:
             await asyncio.to_thread(create_entity_embedding, graph)
             josn_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
             logger.log_struct(josn_obj)
diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py
@@ -177,7 +177,7 @@ def get_total_tokens(model, ai_response):
         total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count']
     elif "bedrock" in model:
         total_tokens = ai_response.response_metadata['usage']['total_tokens']
-    elif "anthropic-claude" in model:
+    elif "anthropic" in model:
         input_tokens = int(ai_response.response_metadata['usage']['input_tokens'])
         output_tokens = int(ai_response.response_metadata['usage']['output_tokens'])
         total_tokens = input_tokens + output_tokens
diff --git a/backend/src/llm.py b/backend/src/llm.py
@@ -20,6 +20,9 @@
 
 def get_llm(model_version: str):
     """Retrieve the specified language model based on the model name."""
+    env_key = "LLM_MODEL_CONFIG_" + model_version
+    env_value = os.environ.get(env_key)
+    logging.info("Model: {}".format(env_key))
     if "gemini" in model_version:
         credentials, project_id = google.auth.default()
         model_name = MODEL_VERSIONS[model_version]
@@ -46,7 +49,7 @@ def get_llm(model_version: str):
         )
 
     elif "azure" in model_version:
-        model_name, api_endpoint, api_key, api_version = os.environ.get(model_version).split(",")
+        model_name, api_endpoint, api_key, api_version = env_value.split(",")
         llm = AzureChatOpenAI(
             api_key=api_key,
             azure_endpoint=api_endpoint,
@@ -58,23 +61,21 @@ def get_llm(model_version: str):
         )
 
     elif "anthropic" in model_version:
-        model_name, api_key = os.environ.get(model_version).split(",")
+        model_name, api_key = env_value.split(",")
         llm = ChatAnthropic(
             api_key=api_key, model=model_name, temperature=0, timeout=None
         )
 
     elif "fireworks" in model_version:
-        model_name, api_key = os.environ.get(model_version).split(",")
+        model_name, api_key = env_value.split(",")
         llm = ChatFireworks(api_key=api_key, model=model_name)
 
     elif "groq" in model_version:
-        model_name, base_url, api_key = os.environ.get(model_version).split(",")
+        model_name, base_url, api_key = env_value.split(",")
         llm = ChatGroq(api_key=api_key, model_name=model_name, temperature=0)
 
     elif "bedrock" in model_version:
-        model_name, aws_access_key, aws_secret_key, region_name = os.environ.get(
-            model_version
-        ).split(",")
+        model_name, aws_access_key, aws_secret_key, region_name = env_value.split(",")
         bedrock_client = boto3.client(
             service_name="bedrock-runtime",
             region_name=region_name,
@@ -87,7 +88,7 @@ def get_llm(model_version: str):
         )
 
     elif "ollama" in model_version:
-        model_name, base_url = os.environ.get(model_version).split(",")
+        model_name, base_url = env_value.split(",")
         llm = ChatOllama(base_url=base_url, model=model_name)
 
     else:
diff --git a/backend/src/main.py b/backend/src/main.py
@@ -336,7 +336,7 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat
     obj_source_node.processing_time = processed_time
 
     graphDb_data_Access.update_source_node(obj_source_node)
-    logging.info('Updated the nodeCount and relCount properties in Docuemnt node')
+    logging.info('Updated the nodeCount and relCount properties in Document node')
     logging.info(f'file:{file_name} extraction has been completed')
 
 
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -24,8 +24,19 @@ services:
       - GCP_LOG_METRICS_ENABLED=${GCP_LOG_METRICS_ENABLED-False}
       - UPDATE_GRAPH_CHUNKS_PROCESSED=${UPDATE_GRAPH_CHUNKS_PROCESSED-20}
       - NUMBER_OF_CHUNKS_TO_COMBINE=${NUMBER_OF_CHUNKS_TO_COMBINE-6}
+      - ENTITY_EMBEDDING=${ENTITY_EMBEDDING-False}
       - GCS_FILE_CACHE=${GCS_FILE_CACHE-False}
+#      - LLM_MODEL_CONFIG_anthropic_claude_35_sonnet=${LLM_MODEL_CONFIG_anthropic_claude_35_sonnet-}
+#      - LLM_MODEL_CONFIG_fireworks_llama_v3_70b=${LLM_MODEL_CONFIG_fireworks_llama_v3_70b-}
+#      - LLM_MODEL_CONFIG_azure_ai_gpt_4o=${LLM_MODEL_CONFIG_azure_ai_gpt_4o-}
+#      - LLM_MODEL_CONFIG_azure_ai_gpt_35=${LLM_MODEL_CONFIG_azure_ai_gpt_35-}
+#      - LLM_MODEL_CONFIG_groq_llama3_70b=${LLM_MODEL_CONFIG_groq_llama3_70b-}
+#      - LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet=${LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet-}
+#     - LLM_MODEL_CONFIG_fireworks_qwen_72b=${LLM_MODEL_CONFIG_fireworks_qwen_72b-}
+      - LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-}
     container_name: backend
+    extra_hosts:
+      - host.docker.internal:host-gateway
     ports:
       - "8000:8000"
     networks:
diff --git a/example.env b/example.env
@@ -8,6 +8,8 @@ IS_EMBEDDING = "true"
 KNN_MIN_SCORE = "0.94"
 # Enable Gemini (default is False) | Can be False or True
 GEMINI_ENABLED = False
+# LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434"
+
 # Enable Google Cloud logs (default is False) | Can be False or True
 GCP_LOG_METRICS_ENABLED = False
 NUMBER_OF_CHUNKS_TO_COMBINE = 6
@@ -20,12 +22,13 @@ LANGCHAIN_PROJECT = ""
 LANGCHAIN_TRACING_V2 = "true"
 LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
 GCS_FILE_CACHE = False
+ENTITY_EMBEDDING=True
 
 # Optional Frontend
 BACKEND_API_URL="http://localhost:8000"
 BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
 REACT_APP_SOURCES="local,youtube,wiki,s3,web"
-LLM_MODELS="diffbot,gpt-3.5,gpt-4o"
+LLM_MODELS="diffbot,gpt-3.5,gpt-4o" # ",ollama_llama3"
 ENV="DEV"
 TIME_PER_CHUNK=4
 TIME_PER_PAGE=50
diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx
@@ -36,9 +36,9 @@ const DropdownComponent: React.FC<ReusableDropdownProps> = ({
             options: allOptions?.map((option) => {
               const label =
                 typeof option === 'string'
-                  ? option.includes('LLM_MODEL_CONFIG_')
+                  ? (option.includes('LLM_MODEL_CONFIG_')
                     ? capitalize(option.split('LLM_MODEL_CONFIG_').at(-1) as string)
-                    : capitalize(option)
+                    : capitalize(option)).split('_').join(' ')
                   : capitalize(option.label);
               const value = typeof option === 'string' ? option : option.value;
               return {
diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx
@@ -285,9 +285,9 @@ const FileTable: React.FC<FileTableProps> = ({ isExpanded, connectionStatus, set
           const model = info.getValue();
           return (
             <i>
-              {model.includes('LLM_MODEL_CONFIG_')
+              {(model.includes('LLM_MODEL_CONFIG_')
                 ? capitalize(model.split('LLM_MODEL_CONFIG_').at(-1) as string)
-                : capitalize(model)}
+                : capitalize(model)).split("_").join(" ")}
             </i>
           );
         },
diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts
@@ -42,13 +42,13 @@ export const llms =
         'openai-gpt-4o',
         'gemini-1.0-pro',
         'gemini-1.5-pro',
-        'LLM_MODEL_CONFIG_azure-ai-gpt-35',
-        'LLM_MODEL_CONFIG_azure-ai-gpt-4o',
-        'LLM_MODEL_CONFIG_ollama_llama3',
-        'LLM_MODEL_CONFIG_groq-llama3-70b',
-        'LLM_MODEL_CONFIG_anthropic-claude-3-5-sonnet',
-        'LLM_MODEL_CONFIG_fireworks-llama-v3-70b',
-        'LLM_MODEL_CONFIG_bedrock-claude-3-5-sonnet',
+        'azure_ai_gpt_35',
+        'azure_ai_gpt_4o',
+        'ollama_llama3',
+        'groq_llama3_70b',
+        'anthropic_claude_3_5_sonnet',
+        'fireworks_llama_v3_70b',
+        'bedrock_claude_3_5_sonnet',
       ];
 
 export const defaultLLM = llms?.includes('openai-gpt-3.5')