Final translator microservice - DeepSeek integration

skkuwari-alt · skkuwari-alt · commit 41cf65a19cf0 · 2025-11-17T21:11:34.000+03:00
diff --git a/app.py b/app.py
@@ -1,22 +1,20 @@
 from flask import Flask, request, jsonify
 import os
-
-from flask import Flask
-from flask import request, jsonify
-from src.translator import translate
+from src.translator import translate_content  # fixed import
 
 app = Flask(__name__)
 
 @app.route("/", methods=["GET"])
 def translator():
-    content = request.args.get("content", default = "", type = str)
-    is_english, translated_content = translate(content)
+    content = request.args.get("content", default="", type=str)
+
+    result = translate_content(content)
+
     return jsonify({
-        "is_english": is_english,
-        "translated_content": translated_content,
+        "isEnglish": result["isEnglish"],
+        "translatedContent": result["translatedContent"]
     })
 
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 8080))
     app.run(debug=True, host="0.0.0.0", port=port)
-
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,7 +1,5 @@
-version: '3.8'
-
 services:
-    mytranslator:
-       build: .
-       network_mode: "host"
-       restart: always
+  translator:
+    build: .
+    restart: unless-stopped
+    network_mode: host
diff --git a/package-lock.json b/package-lock.json
diff --git a/src/translator.py b/src/translator.py
@@ -3,79 +3,59 @@
 import re
 
 def translate(content: str) -> tuple[bool, str]:
-    """
-    Translates content if it's not in English.
-    Returns (is_english: bool, translated_content: str)
-    """
+    prompt = f"""
+Analyze the following text and determine if it is in English or another language.
 
-    prompt = f
-    """Analyze the following text and determine if it is in English or another language.
-    If the text is in English, respond with exactly:
-    {{"is_english": true, "translated_content": ""}}
+If the text is in English, respond with exactly:
+{{"is_english": true, "translated_content": ""}}
 
-    If the text is NOT in English, translate it to English and respond with exactly:
-    {{"is_english": false, "translated_content": "YOUR TRANSLATION HERE"}}
+If the text is NOT in English, translate it to English and respond with exactly:
+{{"is_english": false, "translated_content": "YOUR TRANSLATION HERE"}}
 
-    You must respond ONLY with valid JSON in the exact format above. Do not include any other text.
+Respond ONLY with valid JSON. 
+No explanations. No thinking. No markdown. No extra text.
+If you cannot comply, output exactly: {{"is_english": true, "translated_content": ""}}.
 
-    Text to analyze:
-    {content}
-    """
+Text to analyze:
+{content}
+"""
 
     try:
-        # Call the LLM using Ollama
         response = ollama.chat(
-            model='llama3.2',
-            messages=[
-                {
-                    'role': 'user',
-                    'content': prompt
-                }
-            ]
+            model="deepseek-r1:8b",
+            messages=[{"role": "user", "content": prompt}]
         )
 
-        response_text = response['message']['content'].strip()
+        response_text = response["message"]["content"].strip()
 
-        # Try to parse the JSON directly
+        # Try normal JSON parsing
         try:
             result = json.loads(response_text)
-            is_english = result.get("is_english", True)
-            translated_content = result.get("translated_content", "")
-            return (is_english, translated_content)
+            return result.get("is_english", True), result.get("translated_content", "")
+        except:
+            pass
+
+        # Try fallback JSON extraction
+        json_match = re.search(
+            r'\{[\s\S]*?"is_english"[\s\S]*?"translated_content"[\s\S]*?\}',
+            response_text
+        )
 
-        except json.JSONDecodeError:
-            # Fallback: try to extract a JSON-like substring if LLM added extra text
-            json_match = re.search(
-                r'\{[^}]"is_english"[^}]"translated_content"[^}]*\}',
-                response_text,
-                re.DOTALL
-            )
-            if json_match:
-                try:
-                    result = json.loads(json_match.group(0))
-                    is_english = result.get("is_english", True)
-                    translated_content = result.get("translated_content", "")
-                    return (is_english, translated_content)
-                except json.JSONDecodeError:
-                    pass
+        if json_match:
+            try:
+                result = json.loads(json_match.group(0))
+                return result.get("is_english", True), result.get("translated_content", "")
+            except:
+                pass
 
-            # If parsing completely fails, assume input is English
-            print(f"Warning: Could not parse LLM response: {response_text}")
-            return (True, "")
+        print("Warning: Could not parse:", response_text)
+        return True, ""
 
     except Exception as e:
-        print(f"Error calling LLM: {e}")
-        # On error, assume English so posts don't break
-        return (True, "")
+        print("Error calling LLM:", e)
+        return True, ""
 
 
 def translate_content(content: str) -> dict:
-    """
-    Wrapper for Flask app to return JSON-friendly output.
-    Returns keys in camelCase to match NodeBB expectations.
-    """
-    is_english, translated_content = translate(content)
-    return {
-        "isEnglish": is_english,
-        "translatedContent": translated_content
-    }
+    is_eng, translated = translate(content)
+    return {"isEnglish": is_eng, "translatedContent": translated}