DefangLabs
diff --git a/‎.github/workflows/build.yaml‎
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/build.yaml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 21 additions & 0 deletions b/‎Makefile‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎app/app.py‎
Lines changed: 131 additions & 66 deletions b/‎app/app.py‎
Lines changed: 131 additions & 66 deletions
@@ -13,8 +13,21 @@ jobs:
     steps:
       - name: Install Docker Buildx
         uses: docker/setup-buildx-action@v2
+
       - name: Checkout Repo
         uses: actions/checkout@v4
+
+      # Ruff linting steps
+      - name: Run Ruff linter
+        uses: astral-sh/ruff-action@v1
+        with:
+          args: check ./app
+
+      - name: Run Ruff formatter check
+        uses: astral-sh/ruff-action@v1
+        with:
+          args: "format --check"
+
       - name: Build Docker image
         working-directory: ./app
         run: |
 
@@ -0,0 +1,10 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.9.10
+    hooks:
+      # Run the linter.
+      - id: ruff
+        types_or: [python, pyi]
+      # Run the formatter.
+      - id: ruff-format
@@ -0,0 +1,21 @@
+.PHONY: tests
+tests:
+	PYTHONPATH=src pytest
+
+.PHONY: lint
+lint: # Run pre-commit on staged/changed files
+	pre-commit run
+
+.PHONY: check
+check: # Run all pre-commit hooks on all files (useful for CI or full check)
+	pre-commit run --all-files
+
+.PHONY: format
+format: # Manually run ruff formatter on all files
+	ruff format .
+
+.PHONY: pre-commit-install
+pre-commit-install: # Install pre-commit hooks changes
+	pip install ruff
+	pip install pre-commit
+	pre-commit install
@@ -1,4 +1,13 @@
-from flask import Flask, request, jsonify, render_template, Response, stream_with_context, session, send_from_directory
+from flask import (
+    Flask,
+    request,
+    jsonify,
+    render_template,
+    Response,
+    stream_with_context,
+    session,
+    send_from_directory,
+)
 from flask_wtf.csrf import CSRFProtect
 from rag_system import RAGSystem
 import hashlib
@@ -7,136 +16,168 @@
 import segment.analytics as analytics
 import uuid
 
-from werkzeug.test import EnvironBuilder
-from werkzeug.wrappers import Request
-
 import logging
 import redis
-from intercom import parse_html_to_text, set_conversation_human_replied, is_conversation_human_replied, answer_intercom_conversation, check_intercom_ip
+from intercom import (
+    parse_html_to_text,
+    set_conversation_human_replied,
+    is_conversation_human_replied,
+    answer_intercom_conversation,
+    check_intercom_ip,
+)
 from utils import generate
 
 # Configure logging
 logging.basicConfig(
-    level=logging.DEBUG if os.getenv('DEBUG') else logging.INFO,
+    level=logging.DEBUG if os.getenv("DEBUG") else logging.INFO,
     format="%(asctime)s %(levelname)s %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S"
+    datefmt="%Y-%m-%d %H:%M:%S",
 )
 logger = logging.getLogger(__name__)
 
-analytics.write_key = os.getenv('SEGMENT_WRITE_KEY')
+analytics.write_key = os.getenv("SEGMENT_WRITE_KEY")
 
-app = Flask(__name__, static_folder='templates/static')
-app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')
-app.config['SESSION_COOKIE_HTTPONLY'] = True
-app.config['SESSION_COOKIE_SECURE'] = bool(os.getenv('SESSION_COOKIE_SECURE'))
+app = Flask(__name__, static_folder="templates/static")
+app.config["SECRET_KEY"] = os.getenv("SECRET_KEY")
+app.config["SESSION_COOKIE_HTTPONLY"] = True
+app.config["SESSION_COOKIE_SECURE"] = bool(os.getenv("SESSION_COOKIE_SECURE"))
 
 app.rag_system = RAGSystem()
 
 csrf = CSRFProtect(app)
 
 # Initialize Redis connection
-r = redis.from_url(os.getenv('REDIS_URL'), decode_responses=True)
+r = redis.from_url(os.getenv("REDIS_URL"), decode_responses=True)
+
 
 def validate_pow(nonce, data, difficulty):
     # Calculate the sha256 of the concatenated string of 32-bit X-Nonce header and raw body.
     # This calculation has to match the code on the client side, in index.html.
-    nonce_bytes = int(nonce).to_bytes(4, byteorder='little')  # 32-bit = 4 bytes
+    nonce_bytes = int(nonce).to_bytes(4, byteorder="little")  # 32-bit = 4 bytes
     calculated_hash = hashlib.sha256(nonce_bytes + data).digest()
-    first_uint32 = int.from_bytes(calculated_hash[:4], byteorder='big')
+    first_uint32 = int.from_bytes(calculated_hash[:4], byteorder="big")
     return first_uint32 <= difficulty
 
 
-
 def handle_ask_request(request, session):
     data = request.get_json()
-    query = data.get('query')
+    query = data.get("query")
 
     if not query:
         return jsonify({"error": "No query provided"}), 400
 
     # For analytics tracking, generates an anonymous id and uses it for the session
-    if 'anonymous_id' not in session:
-        session['anonymous_id'] = str(uuid.uuid4())
-    anonymous_id = session['anonymous_id']
+    if "anonymous_id" not in session:
+        session["anonymous_id"] = str(uuid.uuid4())
+    anonymous_id = session["anonymous_id"]
 
     # Determine the source based on the user agent
-    user_agent = request.headers.get('User-Agent', '')
-    source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
+    user_agent = request.headers.get("User-Agent", "")
+    source = (
+        "Ask Defang Discord Bot"
+        if "Discord Bot" in user_agent
+        else "Ask Defang Website"
+    )
 
     # Use the shared generate function directly
-    return Response(stream_with_context(generate(app.rag_system, query, source, anonymous_id)), content_type='text/markdown')
+    return Response(
+        stream_with_context(generate(app.rag_system, query, source, anonymous_id)),
+        content_type="text/markdown",
+    )
+
 
-@app.route('/', methods=['GET', 'POST'])
+@app.route("/", methods=["GET", "POST"])
 def index():
-    return render_template('index.html')
+    return render_template("index.html")
 
-@app.route('/ask', methods=['POST'])
+
+@app.route("/ask", methods=["POST"])
 def ask():
-    if not validate_pow(request.headers.get('X-Nonce'), request.get_data(), 0x50000):
+    if not validate_pow(request.headers.get("X-Nonce"), request.get_data(), 0x50000):
         return jsonify({"error": "Invalid Proof of Work"}), 400
 
     response = handle_ask_request(request, session)
     return response
 
+
 # /v1/ask allows bypassing of CSRF and PoW for clients with a valid Ask Token
-@app.route('/v1/ask', methods=['POST'])
+@app.route("/v1/ask", methods=["POST"])
 @csrf.exempt
 def v1_ask():
-    auth_header = request.headers.get('Authorization')
-    ask_token = auth_header.split("Bearer ")[1] if auth_header and auth_header.startswith("Bearer ") else None
-    if ask_token and ask_token == os.getenv('ASK_TOKEN'):
+    auth_header = request.headers.get("Authorization")
+    ask_token = (
+        auth_header.split("Bearer ")[1]
+        if auth_header and auth_header.startswith("Bearer ")
+        else None
+    )
+    if ask_token and ask_token == os.getenv("ASK_TOKEN"):
         response = handle_ask_request(request, session)
         return response
     else:
         return jsonify({"error": "Invalid or missing Ask Token"}), 401
 
-@app.route('/trigger-rebuild', methods=['POST'])
+
+@app.route("/trigger-rebuild", methods=["POST"])
 @csrf.exempt
 def trigger_rebuild():
-    token = request.args.get('token')
-    if token != os.getenv('REBUILD_TOKEN'):
+    token = request.args.get("token")
+    if token != os.getenv("REBUILD_TOKEN"):
         return jsonify({"error": "Unauthorized"}), 401
     try:
         print("Running get_knowledge_base.py script...")
-        result = subprocess.run(["python3", "get_knowledge_base.py"], capture_output=True, text=True)
+        result = subprocess.run(
+            ["python3", "get_knowledge_base.py"], capture_output=True, text=True
+        )
         if result.returncode != 0:
             print(f"Error running get_knowledge_base.py script: {result.stderr}")
-            return jsonify({"error": "Error running get_knowledge_base.py script", "details": result.stderr}), 500
+            return jsonify(
+                {
+                    "error": "Error running get_knowledge_base.py script",
+                    "details": result.stderr,
+                }
+            ), 500
 
         print("Finished running get_knowledge_base.py script.")
 
         # get Dockerfiles and compose files from samples repo
         print("Running get_samples_examples.py script...")
-        result = subprocess.run(["python3", "get_samples_examples.py"], capture_output=True, text=True)
+        result = subprocess.run(
+            ["python3", "get_samples_examples.py"], capture_output=True, text=True
+        )
         if result.returncode != 0:
             print(f"Error running get_samples_examples.py script: {result.stderr}")
-            return jsonify({"error": "Error running get_samples_examples.py script", "details": result.stderr}), 500
+            return jsonify(
+                {
+                    "error": "Error running get_samples_examples.py script",
+                    "details": result.stderr,
+                }
+            ), 500
 
         print("Finished running get_samples_examples.py script.")
 
         print("Rebuilding embeddings...")
         try:
             app.rag_system.rebuild()
         except Exception as e:
-            print(f"Error rebuilding embeddings: {str(e)}")
-            return jsonify({"error": "Error rebuilding embeddings", "details": str(e)}), 500
+            logging.error(f"Error rebuilding embeddings: {str(e)}")
+            return jsonify({"error": "Error rebuilding embeddings"}), 500
 
-        print("Finished rebuilding embeddings.")
+        logging.info("Finished rebuilding embeddings.")
         return jsonify({"status": "Rebuild triggered successfully"}), 200
 
     except Exception as e:
         print(f"Error in /trigger-rebuild endpoint: {e}")
         return jsonify({"error": "Internal Server Error"}), 500
 
+
 @app.route("/data/<path:name>")
 @csrf.exempt
 def download_file(name):
-    return send_from_directory(
-        "data", name, as_attachment=True
-    )
+    return send_from_directory("data", name, as_attachment=True)
+
 
 # Handle incoming webhooks from Intercom
-@app.route('/intercom-webhook', methods=['POST'])
+@app.route("/intercom-webhook", methods=["POST"])
 @csrf.exempt
 def handle_webhook():
     if not check_intercom_ip(request):
@@ -145,53 +186,77 @@ def handle_webhook():
     data = request.json
 
     logger.info(f"Received Intercom webhook: {data}")
-    conversation_id = data.get('data', {}).get('item', {}).get('id')
+    conversation_id = data.get("data", {}).get("item", {}).get("id")
 
     # Check for the type of the webhook event
-    topic = data.get('topic')
+    topic = data.get("topic")
     logger.info(f"Webhook topic: {topic}")
-    if topic == 'conversation.admin.replied':
+    if topic == "conversation.admin.replied":
         # In this case, the webhook event is an admin reply
         # Check if the admin is a bot or human based on presence of a message marker (e.g., "🤖") in the last message
-        last_message = data.get('data', {}).get('item', {}).get('conversation_parts', {}).get('conversation_parts', [])[-1].get('body', '')
+        last_message = (
+            data.get("data", {})
+            .get("item", {})
+            .get("conversation_parts", {})
+            .get("conversation_parts", [])[-1]
+            .get("body", "")
+        )
         last_message_text = parse_html_to_text(last_message)
 
         logger.info(f"Parsed last message text: {last_message_text}")
         if last_message_text and last_message_text.endswith("🤖"):
             # If the last message ends with the marker, it indicates a bot reply
-            logger.info(f"Last message in conversation {conversation_id} ends with the marker 🤖")
-            logger.info(f"Detected bot admin reply in conversation {conversation_id}; no action taken.")
+            logger.info(
+                f"Last message in conversation {conversation_id} ends with the marker 🤖"
+            )
+            logger.info(
+                f"Detected bot admin reply in conversation {conversation_id}; no action taken."
+            )
         else:
             # If the last message does not end with the marker, it indicates a human reply
-            logger.info(f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied...")
+            logger.info(
+                f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied..."
+            )
             # Mark the conversation as replied by a human admin to skip LLM responses in the future
             set_conversation_human_replied(conversation_id, r)
-            logger.info(f"Successfully marked conversation {conversation_id} as human admin-replied.")
+            logger.info(
+                f"Successfully marked conversation {conversation_id} as human admin-replied."
+            )
 
-    elif topic == 'conversation.user.replied' or topic == 'conversation.user.created':
+    elif topic == "conversation.user.replied" or topic == "conversation.user.created":
         # In this case, the webhook event is a user reply or a new user conversation
         # Check if the conversation is of type email, and skip processing if so
-        conversation_type = data.get('data', {}).get('item', {}).get('source', {}).get('type')
-        if conversation_type == 'email':
-            logger.info(f"Conversation {conversation_id} is of type email; no action taken.")
-            return 'OK'
+        conversation_type = (
+            data.get("data", {}).get("item", {}).get("source", {}).get("type")
+        )
+        if conversation_type == "email":
+            logger.info(
+                f"Conversation {conversation_id} is of type email; no action taken."
+            )
+            return "OK"
 
         # Check if it is a user reply and do the admin-replied checks if so
         # For new user conversations, we will skip admin-replied check to avoid false positives from Intercom auto-replies
-        if topic == 'conversation.user.replied':
+        if topic == "conversation.user.replied":
             # Check if the conversation was replied previously by a human admin and skip processing if so
             if is_conversation_human_replied(conversation_id, r):
-                logger.info(f"Conversation {conversation_id} already marked as human admin-replied; no action taken.")
-                return 'OK'
+                logger.info(
+                    f"Conversation {conversation_id} already marked as human admin-replied; no action taken."
+                )
+                return "OK"
 
         # Fetch the conversation and generate an LLM answer for the user
-        logger.info(f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM...")
+        logger.info(
+            f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM..."
+        )
         answer_intercom_conversation(app.rag_system, conversation_id, topic)
 
     else:
-        logger.info(f"Received webhook for unsupported topic: {topic}; no action taken.")
-    return 'OK'
+        logger.info(
+            f"Received webhook for unsupported topic: {topic}; no action taken."
+        )
+    return "OK"
 
 
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5050)
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=5050)