Rebuild with run_llm working

frikky · frikky · commit b7f7d94afc91 · 2025-01-29T18:10:46.000+01:00
diff --git a/shuffle-ai/1.0.0/Dockerfile b/shuffle-ai/1.0.0/Dockerfile
@@ -1,41 +1,40 @@
 FROM python:3.10-slim
 
+### Install Tesseract
+ENV SHELL=/bin/bash
+ENV CC /usr/bin/clang
+ENV CXX /usr/bin/clang++
+ENV LANG=C.UTF-8
+ENV TESSDATA_PREFIX=/usr/local/share/tessdata
+
 # Install all build tools needed for our pip installs
 RUN apt update 
 RUN apt install -y clang g++ make automake autoconf libtool cmake 
 
+## Install the same packages with apt as with apk, but ensure they exist in apt
+RUN apt install -y jq git curl
+RUN apt install -y file openssl bash tini libpng-dev aspell-en
+RUN apt install -y git clang g++ make automake autoconf libtool cmake 
+RUN apt install -y autoconf-archive wget
+RUN mkdir -p /models
+RUN wget https://huggingface.co/QuantFactory/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B.Q8_0.gguf?download=true -O /models/Llama-3.2-3B.Q8_0.gguf
+
 # Install all of our pip packages in a single directory that we can copy to our base image later
 RUN mkdir /install
 WORKDIR /install
 
 # Switch back to our base image and copy in all of our built packages and source code
-#COPY --from=builder /install /usr/local
-COPY src /app
 COPY requirements.txt /requirements.txt
 RUN python3 -m pip install -r /requirements.txt
 
 # Install any binary dependencies needed in our final image
-# RUN apk --no-cache add --update my_binary_dependency
-#RUN apk --no-cache add jq git curl
-RUN apt install -y jq git curl
 
-ENV SHELL=/bin/bash
-
-### Install Tesseract
-ENV CC /usr/bin/clang
-ENV CXX /usr/bin/clang++
-ENV LANG=C.UTF-8
-ENV TESSDATA_PREFIX=/usr/local/share/tessdata
 
 # Dev tools
 WORKDIR /tmp
 #RUN apk update 
 #RUN apk upgrade 
 
-## Install the same packages with apt as with apk, but ensure they exist in apt
-RUN apt install -y file openssl bash tini libpng-dev aspell-en
-RUN apt install -y git clang g++ make automake autoconf libtool cmake 
-RUN apt install -y autoconf-archive wget
 
 RUN ln -s /usr/include/locale.h /usr/include/xlocale.h
 
@@ -56,20 +55,26 @@ RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git
 
 #RUN curl -fsSL https://ollama.com/install.sh | sh
 # Install to /usr/local
-RUN wget https://ollama.com/install.sh -O /usr/local/bin/ollama-install
-RUN chmod +x /usr/local/bin/ollama-install
-RUN ls /usr/local/bin
-RUN sh /usr/local/bin/ollama-install
+#RUN wget https://ollama.com/install.sh -O /usr/local/bin/ollama-install
+#RUN chmod +x /usr/local/bin/ollama-install
+#RUN sh /usr/local/bin/ollama-install
+#
+#RUN ls -alh /usr/bin
+#RUN ollama serve & sleep 2 && ollama pull nezahatkorkmaz/deepseek-v3
+#CMD ["sh", "-c", "ollama serve & sleep 2 && python app.py --log-level DEBUG"]
 
-RUN ls -alh /usr/bin
-RUN which ollama
+#RUN wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+RUN python3 -m pip install ctransformers --no-binary ctransformers
 
-#RUN /usr/local/bin/ollama pull llama3.2
-RUN ollama serve & sleep 2 && ollama pull nezahatkorkmaz/deepseek-v3
+# Finally, lets run our app!
+ENV GIN_MODE=release
+ENV SHUFFLE_APP_SDK_TIMEOUT=300
+#ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so
+#RUN chmod 755 /usr/local/lib/python3.10/site-packages/ctransformers/lib/basic/libctransformers.so
 
-#RUN rm /usr/local/bin/ollama
-#RUN cd tesseract && ./autogen.sh && ./configure --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl && make && make install && cd /tmp/src
+#RUN apt install -y libffi-dev 
 
-# Finally, lets run our app!
+
+COPY src /app
 WORKDIR /app
 CMD ["python", "app.py", "--log-level", "DEBUG"]
diff --git a/shuffle-ai/1.0.0/requirements.txt b/shuffle-ai/1.0.0/requirements.txt
@@ -3,5 +3,5 @@ pytesseract
 pdf2image
 pypdf2
 requests
-ollama
 
+llama-cpp-python
diff --git a/shuffle-ai/1.0.0/src/app.py b/shuffle-ai/1.0.0/src/app.py
@@ -1,3 +1,4 @@
+import os
 import json
 import tempfile
 import requests
@@ -18,9 +19,9 @@
     print("Skipping pdf2image import: %s" % e)
 
 try:
-    import ollama
+    import llama_cpp 
 except Exception as e:
-    print("Skipping ollama import: %s" % e)
+    print("Skipping llama_cpp import: %s" % e)
 
 from shuffle_sdk import AppBase
 
@@ -32,14 +33,48 @@ def __init__(self, redis, logger, console_logger=None):
         super().__init__(redis, logger, console_logger)
 
     #def run_llm(self, question, model="llama3.2"):
-    def run_llm(self, question, model="deepseek-v3"):
-        response = ollama.chat(model=model, messages=[
-            {
-                "role": "user", "content": question,
+    #def run_llm(self, question, model="deepseek-v3"):
+    def run_llm(self, question, model="/models/Llama-3.2-3B.Q8_0.gguf"):
+        self.logger.info("[DEBUG] Running LLM with model '%s'" % model)
+
+        if not os.path.exists(model):
+            return {
+                "success": False,
+                "reason": "Model not found at path %s" % model,
+                "details": "Ensure the model path is correct"
             }
-        ])
 
-        return response["message"]["content"]
+        llm = llama_cpp.Llama(model_path=model)
+
+        # https://github.com/abetlen/llama-cpp-python 
+        output = llm.create_chat_completion(
+            messages = [
+                {"role": "system", "content": "You are an assistant who outputs in JSON format.."},
+                {
+                    "role": "user",
+                    "content": question,
+                }
+            ]
+        )
+
+        return output
+
+
+        #model = ctransformers.AutoModelForCausalLM.from_pretrained(
+        #    model_path_or_repo_id=model,
+        #    #model_type="deepseek-v3"
+        #)
+
+        #resp = model(full_question)
+        #return resp 
+
+        #response = ollama.chat(model=model, messages=[
+        #    {
+        #        "role": "user", "content": question,
+        #    }
+        #])
+
+        #return response["message"]["content"]
 
     def security_assistant(self):
         # Currently testing outside the Shuffle environment