Shuffle AI rebuild attempt

frikky · frikky · commit 0215a54614ac · 2025-01-29T12:00:50.000+01:00
diff --git a/shuffle-ai/1.0.0/Dockerfile b/shuffle-ai/1.0.0/Dockerfile
@@ -1,26 +1,23 @@
-# Base our app image off of the WALKOFF App SDK image
-FROM frikky/shuffle:app_sdk as base
-
-# We're going to stage away all of the bloat from the build tools so lets create a builder stage
-FROM base as builder
+FROM python:3.10-slim
 
 # Install all alpine build tools needed for our pip installs
-RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git poppler-utils
+RUN apt update 
+RUN apt install -y clang g++ make automake autoconf libtool cmake 
 
 # Install all of our pip packages in a single directory that we can copy to our base image later
 RUN mkdir /install
 WORKDIR /install
 
 # Switch back to our base image and copy in all of our built packages and source code
-FROM base
-COPY --from=builder /install /usr/local
+#COPY --from=builder /install /usr/local
 COPY src /app
 COPY requirements.txt /requirements.txt
 RUN python3 -m pip install -r /requirements.txt
 
 # Install any binary dependencies needed in our final image
 # RUN apk --no-cache add --update my_binary_dependency
-RUN apk --no-cache add jq git curl
+#RUN apk --no-cache add jq git curl
+RUN apt install -y jq git curl
 
 ENV SHELL=/bin/bash
 
@@ -32,23 +29,45 @@ ENV TESSDATA_PREFIX=/usr/local/share/tessdata
 
 # Dev tools
 WORKDIR /tmp
-RUN apk update 
-RUN apk upgrade 
-RUN apk add file openssl openssl-dev bash tini leptonica-dev  openjpeg-dev tiff-dev libpng-dev zlib-dev libgcc mupdf-dev jbig2dec-dev 
-RUN apk add freetype-dev openblas-dev ffmpeg-dev linux-headers aspell-dev aspell-en  # enchant-dev jasper-dev 
-RUN apk add --virtual .dev-deps git clang clang-dev g++ make automake autoconf libtool pkgconfig cmake ninja 
-RUN apk add --virtual .dev-testing-deps -X http://dl-3.alpinelinux.org/alpine/edge/testing autoconf-archive 
+#RUN apk update 
+#RUN apk upgrade 
+
+## Install the same packages with apt as with apk, but ensure they exist in apt
+RUN apt install -y file openssl bash tini libpng-dev aspell-en
+RUN apt install -y git clang g++ make automake autoconf libtool cmake 
+RUN apt install -y autoconf-archive wget
+
 RUN ln -s /usr/include/locale.h /usr/include/xlocale.h
 
-RUN apk add tesseract-ocr  
-RUN apk add poppler-utils 
+#RUN apk add tesseract-ocr  
+RUN apt install -y tesseract-ocr
+#RUN apk add poppler-utils 
+RUN apt install -y poppler-utils
+RUN apt clean && rm -rf /var/lib/apt/lists/*
 
 # Install from main 
 RUN mkdir /usr/local/share/tessdata 
+RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata 
+
 RUN mkdir src 
 RUN cd src 
-RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata 
+
 RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git 
+
+#RUN curl -fsSL https://ollama.com/install.sh | sh
+# Install to /usr/local
+RUN wget https://ollama.com/install.sh -O /usr/local/bin/ollama-install
+RUN chmod +x /usr/local/bin/ollama-install
+RUN ls /usr/local/bin
+RUN sh /usr/local/bin/ollama-install
+
+RUN ls -alh /usr/bin
+RUN which ollama
+
+#RUN /usr/local/bin/ollama pull llama3.2
+RUN ollama serve & sleep 2 && ollama pull llama3
+
+#RUN rm /usr/local/bin/ollama
 #RUN cd tesseract && ./autogen.sh && ./configure --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl && make && make install && cd /tmp/src
 
 # Finally, lets run our app!
diff --git a/shuffle-ai/1.0.0/api.yaml b/shuffle-ai/1.0.0/api.yaml
@@ -11,7 +11,18 @@ contact_info:
   url: https://shuffler.io
   email: support@shuffler.io
 actions:
-  - name: autoformat_text 
+  - name: run_llm 
+    description: "Runs a local LLM based on ollama with any of their models from https://github.com/ollama/ollama?tab=readme-ov-file#model-library"
+    parameters:
+      - name: question 
+        description: "The input question to the model"
+        required: true
+        multiline: true
+        example: ""
+        schema:
+          type: string
+
+  - name: shuffle_cloud_inference
     description: Input ANY kind of data in the format you want, and the format you want it in. Default is a business-y email. Uses ShuffleGPT, which is based on OpenAI and our own model.
     parameters:
       - name: apikey 
diff --git a/shuffle-ai/1.0.0/requirements.txt b/shuffle-ai/1.0.0/requirements.txt
@@ -1,4 +1,6 @@
+shuffle_sdk
 pytesseract
 pdf2image
 pypdf2
 requests
+ollama
diff --git a/shuffle-ai/1.0.0/src/app.py b/shuffle-ai/1.0.0/src/app.py
@@ -1,11 +1,28 @@
 import json
-import PyPDF2
 import tempfile
 import requests
-import pytesseract
-from pdf2image import convert_from_path
 
-from walkoff_app_sdk.app_base import AppBase
+try:
+    import pytesseract
+except Exception as e:
+    print("Skipping pytesseract import: %s" % e)
+
+try:
+    import PyPDF2
+except Exception as e:
+    print("Skipping PyPDF2 import: %s" % e)
+
+try:
+    from pdf2image import convert_from_path
+except Exception as e:
+    print("Skipping pdf2image import: %s" % e)
+
+try:
+    import ollama
+except Exception as e:
+    print("Skipping ollama import: %s" % e)
+
+from shuffle_sdk import AppBase
 
 class Tools(AppBase):
     __version__ = "1.0.0"
@@ -14,6 +31,53 @@ class Tools(AppBase):
     def __init__(self, redis, logger, console_logger=None):
         super().__init__(redis, logger, console_logger)
 
+    def run_llm(self, question, model="llama3.2"):
+        models = []
+        response = ollama.chat(model=model, messages=[
+            {
+                "role": "user", "content": question,
+            }
+        ])
+
+        return response["message"]["content"]
+
+    def security_assistant(self):
+        # Currently testing outside the Shuffle environment
+        # using assistants and local LLMs
+
+        return "Not implemented"
+
+    def shuffle_cloud_inference(self, apikey, text, formatting="auto"):
+        headers = {
+            "Authorization": "Bearer %s" % apikey,
+        }
+
+        if not formatting:
+            formatting = "auto"
+    
+        output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding."
+        if formatting != "auto":
+            output_formatting = formatting
+    
+        ret = requests.post(
+            "https://shuffler.io/api/v1/conversation", 
+            json={
+                "query": text, 
+                "formatting": output_formatting,
+                "output_format": "formatting"
+            },
+            headers=headers,
+        )
+    
+        if ret.status_code != 200:
+            print(ret.text)
+            return {
+                "success": False,
+                "reason": "Status code for auto-formatter is not 200"
+            }
+    
+        return ret.text
+
     def autoformat_text(self, apikey, text, formatting="auto"):
         headers = {
             "Authorization": "Bearer %s" % apikey,