Merge pull request Samagra-Development#265 from AmakrushAI/gpu_embed

Gautam-Rajeev · web-flow · commit db7a0e50f954 · 2023-10-04T18:17:59.000+05:30
Gpu embed
diff --git a/config.json b/config.json
@@ -9,7 +9,7 @@
       "nginx": [],
       "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],
       "constraints": ["node.labels.node_vm_type==gpu"],
-      "build": true
+      "build": false
     }, 
      {
       "serviceName": "asr_lang_detect",
@@ -20,7 +20,7 @@
       "nginx": [],
       "nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],
       "constraints": ["node.labels.node_vm_type==gpu"],
-      "build": true
+      "build": false
     },    
         {
       "serviceName": "ner",
@@ -176,7 +176,7 @@
       },
       "nginx": [],
       "constraints": ["node.labels.node_vm_type==gpu"],
-      "build": true
+      "build": false
     }
   ]
 }
diff --git a/src/embeddings/instructor_gpu/README.md b/src/embeddings/instructor_gpu/README.md
@@ -0,0 +1 @@
+## Instructor model for generating embedding 
diff --git a/src/embeddings/instructor_gpu/local/Dockerfile b/src/embeddings/instructor_gpu/local/Dockerfile
@@ -0,0 +1,15 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/embeddings/instructor_gpu/local/README.md b/src/embeddings/instructor_gpu/local/README.md
@@ -0,0 +1,18 @@
+## Instructor Embedding model:
+
+### Purpose :
+Model to Create Embeddings from given text using Instructor Large model.
+
+### Testing the model deployment :  
+To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd src/embeddings/instructor/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -H "Content-Type: application/json" -d '{"query": "Where is my money? "}' http://localhost:8000/
+
+curl -X POST -F "file=@input.csv"  http://localhost:8000/embeddings/instructor/local -o output.csv
+```
diff --git a/src/embeddings/instructor_gpu/local/__init__.py b/src/embeddings/instructor_gpu/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import *
+from .model import *
diff --git a/src/embeddings/instructor_gpu/local/api.py b/src/embeddings/instructor_gpu/local/api.py
@@ -0,0 +1,37 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request,Response, send_file 
+import aiohttp
+import pandas as pd
+import io
+
+app = Quart(__name__)
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+    global model
+    data = await request.get_json()
+    files = await request.files  # await the coroutine
+    uploaded_file = files.get('file')  # now you can use .get()
+
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file.stream)
+        req = ModelRequest(df=df)  # Pass the DataFrame to ModelRequest
+        response = await model.inference(req)
+        df = pd.read_csv(io.StringIO(response))  # Convert the CSV string back to a DataFrame
+        # Save the DataFrame to a CSV file
+        df.to_csv('output.csv', index=False)
+
+        return await send_file('output.csv', mimetype='text/csv', as_attachment=True, attachment_filename='output.csv')
+    
+    else: 
+        req = ModelRequest(**data)
+        return await model.inference(req)
diff --git a/src/embeddings/instructor_gpu/local/model.py b/src/embeddings/instructor_gpu/local/model.py
@@ -0,0 +1,47 @@
+import torch
+from request import ModelRequest
+from InstructorEmbedding import INSTRUCTOR
+import wget
+import pandas as pd
+import os
+
+class Model():
+    def __new__(cls, context):
+        cls.context = context
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(Model, cls).__new__(cls)
+        model_name = "hkunlp/instructor-large"
+        cls.model = INSTRUCTOR(model_name)
+        return cls.instance
+
+    async def inference(self, request: ModelRequest):
+    # Modify this function according to model requirements such that inputs and output remains the same
+        corpus_instruction = "Represent the Wikipedia document for retrieval:"
+        query_instruction = 'Represent the Wikipedia question for retrieving supporting documents: '
+        query = request.query
+
+        if(query != None):
+            # print('Query Encoding Process :-')
+            query_embeddings = self.model.encode(
+                    [[query_instruction, query]],
+                    show_progress_bar=False,
+                    batch_size=32,
+                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                )
+            return query_embeddings.tolist()
+
+        if not request.df.empty:
+            # print('Text corpus Encoding Process :-')
+            data = request.df
+            
+            text_corpus = data.loc[:,'content'].to_list()
+            corpus_embeddings = self.model.encode(
+                    [[corpus_instruction, text] for text in text_corpus],
+                    show_progress_bar=False,
+                    batch_size=32,
+                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            )
+            data['embeddings'] = corpus_embeddings.tolist()
+            csv_string = data.to_csv(index=False)
+
+            return str(csv_string)
diff --git a/src/embeddings/instructor_gpu/local/request.py b/src/embeddings/instructor_gpu/local/request.py
@@ -0,0 +1,13 @@
+import json
+import pandas as pd
+
+
+class ModelRequest():
+    def __init__(self, query=None, df = pd.DataFrame()):
+         # Url to download csv file
+        self.query = query # String
+        self.df = df
+
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+                          sort_keys=True, indent=4)
diff --git a/src/embeddings/instructor_gpu/local/requirements.txt b/src/embeddings/instructor_gpu/local/requirements.txt
@@ -0,0 +1,8 @@
+torch
+quart
+aiohttp
+InstructorEmbedding
+wget
+pandas
+tqdm
+sentence_transformers

Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@`
`9`	`9`	`"nginx": [],`
`10`	`10`	`"nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],`
`11`	`11`	`"constraints": ["node.labels.node_vm_type==gpu"],`
`12`		`- "build": true`
	`12`	`+ "build": false`
`13`	`13`	`},`
`14`	`14`	`{`
`15`	`15`	`"serviceName": "asr_lang_detect",`
`@@ -20,7 +20,7 @@`
`20`	`20`	`"nginx": [],`
`21`	`21`	`"nginx": ["client_max_body_size 100M;", "proxy_read_timeout 600;", "proxy_connect_timeout 600;", "proxy_send_timeout 600;"],`
`22`	`22`	`"constraints": ["node.labels.node_vm_type==gpu"],`
`23`		`- "build": true`
	`23`	`+ "build": false`
`24`	`24`	`},`
`25`	`25`	`{`
`26`	`26`	`"serviceName": "ner",`
`@@ -176,7 +176,7 @@`
`176`	`176`	`},`
`177`	`177`	`"nginx": [],`
`178`	`178`	`"constraints": ["node.labels.node_vm_type==gpu"],`
`179`		`- "build": true`
	`179`	`+ "build": false`
`180`	`180`	`}`
`181`	`181`	`]`
`182`	`182`	`}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+## Instructor model for generating embedding`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .request import *`
	`2`	`+from .model import *`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,8 @@ @@
 +torch
 +quart
 +aiohttp
 +InstructorEmbedding
 +wget
 +pandas
 +tqdm
 +sentence_transformers