Merge branch 'Samagra-Development:restructure' into restructure

Gautam-Rajeev · web-flow · commit 0623ddb24758 · 2023-09-13T14:27:39.000+05:30
diff --git a/config.json b/config.json
@@ -1,13 +1,22 @@
 {
   "models": [
         {
+      "serviceName": "ner",
+      "modelBasePath": "src/ner/agri_ner_akai/local/.",
+      "apiBasePath": "ner/agri_ner_akai/local/",
+      "containerPort": 8000,
+      "environment": {},
+      "nginx": [],
+      "build": true
+    },    
+        {
       "serviceName": "word_score",
       "modelBasePath": "src/search/word_score/local/.",
-      "apiBasePath": "/search/word_score/local",
+      "apiBasePath": "/search/word_score/local/",
       "containerPort": 8000,
       "environment": {},
       "nginx": [],
-      "build": false
+      "build": true
     },
     {
       "serviceName": "spell_check",
diff --git a/src/ner/README.md b/src/ner/README.md
diff --git a/src/ner/agri_ner_akai/README.md b/src/ner/agri_ner_akai/README.md
diff --git a/src/ner/agri_ner_akai/local/Dockerfile b/src/ner/agri_ner_akai/local/Dockerfile
@@ -0,0 +1,15 @@
+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+
+WORKDIR /app
+
+
+#install requirements
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
+
+# Copy the rest of the application code to the working directory
+COPY . /app/
+EXPOSE 8000
+# Set the entrypoint for the container
+CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
diff --git a/src/ner/agri_ner_akai/local/README.md b/src/ner/agri_ner_akai/local/README.md
@@ -0,0 +1,21 @@
+## NER:
+
+
+### Purpose :
+Model to detect
+- crops
+- pests
+- seed type 
+
+
+### Testing the model deployment :  
+To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : 
+
+- Git clone the repo
+- Go to current folder location i.e. ``` cd /src/ner/agri_ner_akai/local ```
+- Create docker image file and test the api:  
+```
+docker build -t testmodel .
+docker run -p 8000:8000 testmodel
+curl -X POST -H "Content-Type: application/json" -d '{"text": "What are tomatoes and potaotes that are being attacked by aphids? "}' http://localhost:8000/
+```
diff --git a/src/ner/agri_ner_akai/local/__init__.py b/src/ner/agri_ner_akai/local/__init__.py
@@ -0,0 +1,2 @@
+from .request import ModelRequest
+from .request import Model
diff --git a/src/ner/agri_ner_akai/local/api.py b/src/ner/agri_ner_akai/local/api.py
@@ -0,0 +1,25 @@
+from model import Model
+from request import ModelRequest
+from quart import Quart, request, jsonify
+import aiohttp
+
+app = Quart(__name__)
+
+model = None
+
+@app.before_serving
+async def startup():
+    app.client = aiohttp.ClientSession()
+    global model
+    model = Model(app)
+
+@app.route('/', methods=['POST'])
+async def embed():
+    global model
+    data = await request.get_json()
+    req = ModelRequest(**data)
+    entities = await model.inference(req)
+    return jsonify(entities)  # Convert the list of entities to JSON format
+
+if __name__ == "__main__":
+    app.run()
diff --git a/src/ner/agri_ner_akai/local/model.py b/src/ner/agri_ner_akai/local/model.py
@@ -0,0 +1,68 @@
+from transformers import pipeline
+from request import ModelRequest
+
+class Model():
+    def __new__(cls, context):
+        cls.context = context
+        if not hasattr(cls, 'instance'):
+            cls.instance = super(Model, cls).__new__(cls)
+        cls.nlp_ner = pipeline("ner", model="GautamR/akai_ner", tokenizer="GautamR/akai_ner")
+        return cls.instance
+
+    async def inference(self, request: ModelRequest):
+        entities = self.nlp_ner(request.text)
+        return self.aggregate_entities(request.text, entities)
+
+    @staticmethod
+    def aggregate_entities(sentence, entity_outputs):
+        aggregated_entities = []
+        current_entity = None
+
+        for entity in entity_outputs:
+            entity_type = entity["entity"].split("-")[-1]
+
+            # Handle subwords
+            if entity["word"].startswith("##"):
+                # If we encounter an I-PEST or any other I- entity
+                if "I-" in entity["entity"]:
+                    if current_entity:  # Add previous entity
+                        aggregated_entities.append(current_entity)
+                
+                    word_start = sentence.rfind(" ", 0, entity["start"]) + 1
+                    word_end = sentence.find(" ", entity["end"])
+                    if word_end == -1:
+                        word_end = len(sentence)
+
+                    current_entity = {
+                        "entity_group": entity_type,
+                        "score": float(entity["score"]),
+                        "word": sentence[word_start:word_end].replace('.','').replace('?',''),
+                        "start": float(word_start),
+                        "end": float(word_end)
+                    }
+                    aggregated_entities.append(current_entity)
+                    current_entity = None
+
+                else:
+                    # If it's a subword but not an I- entity
+                    current_entity["word"] += entity["word"][2:]
+                    current_entity["end"] = entity["end"]
+                    current_entity["score"] = float((current_entity["score"] + entity["score"]) / 2)  # averaging scores
+
+            # Handle full words
+            else:
+                if current_entity:
+                    aggregated_entities.append(current_entity)
+
+                current_entity = {
+                    "entity_group": entity_type,
+                    "score": float(entity["score"]),
+                    "word": entity["word"],
+                    "start": float(entity["start"]),
+                    "end": float(entity["end"])
+                }
+
+        if current_entity:
+            aggregated_entities.append(current_entity)
+
+        return aggregated_entities
diff --git a/src/ner/agri_ner_akai/local/request.py b/src/ner/agri_ner_akai/local/request.py
@@ -0,0 +1,11 @@
+import requests
+import json
+
+
+class ModelRequest():
+    def __init__(self, text):
+        self.text = text
+
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+                          sort_keys=True, indent=4)
diff --git a/src/ner/agri_ner_akai/local/requirements.txt b/src/ner/agri_ner_akai/local/requirements.txt
@@ -0,0 +1,4 @@
+torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu
+transformers
+quart
+aiohttp

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .request import ModelRequest`
	`2`	`+from .request import Model`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu
 +transformers
 +quart
 +aiohttp