Skip to content

Commit 5a011be

Browse files
Merge pull request Samagra-Development#254 from ksgr5566/search
Added word_score
2 parents 4e75142 + 3233682 commit 5a011be

File tree

13 files changed

+173
-0
lines changed

13 files changed

+173
-0
lines changed

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@
7878
"environment": {},
7979
"nginx": []
8080
},
81+
{
82+
"serviceName": "word_score",
83+
"modelBasePath": "src/search/word_score/local/.",
84+
"apiBasePath": "/search/word_score/local",
85+
"containerPort": 8000,
86+
"environment": {},
87+
"nginx": []
88+
},
8189
{
8290
"serviceName": "text_translation_bhashini",
8391
"modelBasePath": "src/text_translation/bhashini/remote/.",

repository_data.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@
120120
"request_class": "ModelRequest"
121121
}
122122
}
123+
},
124+
"search": {
125+
"word_score": {
126+
"local": {
127+
"__is_async": true,
128+
"__is_base": true,
129+
"model_class": "Model",
130+
"request_class": "ModelRequest"
131+
}
132+
}
123133
}
124134
}
125135
}

src/search/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Purpose
2+
3+
Common folder for scoring methods required for augmenting search and retrieval of documents.

src/search/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from word_score import *

src/search/word_score/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Word Score
2+
3+
This folder consists of an API that scores documents based on an approach that combines IDF and Fuzzy word matching.
4+
5+
For a given query, it calculates fuzzy matching scores for words in query (max score for a word from entire row), weights them with IDF, takes average of the scores of all words in the query to give a score for the entire query, sorts them, and returns the top n matches.

src/search/word_score/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .local import *
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Use an official Python runtime as a parent image
2+
FROM python:3.9-slim
3+
4+
WORKDIR /app
5+
6+
#install requirements
7+
COPY requirements.txt requirements.txt
8+
RUN pip3 install -r requirements.txt
9+
10+
# Copy the rest of the application code to the working directory
11+
COPY . /app/
12+
EXPOSE 8000
13+
# Set the entrypoint for the container
14+
CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Word Score
2+
3+
## Test Deployment
4+
5+
- Git clone the repo and cd to the project location.
6+
- cd to `local`, i.e., `cd ./src/search/word_score/local`.
7+
- Replace the file in `./content` with a csv file of your choice, but the data column should be named `tags` column.
8+
- Start your docker engine and `docker build -t word_score .`.
9+
- Do `docker run -p 8000:8000 word_score`.
10+
- `curl -X POST -H "Content-Type: application/json" -d '{"query": QUERY, "n": N}' http://0.0.0.0:8000`. <br> Replace `QUERY` with a query and `N` with the number of rows you want to retrieve.
11+
- The reponse for above would be: <br>
12+
`
13+
{
14+
"docs": ["row1", "row2", ... , "rowN"]
15+
}
16+
`
17+
The list of strings contains the top N rows.
18+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .request import *
2+
from .model import *

src/search/word_score/local/api.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from model import Model
2+
from request import ModelRequest
3+
from quart import Quart, request
4+
import aiohttp
5+
6+
#from fastapi import FastAPI, Body
7+
app = Quart(__name__)
8+
#app.client = aiohttp.ClientSession()
9+
#app = FastAPI()
10+
11+
@app.before_serving
12+
async def startup():
13+
app.client = aiohttp.ClientSession()
14+
15+
@app.route('/', methods=['POST'])
16+
async def translate():
17+
data = await request.get_json()
18+
req = ModelRequest(**data)
19+
model = Model(app)
20+
return await model.inference(req)
21+
22+
@app.route('/', methods=['GET'])
23+
async def hi():
24+
return "hi"
25+
26+

0 commit comments

Comments
 (0)