Skip to content

Commit b118560

Browse files
authored
feat: migrate from flask to fastapi (#56)
1 parent fbefdec commit b118560

File tree

23 files changed

+1298
-580
lines changed

23 files changed

+1298
-580
lines changed

.gitlab-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ build:
3333
stage: build
3434
script:
3535
- export REPO_NAME=$CI_REGISTRY_IMAGE/conjugador-elastic
36-
- export IMAGE_NAME=$REPO_NAME:$CI_COMMIT_REF_NAME
36+
- export IMAGE_NAME=$REPO_NAME:$CI_COMMIT_REF_SLUG
3737
- export DOCKER_PATH=docker/elasticsearch/Dockerfile
3838
- docker build -f $DOCKER_PATH --tag $IMAGE_NAME .
3939
- docker push $IMAGE_NAME
4040

4141
- export REPO_NAME=$CI_REGISTRY_IMAGE/conjugador-webserver
42-
- export IMAGE_NAME=$REPO_NAME:$CI_COMMIT_REF_NAME
42+
- export IMAGE_NAME=$REPO_NAME:$CI_COMMIT_REF_SLUG
4343
- export DOCKER_PATH=docker/webserver/Dockerfile
4444
- docker build -f $DOCKER_PATH --tag $IMAGE_NAME .
4545
- docker push $IMAGE_NAME

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ content from the spelling dictionary.
77

88
1. extract.py reads diccionary file and extracts the verbs in JSON format (into data/jsons)
99
2. index_creation.py reads the jsons and creates multiple indices in Elasticsearch
10-
3. Flask application at web/ serves the content
10+
3. FastAPI application at web/ serves the content
1111

1212
# Git clone
1313

docker/elasticsearch/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ RUN uv python install $(cat .python-version) && \
1919

2020
ENV discovery.type=single-node
2121
ENV xpack.security.enabled=false
22+
ENV logger.level=error
2223
ENV ES_JAVA_OPTS="-Xms512m -Xmx512m"
2324

2425
# Start Elasticsearch, wait for health, run indexing, then stop
@@ -74,5 +75,6 @@ COPY --from=build /usr/share/elasticsearch/data /usr/share/elasticsearch/data
7475
RUN chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data
7576
ENV discovery.type=single-node
7677
ENV xpack.security.enabled=false
78+
ENV logger.level=error
7779
ENV ES_JAVA_OPTS="-Xms512m -Xmx512m"
7880
USER elasticsearch

docker/webserver/entrypoint.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/bin/sh
22
mkdir -p /var/log/conjugador/
3-
uv run gunicorn web.web_search:app -b 0.0.0.0:8000 --error-logfile /var/log/conjugador/gnuicorn.log --workers=2
3+
uv run uvicorn web.main:app --host 0.0.0.0 --port 8000 --log-level error 2>>/var/log/conjugador/uvicorn_error.log

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ description = "Conjugador is a web application that allows to search and display
55
readme = "README.md"
66
requires-python = ">=3.12"
77
dependencies = [
8-
"elasticsearch==8.15.1",
8+
"aiofiles>=25.1.0",
9+
"elasticsearch[async]==8.15.1",
10+
"fastapi>=0.128.0",
911
"flask==3.0.2",
1012
"gunicorn==22.0.0",
1113
"psutil>=7.1.3",
1214
"pyuca==1.2",
15+
"uvicorn>=0.40.0",
1316
]
1417

1518
[dependency-groups]

uv.lock

Lines changed: 701 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

web/conjugador/autocomplete.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/env python3
2+
# -*- encoding: utf-8 -*-
3+
#
4+
# Copyright (c) 2019-2020 Jordi Mas i Hernandez <jmas@softcatala.org>
5+
#
6+
# This program is free software; you can redistribute it and/or
7+
# modify it under the terms of the GNU Lesser General Public
8+
# License as published by the Free Software Foundation; either
9+
# version 2.1 of the License, or (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
# Lesser General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU Lesser General Public
17+
# License along with this program; if not, write to the
18+
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19+
# Boston, MA 02111-1307, USA.
20+
21+
import json
22+
import logging
23+
24+
from elasticsearch import AsyncElasticsearch
25+
26+
from indexer.firstletter import FirstLetter
27+
28+
29+
class Autocomplete:
30+
"""
31+
Autocomplete a word based on the information on the Elasticsearch indices.
32+
33+
Args:
34+
es_client (AsyncElasticsearch): The client to use for the connection.
35+
"""
36+
37+
def __init__(self, es_client: AsyncElasticsearch) -> None:
38+
"""
39+
Initializes the Autocomplete class with a preconfigured ES client.
40+
41+
Args:
42+
es_client (AsyncElasticsearch): The client to use for the connection.
43+
"""
44+
self.es_client = es_client
45+
self.letter = FirstLetter()
46+
47+
async def get_results(self, word: str) -> list[dict]:
48+
"""
49+
Gets the results from the prepared query, based on the word to autocomplete.
50+
51+
Args:
52+
word (str): The word from which to autocomplete.
53+
54+
Returns:
55+
list[dict]: A list of dictionaries containing the results.
56+
"""
57+
letter = self.letter.from_word(word)
58+
index_name = f"autocomplete-{letter}"
59+
60+
if not await self.es_client.indices.exists(index=index_name):
61+
return []
62+
63+
query = {
64+
"query": {
65+
"prefix": {"verb_form.keyword": {"value": word.lower()}}
66+
},
67+
"sort": [
68+
{
69+
"autocomplete_sorting.keyword": {
70+
"order": "asc",
71+
},
72+
},
73+
],
74+
"size": 1000,
75+
"_source": ["verb_form", "infinitive", "url"],
76+
}
77+
78+
try:
79+
resp = await self.es_client.search(index=index_name, body=query)
80+
results = [hit["_source"] for hit in resp["hits"]["hits"]]
81+
except Exception as e:
82+
logging.error(f"Error searching index '{index_name}': {e}")
83+
results = []
84+
85+
return results
86+
87+
async def get_json(self, word: str) -> tuple[str, int]:
88+
"""
89+
Gets a stringified JSON for all the results found for the autocomplete word.
90+
91+
Args:
92+
word (str): The word from which to autocomplete.
93+
94+
Returns:
95+
tuple[str, int]: A tuple containing the stringified JSON and the status code.
96+
"""
97+
OK = 200
98+
status = OK
99+
results = await self.get_results(word)
100+
101+
all_results = []
102+
for result in results:
103+
verb = {
104+
"verb_form": result["verb_form"],
105+
"infinitive": result["infinitive"],
106+
"url": result["url"],
107+
}
108+
all_results.append(verb)
109+
110+
return json.dumps(
111+
all_results, indent=4, separators=(",", ": ")
112+
), status
Lines changed: 27 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import json
2222
import logging
2323

24-
from elasticsearch import Elasticsearch
24+
from elasticsearch import AsyncElasticsearch
2525
from pyuca import Collator
2626

2727

@@ -30,85 +30,68 @@ class IndexLetter:
3030
Search a letter in the Elasticsearch index.
3131
3232
Args:
33-
letter (str): The letter to search for.
34-
es_url (str | None): The url to connect to an Elasticsearch instance.
33+
es_client (AsyncElasticsearch): The client to use for the ES connection.
3534
"""
3635

37-
DEFAULT_ES_HOST = "http://localhost:9200"
38-
39-
def __init__(self, letter: str, es_url: str | None = None) -> None:
36+
def __init__(self, es_client: AsyncElasticsearch) -> None:
4037
"""
41-
Initializes the IndexLetter class with a letter to look for.
38+
Initializes the IndexLetter class with a preconfigured ES client.
4239
4340
Args:
44-
letter (str): The letter to search for.
45-
es_url (str | None): The url to connect to an Elasticsearch instance.
41+
es_client (AsyncElasticsearch): The client to use for the ES connection.
4642
"""
47-
if not es_url:
48-
es_url = self.DEFAULT_ES_HOST
49-
50-
self.letter = letter
51-
self.es_client = Elasticsearch(es_url)
43+
self.es_client = es_client
5244
self.index_name = "letter-index"
5345
self.collator = Collator()
54-
self.num_results = 0
55-
self.results = []
56-
57-
def get_num_results(self) -> int:
58-
"""
59-
Retrieves the number of results found.
60-
61-
Returns:
62-
int: Num of results.
63-
"""
64-
return self.num_results
6546

66-
def get_results(self) -> list[dict]:
47+
async def get_results(self, letter: str) -> list[dict]:
6748
"""
6849
Gets the results from the prepared query, based on the letter.
6950
51+
Args:
52+
letter (str): The letter of the index to check.
53+
7054
Returns:
7155
list[dict]: A list of dicts containing the results.
7256
"""
73-
if not self.es_client.indices.exists(index=self.index_name):
74-
self.results = []
75-
self.num_results = 0
76-
return self.results
57+
if not await self.es_client.indices.exists(index=self.index_name):
58+
return []
7759

7860
query = {
79-
"query": {"term": {"index_letter.keyword": self.letter}},
61+
"query": {"term": {"index_letter.keyword": letter}},
8062
"collapse": {"field": "verb_form.keyword"},
8163
"size": 10000,
8264
"_source": ["verb_form", "infinitive"],
8365
}
8466

8567
try:
86-
response = self.es_client.search(index=self.index_name, body=query)
87-
hits = response["hits"]["hits"]
88-
self.results = [hit["_source"] for hit in hits]
89-
self.results.sort(
90-
key=lambda x: self.collator.sort_key(x["verb_form"])
68+
response = await self.es_client.search(
69+
index=self.index_name, body=query
9170
)
92-
self.num_results = len(self.results)
71+
hits = response["hits"]["hits"]
72+
results = [hit["_source"] for hit in hits]
73+
results.sort(key=lambda x: self.collator.sort_key(x["verb_form"]))
9374

9475
except Exception as e:
9576
logging.error(f"Error searching index {self.index_name}: {e}")
96-
self.results = []
97-
self.num_results = 0
77+
results = []
9878

99-
return self.results
79+
return results
10080

101-
def get_json(self) -> tuple[str, int]:
81+
async def get_json(self, letter: str) -> tuple[str, int]:
10282
"""
103-
Gets a stringified JSON for all the results found for the initialized
104-
letter.
83+
Gets a stringified JSON for all the results found for the given letter.
84+
85+
Args:
86+
letter (str): The letter of the index to check.
10587
10688
Returns:
10789
tuple[str, int]: A tuple containing the stringified JSON and the status code.
10890
"""
10991
OK = 200
11092
status = OK
111-
results = self.get_results()
93+
results = await self.get_results(letter)
94+
11295
all_results = []
11396
for result in results:
11497
verb = {"verb_form": result["verb_form"]}

0 commit comments

Comments
 (0)