Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,24 @@ Usage
=====

Check `usage page <https://averell.readthedocs.io/en/latest/usage.html>`_


Docker
======
A docker for the frontend is available at https://hub.docker.com/repository/docker/linhdpostdata/averell-ui/

This is a frontend to the POSTDATA group tool Averell. In order to install and run it, follow these steps:

* Download Docker desktop from: https://www.docker.com/products/docker-desktop/

* Open the Docker desktop app

* From the Docker Dashboard you can use Quick Search, which is located in the Dashboard header, to search for:
* Any container or Compose app on your local system. You can see an overview of associated environment variables or perform quick actions, such as start, stop, or delete.
* Public Docker Hub images, local images, and images from remote repositories. Depending on the type of image you select, you can either pull the image by tag, view documentation, go to Docker Hub for more details, or run a new container using the image.
* Extensions. From here, you can learn more about the extension and install it with a single click. Or, if you already have an extension installed, you can open it straight from the search results.
* Any volume. From here you can view the associated container.

* Search for linhdpostdata/averell-ui and download it.

* Run the container and enter url 127.0.0.1:5741 in your browser to access the UI.
226 changes: 226 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
import copy
import json

from pathlib import Path
from zipfile import ZipFile

import gradio as gr
import pycountry

from averell.utils import get_ids, generate_tei, CORPORA_SOURCES
from averell.core import export_corpora

PARAMS = {
"granularity": "poem",
"output_format": "JSON",
"corpora_list": [
'bibit', 'stichopt', 'disco2_1',
'disco3', 'adso', 'adso100',
'plc', 'gongo', 'ecpa',
'4b4v', 'czverse', 'mel'
],
}

def get_available_languages(sources):
available_langs = []
for c in sources:
lg = c["properties"]["language"]
if lg not in available_langs:
available_langs.append(lg)
return available_langs

def filter_corpus_language(sources, lg):
corpora_sources = copy.deepcopy(sources)
filtered_corpora = [c for c in corpora_sources if c["properties"]["language"] == lg]
return filtered_corpora

available_languages = get_available_languages(CORPORA_SOURCES)

with gr.Blocks() as app_averell:
def export(output_path, output_format, output_granularity):
corpora_list = PARAMS["corpora_list"]
if not corpora_list:
return {out_file: gr.File.update(label="ERROR: No corpus selected")}
output_path = output_path["label"]
filename = f"tmp/{output_format}.zip"
# Export to TEI
if output_format == "TEI":
generate_tei(corpora_list, output_path, True)
return {out_file: gr.File.update(value=f"tmp/{output_format}.zip",
label=output_format)}
# Export to JSON
if output_granularity == "poem":
export_corpora(get_ids(corpora_list),
None,
"tmp/JSON",
None,
no_download=False,
ui_mode=True)
with ZipFile(filename, "w") as zfile:
for corp in corpora_list:
p = Path(f'{output_path}/{output_format}/{corp}')
for f in p.glob("**/*.json"):
zfile.write(f)
return {out_file: gr.File.update(value=f"tmp/{output_format}.zip",
label=output_format)}
else:
json_l, filename = export_corpora(get_ids(corpora_list),
output_granularity,
"tmp/JSON",
None,
no_download=False,
ui_mode=True)
with open(f"tmp/{filename}.json", 'w', encoding='utf-8') as f:
json.dump(json_l, f, ensure_ascii=False, indent=4)
return {out_file: gr.File.update(value=f"tmp/{filename}.json",
label=filename)}


def block_granularity(r_format):
if r_format == "TEI":
PARAMS["output_format"] = r_format
return {rad_granularity: gr.update(value="poem", visible=False)}
else:
PARAMS["output_format"] = r_format
return {rad_granularity: gr.update(value="poem", visible=True)}
def update_granularity(value):
PARAMS["granularity"] = value
true_l = [gr.Checkbox.update(value=True, visible=True)]
false_l = [gr.Checkbox.update(value=False, visible=False)] # False
true_acc = [gr.Box.update(visible=True)]
false_acc = [gr.Box.update(visible=False)] # False
if value == "word":
# 000011100110 + 110110 + 110110
corp_list = false_l*4 + true_l*3 + false_l*2 + true_l*2 + false_l
lang_list = true_l*2 + false_l + true_l*2 + false_l
acc_list = true_acc*2 + false_acc + true_acc*2 + false_acc
PARAMS["corpora_list"] = ["plc", "gongo", "ecpa", "bibit", "czverse"]
return corp_list + lang_list + acc_list
elif value == "syllable":
# 000011000000 + 100000 + 100000
corp_list = false_l*4 + true_l*2 + false_l*6
lang_list = true_l + false_l*5
acc_list = true_acc + false_acc*5
PARAMS["corpora_list"] = ["plc", "gongo"]
return corp_list + lang_list + acc_list
else:
return true_l*18 + true_acc*6


def change_selection(value, *labels):
for corpus_name in labels:
update_corpora_list(value, corpus_name)
return value
def change_global_selection(value, *labels):
for corpus_name in labels:
update_corpora_list(value, corpus_name)
return [gr.Checkbox.update(value=value)] * 6
def update_corpora_list(added, corpus_name):
corpora_list = PARAMS["corpora_list"]
if corpus_name in corpora_list and not added:
corpora_list.remove(corpus_name)
elif corpus_name not in corpora_list and added:
corpora_list.append(corpus_name)

app_title = gr.HTML("<h1>Averell</h1>")
with gr.Row() as row:
with gr.Column(scale=3) as c1:
rad_format = gr.Radio(["TEI", "JSON"],
label="Output",
info="Choose output format",
value="TEI",
interactive=True)
rad_granularity = gr.Radio(
["poem", "stanza", "line", "word", "syllable"],
label="Granularity",
info="Choose output granularity",
value="poem",
interactive=True,
visible=False,
)
corpus_checkboxes = []
lang_checkboxes = []
with gr.Box() as b1:
gr.HTML(value="<h3>Corpora list<h3>")
gr.HTML(value="<br>")
all_corp_chk = gr.Checkbox(True, label="Select all/none",
interactive=True)
with gr.Row() as rowa:
all_label_list = []
gr.Checkbox(label="dummy",visible=False)
for lang in available_languages:
with gr.Column() as b2:
language = pycountry.languages.get(
alpha_2=lang).name
gr.HTML(language)
with gr.Blocks() as corpora:
with gr.Row() as rowb:
lang_chk = gr.Checkbox(True,
label="Select all/none",
interactive=True)
filtered_corpus = filter_corpus_language(
CORPORA_SOURCES, lang)
with gr.Accordion("Expand list",
open=False) as acc:
label_list = []
for corpus in filtered_corpus:
classes = corpus["properties"][
"granularity"]
classes.append("poem")
classes.append(lang)
chk = gr.Checkbox(True,
label=corpus[
"name"],
info=f'License: {corpus["properties"]["license"]} | Number of poems: {corpus["properties"]["doc_quantity"]}',
interactive=True,
elem_classes=classes,
elem_id=corpus[
"properties"][
"slug"],
)
label = gr.Textbox(
value=corpus["properties"][
"slug"], visible=False)
# Corpus checkboxes change
chk.change(update_corpora_list,
[chk, label],
show_progress=False)
# "Select all" language checkboxes change
lang_chk.change(change_selection,
[lang_chk,
*label_list],
chk,
show_progress=False)
corpus_checkboxes.append(chk)
label_list.append(label)
all_label_list.append(label)
lang_checkboxes.append(lang_chk)
# "Select All/None" checkbox change
all_corp_chk.change(change_global_selection,
[all_corp_chk,
*all_label_list],
[*lang_checkboxes])
with gr.Column(scale=1) as c2:
accordions_boxes = rowa.children[1:]
rad_granularity.change(update_granularity,
rad_granularity,
[*corpus_checkboxes,
*lang_checkboxes,
*accordions_boxes],
show_progress=False)
rad_format.change(block_granularity,
rad_format,
rad_granularity,
api_name="output_format",
show_progress=False)
exp_btn = gr.Button("Export")
folder_path = gr.Label(value="tmp/", visible=False)
out_file = gr.File()
exp_btn.click(export,
[folder_path,
rad_format,
rad_granularity],
out_file,
api_name="export")

app_averell.launch(share=False, server_port=5741)
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@ pyyaml>=5
requests>=2.21.0
tabulate>=0.8.7
tqdm>=4.43.0

averell~=1.2.2
setuptools~=67.7.2
gradio~=3.27.0
pycountry~=22.3.5
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def read(*names, **kwargs):
keywords=[
# eg: 'keyword1', 'keyword2', 'keyword3',
],
python_requires='>3.6.*',
python_requires='>=3.6',
install_requires=read("requirements.txt").split("\n"),
extras_require={
# eg:
Expand Down
15 changes: 13 additions & 2 deletions src/averell/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def get_corpora(corpus_indices=None, output_folder=DEFAULT_OUTPUT_FOLDER):


def export_corpora(
corpus_ids, granularity, corpora_folder, filename, no_download=False
corpus_ids, granularity, corpora_folder, filename, no_download=False,
ui_mode=False
):
"""
Generates a single JSON file with the chosen granularity for all of the
Expand All @@ -60,6 +61,8 @@ def export_corpora(
:param corpora_folder: Local folder where the corpora is located
:param filename: Name of the output file
:param no_download: Whether to download or not a corpora when missing
:param ui_mode: Whether the function is called from the gradio UI

:return: Python dict with the chosen granularity for all of the selected
corpora
"""
Expand All @@ -68,13 +71,16 @@ def export_corpora(
export_filename = filename
if Path(corpora_folder).exists() or not no_download:
if not corpus_ids:
print("ID not in corpora list")

logging.error("No CORPUS ID selected")
else:
if granularity is not None:
for corpus_id in corpus_ids:
try:
corpus = CORPORA_SOURCES[corpus_id]
except IndexError:
print("ID not in corpora list")
logging.error("ID not in corpora list")
else:
corpus_folder = corpus["properties"]["slug"]
Expand All @@ -90,6 +96,8 @@ def export_corpora(
continue
granularities_list = corpus["properties"]["granularity"]
if granularity not in granularities_list:
print("ID not in corpora list")

logging.error(
f"'{granularity}' granularity not found on "
f"'{corpus_name}' properties")
Expand All @@ -101,14 +109,17 @@ def export_corpora(
granularity)
corpora_features.extend(filtered_features)
else:
print("ID not in corpora list")

logging.error("No GRANULARITY selected")

if not export_filename:
export_filename = "_".join(slugs)
export_filename = f"{export_filename}_{granularity}s"

if corpora_features:
if corpora_features and not ui_mode:
write_json(corpora_features, export_filename)
else:
print("Corpora folder not found")
logging.error("Corpora folder not found")
return corpora_features, export_filename
Loading