diff --git a/README.md b/README.md
index 948a82b..022eed8 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,9 @@ is based on [Profiles][profiles]. Unless otherwise configured, the profile used
| | Name | Description |
| -- | ----------------- | ----------- |
| 1. | [NLP on documents](examples/nlp_on_documents/nlp_on_documents.ipynb) | A few quick examples on how to apply NLP models on documents (eg extracting key-terms) |
-| 2. | [Reference Parsing](examples/nlp_for_references/nlp_for_references.ipynb) | Examples on how to parse references from Documents |
-| 3. | [Material Extraction](examples/nlp_for_materials/nlp_for_materials.ipynb) | Examples on how to extract materials from Documents |
+| 2. | [MetaData Extraction](examples/nlp_for_metadata/nlp_for_metadata.ipynb) | Examples on how to detect the metadata of a Document |
+| 3. | [Reference Parsing](examples/nlp_for_references/nlp_for_references.ipynb) | Examples on how to parse references from Documents |
+| 4. | [Material Extraction](examples/nlp_for_materials/nlp_for_materials.ipynb) | Examples on how to extract materials from Documents |
### Data queries
diff --git a/examples/data_query_quick_start/quick_start.ipynb b/examples/data_query_quick_start/quick_start.ipynb
index 6f3e334..bfecac8 100644
--- a/examples/data_query_quick_start/quick_start.ipynb
+++ b/examples/data_query_quick_start/quick_start.ipynb
@@ -132,9 +132,7 @@
"cell_type": "code",
"execution_count": 5,
"id": "f915761b",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -370,9 +368,7 @@
"cell_type": "code",
"execution_count": 6,
"id": "81df4c38",
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -786,7 +782,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.4"
+ "version": "3.9.18"
}
},
"nbformat": 4,
diff --git a/examples/nlp_for_metadata/README.md b/examples/nlp_for_metadata/README.md
new file mode 100644
index 0000000..1504c62
--- /dev/null
+++ b/examples/nlp_for_metadata/README.md
@@ -0,0 +1,5 @@
+# NLP on Documents - Extracting meta data
+
+:point_right: Run the [nlp_for_metadata.ipynb](./nlp_for_metadata.ipynb)
+notebook to extract metadata from (scientific) reports
+
diff --git a/examples/nlp_for_metadata/nlp_for_metadata.ipynb b/examples/nlp_for_metadata/nlp_for_metadata.ipynb
new file mode 100644
index 0000000..52e9903
--- /dev/null
+++ b/examples/nlp_for_metadata/nlp_for_metadata.ipynb
@@ -0,0 +1,4298 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "37d96e78",
+ "metadata": {},
+ "source": [
+ "# Document MetaData Extraction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4edb626f",
+ "metadata": {},
+ "source": [
+ "## Getting started\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8f9c441",
+ "metadata": {},
+ "source": [
+ "### Set notebook parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b01a4fd1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Project key: 1234567890abcdefghijklmnopqrstvwyz123456\n"
+ ]
+ }
+ ],
+ "source": [
+ "from dsnotebooks.settings import ProjectNotebookSettings\n",
+ "\n",
+ "# notebook settings auto-loaded from .env / env vars\n",
+ "notebook_settings = ProjectNotebookSettings()\n",
+ "\n",
+ "PROFILE_NAME = notebook_settings.profile # the profile to use\n",
+ "PROJ_KEY = notebook_settings.proj_key # the project to use\n",
+ "\n",
+ "# default project_key = 1234567890abcdefghijklmnopqrstvwyz123456"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "239dc0f1",
+ "metadata": {},
+ "source": [
+ "### Import example dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "502cdef8",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2022-08-02T12:14:25.377422Z",
+ "start_time": "2022-08-02T12:14:25.152485Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " -> already downloaded part-of-speech\n",
+ " -> already downloaded reference\n",
+ " -> already downloaded material\n",
+ " -> already downloaded language\n",
+ " -> already downloaded name\n",
+ " -> already downloaded semantic\n",
+ " -> already downloaded metadata\n",
+ " -> already downloaded geoloc\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import json\n",
+ "\n",
+ "import textwrap\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "import deepsearch as ds\n",
+ "\n",
+ "from pathlib import Path\n",
+ "from zipfile import ZipFile\n",
+ "\n",
+ "from deepsearch.documents.core.export import export_to_markdown\n",
+ "from IPython.display import display, Markdown, HTML, display_html\n",
+ "\n",
+ "from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models\n",
+ "\n",
+ "from deepsearch_glm.nlp_utils import (\n",
+ " extract_references_from_doc,\n",
+ " init_nlp_model,\n",
+ " list_nlp_model_configs,\n",
+ ")\n",
+ "\n",
+ "from tabulate import tabulate\n",
+ "\n",
+ "models = load_pretrained_nlp_models(force=False, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6e4dcda",
+ "metadata": {},
+ "source": [
+ "### Connect to Deep Search"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "f44fbf08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "api = ds.CpsApi.from_env(profile_name=PROFILE_NAME)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f1200c5-1138-4491-bc33-3b2d5aabe949",
+ "metadata": {},
+ "source": [
+ "## Convert Document"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ec83eb0b",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2022-08-02T12:14:49.216045Z",
+ "start_time": "2022-08-02T12:14:25.380757Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Processing input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:00<00:00, 124.39it/s]\u001b[38;2;15;98;254m \u001b[0m\n",
+ "Submitting input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:06<00:00, 6.66s/it]\u001b[38;2;15;98;254m \u001b[0m\n",
+ "Converting input: : 100%|\u001b[38;2;15;98;254m██████████████████████████████\u001b[0m| 1/1 [00:26<00:00, 26.56s/it]\u001b[38;2;15;98;254m \u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'Total documents': 1, 'Successfully converted documents': 1}\n"
+ ]
+ }
+ ],
+ "source": [
+ "output_dir = Path(\"./converted_docs\")\n",
+ "\n",
+ "fname = \"2206.00785.pdf\"\n",
+ "\n",
+ "documents = ds.convert_documents(\n",
+ " api=api,\n",
+ " proj_key=PROJ_KEY,\n",
+ " source_path=f\"../../data/samples/{fname}\",\n",
+ " progress_bar=True\n",
+ ") \n",
+ "documents.download_all(result_dir=output_dir)\n",
+ "info = documents.generate_report(result_dir=output_dir)\n",
+ "print(info) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "382c4869-cca9-43fc-8052-c0ab7e9c175d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "writing converted_docs/2206.00785.json\n",
+ "writing converted_docs/2206.00785.md\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Iterare output files and visualize the output\n",
+ "for output_file in output_dir.rglob(\"json*.zip\"):\n",
+ " with ZipFile(output_file) as archive:\n",
+ " all_files = archive.namelist()\n",
+ " for name in all_files:\n",
+ " if not name.endswith(\".json\"):\n",
+ " continue\n",
+ " \n",
+ " #basename = name.rstrip('.json')\n",
+ " doc_json = json.loads(archive.read(name))\n",
+ " \n",
+ " ofile = output_dir / name\n",
+ " print(f\"writing {ofile}\")\n",
+ " with ofile.open(\"w\") as fw:\n",
+ " fw.write(json.dumps(doc_json, indent=2))\n",
+ " \n",
+ " doc_md = export_to_markdown(doc_json)\n",
+ "\n",
+ " ofile = output_dir / name.replace(\".json\", \".md\")\n",
+ " print(f\"writing {ofile}\")\n",
+ " with ofile.open(\"w\") as fw:\n",
+ " fw.write(doc_md)\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "b19f7678-b650-484b-a994-150d0c4ec3a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# display last document\n",
+ "# display(Markdown(doc_md))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6784c8a9-4b96-4385-a04e-40ddbf6c613f",
+ "metadata": {},
+ "source": [
+ "## Extract references from converted Document"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "710cc200-e2ba-46f3-9ca0-efd2baab7ee1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def resolve(path, doc):\n",
+ "\n",
+ " if len(path)>1 and path[0]==\"#\":\n",
+ " return resolve(path[1:], doc)\n",
+ " \n",
+ " if len(path)==1 and isinstance(doc, dict):\n",
+ " return doc[path[0]]\n",
+ "\n",
+ " elif len(path)==1 and isinstance(doc, list):\n",
+ " ind = int(path[0])\n",
+ " return doc[ind]\n",
+ " \n",
+ " elif len(path)>1 and isinstance(doc, dict):\n",
+ " return resolve(path[1:], doc[path[0]])\n",
+ "\n",
+ " elif len(path)>1 and isinstance(doc, list):\n",
+ " ind = int(path[0])\n",
+ " return resolve(path[1:], doc[ind])\n",
+ "\n",
+ " else:\n",
+ " return None\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "ed3612b4-bbd2-42d0-ba2d-f8f994565380",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ifile = \"converted_docs/2206.00785.json\"\n",
+ "\n",
+ "with open(ifile) as fr:\n",
+ " doc = json.load(fr)\n",
+ "\n",
+ "model = init_nlp_model(\"language;reference;metadata\")\n",
+ "res = model.apply_on_doc(doc)\n",
+ "\n",
+ "props = pd.DataFrame(res[\"properties\"][\"data\"], columns=res[\"properties\"][\"headers\"])\n",
+ "insts = pd.DataFrame(res[\"instances\"][\"data\"], columns=res[\"instances\"][\"headers\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "6d98745c-e0f3-41d2-8261-b7953d835dec",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TITLE\n",
+ "Delivering Document Conversion as a Cloud Service with High Throughput and Responsiveness\n",
+ "ABSTRACT\n",
+ "['Abstract-Document understanding is a key business process in the data-driven economy since documents are central to knowledge discovery and business insights. Converting documents into a machine-processable format is a particular challenge here due to their huge variability in formats and complex structure. Accordingly, many algorithms and machine-learning methods emerged to solve particular tasks such as Optical Character Recognition (OCR), layout analysis, table-structure recovery, figure understanding, etc. We observe the adoption of such methods in document understanding solutions offered by all major cloud providers. Yet, publications outlining how such services are designed and optimized to scale in the cloud are scarce. In this paper, we focus on the case of document conversion to illustrate the particular challenges of scaling a complex data processing pipeline with a strong reliance on machine-learning methods on cloud infrastructure. Our key objective is to achieve high scalability and responsiveness for different workload profiles in a well-defined resource budget. We outline the requirements, design, and implementation choices of our document conversion service and reflect on the challenges we faced. Evidence for the scaling behavior and resource efficiency is provided for two alternative workload distribution strategies and deployment configurations. Our best-performing method achieves sustained throughput of over one million PDF pages per hour on 3072 CPU cores across 192 nodes.', 'Index Terms-cloud applications, document understanding, distributed computing, artificial intelligence']\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "5 semantic 8967552455475999131 TEXT #/texts/0 header \n",
+ "7 semantic 384749972256050104 TEXT #/texts/1 meta-data \n",
+ "9 semantic 15891517341344374830 TEXT #/texts/2 meta-data \n",
+ "11 semantic 10276496618786154295 TEXT #/texts/3 meta-data \n",
+ "13 semantic 5624406992563222356 TEXT #/texts/4 meta-data \n",
+ "15 semantic 15035726207261556942 TEXT #/texts/5 text \n",
+ "17 semantic 4662798960261328447 TEXT #/texts/6 text \n",
+ "19 semantic 15072469540570473164 TEXT #/texts/7 header \n",
+ "21 semantic 8600142426167835349 TEXT #/texts/8 text \n",
+ "23 semantic 3072624984713661043 TEXT #/texts/9 text \n",
+ "25 semantic 14339411138813898476 TEXT #/texts/10 text \n",
+ "27 semantic 17407436599861342415 TEXT #/texts/11 meta-data \n",
+ "29 semantic 4004878754391976765 TEXT #/texts/12 text \n",
+ "31 semantic 15578236054977031520 TEXT #/texts/13 text \n",
+ "33 semantic 1317828445053500670 TEXT #/texts/14 text \n",
+ "35 semantic 3501395332085509922 TEXT #/texts/15 text \n",
+ "37 semantic 14716706603701707953 TEXT #/texts/16 text \n",
+ "39 semantic 2277014394919988861 TEXT #/texts/17 text \n",
+ "41 semantic 18364912209191405749 TEXT #/texts/18 header \n",
+ "43 semantic 487083125877341825 TEXT #/texts/19 text \n",
+ "\n",
+ " confidence \n",
+ "5 0.73 \n",
+ "7 1.00 \n",
+ "9 0.99 \n",
+ "11 0.99 \n",
+ "13 0.99 \n",
+ "15 0.99 \n",
+ "17 0.93 \n",
+ "19 1.00 \n",
+ "21 0.99 \n",
+ "23 0.99 \n",
+ "25 1.00 \n",
+ "27 0.99 \n",
+ "29 1.00 \n",
+ "31 1.00 \n",
+ "33 0.97 \n",
+ "35 0.95 \n",
+ "37 1.00 \n",
+ "39 0.95 \n",
+ "41 0.97 \n",
+ "43 1.00 \n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "1 metadata 8967552455475999131 DOCUMENT #/texts/0 title 1.0\n",
+ "2 metadata 15035726207261556942 DOCUMENT #/texts/5 abstract 1.0\n",
+ "3 metadata 4662798960261328447 DOCUMENT #/texts/6 abstract 1.0\n",
+ " subtype subj_path name\n",
+ "0 author #/texts/1 Christoph Auer\n",
+ "1 author #/texts/3 Michele Dolfi\n",
+ "2 author #/texts/4 J Staar\n"
+ ]
+ }
+ ],
+ "source": [
+ "if \"title\" in res[\"description\"]:\n",
+ " print(\"TITLE\")\n",
+ " print(res[\"description\"][\"title\"])\n",
+ "\n",
+ "if \"abstract\" in res[\"description\"]:\n",
+ " print(\"ABSTRACT\")\n",
+ " print(res[\"description\"][\"abstract\"])\n",
+ "\n",
+ "doc_props = props[props[\"type\"]==\"semantic\"]\n",
+ "print(doc_props[0:20])\n",
+ "\n",
+ "doc_props = props[props[\"type\"]==\"metadata\"]\n",
+ "print(doc_props)\n",
+ "\n",
+ "doc_insts = insts[insts[\"subj_name\"]==\"DOCUMENT\"][[\"subtype\", \"subj_path\", \"name\"]]\n",
+ "print(doc_insts)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "01771757-70c3-44cb-824c-1fd9b716a99f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfeca54d-bbc1-4022-851d-0b29027de761",
+ "metadata": {},
+ "source": [
+ "## Extract MetaData from public documents"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "8bb459a8-7b26-4dc3-98da-b1b4a1b59fcc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import standard dependenices\n",
+ "from copy import deepcopy\n",
+ "import pandas as pd\n",
+ "from numerize.numerize import numerize\n",
+ "from tqdm.notebook import tqdm\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "# IPython utilities\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "# Import the deepsearch-toolkit\n",
+ "import deepsearch as ds\n",
+ "from deepsearch.cps.client.components.elastic import ElasticDataCollectionSource\n",
+ "from deepsearch.cps.queries import DataQuery"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d064166b-7578-437c-b3a6-b16eb3d95c1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fetch list of all data collections\n",
+ "collections = api.elastic.list()\n",
+ "collections.sort(key=lambda c: c.name.lower())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "db9da464-23db-4562-a1ce-259a717f404a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Type | \n",
+ " Num entries | \n",
+ " Date | \n",
+ " Coords | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AAAI | \n",
+ " Document | \n",
+ " 16.02K | \n",
+ " 2023-08-29 | \n",
+ " default/aaai | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " ACL Anthology | \n",
+ " Document | \n",
+ " 55.28K | \n",
+ " 2023-08-22 | \n",
+ " default/acl | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Annual Reports | \n",
+ " Document | \n",
+ " 107.38K | \n",
+ " 2024-04-15 | \n",
+ " default/annual-report | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " arXiv abstracts | \n",
+ " Document | \n",
+ " 2.37M | \n",
+ " 2023-12-07 | \n",
+ " default/arxiv-abstract | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " arXiv category taxonomy | \n",
+ " Record | \n",
+ " 155 | \n",
+ " 2023-12-05 | \n",
+ " default/arxiv-category | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 56 | \n",
+ " UMLS | \n",
+ " Record | \n",
+ " 2.69M | \n",
+ " 2023-01-03 | \n",
+ " default/umls | \n",
+ "
\n",
+ " \n",
+ " | 57 | \n",
+ " UniProt | \n",
+ " Record | \n",
+ " 567.48K | \n",
+ " 2023-01-03 | \n",
+ " default/uniprot | \n",
+ "
\n",
+ " \n",
+ " | 58 | \n",
+ " USPTO patents for NER | \n",
+ " Document | \n",
+ " 2.64K | \n",
+ " 2023-03-20 | \n",
+ " default/uspto-for-ner | \n",
+ "
\n",
+ " \n",
+ " | 59 | \n",
+ " VHDL articles | \n",
+ " Document | \n",
+ " 215 | \n",
+ " 2024-04-23 | \n",
+ " default/vhdl | \n",
+ "
\n",
+ " \n",
+ " | 60 | \n",
+ " Wikipedia | \n",
+ " Document | \n",
+ " 6.45M | \n",
+ " 2024-02-26 | \n",
+ " default/wikipedia | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
61 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Type Num entries Date \\\n",
+ "0 AAAI Document 16.02K 2023-08-29 \n",
+ "1 ACL Anthology Document 55.28K 2023-08-22 \n",
+ "2 Annual Reports Document 107.38K 2024-04-15 \n",
+ "3 arXiv abstracts Document 2.37M 2023-12-07 \n",
+ "4 arXiv category taxonomy Record 155 2023-12-05 \n",
+ ".. ... ... ... ... \n",
+ "56 UMLS Record 2.69M 2023-01-03 \n",
+ "57 UniProt Record 567.48K 2023-01-03 \n",
+ "58 USPTO patents for NER Document 2.64K 2023-03-20 \n",
+ "59 VHDL articles Document 215 2024-04-23 \n",
+ "60 Wikipedia Document 6.45M 2024-02-26 \n",
+ "\n",
+ " Coords \n",
+ "0 default/aaai \n",
+ "1 default/acl \n",
+ "2 default/annual-report \n",
+ "3 default/arxiv-abstract \n",
+ "4 default/arxiv-category \n",
+ ".. ... \n",
+ "56 default/umls \n",
+ "57 default/uniprot \n",
+ "58 default/uspto-for-ner \n",
+ "59 default/vhdl \n",
+ "60 default/wikipedia \n",
+ "\n",
+ "[61 rows x 5 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Visualize summary table\n",
+ "results = [\n",
+ " {\n",
+ " \"Name\": c.name,\n",
+ " \"Type\": c.metadata.type,\n",
+ " \"Num entries\": numerize(c.documents),\n",
+ " \"Date\": c.metadata.created.strftime(\"%Y-%m-%d\"),\n",
+ " \"Coords\": f\"{c.source.elastic_id}/{c.source.index_key}\",\n",
+ " }\n",
+ " for c in collections\n",
+ "]\n",
+ "display(pd.DataFrame(results))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "357340cc-97e3-44bc-aa28-41a1be1e9a20",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "731a7106c87f46fb97ed8d94c8ce883b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/61 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " matches | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " arXiv full documents | \n",
+ " 165 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Semantic Scholar Academic Graph | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " OpenCVF | \n",
+ " 31 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " arXiv abstracts | \n",
+ " 24 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ACL Anthology | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name matches\n",
+ "0 arXiv full documents 165\n",
+ "1 Semantic Scholar Academic Graph 40\n",
+ "2 OpenCVF 31\n",
+ "3 arXiv abstracts 24\n",
+ "4 ACL Anthology 16"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Input query: search for papers which mention `DocLayNet` or `PubLayNet` in the main-text\n",
+ "search_query = \"main-text.text:(\\\"DocLayNet\\\" OR \\\"PubLayNet\\\")\"\n",
+ "\n",
+ "# Iterate through the data collections\n",
+ "results = []\n",
+ "for c in (pbar := tqdm(collections)):\n",
+ " pbar.set_description(f\"Querying {c.name}\")\n",
+ "\n",
+ " # Search only on document collections\n",
+ " if c.metadata.type != \"Document\":\n",
+ " continue\n",
+ "\n",
+ " # Execute the query\n",
+ " query = DataQuery(search_query, source=[], limit=0, coordinates=c.source)\n",
+ " query_results = api.queries.run(query)\n",
+ " results.append({\n",
+ " \"name\": c.name,\n",
+ " \"matches\": query_results.outputs[\"data_count\"]\n",
+ " })\n",
+ "\n",
+ "# Sort and display results\n",
+ "results.sort(reverse=True, key=lambda r: r[\"matches\"])\n",
+ "display(pd.DataFrame(results[0:5]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "90f84882-1c85-4b0a-b0eb-ea5bf0b41e32",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e61ea71924bd4746961398551b4955bd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/33 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2007.12238.pdf\n",
+ "title: MiniConf-A Virtual Conference Framework\n",
+ "abstract: Abstract MiniConf is a framework for hosting virtual academic conferences motivated by the sudden inability for these events to be hosted globally. The framework is designed to be global and asynchronous, interactive, and to promote browsing and discovery. We developed the system to be sustainable and maintainable, in particular ensuring that it is open-source, easy to setup, and scalable on minimal hardware. In this technical report, we discuss design decisions, provide technical detail, and show examples of a case study deployment. Keywords Conference Management-Academic Communication-Software Development $^{1}$CS+Cornell Tech, Cornell University, New York NY, USA $^{2}$MIT-IBM Watson AI Lab, IBM Research, Cambridge MA, USA Correspondence : info@mini-conf.org\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2265028778467379955 DOCUMENT # en 1.00\n",
+ "1 metadata 7284302905140581098 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 4436866271436177692 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 3600736756989613306 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 4437602680284425626 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 metadata 4167859403464433494 DOCUMENT #/texts/6 abstract 1.00\n",
+ "6 metadata 16515177709963531263 DOCUMENT #/texts/7 abstract 1.00\n",
+ "7 metadata 12555384452004545401 DOCUMENT #/texts/8 abstract 1.00\n",
+ "8 metadata 6728476374858842747 DOCUMENT #/texts/9 abstract 1.00\n",
+ "9 language 7357703022170425089 TEXT #/texts/0 en 0.52\n",
+ "10 semantic 7357703022170425089 TEXT #/texts/0 reference 0.97\n",
+ "11 language 7284302905140581098 TEXT #/texts/1 en 0.67\n",
+ "2111.06016.pdf\n",
+ "title: SYNTHETIC DOCUMENT GENERATOR FOR ANNOTATION-FREE LAYOUT RECOGNITION\n",
+ "abstract: ABSTRACT Analyzing the layout of a document to identify headers, sections, tables, figures etc. is critical to understanding its content. Deep learning based approaches for detecting the layout structure of document images have been promising. However, these methods require a large number of annotated examples during training, which are both expensive and time consuming to obtain. We describe here a synthetic document generator that automatically produces realistic documents with labels for spatial positions, extents and categories of the layout elements. The proposed generative process treats every physical component of a document as a random variable and models their intrinsic dependencies using a Bayesian Network graph. Our hierarchical formulation using stochastic templates allow parameter sharing between documents for retaining broad themes and yet the distributional characteristics produces visually unique samples, thereby capturing complex and diverse layouts. We empirically illustrate that a deep layout detection model trained purely on the synthetic documents can match the performance of a model that uses real documents. K eywords Synthetic Image Generation · Bayesian Network · Layout Analysis\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15863822848670978642 DOCUMENT # en 1.00\n",
+ "1 metadata 849366496391973042 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6562169947754304198 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 13702001854642761223 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 12685056807055253008 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 language 10149061364101562486 TEXT #/texts/0 en 0.50\n",
+ "6 semantic 10149061364101562486 TEXT #/texts/0 reference 0.91\n",
+ "7 language 849366496391973042 TEXT #/texts/1 en 0.35\n",
+ "8 semantic 849366496391973042 TEXT #/texts/1 header 0.97\n",
+ "9 language 9139753227411297604 TEXT #/texts/2 en 0.21\n",
+ "10 semantic 9139753227411297604 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 6562169947754304198 TEXT #/texts/3 en 0.64\n",
+ "2105.14931.pdf\n",
+ "title: Document Domain Randomization for Deep Learning Document Layout Extraction\n",
+ "abstract: Abstract. We present d ocument d omain r andomization (DDR), the first successful transfer of CNNs trained only on graphically rendered pseudo-paper pages to real-world document segmentation. DDR renders pseudo-document pages by modeling randomized textual and non-textual contents of interest, with userdefined layout and font styles to support joint learning of fine-grained classes. We demonstrate competitive results using our DDR approach to extract nine document classes from the benchmark CS-150 and papers published in two domains, namely annual meetings of Association for Computational Linguistics (ACL) and IEEE Visualization (VIS). We compare DDR to conditions of style mismatch, fewer or more noisy samples that are more easily obtained in the real world. We show that high-fidelity semantic information is not necessary to label semantic classes but style mismatch between train and test can lower model accuracy. Using smaller training samples had a slightly detrimental effect. Finally, network models still achieved high test accuracy when correct labels are diluted towards confusing labels; this behavior hold across several classes. Keywords: Document domain randomization · Document layout · Deep neural network · behavior analysis · evaluation.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 7327671452024217603 DOCUMENT # en \n",
+ "1 metadata 14069245462061859532 DOCUMENT #/texts/1 title \n",
+ "2 metadata 12775108533738256421 DOCUMENT #/texts/12 abstract \n",
+ "3 metadata 16989119287644322975 DOCUMENT #/texts/13 abstract \n",
+ "4 language 1657309244825806266 TEXT #/texts/0 en \n",
+ "5 semantic 1657309244825806266 TEXT #/texts/0 reference \n",
+ "6 language 14069245462061859532 TEXT #/texts/1 en \n",
+ "7 semantic 14069245462061859532 TEXT #/texts/1 header \n",
+ "8 language 8299103160765612598 TEXT #/texts/2 en \n",
+ "9 semantic 8299103160765612598 TEXT #/texts/2 meta-data \n",
+ "10 language 3210927963314597787 TEXT #/texts/3 en \n",
+ "11 semantic 3210927963314597787 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.96 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.64 \n",
+ "5 0.66 \n",
+ "6 0.51 \n",
+ "7 0.88 \n",
+ "8 0.24 \n",
+ "9 0.99 \n",
+ "10 0.44 \n",
+ "11 0.72 \n",
+ "2102.02971.pdf\n",
+ "title: Metaknowledge Extraction Based on Multi-Modal Documents\n",
+ "abstract: Abstract-The triple-based knowledge in large-scale knowledge bases is most likely lacking in structural logic and problematic of conducting knowledge hierarchy. In this paper, we introduce the concept of metaknowledge to knowledge engineering research for the purpose of structural knowledge construction. Therefore, the Metaknowledge Extraction Framework and Document Structure Tree model are presented to extract and organize metaknowledge elements (titles, authors, abstracts, sections, paragraphs, etc.), so that it is feasible to extract the structural knowledge from multi-modal documents. Experiment results have proved the effectiveness of metaknowledge elements extraction by our framework. Meanwhile, detailed examples are given to demonstrate what exactly metaknowledge is and how to generate it. At the end of this paper, we propose and analyze the task flow of metaknowledge applications and the associations between knowledge and metaknowledge. Index Terms-Metaknowledge, Multi-Modal, Document Layout Analysis, Knowledge Graph.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9495012996734215687 DOCUMENT # en 0.97\n",
+ "1 metadata 10742098332968591246 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2370874436100491633 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 11616931947318553305 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 12404813074137060112 TEXT #/texts/0 en 0.49\n",
+ "5 semantic 12404813074137060112 TEXT #/texts/0 text 0.90\n",
+ "6 language 10742098332968591246 TEXT #/texts/1 en 0.62\n",
+ "7 semantic 10742098332968591246 TEXT #/texts/1 header 0.55\n",
+ "8 language 4948844491635507699 TEXT #/texts/2 en 0.56\n",
+ "9 semantic 4948844491635507699 TEXT #/texts/2 meta-data 0.88\n",
+ "10 language 2370874436100491633 TEXT #/texts/3 en 0.89\n",
+ "11 semantic 2370874436100491633 TEXT #/texts/3 text 0.99\n",
+ "2003.13197.pdf\n",
+ "title: Cross-Domain Document Object Detection: Benchmark Suite and Method\n",
+ "abstract: Abstract Decomposing images of document pages into high-level semantic regions (e.g., figures, tables, paragraphs), document object detection (DOD) is fundamental for downstream tasks like intelligent document editing and understanding. DOD remains a challenging problem as document objects vary significantly in layout, size, aspect ratio, texture, etc. An additional challenge arises in practice because large labeled training datasets are only available for domains that differ from the target domain. We investigate cross-domain DOD, where the goal is to learn a detector for the target domain using labeled data from the source domain and only unlabeled data from the target domain. Documents from the two domains may vary significantly in layout, language, and genre. We establish a benchmark suite consisting of different types of PDF document datasets that can be utilized for cross-domain DOD model training and evaluation. For each dataset, we provide the page images, bounding box annotations, PDF files, and the rendering layers extracted from the PDF files. Moreover, we propose a novel cross-domain DOD model which builds upon the standard detection model and addresses domain shifts by incorporating three novel alignment modules: Feature Pyramid Alignment (FPA) module, Region Alignment (RA) module and Rendering Layer alignment (RLA) module. Extensive experiments on the benchmark suite substantiate the efficacy of the three proposed modules and the proposed method significantly outperforms the baseline methods. The project page is at https://github.com/kailigo/cddod.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2954750465553139966 DOCUMENT # en 1.00\n",
+ "1 metadata 9382351260204097292 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 12550748484969917940 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 16113821817977818841 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 9754266243859282074 TEXT #/texts/0 en 0.23\n",
+ "5 semantic 9754266243859282074 TEXT #/texts/0 reference 0.78\n",
+ "6 language 9382351260204097292 TEXT #/texts/1 en 0.51\n",
+ "7 semantic 9382351260204097292 TEXT #/texts/1 header 0.70\n",
+ "8 language 202484138750054472 TEXT #/texts/2 en 0.25\n",
+ "9 semantic 202484138750054472 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 14449744740596420817 TEXT #/texts/3 en 0.20\n",
+ "11 semantic 14449744740596420817 TEXT #/texts/3 meta-data 1.00\n",
+ "2111.08609.pdf\n",
+ "title: Lei Cui, Yiheng Xu, Tengchao Lv, Furu Wei\n",
+ "abstract: ABSTRACT Document AI, or Document Intelligence, is a relatively new research topic that refers to the techniques for automatically reading, understanding, and analyzing business documents. It is an important research direction for natural language processing and computer vision. In recent years, the popularity of deep learning technology has greatly advanced the development of Document AI, such as document layout analysis, visual information extraction, document visual question answering, document image classification, etc. This paper briefly reviews some of the representative models, tasks, and benchmark datasets. Furthermore, we also introduce early-stage heuristic rule-based document analysis, statistical machine learning algorithms, and deep learning approaches especially pre-training methods. Finally, we look into future directions for Document AI research. 1 DOCUMENT AI Document AI, or Document Intelligence, is a booming research topic with increased industrial demand in recent years. It mainly refers to the process of automated understanding, classifying and extracting information with rich typesetting formats from webpages, digital-born documents or scanned documents through AI technology. Due to the diversity of layouts and formats, low-quality scanned document images, and the complexity of the template structure, Document AI is a very challenging task and has attracted widespread attention in related research areas. With the acceleration of digitization, the structured analysis and content extraction of documents, images and others has become a key part of the success of digital transformation. Meanwhile automatic, accurate, and rapid information processing is crucial to improving productivity. Taking business documents as an example, they not only contain the processing details and knowledge accumulation of a company's internal and external affairs, but also a large number of industry-related entities and digital information. Manually extracting information is time-consuming and labor-intensive with low accuracy and low reusability. Document AI deeply combines artificial intelligence and human intelligence, and has different types of applications in multiple industries such as finance, healthcare, insurance, energy and logistics. For instance, in the finance field, it can conduct financial report analysis and intelligent decision analysis, and provide scientific and systematic data support for the formulation of corporate strategies and investment decisions. In healthcare, it can improve the digitization of medical cases and enhance diagnosis accuracy. By analyzing the correlation between medical literature and cases, people can locate potential treatment options. In the accounting field, it can achieve automatic information extraction of invoices and purchase orders, automatically analyze a large number of unstructured documents, and support different downstream business scenarios, saving a lot of manual processing time.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 18024887739747733016 DOCUMENT # en 1.00\n",
+ "1 metadata 2823524375188962888 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 17737597528573843477 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 4278000990015673025 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 14014369534978377579 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 metadata 7722544801970925360 DOCUMENT #/texts/8 abstract 1.00\n",
+ "6 language 326120714262372444 TEXT #/texts/0 en 0.40\n",
+ "7 semantic 326120714262372444 TEXT #/texts/0 reference 0.51\n",
+ "8 language 7667149918016322326 TEXT #/texts/1 en 0.44\n",
+ "9 semantic 7667149918016322326 TEXT #/texts/1 header 0.72\n",
+ "10 language 2823524375188962888 TEXT #/texts/2 en 0.71\n",
+ "11 semantic 2823524375188962888 TEXT #/texts/2 reference 0.90\n",
+ "2209.00852.pdf\n",
+ "title: Geometry Aligned Variational Transformer for Image-conditioned Layout Generation\n",
+ "abstract: ABSTRACT Layout generation is a novel task in computer vision, which combines the challenges in both object localization and aesthetic appraisal, widely used in advertisements, posters, and slides design. An accurate and pleasant layout should consider both the intradomain relationship within layout elements and the inter-domain relationship between layout elements and the image. However, most previous methods simply focus on image-content-agnostic layout generation, without leveraging the complex visual information from the image. To this end, we explore a novel paradigm entitled image-conditioned layout generation, which aims to add text overlays to an image in a semantically coherent manner. Specifically, we propose an Image-Conditioned Variational Transformer (ICVT) that autoregressively generates various layouts in an image. First, self-attention mechanism is adopted to model the contextual relationship within layout elements, while cross-attention mechanism is used to fuse the visual information of conditional images. Subsequently, we take them as building blocks of conditional variational autoencoder (CVAE), which demonstrates appealing diversity. Second, in order to alleviate the gap between layout elements domain and visual domain, we design a Geometry Alignment module, in which the geometric information of the image is aligned with the layout representation. In addition, we construct a large-scale advertisement poster layout designing dataset with delicate layout and saliency map annotations. Experimental results show that our model can adaptively generate layouts in the non-intrusive area of the image, resulting in a harmonious layout design. KEYWORDS image-conditioned layout generation, conditional variational autoencoder, Transformer, cross attention\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1043133886708999723 DOCUMENT # en 0.99\n",
+ "1 metadata 16310176366628644108 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7923403198268402625 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 6638340325318454439 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 7923294752006355278 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 metadata 4688329289695183139 DOCUMENT #/texts/6 abstract 1.00\n",
+ "6 language 8377923071847443401 TEXT #/texts/0 en 0.56\n",
+ "7 semantic 8377923071847443401 TEXT #/texts/0 reference 0.95\n",
+ "8 language 16310176366628644108 TEXT #/texts/1 en 0.74\n",
+ "9 semantic 16310176366628644108 TEXT #/texts/1 header 0.86\n",
+ "10 language 12909032936984420733 TEXT #/texts/2 en 0.35\n",
+ "11 semantic 12909032936984420733 TEXT #/texts/2 meta-data 0.99\n",
+ "2203.09056.pdf\n",
+ "title: Robust Table Detection and Structure Recognition from Heterogeneous Document Images\n",
+ "abstract: Abstract We introduce a new table detection and structure recognition approach named RobusTabNet to detect the boundaries of tables and reconstruct the cellular structure of the table from heterogeneous document images. For table detection, we propose to use CornerNet as a new region proposal network to generate higher quality table proposals for Faster R-CNN, which has significantly improved the localization accuracy of Faster R-CNN for table detection. Consequently, our table detection approach achieves state-of-the-art performance on three public table detection benchmarks, namely cTDaR TrackA, PubLayNet and IIIT-AR-13K, by only using a lightweight ResNet-18 backbone network. Furthermore, we propose a new split-and-merge based table structure recognition approach, in which a novel spatial CNN based separation line prediction module is proposed to split each detected table into a grid of cells, and a Grid CNN based cell merging module is applied to recover the spanning cells. As the spatial CNN module can e ectively propagate contextual information across the whole table image, our table structure recognizer can robustly recognize tables with large blank spaces and geometrically distorted (even curved) tables. Thanks to these two techniques, our table structure recognition approach achieves state-of-the-art performance on three public benchmarks, including SciTSR, PubTabNet and cTDaR TrackB. Moreover, we have further demonstrated the advantages of our approach in recognizing tables with complex structures, large blank spaces, empty or spanning cells as well as geometrically distorted or even curved tables on a more challenging in-house dataset. Keywords: Table detection, Table structure recognition, Corner detection, Spatial CNN, Grid CNN, Split-and-merge\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 889660866869193011 DOCUMENT # en 0.99\n",
+ "1 metadata 4207068466148295970 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15138944633239382092 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 5174676295578023103 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 5104988531686271285 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 language 12654551183495324521 TEXT #/texts/0 en 0.42\n",
+ "6 semantic 12654551183495324521 TEXT #/texts/0 text 0.89\n",
+ "7 language 4207068466148295970 TEXT #/texts/1 en 0.67\n",
+ "8 semantic 4207068466148295970 TEXT #/texts/1 header 0.66\n",
+ "9 language 8283687367011430155 TEXT #/texts/2 it 0.31\n",
+ "10 semantic 8283687367011430155 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 2638908518268155278 TEXT #/texts/3 en 0.64\n",
+ "2203.09638.pdf\n",
+ "title: Unified Line and Paragraph Detection by Graph Convolutional Networks ⋆\n",
+ "abstract: Abstract. We formulate the task of detecting lines and paragraphs in a document into a unified two-level clustering problem. Given a set of text detection boxes that roughly correspond to words, a text line is a cluster of boxes and a paragraph is a cluster of lines. These clusters form a two-level tree that represents a major part of the layout of a document. We use a graph convolutional network to predict the relations between text detection boxes and then build both levels of clusters from these predictions. Experimentally, we demonstrate that the unified approach can be highly efficient while still achieving state-of-the-art quality for detecting paragraphs in public benchmarks and real-world images. Keywords: Text detection, document layout, graph convolutional network.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2831576719369875742 DOCUMENT # en 1.00\n",
+ "1 metadata 10579568759084702273 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1968444455801408936 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 13330257735040533249 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 9315017878854505401 TEXT #/texts/0 en 0.33\n",
+ "5 semantic 9315017878854505401 TEXT #/texts/0 text 0.89\n",
+ "6 language 10579568759084702273 TEXT #/texts/1 en 0.90\n",
+ "7 semantic 10579568759084702273 TEXT #/texts/1 header 0.63\n",
+ "8 language 618605864483312106 TEXT #/texts/2 en 0.24\n",
+ "9 semantic 618605864483312106 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 18349261345975562107 TEXT #/texts/3 en 0.75\n",
+ "11 semantic 18349261345975562107 TEXT #/texts/3 meta-data 0.97\n",
+ "2305.05836.pdf\n",
+ "title: Extracting Complex Named Entities in Legal Documents via Weakly Supervised Object Detection\n",
+ "abstract: ABSTRACT Accurate Named Entity Recognition (NER) is crucial for various information retrieval tasks in industry. However, despite significant progress in traditional NER methods, the extraction of Complex Named Entities remains a relatively unexplored area. In this paper, we propose a novel system that combines object detection for Document Layout Analysis (DLA) with weakly supervised learning to address the challenge of extracting discontinuous complex named entities in legal documents. Notably, to the best of our knowledge, this is the first work to apply weak supervision to DLA. Our experimental results show that the model trained solely on pseudo labels outperforms the supervised baseline when gold-standard data is limited, highlighting the effectiveness of our proposed approach in reducing the dependency on annotated data. CCS CONCEPTS · Applied computing → Law; · Information systems → Document structure; · Computing methodologies → Information extraction. KEYWORDS complex named entity recognition, weakly supervised object detection, document understanding, law, information extraction ACM Reference Format: Hsiu-Wei Yang and Abhinav Agrawal. 2023. Extracting Complex Named Entities in Legal Documents via Weakly Supervised Object Detection. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR '23), July 23-27, 2023, Taipei, Taiwan. ACM, New York, NY, USA, 5 pages. https://doi.org/10.1145/3539618. 3591852\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11794573301687210414 DOCUMENT # en 1.00\n",
+ "1 metadata 17466521243985813516 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6886915531134010569 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 11297975182741860390 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 10877363875417754656 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 metadata 13673276854222443119 DOCUMENT #/texts/7 abstract 1.00\n",
+ "6 metadata 6886659627785775936 DOCUMENT #/texts/8 abstract 1.00\n",
+ "7 metadata 6196452448967347559 DOCUMENT #/texts/9 abstract 1.00\n",
+ "8 metadata 18033064798229328065 DOCUMENT #/texts/10 abstract 1.00\n",
+ "9 metadata 8842802019638608357 DOCUMENT #/texts/11 abstract 1.00\n",
+ "10 language 11740347048559880097 TEXT #/texts/0 en 0.62\n",
+ "11 semantic 11740347048559880097 TEXT #/texts/0 text 0.83\n",
+ "2305.02567.pdf\n",
+ "title: LayoutDM: Transformer-based Diffusion Model for Layout Generation\n",
+ "abstract: Abstract Automatic layout generation that can synthesize highquality layouts is an important tool for graphic design in many applications. Though existing methods based on generative models such as Generative Adversarial Networks (GANs) and Variational Auto-Encoders (VAEs) have progressed, they still leave much room for improving the quality and diversity of the results. Inspired by the recent success of diffusion models in generating high-quality images, this paper explores their potential for conditional layout generation and proposes Transformer-based Layout Diffusion Model (LayoutDM) by instantiating the conditional denoising diffusion probabilistic model (DDPM) with a purely transformer-based architecture. Instead of using convolutional neural networks, a transformer-based conditional Layout Denoiser is proposed to learn the reverse diffusion process to generate samples from noised layout data. Benefitting from both transformer and DDPM, our LayoutDM is of desired properties such as high-quality generation, strong sample diversity, faithful distribution coverage, and stationary training in comparison to GANs and VAEs. Quantitative and qualitative experimental results show that our method outperforms state-of-the-art generative models in terms of quality and diversity.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10181470485652924664 DOCUMENT # en 0.99\n",
+ "1 metadata 9434206477714495822 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3785748435451180652 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 3680119538929946904 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 9912909017476921615 TEXT #/texts/0 en 0.54\n",
+ "5 semantic 9912909017476921615 TEXT #/texts/0 reference 0.66\n",
+ "6 language 9434206477714495822 TEXT #/texts/1 en 0.61\n",
+ "7 semantic 9434206477714495822 TEXT #/texts/1 header 0.86\n",
+ "8 language 10066165997310264035 TEXT #/texts/2 en 0.77\n",
+ "9 semantic 10066165997310264035 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 11841021682582949258 TEXT #/texts/3 zh 0.43\n",
+ "11 semantic 11841021682582949258 TEXT #/texts/3 meta-data 0.99\n",
+ "2205.12840.pdf\n",
+ "title: DistillAdapt: Source-Free Active Visual Domain Adaptation\n",
+ "abstract: Abstract. We present a novel method, DistillAdapt, for the challenging problem of Source-free Active Domain Adaptation (SF-ADA). The problem requires adapting a pre-trained 'source' domain network to a 'target' domain, within a provided budget for acquiring labels in the 'target' domain, while assuming that the source data is not available for adaptation, due to privacy concerns or otherwise. DistillAdapt is one of the first approaches for SF-ADA, and holistically addresses the challenges of SF-ADA via a novel Guided Attention Transfer Network (GATN) and an active learning function, H$_{AL}$. The GATN enables selective distillation of features from the pre-trained network to the target network using a small subset of annotated target samples mined by H$_{AL}$. H$_{AL}$ acquires samples at batch-level and balances transfer-ability from the pre-trained network and uncertainty of the target network. DistillAdapt is task-agnostic, and can be applied across visual tasks such as classification, segmentation and detection. Moreover, DistillAdapt can handle shifts in output label space. We conduct experiments and extensive ablation studies across 3 visual tasks, viz. digits classification (MNIST, SVHN), synthetic (GTA5) to real (CityScapes) image segmentation, and document layout detection (PubLayNet to DSSE). We show that our source-free approach, DistillAdapt, results in an improvement of 0. 5%-31. 3% (across datasets and tasks) over prior adaptation methods that assume access to large amounts of annotated source data for adaptation.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15881966247098943967 DOCUMENT # en 1.00\n",
+ "1 metadata 14946664510960877857 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 1143708917984711296 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 language 14946664510960877857 TEXT #/texts/0 en 0.32\n",
+ "4 semantic 14946664510960877857 TEXT #/texts/0 header 0.76\n",
+ "5 language 4787167108441673167 TEXT #/texts/1 en 0.49\n",
+ "6 semantic 4787167108441673167 TEXT #/texts/1 meta-data 0.96\n",
+ "7 language 5395310715898161319 TEXT #/texts/2 en 0.74\n",
+ "8 semantic 5395310715898161319 TEXT #/texts/2 meta-data 0.90\n",
+ "9 language 1143708917984711296 TEXT #/texts/3 en 0.85\n",
+ "10 semantic 1143708917984711296 TEXT #/texts/3 text 0.98\n",
+ "11 language 8083078873212012775 TEXT #/texts/4 en 0.12\n",
+ "2105.06400.pdf\n",
+ "title: TabLeX: A Benchmark Dataset for Structure and Content Information Extraction from Scientific Tables\n",
+ "abstract: Abstract. Information Extraction (IE) from the tables present in scientific articles is challenging due to complicated tabular representations and complex embedded text. This paper presents$_{TabLeX}$, a large-scale benchmark dataset comprising table images generated from scientific articles. TabLeX consists of two subsets, one for table structure extraction and the other for table content extraction. Each table image is accompanied by its corresponding L A T E X source code. To facilitate the development of robust table IE tools, TabLeX contains images in different aspect ratios and in a variety of fonts. Our analysis sheds light on the shortcomings of current state-of-the-art table extraction models and shows that they fail on even simple table images. Towards the end, we experiment with a transformer-based existing baseline to report performance scores. In contrast to the static benchmarks, we plan to augment this dataset with more complex and diverse tables at regular intervals. Keywords: Information Extraction · L A T E X · Scientific Articles.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13137329850442248461 DOCUMENT # en 0.99\n",
+ "1 metadata 11472458349144611068 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 4588761031433719882 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 8958995113943448416 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 12599017931824777467 TEXT #/texts/0 en 0.63\n",
+ "5 semantic 12599017931824777467 TEXT #/texts/0 text 0.83\n",
+ "6 language 11472458349144611068 TEXT #/texts/1 en 0.59\n",
+ "7 semantic 11472458349144611068 TEXT #/texts/1 header 0.86\n",
+ "8 language 11111553338144474380 TEXT #/texts/2 en 0.42\n",
+ "9 semantic 11111553338144474380 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 1415660526087454217 TEXT #/texts/3 en 0.56\n",
+ "11 semantic 1415660526087454217 TEXT #/texts/3 meta-data 0.95\n",
+ "2103.15348.pdf\n",
+ "title: LayoutParser : A Unified Toolkit for Deep Learning Based Document Image Analysis\n",
+ "abstract: Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of important innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applications. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout detection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digitization pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io. Keywords: Document Image Analysis · Deep Learning · Layout Analysis · Character Recognition · Open Source library · Toolkit.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 12548874647383832801 DOCUMENT # en \n",
+ "1 metadata 2802584575013519106 DOCUMENT #/texts/1 title \n",
+ "2 metadata 2179018036072283553 DOCUMENT #/texts/11 abstract \n",
+ "3 metadata 8986516417526936006 DOCUMENT #/texts/12 abstract \n",
+ "4 language 1737321237888584465 TEXT #/texts/0 en \n",
+ "5 semantic 1737321237888584465 TEXT #/texts/0 text \n",
+ "6 language 2802584575013519106 TEXT #/texts/1 en \n",
+ "7 semantic 2802584575013519106 TEXT #/texts/1 header \n",
+ "8 language 7564785066304785170 TEXT #/texts/2 en \n",
+ "9 semantic 7564785066304785170 TEXT #/texts/2 meta-data \n",
+ "10 language 9158019064682925600 TEXT #/texts/3 en \n",
+ "11 semantic 9158019064682925600 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 1.00 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.34 \n",
+ "5 0.89 \n",
+ "6 0.52 \n",
+ "7 0.81 \n",
+ "8 0.24 \n",
+ "9 1.00 \n",
+ "10 0.52 \n",
+ "11 0.99 \n",
+ "2110.09915.pdf\n",
+ "title: Entity Relation Extraction as Dependency Parsing in Visually Rich Documents\n",
+ "abstract: Abstract Previous works on key information extraction from visually rich documents (VRDs) mainly focus on labeling the text within each bounding box (i.e., semantic entity), while the relations in-between are largely unexplored. In this paper, we adapt the popular dependency parsing model, the biaffine parser, to this entity relation extraction task. Being different from the original dependency parsing model which recognizes dependency relations between words, we identify relations between groups of words with layout information instead. We have compared different representations of the semantic entity, different VRD encoders, and different relation decoders. For the model training, we explore multi-task learning to combine entity labeling and relation extraction tasks; and for the evaluation, we conduct experiments on different datasets with filtering and augmentation. The results demonstrate that our proposed model achieves 65.96% F1 score on the FUNSD dataset. As for the realworld application, our model has been applied to the in-house customs data, achieving reliable performance in the production setting.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7256593462777767795 DOCUMENT # en 0.99\n",
+ "1 metadata 11039456751490139420 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3437509844966890620 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 3351640345825674097 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 4359367855478211315 TEXT #/texts/0 en 0.36\n",
+ "5 semantic 4359367855478211315 TEXT #/texts/0 text 0.70\n",
+ "6 language 11039456751490139420 TEXT #/texts/1 en 0.85\n",
+ "7 semantic 11039456751490139420 TEXT #/texts/1 header 0.86\n",
+ "8 language 2331012439618467779 TEXT #/texts/2 en 0.17\n",
+ "9 semantic 2331012439618467779 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 7382564455872733900 TEXT #/texts/3 en 0.65\n",
+ "11 semantic 7382564455872733900 TEXT #/texts/3 meta-data 1.00\n",
+ "2112.12353.pdf\n",
+ "title: LAME: Layout-Aware Metadata Extraction Approach for Research Articles JONGYUN CHOI$^{1}$, HYESOO KONG$^{2}$, HWAMOOK YOON$^{2}$, HEUNG-SEON OH$^{3}$, and YUCHUL JUNG$^{1*}$\n",
+ "abstract: Abstract: The volume of academic literature, such as academic conference papers and journals, has increased rapidly worldwide, and research on metadata extraction is ongoing. However, high-performing metadata extraction is still challenging due to diverse layout formats according to journal publishers. To accommodate the diversity of the layouts of academic journals, we propose a novel LAyout-aware Metadata Extraction (LAME) framework equipped with the three characteristics (e.g., design of an automatic layout analysis, construction of a large meta-data training set, and construction of Layout-MetaBERT). We designed an automatic layout analysis using PDFMiner. Based on the layout analysis, a large volume of metadata-separated training data, including the title, abstract, author name, author affiliated organization, and keywords, were automatically extracted. Moreover, we constructed Layout-MetaBERT to extract the metadata from academic journals with varying layout formats. The experimental results with Layout-MetaBERT exhibited robust performance (Macro-F1, 93.27%) in metadata extraction for unseen journals with different layout formats. Keywords: Automatic layout analysis, Layout-MetaBERT, Metadata extraction, Research article\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6012573637307908533 DOCUMENT # en 1.00\n",
+ "1 metadata 464228663354183117 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 15858266536491636911 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 9152689721526878606 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 464228663354183117 TEXT #/texts/0 en 0.39\n",
+ "5 semantic 464228663354183117 TEXT #/texts/0 meta-data 0.65\n",
+ "6 language 14433534267320267039 TEXT #/texts/1 en 0.74\n",
+ "7 semantic 14433534267320267039 TEXT #/texts/1 meta-data 0.99\n",
+ "8 language 4203210264018792995 TEXT #/texts/2 en 0.64\n",
+ "9 semantic 4203210264018792995 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 15854392013436684226 TEXT #/texts/3 en 0.79\n",
+ "11 semantic 15854392013436684226 TEXT #/texts/3 meta-data 0.94\n",
+ "2201.09745.pdf\n",
+ "title: Table Pre-training: A Survey on Model Architectures, Pre-training Objectives, and Downstream Tasks\n",
+ "abstract: ABSTRACT Since a vast number of tables can be easily collected from web pages, spreadsheets, PDFs, and various other document types, a flurry of table pre-training frameworks have been proposed following the success of text and images, and they have achieved new state-of-thearts on various tasks such as table question answering, table type recognition, column relation classification, table search, formula prediction, etc. To fully use the supervision signals in unlabeled tables, a variety of pre-training objectives have been designed and evaluated, for example, denoising cell values, predicting numerical relationships, and implicitly executing SQLs. And to best leverage the characteristics of (semi-)structured tables, various tabular language models, particularly with specially-designed attention mechanisms, have been explored. Since tables usually appear and interact with free-form text, table pre-training usually takes the form of table-text joint pre-training, which attracts significant research interests from multiple domains. This survey aims to provide a comprehensive review of different model designs, pre-training objectives, and downstream tasks for table pre-training, and we further share our thoughts and vision on existing challenges and future opportunities. CCS CONCEPTS · Information systems → Information retrieval. KEYWORDS Semi-structured table; Representation learning; pre-training ACM Reference Format: Haoyu Dong, Zhoujun Cheng, Xinyi He, Mengyu Zhou, Anda Zhou, Fan Zhou, Ao Liu, Shi Han, and Dongmei Zhang. 2022. Table Pre-training: A Survey on Model Architectures, Pre-training Objectives, and Downstream Tasks. In. ACM, New York, NY, USA, 14 pages.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14097143220837798334 DOCUMENT # en 1.00\n",
+ "1 metadata 1418740895044524822 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11144305653262557381 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 8767759704357325899 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 metadata 5892269567500855356 DOCUMENT #/texts/10 abstract 1.00\n",
+ "5 metadata 1349658109544879525 DOCUMENT #/texts/11 abstract 1.00\n",
+ "6 metadata 11144279514693801820 DOCUMENT #/texts/12 abstract 1.00\n",
+ "7 metadata 1144729129087448562 DOCUMENT #/texts/13 abstract 1.00\n",
+ "8 metadata 394174642402172125 DOCUMENT #/texts/14 abstract 1.00\n",
+ "9 metadata 17058277238831647723 DOCUMENT #/texts/15 abstract 1.00\n",
+ "10 language 6717765264948266926 TEXT #/texts/0 en 0.27\n",
+ "11 semantic 6717765264948266926 TEXT #/texts/0 text 0.98\n",
+ "2205.00347.pdf\n",
+ "title: LayoutBERT: Masked Language Layout Model for Object Insertion\n",
+ "abstract: ['Image compositing is one of the most fundamental steps in creative workflows. It involves taking objects/parts of several images to create a new image, called a composite. Currently, this process is done manually by creating accurate masks of objects to be inserted and carefully blending them with the target scene or images, usually with the help of tools such as Photoshop or GIMP. While there have been several works on automatic selection of objects for creating masks, the problem of object placement within an image with the correct position, scale, and harmony remains a difficult problem with limited exploration. Automatic object insertion in images or designs is a difficult problem as it requires understanding of the scene geometry and the color harmony between objects. We propose LayoutBERT for the object insertion task. It uses a novel self-supervised masked language model objective and bidirectional multi-head self-attention. It outperforms previous layout-based likelihood models and shows favorable properties in terms of model capacity. We demonstrate the effectiveness of our approach for object insertion in the image compositing setting and other settings like documents and design templates. We further demonstrate the usefulness of the learned representations for layout-based retrieval tasks. We provide both qualitative and quantitative evaluations on datasets from diverse domains like COCO, PublayNet, and two new datasets which we call Image Layouts and Template Layouts. Image Layouts which consists of 5.8 million images with layout annotations is the largest image layout dataset to our knowledge. We also share ablation study results on the effect of dataset size, model size and class sample size for this task.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16542934806182997069 DOCUMENT # en 0.99\n",
+ "1 language 12311640805300642491 TEXT #/texts/0 en 0.87\n",
+ "2 semantic 12311640805300642491 TEXT #/texts/0 text 1.00\n",
+ "3 language 16817937696810446594 TEXT #/texts/1 en 0.91\n",
+ "4 semantic 16817937696810446594 TEXT #/texts/1 text 1.00\n",
+ "5 language 4922519091516066157 TEXT #/texts/2 en 0.16\n",
+ "6 semantic 4922519091516066157 TEXT #/texts/2 header 0.94\n",
+ "7 language 11724433687639709379 TEXT #/texts/3 en 0.90\n",
+ "8 semantic 11724433687639709379 TEXT #/texts/3 text 1.00\n",
+ "9 language 15566448323773711965 TEXT #/texts/4 en 0.57\n",
+ "10 semantic 15566448323773711965 TEXT #/texts/4 header 1.00\n",
+ "11 language 10768863974302381558 TEXT #/texts/5 en 0.77\n",
+ "1911.10683.pdf\n",
+ "title: Image-based table recognition: data, model, and evaluation\n",
+ "abstract: Abstract-Important information that relates to a specific topic in a document is often organized in tabular format to assist readers with information retrieval and comparison, which may be difficult to provide in natural language. However, tabular data in unstructured digital documents, e.g. Portable Document Format (PDF) and images, are difficult to parse into structured machine-readable format, due to complexity and diversity in their structure and style. To facilitate image-based table recognition with deep learning, we develop and release the largest publicly available table recognition dataset PubTabNet $^{1}$, containing 568k table images with corresponding structured HTML representation. PubTabNet is automatically generated by matching the XML and PDF representations of the scientific articles in PubMed Central TM Open Access Subset (PMCOA). We also propose a novel attention-based encoder-dual-decoder (EDD) architecture that converts images of tables into HTML code. The model has a structure decoder which reconstructs the table structure and helps the cell decoder to recognize cell content. In addition, we propose a new Tree-Edit-Distance-based Similarity (TEDS) metric for table recognition, which more appropriately captures multi-hop cell misalignment and OCR errors than the pre-established metric. The experiments demonstrate that the EDD model can accurately recognize complex tables solely relying on the image representation, outperforming the state-of-the-art by 9.7% absolute TEDS score.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4080901632344816715 DOCUMENT # en 1.00\n",
+ "1 metadata 10644900836417579648 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14242290163871738915 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 language 3067541903536160771 TEXT #/texts/0 en 0.41\n",
+ "4 semantic 3067541903536160771 TEXT #/texts/0 reference 0.78\n",
+ "5 language 10644900836417579648 TEXT #/texts/1 en 0.79\n",
+ "6 semantic 10644900836417579648 TEXT #/texts/1 reference 0.62\n",
+ "7 language 15474928253492353043 TEXT #/texts/2 en 0.58\n",
+ "8 semantic 15474928253492353043 TEXT #/texts/2 meta-data 1.00\n",
+ "9 language 9635025618824704809 TEXT #/texts/3 en 0.46\n",
+ "10 semantic 9635025618824704809 TEXT #/texts/3 meta-data 1.00\n",
+ "11 language 17849197603200806688 TEXT #/texts/4 en 0.49\n",
+ "2305.06553.pdf\n",
+ "title: WeLayout: WeChat Layout Analysis System for the ICDAR 2023 Competition on Robust Layout Segmentation in Corporate Documents\n",
+ "abstract: Abstract. In this paper, we introduce WeLayout, a novel system for segmenting the layout of corporate documents, which stands for We Chat Layout Analysis System. Our approach utilizes a sophisticated ensemble of DINO and YOLO models, specifically developed for the ICDAR 2023 Competition on Robust Layout Segmentation. Our method significantly surpasses the baseline, securing a top position 1 on the leaderboard with a mAP of 70.0. To achieve this performance, we concentrated on enhancing various aspects of the task, such as dataset augmentation, model architecture, bounding box refinement, and model ensemble techniques. Additionally, we trained the data separately for each document category to ensure a higher mean submission score. We also developed an algorithm for cell matching to further improve our performance. To identify the optimal weights and IoU thresholds for our model ensemble, we employed a Bayesian optimization algorithm called the Tree-Structured Parzen Estimator. Our approach effectively demonstrates the benefits of combining query-based and anchor-free models for achieving robust layout segmentation in corporate documents.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14956064378562553280 DOCUMENT # en 0.98\n",
+ "1 metadata 14484922385533454757 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1897918619985061747 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 language 15158612243538917595 TEXT #/texts/0 en 0.68\n",
+ "4 semantic 15158612243538917595 TEXT #/texts/0 reference 0.66\n",
+ "5 language 14484922385533454757 TEXT #/texts/1 en 0.68\n",
+ "6 semantic 14484922385533454757 TEXT #/texts/1 header 0.59\n",
+ "7 language 13617141398818891398 TEXT #/texts/2 en 0.23\n",
+ "8 semantic 13617141398818891398 TEXT #/texts/2 meta-data 0.99\n",
+ "9 language 17025208375146063947 TEXT #/texts/3 en 0.57\n",
+ "10 semantic 17025208375146063947 TEXT #/texts/3 meta-data 0.89\n",
+ "11 language 7171695325311829417 TEXT #/texts/4 en 0.26\n",
+ "2209.04460.pdf\n",
+ "title: Figure and Figure Caption Extraction for Mixed Raster and Vector PDFs: Digitization of Astronomical Literature with OCR Features\n",
+ "abstract: Abstract. Scientific articles published prior to the 'age of digitization' in the late 1990s contain figures which are 'trapped' within their scanned pages. While progress to extract figures and their captions has been made, there is currently no robust method for this process. We present a YOLO-based method for use on scanned pages, post-Optical Character Recognition (OCR), which uses both grayscale and OCR-features. When applied to the astrophysics literature holdings of the Astrophysics Data System (ADS), we find F1 scores of 90.9% (92.2%) for figures (figure captions) with the intersection-over-union (IOU) cut-off of 0.9 which is a significant improvement over other state-of-the-art methods. Keywords: scholarly document processing · document layout analysis · astronomy.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 15034260135785917006 DOCUMENT # en \n",
+ "1 metadata 3433926981744971318 DOCUMENT #/texts/1 title \n",
+ "2 metadata 9462471546895446962 DOCUMENT #/texts/9 abstract \n",
+ "3 metadata 10821452764085231010 DOCUMENT #/texts/10 abstract \n",
+ "4 language 10280969898410321041 TEXT #/texts/0 en \n",
+ "5 semantic 10280969898410321041 TEXT #/texts/0 reference \n",
+ "6 language 3433926981744971318 TEXT #/texts/1 en \n",
+ "7 semantic 3433926981744971318 TEXT #/texts/1 header \n",
+ "8 language 2453516942315491092 TEXT #/texts/2 en \n",
+ "9 semantic 2453516942315491092 TEXT #/texts/2 meta-data \n",
+ "10 language 4178901503750185596 TEXT #/texts/3 en \n",
+ "11 semantic 4178901503750185596 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 1.00 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.68 \n",
+ "5 0.82 \n",
+ "6 0.82 \n",
+ "7 0.54 \n",
+ "8 0.24 \n",
+ "9 0.93 \n",
+ "10 0.20 \n",
+ "11 1.00 \n",
+ "2207.06695.pdf\n",
+ "title: DavarOCR: A Toolbox for OCR and Multi-Modal Document Understanding\n",
+ "abstract: ABSTRACT This paper presents DavarOCR, an open-source toolbox for OCR and document understanding tasks. DavarOCR currently implements 19 advanced algorithms, covering 9 different task forms. DavarOCR provides detailed usage instructions and the trained models for each algorithm. Compared with the previous opensource OCR toolbox, DavarOCR has relatively more complete support for the sub-tasks of the cutting-edge technology of document understanding. In order to promote the development and application of OCR technology in academia and industry, we pay more attention to the use of modules that different sub-domains of technology can share. DavarOCR is publicly released at https: //github.com/hikopensource/Davar-Lab-OCR. CCS CONCEPTS · Computing methodologies → Computer vision problems. KEYWORDS Open-source, OCR, Document Understanding ACM Reference Format: Liang Qiao, Hui Jiang, Ying Chen, Can Li, Pengfei Li, Zaisheng Li, Baorui Zou, Dashan Guo, Yingda Xu, Yunlu Xu, Zhanzhan Cheng $^{∗}$, and Yi Niu. 2022. DavarOCR: A Toolbox for OCR and Multi-Modal Document Understanding. In Proceedings of the 30th ACM International Conference on Multimedia (MM '22), October 10-14, 2022, Lisboa, Portugal. ACM, New York, NY, USA, 4 pages. https://doi.org/10.1145/3503161.3548547 ACM ISBN 978-1-4503-9203-7/22/10...$15.00\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10578670118810806432 DOCUMENT # en 1.00\n",
+ "1 metadata 14078910730148120710 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9804854515977808943 DOCUMENT #/texts/14 abstract 1.00\n",
+ "3 metadata 12499147061936475091 DOCUMENT #/texts/15 abstract 1.00\n",
+ "4 metadata 13885398359714424134 DOCUMENT #/texts/16 abstract 1.00\n",
+ "5 metadata 7460045827015547000 DOCUMENT #/texts/17 abstract 1.00\n",
+ "6 metadata 9804754606106263206 DOCUMENT #/texts/18 abstract 1.00\n",
+ "7 metadata 13984942241077387243 DOCUMENT #/texts/19 abstract 1.00\n",
+ "8 metadata 1459573036359563303 DOCUMENT #/texts/20 abstract 1.00\n",
+ "9 metadata 10320331568267550096 DOCUMENT #/texts/21 abstract 1.00\n",
+ "10 metadata 9842410885214878497 DOCUMENT #/texts/22 abstract 1.00\n",
+ "11 language 17753305227894791928 TEXT #/texts/0 en 0.55\n",
+ "2204.12974.pdf\n",
+ "title: CapOnImage: Context-driven Dense-Captioning on Image\n",
+ "abstract: ABSTRACT Existing image captioning systems are dedicated to generating narrative captions for images, which are spatially detached from the image in presentation. However, texts can also be used as decorations on the image to highlight the key points and increase the attractiveness of images. In this work, we introduce a new task called captioning on image (CapOnImage), which aims to generate dense captions at different locations of the image based on contextual information. To fully exploit the surrounding visual context to generate the most suitable caption for each location, we propose a multi-modal pre-training model with multi-level pre-training tasks that progressively learn the correspondence between texts and image locations from easy to difficult. Since the model may generate redundant captions for nearby locations, we further enhance the location embedding with neighbor locations as context. For this new task, we also introduce a large-scale benchmark called CapOn-Image2M, which contains 2.1 million product images, each with an average of 4.8 spatially localized captions. Compared with other image captioning model variants, our model achieves the best results in both captioning accuracy and diversity aspects. We will make code and datasets public to facilitate future research.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14357711412048741219 DOCUMENT # en 0.99\n",
+ "1 metadata 13423398989423713140 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 222532995322659675 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 6373866757109498540 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 language 3594251336999786412 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 3594251336999786412 TEXT #/texts/0 text 0.69\n",
+ "6 language 13423398989423713140 TEXT #/texts/1 en 0.48\n",
+ "7 semantic 13423398989423713140 TEXT #/texts/1 header 0.75\n",
+ "8 language 6919689940402428871 TEXT #/texts/2 en 0.40\n",
+ "9 semantic 6919689940402428871 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 7331669175832383025 TEXT #/texts/3 nl 0.24\n",
+ "11 semantic 7331669175832383025 TEXT #/texts/3 meta-data 0.99\n",
+ "2305.10825.pdf\n",
+ "title: B\n",
+ "abstract: ['Diffusion model based language-guided image editing has achieved great success recently. However, existing state-of-the-art diffusion models struggle with rendering correct text and text style during generation. To tackle this problem, we propose a universal self-supervised text editing diffusion model (DiffUTE), which aims to replace or modify words in the source image with another one while maintaining its realistic appearance. Specifically, we build our model on a diffusion model and carefully modify the network structure to enable the model for drawing multilingual characters with the help of glyph and position information. Moreover, we design a self-supervised learning framework to leverage large amounts of web data to improve the representation ability of the model. Experimental results show that our method achieves an impressive performance and enables controllable editing on in-the-wild images with high fidelity. Our code will be avaliable in \\\\url{https://github.com/chenhaoxing/DiffUTE}.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6514666095637483066 DOCUMENT # en 0.99\n",
+ "1 metadata 12998565238298329342 DOCUMENT #/texts/2 title 1.00\n",
+ "2 language 8636953316538300383 TEXT #/texts/0 en 0.71\n",
+ "3 semantic 8636953316538300383 TEXT #/texts/0 reference 0.66\n",
+ "4 language 13544356125384020866 TEXT #/texts/1 en 0.35\n",
+ "5 semantic 13544356125384020866 TEXT #/texts/1 header 0.92\n",
+ "6 language 12998565238298329342 TEXT #/texts/2 de 1.00\n",
+ "7 semantic 12998565238298329342 TEXT #/texts/2 header 1.00\n",
+ "8 language 3876166548510224187 TEXT #/texts/3 en 0.56\n",
+ "9 semantic 3876166548510224187 TEXT #/texts/3 meta-data 0.99\n",
+ "10 language 2124254991731670016 TEXT #/texts/4 en 0.59\n",
+ "11 semantic 2124254991731670016 TEXT #/texts/4 meta-data 0.90\n",
+ "2302.11583.pdf\n",
+ "title: The Digitization of Historical Astrophysical Literature with Highly-Localized Figures and Figure Captions\n",
+ "abstract: Abstract Scientific articles published prior to the 'age of digitization' in the late 1990s contain figures which are 'trapped' within their scanned pages. While progress to extract figures and their captions has been made, there is currently no robust method for this process. We present a YOLO-based method for use on scanned pages, after they have been processed with Optical Character Recognition (OCR), which uses both grayscale and OCR-features. We focus our efforts on translating the intersection-overunion (IOU) metric from the field of object detection to document layout analysis and quantify 'high localization' levels as an IOU of 0.9. When applied to the astrophysics literature holdings of the NASA Astrophysics Data System (ADS), we find F1 scores of 90.9% (92.2%) for figures (figure captions) with the IOU cut-off of 0.9 which is a significant improvement over other state-of-the-art methods. Keywords: scholarly document processing, document layout analysis, astronomy.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 16161331326838422230 DOCUMENT # en \n",
+ "1 metadata 380514435218640980 DOCUMENT #/texts/1 title \n",
+ "2 metadata 13231021132440135750 DOCUMENT #/texts/9 abstract \n",
+ "3 metadata 2570868380428784219 DOCUMENT #/texts/10 abstract \n",
+ "4 metadata 8293005220653308550 DOCUMENT #/texts/11 abstract \n",
+ "5 language 11001573406615304101 TEXT #/texts/0 en \n",
+ "6 semantic 11001573406615304101 TEXT #/texts/0 text \n",
+ "7 language 380514435218640980 TEXT #/texts/1 en \n",
+ "8 semantic 380514435218640980 TEXT #/texts/1 header \n",
+ "9 language 2659903811506978490 TEXT #/texts/2 en \n",
+ "10 semantic 2659903811506978490 TEXT #/texts/2 meta-data \n",
+ "11 language 14358587209864171010 TEXT #/texts/3 en \n",
+ "\n",
+ " confidence \n",
+ "0 1.00 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 0.33 \n",
+ "6 0.96 \n",
+ "7 0.84 \n",
+ "8 0.66 \n",
+ "9 0.47 \n",
+ "10 0.98 \n",
+ "11 0.68 \n",
+ "2203.04814.pdf\n",
+ "title: Text-DIAE: Degradation Invariant Autoencoders for Text Recognition and Document Enhancement\n",
+ "abstract: Abstract. In this work, we propose Text-Degradation Invariant Auto Encoder (Text-DIAE) aimed to solve two tasks, text recognition (handwritten or scene-text) and document image enhancement. We define three pretext tasks as learning objectives to be optimized during pretraining without the usage of labelled data. Each of the pre-text objectives is specifically tailored for the final downstream tasks. We conduct several ablation experiments that show the importance of each degradation for a specific domain. Exhaustive experimentation shows that our method does not have limitations of previous state-of-the-art based on contrastive losses while at the same time requiring essentially fewer data samples to converge. Finally, we demonstrate that our method surpasses the state-of-the-art significantly in existing supervised and selfsupervised settings in handwritten and scene text recognition and document image enhancement. Our code and trained models will be made publicly available at http://Upon_Acceptance. Keywords: Self-Supervised Learning, Handwritten Text Recognition, Scene-Text Recognition, Document Image Enhancement.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14746165682703908176 DOCUMENT # en 0.99\n",
+ "1 metadata 1239120697244612174 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 12077914615450139164 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 4888985162782367430 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 1239120697244612174 TEXT #/texts/0 en 0.36\n",
+ "5 semantic 1239120697244612174 TEXT #/texts/0 header 0.91\n",
+ "6 language 7357589549043404236 TEXT #/texts/1 es 0.28\n",
+ "7 semantic 7357589549043404236 TEXT #/texts/1 meta-data 0.99\n",
+ "8 language 3561303740234063321 TEXT #/texts/2 es 0.22\n",
+ "9 semantic 3561303740234063321 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 13046124633699204353 TEXT #/texts/3 en 0.28\n",
+ "11 semantic 13046124633699204353 TEXT #/texts/3 meta-data 0.95\n",
+ "2110.02069.pdf\n",
+ "title: OPAD: An Optimized Policy-based Active Learning Framework for Document Content Analysis\n",
+ "abstract: ABSTRACT\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16825692768564201021 DOCUMENT # en 0.94\n",
+ "1 metadata 4532215096900581724 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16063359772842428263 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 language 13727066778969946399 TEXT #/texts/0 en 0.43\n",
+ "4 semantic 13727066778969946399 TEXT #/texts/0 text 0.69\n",
+ "5 language 4532215096900581724 TEXT #/texts/1 en 0.57\n",
+ "6 semantic 4532215096900581724 TEXT #/texts/1 header 0.84\n",
+ "7 language 14339003219097135650 TEXT #/texts/2 en 0.41\n",
+ "8 semantic 14339003219097135650 TEXT #/texts/2 meta-data 0.98\n",
+ "9 language 5884331416575885239 TEXT #/texts/3 en 0.66\n",
+ "10 semantic 5884331416575885239 TEXT #/texts/3 meta-data 1.00\n",
+ "11 language 14652148498818357478 TEXT #/texts/4 en 0.16\n",
+ "2106.07359.pdf\n",
+ "title: MexPub: Deep Transfer Learning for Metadata Extraction from German Publications\n",
+ "abstract: Abstract-Extracting metadata from scientific papers can be considered as a solved problem in NLP due to the high accuracy of state-of-the-art methods. However, this does not apply to German scientific publications, which have a variety of styles and layouts. In contrast to most of the English scientific publications that follow standard and simple layouts, the order, content, position and size of metadata in German publications vary greatly among publications. This variety makes traditional NLP methods fail to accurately extract metadata from these publications. In this paper, we present a method that extracts metadata from PDF documents with different layouts and styles by viewing the document as an image. We used Mask R-CNN that is trained on COCO dataset and finetuned with PubLayNet dataset that consists of 200K PDF snapshots with five basic classes (e.g. text, figure, etc). We refine-tuned the model on our proposed synthetic dataset consisting of 30K article snapshots to extract nine patterns (i.e. author, title, etc). Our synthetic dataset is generated using contents in both languages German and English and a finite set of challenging templates obtained from German publications. Our method achieved an average accuracy of around 90% which validates its capability to accurately extract metadata from a variety of PDF documents with challenging templates. Index Terms-author name disambiguation, entity linkage, bibliographic data, neural networks, classification\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17314565271857111086 DOCUMENT # en 1.00\n",
+ "1 metadata 16695866139959375602 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3770385164590787878 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 11106268346932843180 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 4288696785669740737 TEXT #/texts/0 en 0.38\n",
+ "5 semantic 4288696785669740737 TEXT #/texts/0 reference 0.67\n",
+ "6 language 16695866139959375602 TEXT #/texts/1 en 0.80\n",
+ "7 semantic 16695866139959375602 TEXT #/texts/1 header 0.52\n",
+ "8 language 2881893159174956500 TEXT #/texts/2 en 0.40\n",
+ "9 semantic 2881893159174956500 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 8155322520631945055 TEXT #/texts/3 en 0.49\n",
+ "11 semantic 8155322520631945055 TEXT #/texts/3 meta-data 0.89\n",
+ "2308.14397.pdf\n",
+ "title: Ensemble of Anchor-Free Models for Robust Bangla Document Layout Segmentation\n",
+ "abstract: Abstract-In this research paper, we introduce a novel approach designed for the purpose of segmenting the layout of Bangla documents. Our methodology involves the utilization of a sophisticated ensemble of YOLOv8 models, which were trained for the DL Sprint 2.0-BUET CSE Fest 2023 Competition focused on Bangla document layout segmentation. Our primary emphasis lies in enhancing various aspects of the task, including techniques such as image augmentation, model architecture, and the incorporation of model ensembles. We deliberately reduce the quality of a subset of document images to enhance the resilience of model training, thereby resulting in an improvement in our cross-validation score. By employing Bayesian optimization, we determine the optimal confidence and Intersection over Union (IoU) thresholds for our model ensemble. Through our approach, we successfully demonstrate the effectiveness of anchor-free models in achieving robust layout segmentation in Bangla documents.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4186796933444337650 DOCUMENT # en 1.00\n",
+ "1 metadata 15963578782981549677 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7869003996707617300 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 language 1586940549653234579 TEXT #/texts/0 en 0.27\n",
+ "4 semantic 1586940549653234579 TEXT #/texts/0 reference 0.86\n",
+ "5 language 15963578782981549677 TEXT #/texts/1 en 0.51\n",
+ "6 semantic 15963578782981549677 TEXT #/texts/1 header 0.93\n",
+ "7 language 16607079952244636778 TEXT #/texts/2 en 0.70\n",
+ "8 semantic 16607079952244636778 TEXT #/texts/2 meta-data 1.00\n",
+ "9 language 3645630920813888449 TEXT #/texts/3 en 0.72\n",
+ "10 semantic 3645630920813888449 TEXT #/texts/3 meta-data 1.00\n",
+ "11 language 7869003996707617300 TEXT #/texts/4 en 0.90\n",
+ "2301.06629.pdf\n",
+ "title: Diverse Multimedia Layout Generation with Multi Choice Learning\n",
+ "abstract: ABSTRACT Designing visually appealing layouts for multimedia documents containing text, graphs and images requires a form of creative intelligence. Modelling the generation of layouts has recently gained attention due to its importance in aesthetics and communication style. In contrast to standard prediction tasks, there are a range of acceptable layouts which depend on user preferences. For example, a poster designer may prefer logos on the top-left while another prefers logos on the bottom-right. Both are correct choices yet existing machine learning models treat layouts as a single choice prediction problem. In such situations, these models would simply average over all possible choices given the same input forming a degenerate sample. In the above example, this would form an unacceptable layout with a logo in the centre. In this paper, we present an auto-regressive neural network architecture, called LayoutMCL, that uses multi-choice prediction and winner-takes-all loss to effectively stabilise layout generation. LayoutMCL avoids the averaging problem by using multiple predictors to learn a range of possible options for each layout object. This enables LayoutMCL to generate multiple and diverse layouts from a single input which is in contrast with existing approaches © 2021 Association for Computing Machinery. https://doi.org/10.1145/3474085.3475525 which yield similar layouts with minor variations. Through quantitative benchmarks on real data (magazine, document and mobile app layouts), we demonstrate that LayoutMCL reduces Fréchet Inception Distance (FID) by 83-98% and generates significantly more diversity in comparison to existing approaches. CCS CONCEPTS · Computing methodologies → Neural networks; Mixture modeling; · Applied computing → Multi / mixed media creation. KEYWORDS multimedia applications, neural networks, generative models, creative intelligence, layouts, multi-choice learning, mixture models ACM Reference Format: David D. Nguyen, Surya Nepal, and Salil S. Kanhere. 2021. Diverse Multimedia Layout Generation with Multi Choice Learning. In Proceedings of the 29th ACM International Conference on Multimedia (MM '21), October 20-24, 2021, Virtual Event, China. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/3474085.3475525\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13531002653667033373 DOCUMENT # en 0.99\n",
+ "1 metadata 2484448217154416415 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6494237988358185759 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 8217380235620815139 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 5607695864583266366 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 metadata 17573229298095673916 DOCUMENT #/texts/8 abstract 1.00\n",
+ "6 metadata 3607335504213949949 DOCUMENT #/texts/9 abstract 1.00\n",
+ "7 metadata 3232040026906145710 DOCUMENT #/texts/10 abstract 1.00\n",
+ "8 metadata 11729361395974924344 DOCUMENT #/texts/11 abstract 1.00\n",
+ "9 metadata 10132473083317424389 DOCUMENT #/texts/12 abstract 1.00\n",
+ "10 metadata 6494669723362407576 DOCUMENT #/texts/13 abstract 1.00\n",
+ "11 metadata 9741072581454175100 DOCUMENT #/texts/14 abstract 1.00\n",
+ "2205.13724.pdf\n",
+ "title: V-Doc : Visual questions answers with Documents\n",
+ "abstract: Abstract We propose V-Doc, a question-answering tool using document images and PDF, mainly for researchers and general non-deep learning experts looking to generate, process, and understand the document visual question answering tasks. The V-Doc supports generating and using both extractive and abstractive question-answer pairs using documents images. The extractive QA selects a subset of tokens or phrases from the document contents to predict the answers, while the abstractive QA recognises the language in the content and generates the answer based on the trained model. Both aspects are crucial to understanding the documents, especially in an image format. We include a detailed scenario of question generation for the abstractive QA task. V-Doc supports a wide range of datasets and models, and is highly extensible through a declarative, framework-agnostic platform. 1\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9416824584998558997 DOCUMENT # en 1.00\n",
+ "1 metadata 4138600041242086037 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 13003046497338185974 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 1338064323689945546 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 4138600041242086037 TEXT #/texts/0 en 0.71\n",
+ "5 semantic 4138600041242086037 TEXT #/texts/0 text 0.79\n",
+ "6 language 12653076769892651762 TEXT #/texts/1 en 0.28\n",
+ "7 semantic 12653076769892651762 TEXT #/texts/1 meta-data 1.00\n",
+ "8 language 14589044734228421915 TEXT #/texts/2 en 0.58\n",
+ "9 semantic 14589044734228421915 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 13003046497338185974 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 13003046497338185974 TEXT #/texts/3 header 0.93\n",
+ "2210.05391.pdf\n",
+ "title: Chenxia Li, Ruoyu Guo, Jun Zhou, Mengtao An, Yuning Du, Lingfeng Zhu, Yi Liu, Xiaoguang Hu, Dianhai Yu\n",
+ "abstract: Abstract A large amount of document data exists in unstructured form such as raw images without any text information. Designing a practical document image analysis system is a meaningful but challenging task. In previous work, we proposed an intelligent document analysis system PP-Structure. In order to further upgrade the function and performance of PP-Structure, we propose PP-StructureV2 in this work, which contains two subsystems: Layout Information Extraction and Key Information Extraction. Firstly, we integrate Image Direction Correction module and Layout Restoration module to enhance the functionality of the system. Secondly, 8 practical strategies are utilized in PP-StructureV2 for better performance. For Layout Analysis model, we introduce ultra lightweight detector PP-PicoDet and knowledge distillation algorithm FGD for model lightweighting, which increased the inference speed by 11 times with comparable mAP. For Table Recognition model, we utilize PP-LCNet, CSP-PAN and SLAHead to optimize the backbone module, feature fusion module and decoding module, respectively, which improved the table structure accuracy by 6% with comparable inference speed. For Key Information Extraction model, we introduce VI-LayoutXLM which is a visual-feature independent LayoutXLM architecture, TB-YX sorting algorithm and U-DML knowledge distillation algorithm, which brought 2.8% and 9.1% improvement respectively on the Hmean of Semantic Entity Recognition and Relation Extraction tasks. All the above mentioned models and code are open-sourced in the GitHub repository PaddleOCR $^{1}$.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10369181880053115369 DOCUMENT # en 1.00\n",
+ "1 metadata 12408328708422733217 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 6111303043411909073 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 9351761072530796738 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 8875645199832918788 TEXT #/texts/0 en 0.48\n",
+ "5 semantic 8875645199832918788 TEXT #/texts/0 reference 0.81\n",
+ "6 language 5481805245648485323 TEXT #/texts/1 en 0.31\n",
+ "7 semantic 5481805245648485323 TEXT #/texts/1 header 0.61\n",
+ "8 language 12408328708422733217 TEXT #/texts/2 en 0.67\n",
+ "9 semantic 12408328708422733217 TEXT #/texts/2 reference 0.69\n",
+ "10 language 1681836491796280974 TEXT #/texts/3 en 0.78\n",
+ "11 semantic 1681836491796280974 TEXT #/texts/3 meta-data 0.99\n",
+ "2306.02815.pdf\n",
+ "title: Transformer-Based UNet with Multi-Headed Cross-Attention Skip Connections to Eliminate Artifacts in Scanned Documents\n",
+ "abstract: Abstract The extraction of text in high quality is essential for textbased document analysis tasks like Document Classification or Named Entity Recognition. Unfortunately, this is not always ensured, as poor scan quality and the resulting artifacts lead to errors in the Optical Character Recognition (OCR) process. Current approaches using Convolutional Neural Networks show promising results for background removal tasks but fail correcting artifacts like pixelation or compression errors. For general images, Transformer backbones are getting integrated more frequently in wellknown neural network structures for denoising tasks. In this work, a modified UNet structure using a Swin Transformer backbone is presented to remove typical artifacts in scanned documents. Multi-headed cross-attention skip connections are used to more selectively learn features in respective levels of abstraction. The performance of this approach is examined regarding compression errors, pixelation and random noise. An improvement in text extraction quality with a reduced error rate of up to 53.9% on the synthetic data is archived. The pretrained base-model can be easily adopted to new artifacts. The cross-attention skip connections allow to integrate textual information extracted from the encoder or in form of commands to more selectively control the models outcome. The latter is shown by means of an example application.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14009397682830537636 DOCUMENT # en 1.00\n",
+ "1 metadata 1856000564203785991 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3087064519284633291 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 3447213099923161850 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 14896720172907815422 TEXT #/texts/0 en 0.32\n",
+ "5 semantic 14896720172907815422 TEXT #/texts/0 reference 0.95\n",
+ "6 language 1856000564203785991 TEXT #/texts/1 en 0.83\n",
+ "7 semantic 1856000564203785991 TEXT #/texts/1 header 0.82\n",
+ "8 language 8235066263506277257 TEXT #/texts/2 en 0.39\n",
+ "9 semantic 8235066263506277257 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 4742752365505202138 TEXT #/texts/3 en 0.44\n",
+ "11 semantic 4742752365505202138 TEXT #/texts/3 meta-data 0.95\n",
+ "2211.04934.pdf\n",
+ "title: DoSA : A System to Accelerate Annotations on Business Documents with Human-in-the-Loop\n",
+ "abstract: Abstract Business documents come in a variety of structures, formats and information needs which makes information extraction a challenging task. Due to these variations, having a document generic model which can work well across all types of documents and for all the use cases seems far-fetched. For documentspecific models, we would need customized document-specific labels. We introduce DoSA (Do cument S pecific A utomated Annotations), which helps annotators in generating initial annotations automatically using our novel bootstrap approach by leveraging document generic datasets and models. These initial annotations can further be reviewed by a human for correctness. An initial document-specific model can be trained and its inference can be used as feedback for generating more automated annotations. These automated annotations can be reviewed by human-in-the-loop for the correctness and a new improved model can be trained using the current model as pretrained model before going for the next iteration. In this paper, our scope is limited to Form like documents due to limited availability of generic annotated datasets, but this idea can be extended to a variety of other documents as more datasets are built. An open-source ready-to-use implementation is made available on GitHub. 1\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7687356650631646950 DOCUMENT # en 1.00\n",
+ "1 metadata 28198017736497714 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11981348536210967890 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 10011946881393212937 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 9783302183418439591 TEXT #/texts/0 en 0.48\n",
+ "5 semantic 9783302183418439591 TEXT #/texts/0 reference 0.51\n",
+ "6 language 28198017736497714 TEXT #/texts/1 en 0.69\n",
+ "7 semantic 28198017736497714 TEXT #/texts/1 header 0.56\n",
+ "8 language 11165351352510034148 TEXT #/texts/2 en 0.65\n",
+ "9 semantic 11165351352510034148 TEXT #/texts/2 meta-data 0.93\n",
+ "10 language 4442204473943079428 TEXT #/texts/3 en 0.53\n",
+ "11 semantic 4442204473943079428 TEXT #/texts/3 meta-data 0.68\n",
+ "2303.08137.pdf\n",
+ "title: LayoutDM: Discrete Diffusion Model for Controllable Layout Generation\n",
+ "abstract: Abstract Controllable layout generation aims at synthesizing plausible arrangement of element bounding boxes with optional constraints, such as type or position of a specific element. In this work, we try to solve a broad range of layout generation tasks in a single model that is based on discrete state-space diffusion models. Our model, named LayoutDM, naturally handles the structured layout data in the discrete representation and learns to progressively infer a noiseless layout from the initial input, where we model the layout corruption process by modality-wise discrete diffusion. For conditional generation, we propose to inject layout constraints in the form of masking or logit adjustment during inference. We show in the experiments that our LayoutDM successfully generates high-quality layouts and outperforms both task-specific and task-agnostic baselines on several layout tasks. 1\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5274670548434171366 DOCUMENT # en 0.98\n",
+ "1 metadata 7423050627833765715 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2463036394959683986 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 12872335772674645887 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 7128723719619351585 TEXT #/texts/0 en 0.26\n",
+ "5 semantic 7128723719619351585 TEXT #/texts/0 text 0.89\n",
+ "6 language 7423050627833765715 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 7423050627833765715 TEXT #/texts/1 header 0.85\n",
+ "8 language 7379740030068026814 TEXT #/texts/2 en 0.29\n",
+ "9 semantic 7379740030068026814 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 4654927673697807378 TEXT #/texts/3 en 0.55\n",
+ "11 semantic 4654927673697807378 TEXT #/texts/3 meta-data 0.98\n",
+ "2203.06947.pdf\n",
+ "title: XYLayoutLM: Towards Layout-Aware Multimodal Networks For Visually-Rich Document Understanding\n",
+ "abstract: Abstract Recently, various multimodal networks for Visually-Rich Document Understanding(VRDU) have been proposed, showing the promotion of transformers by integrating visual and layout information with the text embeddings. However, most existing approaches utilize the position embeddings to incorporate the sequence information, neglecting the noisy improper reading order obtained by OCR tools. In this paper, we propose a robust layout-aware multimodal network named XYLayoutLM to capture and leverage rich layout information from proper reading orders produced by our Augmented XY Cut. Moreover, a Dilated Conditional Position Encoding module is proposed to deal with the input sequence of variable lengths, and it additionally extracts local layout information from both textual and visual modalities while generating position embeddings. Experiment results show that our XYLayoutLM achieves competitive results on document understanding tasks.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13644024371973421720 DOCUMENT # en 1.00\n",
+ "1 metadata 6080215795318562627 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 8620641516224866452 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 16810561801514819585 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 6080215795318562627 TEXT #/texts/0 en 0.49\n",
+ "5 semantic 6080215795318562627 TEXT #/texts/0 header 0.68\n",
+ "6 language 8343217181628732407 TEXT #/texts/1 en 0.50\n",
+ "7 semantic 8343217181628732407 TEXT #/texts/1 meta-data 0.98\n",
+ "8 language 4372809223159679403 TEXT #/texts/2 en 0.42\n",
+ "9 semantic 4372809223159679403 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 9574469604736173385 TEXT #/texts/3 en 0.14\n",
+ "11 semantic 9574469604736173385 TEXT #/texts/3 meta-data 0.83\n",
+ "2106.00676.pdf\n",
+ "title: Incorporating Visual Layout Structures for Scientific Text Classification\n",
+ "abstract: Abstract Classifying the core textual components of a scientific paper-title, author, body text, etc.is a critical first step in automated scientific document understanding. Previous work has shown how using elementary layout information, i.e., each token's 2D position on the page, leads to more accurate classification. We introduce new methods for incorporating VIsual LAyout (VILA) structures, e.g., the grouping of page texts into text lines or text blocks, into language models to further improve performance. We show that the I-VILA approach, which simply adds special tokens denoting the boundaries of layout structures into model inputs, can lead to 1.9% Macro F1 improvements for token classification. Moreover, we design a hierarchical model, H-VILA, that encodes the text based on layout structures and record an up-to 47% inference time reduction with less than 1.5% Macro F1 loss for the text classification models. Experiments are conducted on a newly curated evaluation suite, S2-VLUE, with a novel metric measuring classification uniformity within visual groups and a new dataset of gold annotations covering papers from 19 scientific disciplines. Pre-trained weights, benchmark datasets, and source code will be available at https://github.com/allenai/VILA.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8906765692817121331 DOCUMENT # en 1.00\n",
+ "1 metadata 6464735184806538810 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 17766533753665278732 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 5357052966551336360 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 8324885902404712044 TEXT #/texts/0 en 0.41\n",
+ "5 semantic 8324885902404712044 TEXT #/texts/0 reference 0.66\n",
+ "6 language 6464735184806538810 TEXT #/texts/1 en 0.49\n",
+ "7 semantic 6464735184806538810 TEXT #/texts/1 header 0.90\n",
+ "8 language 9600035908089748960 TEXT #/texts/2 en 0.49\n",
+ "9 semantic 9600035908089748960 TEXT #/texts/2 meta-data 0.94\n",
+ "10 language 12045148224929084550 TEXT #/texts/3 en 0.60\n",
+ "11 semantic 12045148224929084550 TEXT #/texts/3 meta-data 0.99\n",
+ "2305.00795.pdf\n",
+ "title: SelfDocSeg: A Self-Supervised vision-based Approach towards Document Segmentation\n",
+ "abstract: ['Document layout analysis is a known problem to the documents research community and has been vastly explored yielding a multitude of solutions ranging from text mining, and recognition to graph-based representation, visual feature extraction, etc. However, most of the existing works have ignored the crucial fact regarding the scarcity of labeled data. With growing internet connectivity to personal life, an enormous amount of documents had been available in the public domain and thus making data annotation a tedious task. We address this challenge using self-supervision and unlike, the few existing self-supervised document segmentation approaches which use text mining and textual labels, we use a complete vision-based approach in pre-training without any ground-truth label or its derivative. Instead, we generate pseudo-layouts from the document images to pre-train an image encoder to learn the document object representation and localization in a self-supervised framework before fine-tuning it with an object detection model. We show that our pipeline sets a new benchmark in this context and performs at par with the existing methods and the supervised counterparts, if not outperforms. The code is made publicly available at: https://github.com/MaitySubhajit/SelfDocSeg']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13678012789189708476 DOCUMENT # en 0.99\n",
+ "1 metadata 12907604576135823634 DOCUMENT #/texts/1 title 1.00\n",
+ "2 language 7874936498254575304 TEXT #/texts/0 en 0.72\n",
+ "3 semantic 7874936498254575304 TEXT #/texts/0 reference 0.66\n",
+ "4 language 12907604576135823634 TEXT #/texts/1 en 0.89\n",
+ "5 semantic 12907604576135823634 TEXT #/texts/1 header 0.84\n",
+ "6 language 5455095828293575748 TEXT #/texts/2 en 0.53\n",
+ "7 semantic 5455095828293575748 TEXT #/texts/2 meta-data 0.98\n",
+ "8 language 8702834490821239030 TEXT #/texts/3 en 0.16\n",
+ "9 semantic 8702834490821239030 TEXT #/texts/3 meta-data 1.00\n",
+ "10 language 2784524357674996804 TEXT #/texts/4 en 0.22\n",
+ "11 semantic 2784524357674996804 TEXT #/texts/4 meta-data 0.99\n",
+ "2202.01414.pdf\n",
+ "title: DocBed: A Multi-Stage OCR Solution for Documents with Complex Layouts\n",
+ "abstract: Abstract Digitization of newspapers is of interest for many reasons including preservation of history, accessibility and search ability, etc. While digitization of documents such as scientific articles and magazines is prevalent in literature, one of the main challenges for digitization of newspaper lies in its complex layout (e.g. articles spanning multiple columns, text interrupted by images) analysis, which is necessary to preserve human read-order. This work provides a major breakthrough in the digitization of newspapers on three fronts: first, releasing a dataset of 3000 fully-annotated, real-world newspaper images from 21 different U.S. states representing an extensive variety of complex layouts for document layout analysis; second, proposing layout segmentation as a precursor to existing optical character recognition (OCR) engines, where multiple state-of-the-art image segmentation models and several post-processing methods are explored for document layout segmentation; third, providing a thorough and structured evaluation protocol for isolated layout segmentation and endto-end OCR.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1254548184009159520 DOCUMENT # en 1.00\n",
+ "1 metadata 8978338783002004372 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 12292879104215867518 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 1120897889773198344 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 14113261502938670554 TEXT #/texts/0 en 0.41\n",
+ "5 semantic 14113261502938670554 TEXT #/texts/0 text 0.90\n",
+ "6 language 8978338783002004372 TEXT #/texts/1 en 0.78\n",
+ "7 semantic 8978338783002004372 TEXT #/texts/1 header 0.79\n",
+ "8 language 10271475077874645195 TEXT #/texts/2 en 0.50\n",
+ "9 semantic 10271475077874645195 TEXT #/texts/2 meta-data 0.95\n",
+ "10 language 7339311064102247317 TEXT #/texts/3 en 0.52\n",
+ "11 semantic 7339311064102247317 TEXT #/texts/3 meta-data 0.92\n",
+ "2101.09465.pdf\n",
+ "title: WebSRC: A Dataset for Web-Based Structural Reading Comprehension\n",
+ "abstract: Abstract Web search is an essential way for human to obtain information, but it's still a great challenge for machines to understand the contents of web pages. In this paper, we introduce the task of web-based structural reading comprehension. Given a web page and a question about it, the task is to find an answer from the web page. This task requires a system not only to understand the semantics of texts but also the structure of the web page. Moreover, we proposed WebSRC, a novel Web-based S tructural R eading C omprehension dataset. WebSRC consists of 0.44M question-answer pairs, which are collected from 6.5K web pages with corresponding HTML source code, screenshots, and metadata. Each question in WebSRC requires a certain structural understanding of a web page to answer, and the answer is either a text span on the web page or yes/no. We evaluate various strong baselines on our dataset to show the difficulty of our task. We also investigate the usefulness of structural information and visual features. Our dataset and task are publicly available at https://speechlab-sjtu. github.io/WebSRC/.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11501099291707251985 DOCUMENT # en 1.00\n",
+ "1 metadata 2324440648085679014 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7666771146189822451 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 9356709581928391389 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 11875601693668658903 TEXT #/texts/0 en 0.25\n",
+ "5 semantic 11875601693668658903 TEXT #/texts/0 text 0.98\n",
+ "6 language 2324440648085679014 TEXT #/texts/1 en 0.53\n",
+ "7 semantic 2324440648085679014 TEXT #/texts/1 header 0.77\n",
+ "8 language 15969453667391098982 TEXT #/texts/2 en 0.50\n",
+ "9 semantic 15969453667391098982 TEXT #/texts/2 meta-data 0.80\n",
+ "10 language 14477852969093793837 TEXT #/texts/3 en 0.70\n",
+ "11 semantic 14477852969093793837 TEXT #/texts/3 meta-data 0.88\n",
+ "2009.14457.pdf\n",
+ "title: Towards a Multi-modal, Multi-task Learning based Pre-training Framework for Document Representation Learning\n",
+ "abstract: Abstract In this paper, we propose a multi-task learning-based framework that utilizes a combination of self-supervised and supervised pre-training tasks to learn a generic document representation. We design the network architecture and the pretraining tasks to incorporate the multi-modal document information across text, layout, and image dimensions and allow the network to work with multi-page documents. We showcase the applicability of our pre-training framework on a variety of different real-world document tasks such as document classification, document information extraction, and document retrieval. We conduct exhaustive experiments to compare performance against different ablations of our framework and state-of-the-art baselines. We discuss the current limitations and next steps for our work and make the code available to promote future research in this direction.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15695631869970857141 DOCUMENT # en 1.00\n",
+ "1 metadata 9405462448796430699 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1309713858244130926 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 15701970662693362424 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 16793184873001030572 TEXT #/texts/0 en 0.29\n",
+ "5 semantic 16793184873001030572 TEXT #/texts/0 reference 0.97\n",
+ "6 language 9405462448796430699 TEXT #/texts/1 en 0.87\n",
+ "7 semantic 9405462448796430699 TEXT #/texts/1 header 0.89\n",
+ "8 language 13576085455555947094 TEXT #/texts/2 en 0.50\n",
+ "9 semantic 13576085455555947094 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 14968621602377449348 TEXT #/texts/3 en 0.27\n",
+ "11 semantic 14968621602377449348 TEXT #/texts/3 meta-data 1.00\n",
+ "2206.13155.pdf\n",
+ "title: Bi-VLDoc: Bidirectional Vision-Language Modeling for Visually-Rich Document Understanding\n",
+ "abstract: Abstract-Multi-modal document pre-trained models have proven to be very effective in a variety of visually-rich document understanding (VrDU) tasks. Though existing document pretrained models have achieved excellent performance on standard benchmarks for VrDU, the way they model and exploit the interactions between vision and language on documents has hindered them from better generalization ability and higher accuracy. In this work, we investigate the problem of vision-language joint representation learning for VrDU mainly from the perspective of supervisory signals. Specifically, a pre-training paradigm called Bi-VLDoc is proposed, in which a bidirectional visionlanguage supervision strategy and a vision-language hybridattention mechanism are devised to fully explore and utilize the interactions between these two modalities, to learn stronger cross-modal document representations with richer semantics. Benefiting from the learned informative cross-modal document representations, Bi-VLDoc significantly advances the state-of-theart performance on three widely-used document understanding benchmarks, including Form Understanding (from 85.14% to 93.44%), Receipt Information Extraction (from 96.01% to 97.84%), and Document Classification (from 96.08% to 97.12%). On Document Visual QA, Bi-VLDoc achieves the state-of-the-art performance compared to previous single model methods. Index Terms-Visually-rich Document Understanding, Document Pre-trained Models, Cross-modal Document Representations.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11596065795325662056 DOCUMENT # en 0.99\n",
+ "1 metadata 12526893973429135138 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15409526057872574964 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 17336839330116895928 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 16857606035249792126 TEXT #/texts/0 en 0.40\n",
+ "5 semantic 16857606035249792126 TEXT #/texts/0 reference 0.95\n",
+ "6 language 12526893973429135138 TEXT #/texts/1 en 0.52\n",
+ "7 semantic 12526893973429135138 TEXT #/texts/1 header 0.67\n",
+ "8 language 16728676851862945303 TEXT #/texts/2 en 0.26\n",
+ "9 semantic 16728676851862945303 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 2650850847520354234 TEXT #/texts/3 en 0.54\n",
+ "11 semantic 2650850847520354234 TEXT #/texts/3 meta-data 0.98\n",
+ "2305.14962.pdf\n",
+ "title: ICDAR 2023 Competition on Robust Layout Segmentation in Corporate Documents\n",
+ "abstract: Abstract. Transforming documents into machine-processable representations is a challenging task due to their complex structures and variability in formats. Recovering the layout structure and content from PDF files or scanned material has remained a key problem for decades. IC-DAR has a long tradition in hosting competitions to benchmark the state-of-the-art and encourage the development of novel solutions to document layout understanding. In this report, we present the results of our ICDAR 2023 Competition on Robust Layout Segmentation in Corporate Documents, which posed the challenge to accurately segment the page layout in a broad range of document styles and domains, including corporate reports, technical literature and patents. To raise the bar over previous competitions, we engineered a hard competition dataset and proposed the recent DocLayNet dataset for training. We recorded 45 team registrations and received official submissions from 21 teams. In the presented solutions, we recognize interesting combinations of recent computer vision models, data augmentation strategies and ensemble methods to achieve remarkable accuracy in the task we posed. A clear trend towards adoption of vision-transformer based methods is evident. The results demonstrate substantial progress towards achieving robust and highly generalizing methods for document layout understanding. Keywords: Document Layout Analysis · Machine Learning · Computer Vision · Object Detection · ICDAR Competition\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16179804872697398409 DOCUMENT # en 0.99\n",
+ "1 metadata 2549771196302855966 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3783859718727443955 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 14449809084629610892 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 7764449527618403650 TEXT #/texts/0 en 0.66\n",
+ "5 semantic 7764449527618403650 TEXT #/texts/0 reference 0.66\n",
+ "6 language 2549771196302855966 TEXT #/texts/1 en 0.67\n",
+ "7 semantic 2549771196302855966 TEXT #/texts/1 header 0.51\n",
+ "8 language 17714906341415131143 TEXT #/texts/2 en 0.21\n",
+ "9 semantic 17714906341415131143 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 7644758239577654701 TEXT #/texts/3 en 0.44\n",
+ "11 semantic 7644758239577654701 TEXT #/texts/3 meta-data 0.99\n",
+ "2201.09407.pdf\n",
+ "title: CROSS-DOMAIN DOCUMENT LAYOUT ANALYSIS VIA UNSUPERVISED DOCUMENT STYLE GUIDE\n",
+ "abstract: ABSTRACT The document layout analysis (DLA) aims to decompose document images into high-level semantic areas (i.e., figures, tables, texts, and background). Creating a DLA framework with strong generalization capabilities is a challenge due to document objects are diversity in layout, size, aspect ratio, texture, etc. Many researchers devoted this challenge by synthesizing data to build large training sets. However, the synthetic training data has different styles and erratic quality. Besides, there is a large gap between the source data and the target data. In this paper, we propose an unsupervised cross-domain DLA framework based on document style guidance. We integrated the document quality assessment and the document cross-domain analysis into a unified framework. Our framework is composed of three components, Document Layout Generator (GLD), Document Elements Decorator(GED), and Document Style Discriminator(DSD). The GLD is used to document layout generates, the GED is used to document layout elements fill, and the DSD is used to document quality assessment and cross-domain guidance. First, we apply GLD to predict the positions of the generated document. Then, we design a novel algorithm based on aesthetic guidance to fill the document positions. Finally, we use contrastive learning to evaluate the quality assessment of the document. Besides, we design a new strategy to change the document quality assessment component into a document cross-domain style guide component. Our framework is an unsupervised document layout analysis framework. We have proved through numerous experiments that our proposed method has achieved remarkable performance. Index Terms-Semantic Segmentation, Docuemnt Layout Analysis, Deep Learning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13068987446936029579 DOCUMENT # en 0.98\n",
+ "1 metadata 10264502586869452829 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9148518800528223737 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 8168549048811471983 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 14148857376607305658 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 language 16812019770818315157 TEXT #/texts/0 en 0.13\n",
+ "6 semantic 16812019770818315157 TEXT #/texts/0 text 0.88\n",
+ "7 language 10264502586869452829 TEXT #/texts/1 en 0.36\n",
+ "8 semantic 10264502586869452829 TEXT #/texts/1 header 0.97\n",
+ "9 language 8747017711417260965 TEXT #/texts/2 en 0.20\n",
+ "10 semantic 8747017711417260965 TEXT #/texts/2 meta-data 1.00\n",
+ "11 language 11489218395994698375 TEXT #/texts/3 en 0.71\n",
+ "2106.13802.pdf\n",
+ "title: American Family Insurance, Machine Learning Research Group\n",
+ "abstract: Abstract Document image classification remains a popular research area because it can be commercialized in many enterprise applications across different industries. Recent advancements in large pre-trained computer vision and language models and graph neural networks has lent document image classification many tools. However using large pre-trained models usually requires substantial computing resources which could defeat the costsaving advantages of automatic document image classification. In the paper we propose an efficient document image classification framework that uses graph convolution neural networks and incorporates textual, visual and layout information of the document. Empirical results on both publicly available and real-world data show that our methods achieve near SOTA performance yet require much less computing resources and time for model training and inference. This results in solutions than offer better cost advantages, especially in scalable deployment for enterprise applications.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16898192775396121132 DOCUMENT # en 1.00\n",
+ "1 metadata 18047577149978264333 DOCUMENT #/texts/3 title 1.00\n",
+ "2 metadata 1342019013639942885 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 5364596895525468982 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 2030268891848045074 TEXT #/texts/0 en 0.36\n",
+ "5 semantic 2030268891848045074 TEXT #/texts/0 reference 0.95\n",
+ "6 language 7730456910860267844 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 7730456910860267844 TEXT #/texts/1 header 0.81\n",
+ "8 language 3576015150755710974 TEXT #/texts/2 en 0.67\n",
+ "9 semantic 3576015150755710974 TEXT #/texts/2 reference 0.72\n",
+ "10 language 18047577149978264333 TEXT #/texts/3 en 0.82\n",
+ "11 semantic 18047577149978264333 TEXT #/texts/3 reference 0.48\n",
+ "2101.10281.pdf\n",
+ "title: PAWLS : PDF Annotation With Labels and Structure\n",
+ "abstract: Abstract Adobe's Portable Document Format (PDF) is a popular way of distributing view-only documents with a rich visual markup. This presents a challenge to NLP practitioners who wish to use the information contained within PDF documents for training models or data analysis, because annotating these documents is difficult. In this paper, we present PDF Annotation with Labels and Structure (PAWLS), a new annotation tool designed specifically for the PDF document format. PAWLS is particularly suited for mixed-mode annotation and scenarios in which annotators require extended context to annotate accurately. PAWLS supports span-based textual annotation, N-ary relations and freeform, non-textual bounding boxes, all of which can be exported in convenient formats for training multi-modal machine learning models. A read-only PAWLS server is available at https://pawls. apps.allenai.org/ 1 and the source code is available at https://github. com/allenai/pawls.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14568006567697038323 DOCUMENT # en 1.00\n",
+ "1 metadata 4039357198175436344 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14712112053459110108 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 16316434293491587175 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 560399852479874905 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 560399852479874905 TEXT #/texts/0 text 0.98\n",
+ "6 language 4039357198175436344 TEXT #/texts/1 en 0.37\n",
+ "7 semantic 4039357198175436344 TEXT #/texts/1 header 0.97\n",
+ "8 language 5624206597017791865 TEXT #/texts/2 en 0.31\n",
+ "9 semantic 5624206597017791865 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 15153810220268008204 TEXT #/texts/3 en 0.58\n",
+ "11 semantic 15153810220268008204 TEXT #/texts/3 meta-data 0.99\n",
+ "2006.01038.pdf\n",
+ "title: arXiv:2006.01038v1 [cs.CL] 1 Jun 2020\n",
+ "abstract: Abstract Document layout analysis usually relies on computer vision models to understand documents while ignoring textual information that is vital to capture. Meanwhile, high quality labeled datasets with both visual and textual information are still insufficient. In this paper, we present DocBank, a benchmark dataset with fine-grained token-level annotations for document layout analysis. DocBank is constructed using a simple yet effective way with weak supervision from the L A T E X documents available on the arXiv.com. With DocBank, models from different modalities can be compared fairly and multi-modal approaches will be further investigated and boost the performance of document layout analysis. We build several strong baselines and manually split train/dev/test sets for evaluation. Experiment results show that models trained on DocBank accurately recognize the layout information for a variety of documents. The DocBank dataset will be publicly available at https://github.com/doc-analysis/DocBank.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2465515363197577403 DOCUMENT # en 0.98\n",
+ "1 metadata 9465834195761580083 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 6884525327918319354 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 12088364625193045527 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 9465834195761580083 TEXT #/texts/0 en 0.29\n",
+ "5 semantic 9465834195761580083 TEXT #/texts/0 reference 0.97\n",
+ "6 language 6395967760918650982 TEXT #/texts/1 en 0.31\n",
+ "7 semantic 6395967760918650982 TEXT #/texts/1 meta-data 0.90\n",
+ "8 language 14211229016076119859 TEXT #/texts/2 en 0.38\n",
+ "9 semantic 14211229016076119859 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 1286919575614411010 TEXT #/texts/3 en 0.73\n",
+ "11 semantic 1286919575614411010 TEXT #/texts/3 meta-data 1.00\n",
+ "2303.05049.pdf\n",
+ "title: Unifying Lay out Generation with a Decoupled Diffusion Model\n",
+ "abstract: Abstract Layout generation aims to synthesize realistic graphic scenes consisting of elements with different attributes including category, size, position, and between-element relation. It is a crucial task for r educing the burden on heavyduty graphic design works for formatted scenes, e.g., publications, documents, and user interfaces (UIs). Diverse application scenarios impose a big c hallenge in unifying various layout generation subtasks, including conditional and unconditional generation. In this paper, we propose a Layout Diffusion Generative Model (LDGM) to achieve such unification with a single decoupled diffusion model. LDGM views a layout of arbitrary missing or coarse element attributes as an intermediate dif fusion status from a completed layout. Since different attrib utes have their individual semantics and characteristics, we propose to decouple the diffusion processes for them to improve the diversity of training samples and learn the rever se process jointly to exploit global-scope contexts for facilitating generation. As a result, our LDGM can generate layouts either from scratch or conditional on arbitrary available attributes. Extensive qualitative and quantitative e xperiments demonstrate our proposed LDGM outperforms existing layout generation models in both functionality and performance.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11469147180815824661 DOCUMENT # en 0.99\n",
+ "1 metadata 7345684647555911193 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14633266648332875894 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 12461356387847223939 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 13734010725237739732 TEXT #/texts/0 en 0.24\n",
+ "5 semantic 13734010725237739732 TEXT #/texts/0 text 0.89\n",
+ "6 language 7345684647555911193 TEXT #/texts/1 en 0.67\n",
+ "7 semantic 7345684647555911193 TEXT #/texts/1 header 0.83\n",
+ "8 language 4192388650002618545 TEXT #/texts/2 en 0.48\n",
+ "9 semantic 4192388650002618545 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 14722593531256542654 TEXT #/texts/3 en 0.39\n",
+ "11 semantic 14722593531256542654 TEXT #/texts/3 meta-data 0.99\n",
+ "2105.06220.pdf\n",
+ "title: VSR: A Unified Framework for Document Layout Analysis combining Vision, Semantics and Relations\n",
+ "abstract: Abstract. Document layout analysis is crucial for understanding document structures. On this task, vision and semantics of documents, and relations between layout components contribute to the understanding process. Though many works have been proposed to exploit the above information, they show unsatisfactory results. NLP-based methods model layout analysis as a sequence labeling task and show insufficient capabilities in layout modeling. CV-based methods model layout analysis as a detection or segmentation task, but bear limitations of inefficient modality fusion and lack of relation modeling between layout components. To address the above limitations, we propose a unified framework VSR for document layout analysis, combining vision, semantics and relations. VSR supports both NLP-based and CV-based methods. Specifically, we first introduce vision through document image and semantics through text embedding maps. Then, modality-specific visual and semantic features are extracted using a two-stream network, which are adaptively fused to make full use of complementary information. Finally, given component candidates, a relation module based on graph neural network is incorported to model relations between components and output final results. On three popular benchmarks, VSR outperforms previous models by large margins. Code will be released soon. Keywords: Vision · Semantics · Relations · Document layout analysis.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11491769011364032964 DOCUMENT # en 0.95\n",
+ "1 metadata 1183044382250210687 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 17385183871704569381 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 17289783423793469794 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 8063734862441376055 TEXT #/texts/0 en 0.55\n",
+ "5 semantic 8063734862441376055 TEXT #/texts/0 reference 0.66\n",
+ "6 language 1183044382250210687 TEXT #/texts/1 en 0.66\n",
+ "7 semantic 1183044382250210687 TEXT #/texts/1 header 0.80\n",
+ "8 language 10571720774666920474 TEXT #/texts/2 en 0.31\n",
+ "9 semantic 10571720774666920474 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 7775955189029927588 TEXT #/texts/3 en 0.56\n",
+ "11 semantic 7775955189029927588 TEXT #/texts/3 meta-data 1.00\n",
+ "2106.11797.pdf\n",
+ "title: Evaluation of a Region Proposal Architecture for Multi-task Document Layout Analysis\n",
+ "abstract: Abstract Automatically recognizing the layout of handwritten documents is an important step towards useful extraction of information from those documents. The most common application is to feed downstream applications such as automatic text recognition and keyword spotting; however, the recognition of the layout also helps to establish relationships between elements in the document which allows to enrich the information that can be extracted. Most of the modern document layout analysis systems are designed to address only one part of the document layout problem, namely: baseline detection or region segmentation. In contrast, we evaluate the effectiveness of the Mask-RCNN architecture to address the problem of baseline detection and region segmentation in an integrated manner. We present experimental results on two handwritten text datasets and one handwritten music dataset. The analyzed architecture yields promising results, outperforming state-of-theart techniques in all three datasets. Keywordsdocument layout analysis, region proposal network, baseline detection, region segmentation.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 3713940944030150691 DOCUMENT # en 0.98\n",
+ "1 metadata 16570263275213596473 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16485599656984523560 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 9852063695928942848 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 17889680592944608058 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 language 11004961425232215947 TEXT #/texts/0 en 0.23\n",
+ "6 semantic 11004961425232215947 TEXT #/texts/0 reference 0.95\n",
+ "7 language 16570263275213596473 TEXT #/texts/1 en 0.61\n",
+ "8 semantic 16570263275213596473 TEXT #/texts/1 header 0.93\n",
+ "9 language 17652260495556794451 TEXT #/texts/2 en 0.35\n",
+ "10 semantic 17652260495556794451 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 1051774319299939846 TEXT #/texts/3 es 0.35\n",
+ "2304.11810.pdf\n",
+ "title: PARAGRAPH2GRAPH: A GNN-BASED FRAMEWORK FOR LAYOUT PARAGRAPH ANALYSIS\n",
+ "abstract: ABSTRACT Document layout analysis has a wide range of requirements across various domains, languages, and business scenarios. However, most current state-of-the-art algorithms are language-dependent, with architectures that rely on transformer encoders or language-specific text encoders, such as BERT, for feature extraction. These approaches are limited in their ability to handle very long documents due to input sequence length constraints and are closely tied to language-specific tokenizers. Additionally, training a cross-language text encoder can be challenging due to the lack of labeled multilingual document datasets that consider privacy. Furthermore, some layout tasks require a clean separation between different layout components without overlap, which can be difficult for image segmentationbased algorithms to achieve. In this paper, we present Paragraph2Graph, a language-independent graph neural network (GNN)-based model that achieves competitive results on common document layout datasets while being adaptable to business scenarios with strict separation. With only 19.95 million parameters, our model is suitable for industrial applications, particularly in multi-language scenarios. We are releasing all of our code and pretrained models at this repo. K eywords GNN · Language-independent · Document Layout · Layout Paragraph · Generalization\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 3100341686165584999 DOCUMENT # en \n",
+ "1 metadata 15403989920679912416 DOCUMENT #/texts/1 title \n",
+ "2 metadata 227750139215915282 DOCUMENT #/texts/8 abstract \n",
+ "3 metadata 18323991934847197201 DOCUMENT #/texts/9 abstract \n",
+ "4 metadata 882870464380862174 DOCUMENT #/texts/10 abstract \n",
+ "5 language 14975926239625135018 TEXT #/texts/0 en \n",
+ "6 semantic 14975926239625135018 TEXT #/texts/0 text \n",
+ "7 language 15403989920679912416 TEXT #/texts/1 en \n",
+ "8 semantic 15403989920679912416 TEXT #/texts/1 header \n",
+ "9 language 11996006598348841766 TEXT #/texts/2 en \n",
+ "10 semantic 11996006598348841766 TEXT #/texts/2 meta-data \n",
+ "11 language 16098072656615255554 TEXT #/texts/3 en \n",
+ "\n",
+ " confidence \n",
+ "0 0.99 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 0.28 \n",
+ "6 0.69 \n",
+ "7 0.32 \n",
+ "8 0.96 \n",
+ "9 0.70 \n",
+ "10 0.77 \n",
+ "11 0.20 \n",
+ "2203.16850.pdf\n",
+ "title: Revisiting Document Image Dewarping by Grid Regularization\n",
+ "abstract: Abstract This paper addresses the problem of document image dewarping, which aims at eliminating the geometric distortion in document images for document digitization. Instead of designing a better neural network to approximate the optical flow fields between the inputs and outputs, we pursue the best readability by taking the text lines and the document boundaries into account from a constrained optimization perspective. Specifically, our proposed method first learns the boundary points and the pixels in the text lines and then follows the most simple observation that the boundaries and text lines in both horizontal and vertical directions should be kept after dewarping to introduce a novel grid regularization scheme. To obtain the final forward mapping for dewarping, we solve an optimization problem with our proposed grid regularization. The experiments comprehensively demonstrate that our proposed approach outperforms the prior arts by large margins in terms of readability (with the metrics of Character Errors Rate and the Edit Distance) while maintaining the best image quality on the publiclyavailable DocUNet benchmark.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 978603162285402020 DOCUMENT # en 0.99\n",
+ "1 metadata 12429620403640438226 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 4879793979395431883 DOCUMENT #/texts/2 abstract 1.00\n",
+ "3 metadata 175822038141138086 DOCUMENT #/texts/3 abstract 1.00\n",
+ "4 language 12429620403640438226 TEXT #/texts/0 en 0.75\n",
+ "5 semantic 12429620403640438226 TEXT #/texts/0 header 0.88\n",
+ "6 language 15235721918946238063 TEXT #/texts/1 en 0.55\n",
+ "7 semantic 15235721918946238063 TEXT #/texts/1 meta-data 0.99\n",
+ "8 language 4879793979395431883 TEXT #/texts/2 en 0.32\n",
+ "9 semantic 4879793979395431883 TEXT #/texts/2 header 0.93\n",
+ "10 language 175822038141138086 TEXT #/texts/3 en 0.91\n",
+ "11 semantic 175822038141138086 TEXT #/texts/3 text 0.95\n",
+ "2303.03755.pdf\n",
+ "title: DLT: Conditioned layout generation with Joint Discrete-Continuous Diffusion Layout Transformer\n",
+ "abstract: Abstract Generating visual layouts is an essential ingredient of graphic design. The ability to condition layout generation on a partial subset of component attributes is critical to real-world applications that involve user interaction. Recently, diffusion models have demonstrated high-quality generative performances in various domains. However, it is unclear how to apply diffusion models to the natural representation of layouts which consists of a mix of discrete (class) and continuous (location, size) attributes. To address the conditioning layout generation problem, we introduce DLT, a joint discrete-continuous diffusion model. DLT is a transformer-based model which has a flexible conditioning mechanism that allows for conditioning on any given subset of all the layout component classes, locations, and sizes. Our method outperforms state-of-the-art generative models on various layout generation datasets with respect to different metrics and conditioning settings. Additionally, we validate the effectiveness of our proposed conditioning mechanism and the joint continuous-diffusion process. This joint process can be incorporated into a wide range of mixed discrete-continuous generative tasks.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11659927563608853753 DOCUMENT # en 0.98\n",
+ "1 metadata 669239715039352587 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14231489606976721709 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 537144397678098445 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 1378232785427175996 TEXT #/texts/0 en 0.28\n",
+ "5 semantic 1378232785427175996 TEXT #/texts/0 text 0.89\n",
+ "6 language 669239715039352587 TEXT #/texts/1 en 0.72\n",
+ "7 semantic 669239715039352587 TEXT #/texts/1 header 0.84\n",
+ "8 language 10345949929986003176 TEXT #/texts/2 en 0.44\n",
+ "9 semantic 10345949929986003176 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 14231489606976721709 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 14231489606976721709 TEXT #/texts/3 header 0.93\n",
+ "2110.08164.pdf\n",
+ "title: A PREPRINT\n",
+ "abstract: ['Accurate layout analysis without subsequent text-line segmentation remains an ongoing challenge, especially when facing the Kangyur, a kind of historical Tibetan document featuring considerable touching components and mottled background. Aiming at identifying different regions in document images, layout analysis is indispensable for subsequent procedures such as character recognition. However, there was only a little research being carried out to perform line-level layout analysis which failed to deal with the Kangyur. To obtain the optimal results, a fine-grained sub-line level layout analysis approach is presented. Firstly, we introduced an accelerated method to build the dataset which is dynamic and reliable. Secondly, enhancement had been made to the SOLOv2 according to the characteristics of the Kangyur. Then, we fed the enhanced SOLOv2 with the prepared annotation file during the training phase. Once the network is trained, instances of the text line, sentence, and titles can be segmented and identified during the inference stage. The experimental results show that the proposed method delivers a decent 72.7% average precision on our dataset. In general, this preliminary research provides insights into the fine-grained sub-line level layout analysis and testifies the SOLOv2-based approaches. We also believe that the proposed methods can be adopted on other language documents with various layouts.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17785348499810975599 DOCUMENT # en 1.00\n",
+ "1 metadata 460898704592932350 DOCUMENT #/texts/2 title 1.00\n",
+ "2 language 10001330042032416266 TEXT #/texts/0 en 0.42\n",
+ "3 semantic 10001330042032416266 TEXT #/texts/0 reference 0.91\n",
+ "4 language 10494471504194919372 TEXT #/texts/1 ja 0.34\n",
+ "5 semantic 10494471504194919372 TEXT #/texts/1 header 0.84\n",
+ "6 language 460898704592932350 TEXT #/texts/2 en 0.13\n",
+ "7 semantic 460898704592932350 TEXT #/texts/2 reference 0.91\n",
+ "8 language 16346695938433635803 TEXT #/texts/3 id 0.46\n",
+ "9 semantic 16346695938433635803 TEXT #/texts/3 meta-data 1.00\n",
+ "10 language 6531628207994599756 TEXT #/texts/4 en 0.77\n",
+ "11 semantic 6531628207994599756 TEXT #/texts/4 meta-data 0.92\n",
+ "2209.06584.pdf\n",
+ "title: One-Shot Doc Snippet Detection: Powering Search in Document Beyond Text\n",
+ "abstract: Abstract Active consumption of digital documents has yielded scope for research in various applications, including search. Traditionally, searching within a document has been cast as a text matching problem ignoring the rich layout and visual cues commonly present in structured documents, forms, etc. To that end, we ask a mostly unexplored question: 'Can we search for other similar snippets present in a target document page given a single query instance of a document snippet?'. We propose MONOMER to solve this as a one-shot snippet detection task. MONOMER fuses context from visual, textual, and spatial modalities of snippets and documents to find query snippet in target documents. We conduct extensive ablations and experiments showing MONOMER outperforms several baselines from one-shot object detection (BHRL), template matching, and document understanding (LayoutLMv3). Due to the scarcity of relevant data for the task at hand, we train MONOMER on programmatically generated data having many visually similar query snippets and target document pairs from two datasets-Flamingo Forms and PubLayNet. We also do a human study to validate the generated data.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11192906298435654347 DOCUMENT # en 0.99\n",
+ "1 metadata 18233337473379706613 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11756603228534403438 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 8440703618720666577 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 10436243055566423792 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 10436243055566423792 TEXT #/texts/0 reference 0.95\n",
+ "6 language 18233337473379706613 TEXT #/texts/1 en 0.50\n",
+ "7 semantic 18233337473379706613 TEXT #/texts/1 header 0.77\n",
+ "8 language 18446113455465465647 TEXT #/texts/2 en 0.65\n",
+ "9 semantic 18446113455465465647 TEXT #/texts/2 meta-data 0.88\n",
+ "10 language 14998310295267340112 TEXT #/texts/3 en 0.51\n",
+ "11 semantic 14998310295267340112 TEXT #/texts/3 meta-data 0.69\n",
+ "2003.07560.pdf\n",
+ "title: ‖\n",
+ "abstract: Abstract-Tabular data is a crucial form of information expression, which can organize data in a standard structure for easy information retrieval and comparison. However, in financial industry and many other fields tables are often disclosed in unstructured digital files, e.g. Portable Document Format (PDF) and images, which are difficult to be extracted directly. In this paper, to facilitate deep learning based table extraction from unstructured digital files, we publish a standard Chinese dataset named FinTab, which contains more than 1,600 financial tables of diverse kinds and their corresponding structure representation in JSON. In addition, we propose a novel graph-based convolutional neural network model named GFTE as a baseline for future comparison. GFTE integrates image feature, position feature and textual feature together for precise edge prediction and reaches overall good results $^{1}$.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17365030269464102181 DOCUMENT # en 0.99\n",
+ "1 metadata 2005836100606620539 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 1624559958454587795 DOCUMENT #/texts/9 abstract 1.00\n",
+ "3 language 12864712276516214286 TEXT #/texts/0 en 0.30\n",
+ "4 semantic 12864712276516214286 TEXT #/texts/0 reference 0.78\n",
+ "5 language 14832218684155059358 TEXT #/texts/1 en 0.88\n",
+ "6 semantic 14832218684155059358 TEXT #/texts/1 text 0.47\n",
+ "7 language 2005836100606620539 TEXT #/texts/2 vi 0.52\n",
+ "8 semantic 2005836100606620539 TEXT #/texts/2 text 1.00\n",
+ "9 language 11681392060471928309 TEXT #/texts/3 en 0.34\n",
+ "10 semantic 11681392060471928309 TEXT #/texts/3 meta-data 1.00\n",
+ "11 language 14497389118296410556 TEXT #/texts/4 en 0.78\n",
+ "2203.01017.pdf\n",
+ "title: TableFormer: Table Structure Understanding with Transformers.\n",
+ "abstract: Abstract a. Picture of a table:\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1450589212631209931 DOCUMENT # en 0.99\n",
+ "1 metadata 16933509326206698184 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16623877941696432046 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 7722591067936378833 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 12349202100122108811 TEXT #/texts/0 en 0.38\n",
+ "5 semantic 12349202100122108811 TEXT #/texts/0 text 0.89\n",
+ "6 language 16933509326206698184 TEXT #/texts/1 en 0.76\n",
+ "7 semantic 16933509326206698184 TEXT #/texts/1 reference 0.87\n",
+ "8 language 3262318516024354863 TEXT #/texts/2 en 0.41\n",
+ "9 semantic 3262318516024354863 TEXT #/texts/2 meta-data 0.92\n",
+ "10 language 8760086191836158497 TEXT #/texts/3 de 0.11\n",
+ "11 semantic 8760086191836158497 TEXT #/texts/3 text 0.60\n",
+ "2208.08037.pdf\n",
+ "title: UniLayout: Taming Unified Sequence-to-Sequence Transformers for Graphic Layout Generation\n",
+ "abstract: Abstract To satisfy various user needs, different subtasks of graphic layout generation have been explored intensively in recent years. Existing studies usually propose taskspecific methods with diverse input-output formats, dedicated model architectures, and different learning methods. However, those specialized approaches make the adaption to unseen subtasks difficult, hinder the knowledge sharing between different subtasks, and are contrary to the trend of devising general-purpose models. In this work, we propose UniLayout, which handles different subtasks for graphic layout generation in a unified manner. First, we uniformly represent diverse inputs and outputs of subtasks as the sequences of tokens. Then, based on the unified sequence format, we naturally leverage an identical encoder-decoder architecture with Transformers for different subtasks. Moreover, based on the above two kinds of unification, we further develop a single model that supports all subtasks concurrently. Experiments on two public datasets demonstrate that while simple, UniLayout significantly outperforms the previous task-specific methods.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8182807638037382561 DOCUMENT # en 1.00\n",
+ "1 metadata 16989829520099866629 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 8153202450442895503 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 15212054895134725433 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 3988459388858141865 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 3988459388858141865 TEXT #/texts/0 reference 0.86\n",
+ "6 language 16989829520099866629 TEXT #/texts/1 en 0.63\n",
+ "7 semantic 16989829520099866629 TEXT #/texts/1 header 0.81\n",
+ "8 language 1229170802030660337 TEXT #/texts/2 en 0.28\n",
+ "9 semantic 1229170802030660337 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 9540420743311235070 TEXT #/texts/3 en 0.60\n",
+ "11 semantic 9540420743311235070 TEXT #/texts/3 meta-data 0.91\n",
+ "2207.12955.pdf\n",
+ "title: Contextual Text Block Detection towards Scene Text Understanding\n",
+ "abstract: Abstract. Most existing scene text detectors focus on detecting characters or words that only capture partial text messages due to missing contextual information. For a better understanding of text in scenes, it is more desired to detect contextual text blocks (CTBs) which consist of one or multiple integral text units (e.g., characters, words, or phrases) in natural reading order and transmit certain complete text messages. This paper presents contextual text detection, a new setup that detects CTBs for better understanding of texts in scenes. We formulate the new setup by a dual detection task which first detects integral text units and then groups them into a CTB. To this end, we design a novel scene text clustering technique that treats integral text units as tokens and groups them (belonging to the same CTB) into an ordered token sequence. In addition, we create two datasets SCUT-CTW-Context and ReCTS-Context to facilitate future research, where each CTB is well annotated by an ordered sequence of integral text units. Further, we introduce three metrics that measure contextual text detection in local accuracy, continuity, and global accuracy. Extensive experiments show that our method accurately detects CTBs which effectively facilitates downstream tasks such as text classification and translation. The project is available at https://sg-vilab.github.io/publication/xue2022contextual/. Keywords: Scene Text Detection\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15828830270858536421 DOCUMENT # en 0.99\n",
+ "1 metadata 9070712247046693948 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14369142600913373072 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 9054809053710816760 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 3136319967504727252 TEXT #/texts/0 en 0.54\n",
+ "5 semantic 3136319967504727252 TEXT #/texts/0 reference 0.95\n",
+ "6 language 9070712247046693948 TEXT #/texts/1 en 0.63\n",
+ "7 semantic 9070712247046693948 TEXT #/texts/1 header 0.75\n",
+ "8 language 13251585860532160945 TEXT #/texts/2 en 0.36\n",
+ "9 semantic 13251585860532160945 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 10777370744359634488 TEXT #/texts/3 en 0.60\n",
+ "11 semantic 10777370744359634488 TEXT #/texts/3 meta-data 0.88\n",
+ "2305.02769.pdf\n",
+ "title: Towards End-to-End Semi-Supervised Table Detection with Deformable Transformer\n",
+ "abstract: Abstract. Table detection is the task of classifying and localizing table objects within document images. With the recent development in deep learning methods, we observe remarkable success in table detection. However, a significant amount of labeled data is required to train these models effectively. Many semi-supervised approaches are introduced to mitigate the need for a substantial amount of label data. These approaches use CNN-based detectors that rely on anchor proposals and post-processing stages such as NMS. To tackle these limitations, this paper presents a novel end-to-end semi-supervised table detection method that employs the deformable transformer for detecting table objects. We evaluate our semi-supervised method on PubLayNet, DocBank, ICADR-19 and TableBank datasets, and it achieves superior performance compared to previous methods. It outperforms the fully supervised method (Deformable transformer) by +3.4 points on 10% labels of TableBank-both dataset and the previous CNN-based semi-supervised approach (Soft Teacher) by +1.8 points on 10% labels of PubLayNet dataset. We hope this work opens new possibilities towards semi-supervised and unsupervised table detection methods. Keywords: Semi-Supervised Learning · Deformable Transformer · Table Analysis · Table Detection.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 2339371309351147950 DOCUMENT # en \n",
+ "1 metadata 2441605672578681191 DOCUMENT #/texts/1 title \n",
+ "2 metadata 10072912660304435314 DOCUMENT #/texts/10 abstract \n",
+ "3 metadata 11963684253056995713 DOCUMENT #/texts/11 abstract \n",
+ "4 language 5123431033643498349 TEXT #/texts/0 en \n",
+ "5 semantic 5123431033643498349 TEXT #/texts/0 reference \n",
+ "6 language 2441605672578681191 TEXT #/texts/1 en \n",
+ "7 semantic 2441605672578681191 TEXT #/texts/1 header \n",
+ "8 language 16577848168059002784 TEXT #/texts/2 en \n",
+ "9 semantic 16577848168059002784 TEXT #/texts/2 meta-data \n",
+ "10 language 12065750236253281860 TEXT #/texts/3 en \n",
+ "11 semantic 12065750236253281860 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 1.00 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.62 \n",
+ "5 0.66 \n",
+ "6 0.76 \n",
+ "7 0.65 \n",
+ "8 0.40 \n",
+ "9 0.94 \n",
+ "10 0.20 \n",
+ "11 1.00 \n",
+ "2303.05325.pdf\n",
+ "title: BaDLAD: A Large Multi-Domain Bengali Document Layout Analysis Dataset\n",
+ "abstract: Abstract. While strides have been made in deep learning based Bengali Optical Character Recognition (OCR) in the past decade, absence of large Document Layout Analysis (DLA) datasets has hindered the application of OCR in document transcription, e.g., transcribing historical documents and newspapers. Moreover, rule-based DLA systems that are currently being employed in practice are not robust to domain variations and out-of-distribution layouts. To this end, we present the first multidomain large B eng a li D ocument L ayout A nalysis D ataset: BaDLAD. This dataset contains 33, 695 human annotated document samples from six domains-i) books and magazines ii) public domain govt. documents iii) liberation war documents iv) new newspapers v) historical newspapers and vi) property deeds; with 710 K polygon annotations for four unit types: text-box, paragraph, image, and table. Through preliminary experiments benchmarking the performance of existing state-of-the-art deep learning architectures for English DLA, we demonstrate the efficacy of our dataset in training deep learning based Bengali document digitization models. Keywords: Handwritten Document Images · Layout Analysis (Physical and Logical) · Mobile/Camera-Based · Other Domains · Typeset Document Images\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 8865613824093398131 DOCUMENT # en \n",
+ "1 metadata 10095296672555375400 DOCUMENT #/texts/1 title \n",
+ "2 metadata 16411799519606679859 DOCUMENT #/texts/9 abstract \n",
+ "3 metadata 10800146418327418135 DOCUMENT #/texts/10 abstract \n",
+ "4 language 12112214876466467582 TEXT #/texts/0 en \n",
+ "5 semantic 12112214876466467582 TEXT #/texts/0 reference \n",
+ "6 language 10095296672555375400 TEXT #/texts/1 en \n",
+ "7 semantic 10095296672555375400 TEXT #/texts/1 header \n",
+ "8 language 17408826844111608210 TEXT #/texts/2 en \n",
+ "9 semantic 17408826844111608210 TEXT #/texts/2 meta-data \n",
+ "10 language 3359000651769550631 TEXT #/texts/3 en \n",
+ "11 semantic 3359000651769550631 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.99 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.60 \n",
+ "5 0.66 \n",
+ "6 0.37 \n",
+ "7 0.94 \n",
+ "8 0.19 \n",
+ "9 0.99 \n",
+ "10 0.27 \n",
+ "11 0.98 \n",
+ "2011.13534.pdf\n",
+ "title: A Survey of Deep Learning Approaches for OCR and Document Understanding\n",
+ "abstract: Abstract Documents are a core part of many businesses in many fields such as law, finance, and technology among others. Automatic understanding of documents such as invoices, contracts, and resumes is lucrative, opening up many new avenues of business. The fields of natural language processing and computer vision have seen tremendous progress through the development of deep learning such that these methods have started to become infused in contemporary document understanding systems. In this survey paper, we review different techniques for document understanding for documents written in English and consolidate methodologies present in literature to act as a jumping-off point for researchers exploring this area.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 3322178716407991214 DOCUMENT # en 1.00\n",
+ "1 metadata 3082305603685610193 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6720404006088754024 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 2132452585218772003 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 language 5865682553062852405 TEXT #/texts/0 en 0.39\n",
+ "5 semantic 5865682553062852405 TEXT #/texts/0 reference 0.95\n",
+ "6 language 3082305603685610193 TEXT #/texts/1 en 0.72\n",
+ "7 semantic 3082305603685610193 TEXT #/texts/1 header 0.88\n",
+ "8 language 5246139715348675309 TEXT #/texts/2 en 0.45\n",
+ "9 semantic 5246139715348675309 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 13584582387870540825 TEXT #/texts/3 en 0.31\n",
+ "11 semantic 13584582387870540825 TEXT #/texts/3 meta-data 0.95\n",
+ "2308.14978.pdf\n",
+ "title: Vision Grid Transformer for Document Layout Analysis\n",
+ "abstract: Abstract Document pre-trained models and grid-based models have proven to be very effective on various tasks in Document AI. However, for the document layout analysis (DLA) task, existing document pre-trained models, even those pretrained in a multi-modal fashion, usually rely on either textual features or visual features. Grid-based models for DLA are multi-modality but largely neglect the effect of pre-training. To fully leverage multi-modal information and exploit pre-training techniques to learn better representation for DLA, in this paper, we present VGT, a twostream Vision Grid Transformer, in which Grid Transformer (GiT) is proposed and pre-trained for 2D token-level and segment-level semantic understanding. Furthermore, a new dataset named D $^{4}$LA, which is so far the most diverse and detailed manually-annotated benchmark for document layout analysis, is curated and released. Experiment results have illustrated that the proposed VGT model achieves new state-of-the-art results on DLA tasks, e.g. PubLayNet (95. 7% → 96. 2%), DocBank (79. 6% → 84. 1%), and D $^{4}$LA (67. 7% → 68. 8%). The code and models as well as the D $^{4}$LA dataset will be made publicly available $^{1}$.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16592063621056527461 DOCUMENT # en 1.00\n",
+ "1 metadata 4640372460409057753 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 17100450427175069178 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 17593631429546340130 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 9727257513907321483 TEXT #/texts/0 en 0.41\n",
+ "5 semantic 9727257513907321483 TEXT #/texts/0 reference 0.86\n",
+ "6 language 4640372460409057753 TEXT #/texts/1 en 0.53\n",
+ "7 semantic 4640372460409057753 TEXT #/texts/1 header 0.87\n",
+ "8 language 17143522046456530342 TEXT #/texts/2 en 0.74\n",
+ "9 semantic 17143522046456530342 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 13989699358317696966 TEXT #/texts/3 en 0.11\n",
+ "11 semantic 13989699358317696966 TEXT #/texts/3 meta-data 0.50\n",
+ "2207.11871.pdf\n",
+ "title: Towards Complex Document Understanding By Discrete Reasoning\n",
+ "abstract: ABSTRACT Document Visual Question Answering (VQA) aims to answer questions over visually-rich documents. In this work, we introduce a new Document VQA dataset, named TAT-DQA, which consists of 3,067 document pages comprising semi-structured table(s) and unstructured text as well as 16,558 question-answer pairs. The documents are sampled from financial reports and contain lots of numbers, which means discrete reasoning capability is demanded to answer the questions. Based on TAT-DQA, we further develop a novel model named MHST that takes into account the information in multi-modalities to intelligently address different types of questions with corresponding strategies, i.e., extraction or reasoning. The experiments show that MHST model significantly outperforms the baseline methods, demonstrating its effectiveness. However, the performance still lags far behind that of expert humans. We expect that our TAT-DQA dataset would facilitate the research on understanding of visually-rich documents, especially for scenarios that require discrete reasoning. Also, we hope the proposed model would inspire researchers to design more advanced Document VQA models in future. CCS CONCEPTS · Computing methodologies → Natural language processing; · Information systems → Question answering. KEYWORDS Question Answering, Visually-rich Document Understanding, Discrete Reasoning ACM Reference Format: Fengbin Zhu 1, $^{2}$, Wenqiang Lei3 $^{∗}$, Fuli Feng $^{4}$, Chao Wang$^{2}$, Haozhou Zhang$^{3}$, Tat-Seng Chua$^{1}$. 2022. Towards Complex Document Understanding By Discrete Reasoning. In Proceedings of the 30th ACM International Conference on Multimedia (MM '22), October 10-14, 2022, Lisboa, Portugal. ACM, New York, NY, USA, 10 pages. https://doi.org/10.1145/3503161.3548422 $^{∗}$Corresponding author. ACM ISBN 978-1-4503-9203-7/22/10...$15.00 Question : What was the total cost in Wireless including spectrum license fee in 2019? Derivation : 1,320 + 1,731 = 3,051 Scale : Millions Answer : 3,051,000,000 Figure 1: An example of TAT-DQA dataset. Given a question and a visually-rich document that contains both tabular and textual data, the machine is expected to derive the answer.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5727170198881628579 DOCUMENT # en 1.0\n",
+ "1 metadata 8475542795139585408 DOCUMENT #/texts/1 title 1.0\n",
+ "2 metadata 11279303476920735275 DOCUMENT #/texts/8 abstract 1.0\n",
+ "3 metadata 16821095994808609750 DOCUMENT #/texts/9 abstract 1.0\n",
+ "4 metadata 4923876684724984128 DOCUMENT #/texts/10 abstract 1.0\n",
+ "5 metadata 1220977146596289779 DOCUMENT #/texts/11 abstract 1.0\n",
+ "6 metadata 2661334982718065771 DOCUMENT #/texts/12 abstract 1.0\n",
+ "7 metadata 11280315739843364000 DOCUMENT #/texts/13 abstract 1.0\n",
+ "8 metadata 3816002088806916798 DOCUMENT #/texts/14 abstract 1.0\n",
+ "9 metadata 8132312948183007779 DOCUMENT #/texts/15 abstract 1.0\n",
+ "10 metadata 9622509244035404651 DOCUMENT #/texts/16 abstract 1.0\n",
+ "11 metadata 6521211824532126123 DOCUMENT #/texts/17 abstract 1.0\n",
+ "2308.11788.pdf\n",
+ "title: An extensible point-based method for data chart value detection\n",
+ "abstract: Abstract. We present an extensible method for identifying semantic points to reverse engineer (i.e. extract the values of) data charts, particularly those in scientific articles. Our method uses a point proposal network (akin to region proposal networks for object detection) to directly predict the position of points of interest in a chart, and it is readily extensible to multiple chart types and chart elements. We focus on complex bar charts in the scientific literature, on which our model is able to detect salient points with an accuracy of 0.8705 F1 (@1.5-cell max deviation); it achieves 0.9810 F1 on synthetically-generated charts similar to those used in prior works. We also explore training exclusively on synthetic data with novel augmentations, reaching surprisingly competent performance in this way (0.6621 F1) on real charts with widely varying appearance, and we further demonstrate our unchanged method applied directly to synthetic pie charts (0.8343 F1). Datasets, trained models, and evaluation code are available at https://github.com/BNLNLP/PPN_model. Keywords: document analysis, chart extraction, value detection\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15730228459936273311 DOCUMENT # en 1.00\n",
+ "1 metadata 4784013526395200985 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 4060338734311582032 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 16636000959355062064 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 5550430566041753073 TEXT #/texts/0 en 0.43\n",
+ "5 semantic 5550430566041753073 TEXT #/texts/0 reference 0.86\n",
+ "6 language 4784013526395200985 TEXT #/texts/1 en 0.70\n",
+ "7 semantic 4784013526395200985 TEXT #/texts/1 header 0.74\n",
+ "8 language 911163436536595014 TEXT #/texts/2 en 0.70\n",
+ "9 semantic 911163436536595014 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 9723793807474243122 TEXT #/texts/3 en 0.18\n",
+ "11 semantic 9723793807474243122 TEXT #/texts/3 meta-data 0.98\n",
+ "1912.13318.pdf\n",
+ "title: LayoutLM: Pre-training of Text and Layout for Document Image Understanding\n",
+ "abstract: ABSTRACT Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the widespread use of pre-training models for NLP applications, they almost exclusively focus on text-level manipulation, while neglecting layout and style information that is vital for document image understanding. In this paper, we propose the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is beneficial for a great number of real-world document image understanding tasks such as information extraction from scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM. To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for documentlevel pre-training. It achieves new state-of-the-art results in several downstream tasks, including form understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification (from 93.07 to 94.42). The code and pre-trained LayoutLM models are publicly available at https://aka.ms/layoutlm. CCS CONCEPTS · Informationsystems → Businessintelligence; · Computing methodologies → Informationextraction; Transferlearning; · Applied computing → Document analysis. KEYWORDS LayoutLM; pre-trained models; document image understanding ACM Reference Format: Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. 2020. LayoutLM: Pre-training of Text and Layout for Document Image Understanding. In Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '20), August 23-27, 2020, Virtual Event, CA, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/ 10.1145/3394486.3403172 $^{∗}$Equal contributions during internship at Microsoft Research Asia. ACM ISBN 978-1-4503-7998-4/20/08...$15.00\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7610895368039716585 DOCUMENT # en 1.0\n",
+ "1 metadata 17654429621247340035 DOCUMENT #/texts/1 title 1.0\n",
+ "2 metadata 7726871037747607472 DOCUMENT #/texts/7 abstract 1.0\n",
+ "3 metadata 10978540521033865848 DOCUMENT #/texts/8 abstract 1.0\n",
+ "4 metadata 5934707724399873881 DOCUMENT #/texts/9 abstract 1.0\n",
+ "5 metadata 2378094128453498823 DOCUMENT #/texts/10 abstract 1.0\n",
+ "6 metadata 7726968154032694329 DOCUMENT #/texts/11 abstract 1.0\n",
+ "7 metadata 9608013805800255202 DOCUMENT #/texts/12 abstract 1.0\n",
+ "8 metadata 561746052722555832 DOCUMENT #/texts/13 abstract 1.0\n",
+ "9 metadata 4105288624001774438 DOCUMENT #/texts/14 abstract 1.0\n",
+ "10 metadata 1007175453030828339 DOCUMENT #/texts/15 abstract 1.0\n",
+ "11 metadata 6999006171723600633 DOCUMENT #/texts/16 abstract 1.0\n",
+ "2208.11203.pdf\n",
+ "title: Graph Neural Networks and Representation Embedding for Table Extraction in PDF Documents\n",
+ "abstract: Abstract-Tables are widely used in several types of documents since they can bring important information in a structured way. In scientific papers, tables can sum up novel discoveries and summarize experimental results, making the research comparable and easily understandable by scholars. Several methods perform table analysis working on document images, losing useful information during the conversion from the PDF files since OCR tools can be prone to recognition errors, in particular for text inside tables. The main contribution of this work is to tackle the problem of table extraction, exploiting Graph Neural Networks. Node features are enriched with suitably designed representation embeddings. These representations help to better distinguish not only tables from the other parts of the paper, but also table cells from table headers. We experimentally evaluated the proposed approach on a new dataset obtained by merging the information provided in the PubLayNet and PubTables-1M datasets.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6493239332379965773 DOCUMENT # en 0.99\n",
+ "1 metadata 2882203704718404424 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3310489291270722526 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 language 8641064745316605644 TEXT #/texts/0 en 0.57\n",
+ "4 semantic 8641064745316605644 TEXT #/texts/0 reference 0.86\n",
+ "5 language 2882203704718404424 TEXT #/texts/1 en 0.75\n",
+ "6 semantic 2882203704718404424 TEXT #/texts/1 header 0.87\n",
+ "7 language 776884144777806374 TEXT #/texts/2 en 0.45\n",
+ "8 semantic 776884144777806374 TEXT #/texts/2 meta-data 1.00\n",
+ "9 language 520288979263302990 TEXT #/texts/3 en 0.32\n",
+ "10 semantic 520288979263302990 TEXT #/texts/3 meta-data 0.99\n",
+ "11 language 3310489291270722526 TEXT #/texts/4 en 0.92\n",
+ "2305.02549.pdf\n",
+ "title: FormNetV2: Multimodal Graph Contrastive Learning for Form Document Information Extraction\n",
+ "abstract: Abstract The recent advent of self-supervised pretraining techniques has led to a surge in the use of multimodal learning in form document understanding. However, existing approaches that extend the mask language modeling to other modalities require careful multitask tuning, complex reconstruction target designs, or additional pre-training data. In Form-NetV2, we introduce a centralized multimodal graph contrastive learning strategy to unify self-supervised pre-training for all modalities in one loss. The graph contrastive objective maximizes the agreement of multimodal representations, providing a natural interplay for all modalities without special customization. In addition, we extract image features within the bounding box that joins a pair of tokens connected by a graph edge, capturing more targeted visual cues without loading a sophisticated and separately pre-trained image embedder. FormNetV2 establishes new state-of-theart performance on FUNSD, CORD, SROIE and Payment benchmarks with a more compact model size.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6435374954406078525 DOCUMENT # en 0.99\n",
+ "1 metadata 5772810757726285982 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1818879276920072344 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 16352104498896647424 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 8132802399624216254 TEXT #/texts/0 en 0.31\n",
+ "5 semantic 8132802399624216254 TEXT #/texts/0 reference 0.66\n",
+ "6 language 5772810757726285982 TEXT #/texts/1 en 0.62\n",
+ "7 semantic 5772810757726285982 TEXT #/texts/1 header 0.68\n",
+ "8 language 2314371849884569015 TEXT #/texts/2 en 0.26\n",
+ "9 semantic 2314371849884569015 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 1818879276920072344 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 1818879276920072344 TEXT #/texts/3 header 0.93\n",
+ "2104.02416.pdf\n",
+ "title: Variational Transformer Networks for Layout Generation\n",
+ "abstract: Abstract Generative models able to synthesize layouts of different kinds (e.g. documents, user interfaces or furniture arrangements) are a useful tool to aid design processes and as a first step in the generation of synthetic data, among other tasks. We exploit the properties of self-attention layers to capture high level relationships between elements in a layout, and use these as the building blocks of the well-known Variational Autoencoder (VAE) formulation. Our proposed Variational Transformer Network (VTN) is capable of learning margins, alignments and other global design rules without explicit supervision. Layouts sampled from our model have a high degree of resemblance to the training data, while demonstrating appealing diversity. In an extensive evaluation on publicly available benchmarks for different layout types VTNs achieve state-of-the-art diversity and perceptual quality. Additionally, we show the capabilities of this method as part of a document layout detection pipeline.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4026452083066056880 DOCUMENT # en 1.00\n",
+ "1 metadata 8211240174221100152 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 18116077653495279146 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 12556626668984946601 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 6185815998567946240 TEXT #/texts/0 en 0.55\n",
+ "5 semantic 6185815998567946240 TEXT #/texts/0 text 0.69\n",
+ "6 language 8211240174221100152 TEXT #/texts/1 en 0.60\n",
+ "7 semantic 8211240174221100152 TEXT #/texts/1 header 0.95\n",
+ "8 language 7735061465405094816 TEXT #/texts/2 en 0.29\n",
+ "9 semantic 7735061465405094816 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 9235665801254691886 TEXT #/texts/3 fr 0.11\n",
+ "11 semantic 9235665801254691886 TEXT #/texts/3 meta-data 0.99\n",
+ "2206.01062.pdf\n",
+ "title: DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis\n",
+ "abstract: ABSTRACT\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10617426972143493900 DOCUMENT # en 1.00\n",
+ "1 metadata 6692027461717503948 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 12275254655398075866 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 language 9792106968482800026 TEXT #/texts/0 en 0.45\n",
+ "4 semantic 9792106968482800026 TEXT #/texts/0 reference 0.95\n",
+ "5 language 6692027461717503948 TEXT #/texts/1 en 0.52\n",
+ "6 semantic 6692027461717503948 TEXT #/texts/1 header 0.90\n",
+ "7 language 16365913279299628941 TEXT #/texts/2 de 0.28\n",
+ "8 semantic 16365913279299628941 TEXT #/texts/2 meta-data 0.93\n",
+ "9 language 1613519574664415958 TEXT #/texts/3 de 0.56\n",
+ "10 semantic 1613519574664415958 TEXT #/texts/3 meta-data 0.97\n",
+ "11 language 17669899987897193800 TEXT #/texts/4 en 0.37\n",
+ "2212.09621.pdf\n",
+ "title: WUKONG-READER: Multi-modal Pre-training for Fine-grained Visual Document Understanding\n",
+ "abstract: Abstract Unsupervised pre-training on millions of digital-born or scanned documents has shown promising advances in visual document understanding (VDU). While various visionlanguage pre-training objectives are studied in existing solutions, the document textline, as an intrinsic granularity in VDU, has seldom been explored so far. A document textline usually contains words that are spatially and semantically correlated, which can be easily obtained from OCR engines. In this paper, we propose WUKONG-READER, trained with new pre-training objectives to leverage the structural knowledge nested in document textlines. We introduce textline-region contrastive learning to achieve fine-grained alignment between the visual regions and texts of document textlines. Furthermore, masked region modeling and textline-grid matching are also designed to enhance the visual and layout representations of textlines. Experiments show that our WUKONG-READER has superior performance on various VDU tasks such as information extraction. The fine-grained alignment over textlines also empowers WUKONG-READER with promising localization ability.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 622493043892949961 DOCUMENT # en 0.99\n",
+ "1 metadata 7134492898741291181 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 10292335078860401705 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 30206745668943456 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 14162084248726860593 TEXT #/texts/0 en 0.49\n",
+ "5 semantic 14162084248726860593 TEXT #/texts/0 text 0.56\n",
+ "6 language 7134492898741291181 TEXT #/texts/1 en 0.68\n",
+ "7 semantic 7134492898741291181 TEXT #/texts/1 header 0.92\n",
+ "8 language 9121673767507891652 TEXT #/texts/2 en 0.30\n",
+ "9 semantic 9121673767507891652 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 10292335078860401705 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 10292335078860401705 TEXT #/texts/3 header 0.93\n",
+ "2309.09506.pdf\n",
+ "title: LAYOUTNUWA: REVEALING THE HIDDEN LAYOUT EXPERTISE OF LARGE LANGUAGE MODELS\n",
+ "abstract: ABSTRACT Graphic layout generation, a growing research field, plays a significant role in user engagement and information perception. Existing methods primarily treat layout generation as a numerical optimization task, focusing on quantitative aspects while overlooking the semantic information of layout, such as the relationship between each layout element. In this paper, we propose LayoutNUWA, the first model that treats layout generation as a code generation task to enhance semantic information and harnesses the hidden layout expertise of large language models (LLMs). More concretely, we develop a Code Instruct Tuning (CIT) approach comprising three interconnected modules: 1) the Code Initialization (CI) module quantifies the numerical conditions and initializes them as HTML code with strategically placed masks; 2) the Code Completion (CC) module employs the formatting knowledge of LLMs to fill in the masked portions within the HTML code; 3) the Code Rendering (CR) module transforms the completed code into the final layout output, ensuring a highly interpretable and transparent layout generation procedure that directly maps code to a visualized layout. We attain significant state-of-the-art performance (even over 50% improvements) on multiple datasets, showcasing the strong capabilities of LayoutNUWA. Our code is available at https://github.com/ProjectNUWA/LayoutNUWA.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1102428998215578833 DOCUMENT # en 1.00\n",
+ "1 metadata 2952583967065414989 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1214126413230308130 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 17592137747910693327 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 7724916533818732031 TEXT #/texts/0 en 0.35\n",
+ "5 semantic 7724916533818732031 TEXT #/texts/0 reference 0.95\n",
+ "6 language 2952583967065414989 TEXT #/texts/1 en 0.44\n",
+ "7 semantic 2952583967065414989 TEXT #/texts/1 header 0.86\n",
+ "8 language 4814086422136468295 TEXT #/texts/2 en 0.21\n",
+ "9 semantic 4814086422136468295 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 1214126413230308130 TEXT #/texts/3 en 0.64\n",
+ "11 semantic 1214126413230308130 TEXT #/texts/3 header 1.00\n",
+ "2305.03393.pdf\n",
+ "title: Optimized Table Tokenization for Table Structure Recognition\n",
+ "abstract: Abstract. Extracting tables from documents is a crucial task in any document conversion pipeline. Recently, transformer-based models have demonstrated that table-structure can be recognized with impressive accuracy using Image-to-Markup-Sequence (Im2Seq) approaches. Taking only the image of a table, such models predict a sequence of tokens (e.g. in HTML, LaTeX) which represent the structure of the table. Since the token representation of the table structure has a significant impact on the accuracy and run-time performance of any Im2Seq model, we investigate in this paper how table-structure representation can be optimised. We propose a new, optimised table-structure language (OTSL) with a minimized vocabulary and specific rules. The benefits of OTSL are that it reduces the number of tokens to 5 (HTML needs 28+) and shortens the sequence length to half of HTML on average. Consequently, model accuracy improves significantly, inference time is halved compared to HTML-based models, and the predicted table structures are always syntactically correct. This in turn eliminates most post-processing needs. Popular table structure data-sets will be published in OTSL format to the community. Keywords: Table Structure Recognition · Data Representation · Transformers · Optimization.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5374198401597601690 DOCUMENT # en 1.00\n",
+ "1 metadata 5888487835083259627 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1264022019336188589 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 5349896953270355381 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 15896613119077796986 TEXT #/texts/0 en 0.62\n",
+ "5 semantic 15896613119077796986 TEXT #/texts/0 reference 0.66\n",
+ "6 language 5888487835083259627 TEXT #/texts/1 en 0.74\n",
+ "7 semantic 5888487835083259627 TEXT #/texts/1 text 0.58\n",
+ "8 language 13588719527350604669 TEXT #/texts/2 en 0.31\n",
+ "9 semantic 13588719527350604669 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 51599421743934370 TEXT #/texts/3 en 0.24\n",
+ "11 semantic 51599421743934370 TEXT #/texts/3 meta-data 1.00\n",
+ "2206.00785.pdf\n",
+ "title: Delivering Document Conversion as a Cloud Service with High Throughput and Responsiveness\n",
+ "abstract: Abstract-Document understanding is a key business process in the data-driven economy since documents are central to knowledge discovery and business insights. Converting documents into a machine-processable format is a particular challenge here due to their huge variability in formats and complex structure. Accordingly, many algorithms and machine-learning methods emerged to solve particular tasks such as Optical Character Recognition (OCR), layout analysis, table-structure recovery, figure understanding, etc. We observe the adoption of such methods in document understanding solutions offered by all major cloud providers. Yet, publications outlining how such services are designed and optimized to scale in the cloud are scarce. In this paper, we focus on the case of document conversion to illustrate the particular challenges of scaling a complex data processing pipeline with a strong reliance on machine-learning methods on cloud infrastructure. Our key objective is to achieve high scalability and responsiveness for different workload profiles in a well-defined resource budget. We outline the requirements, design, and implementation choices of our document conversion service and reflect on the challenges we faced. Evidence for the scaling behavior and resource efficiency is provided for two alternative workload distribution strategies and deployment configurations. Our best-performing method achieves sustained throughput of over one million PDF pages per hour on 3072 CPU cores across 192 nodes. Index Terms-cloud applications, document understanding, distributed computing, artificial intelligence\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13152862009447693765 DOCUMENT # en 1.00\n",
+ "1 metadata 8967552455475999131 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15035726207261556942 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 4662798960261328447 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 17724327985223046044 TEXT #/texts/0 en 0.37\n",
+ "5 semantic 17724327985223046044 TEXT #/texts/0 reference 0.86\n",
+ "6 language 8967552455475999131 TEXT #/texts/1 en 0.91\n",
+ "7 semantic 8967552455475999131 TEXT #/texts/1 header 0.73\n",
+ "8 language 10556124696351850413 TEXT #/texts/2 en 0.36\n",
+ "9 semantic 10556124696351850413 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 18140503323381183580 TEXT #/texts/3 en 0.31\n",
+ "11 semantic 18140503323381183580 TEXT #/texts/3 meta-data 0.99\n",
+ "2108.01249.pdf\n",
+ "title: Kota Yamaguchi CyberAgent\n",
+ "abstract: Abstract unified manner, 2) how to formulate the learning problem, and 3) how to evaluate the quality of documents. Vector graphic documents present visual elements in a resolution free, compact format and are often seen in creative applications. In this work, we attempt to learn a generative model of vector graphic documents. We define vector graphic documents by a multi-modal set of attributes associated to a canvas and a sequence of visual elements such as shapes, images, or texts, and train variational autoencoders to learn the representation of the documents. We collect a new dataset of design templates from an online service that features complete document structure including occluded elements. In experiments, we show that our model, named CanvasVAE, constitutes a strong baseline for generative modeling of vector graphic documents.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11544672061719147914 DOCUMENT # en 0.99\n",
+ "1 metadata 1865621952677141032 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9499241547212596575 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 17505636629490201581 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 5018213406211262976 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 language 14555891895603680624 TEXT #/texts/0 en 0.64\n",
+ "6 semantic 14555891895603680624 TEXT #/texts/0 header 0.67\n",
+ "7 language 1865621952677141032 TEXT #/texts/1 fr 0.35\n",
+ "8 semantic 1865621952677141032 TEXT #/texts/1 reference 0.58\n",
+ "9 language 1076108524886774303 TEXT #/texts/2 en 0.11\n",
+ "10 semantic 1076108524886774303 TEXT #/texts/2 meta-data 0.95\n",
+ "11 language 9499241547212596575 TEXT #/texts/3 en 0.32\n",
+ "2106.15117.pdf\n",
+ "title: SDL: NEW DATA GENERATION TOOLS FOR FULL-LEVEL ANNOTATED DOCUMENT LAYOUT\n",
+ "abstract: ABSTRACT We present a novel data generation tool for document processing. The tool focuses on providing maximal level of visual information in a normal type document, ranging from character position to paragraph-level position. It also enables working with a large dataset on low-resource languages as well as providing a mean of processing thorough full-level information of documented text. The data generation tools come with a dataset of 320000 Vietnamese synthetic document images and an instruction to generate a dataset of similar size on other languages. The repository can be found at: https://github.com/tson1997/SDL-Document-Image-Generation\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 12852606380513963941 DOCUMENT # en 1.00\n",
+ "1 metadata 16022826563161339384 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3623797934402135081 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 11818793719691083935 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 13076215469834646960 TEXT #/texts/0 en 0.31\n",
+ "5 semantic 13076215469834646960 TEXT #/texts/0 reference 0.95\n",
+ "6 language 16022826563161339384 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 16022826563161339384 TEXT #/texts/1 header 0.94\n",
+ "8 language 8551711514533328919 TEXT #/texts/2 en 0.27\n",
+ "9 semantic 8551711514533328919 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 16028837443172790694 TEXT #/texts/3 en 0.64\n",
+ "11 semantic 16028837443172790694 TEXT #/texts/3 meta-data 0.97\n",
+ "2303.13839.pdf\n",
+ "title: HRDoc: Dataset and Baseline Method toward Hierarchical Reconstruction of Document Structures\n",
+ "abstract: Abstract The problem of document structure reconstruction refers to converting digital or scanned documents into corresponding semantic structures. Most existing works mainly focus on splitting the boundary of each element in a single document page, neglecting the reconstruction of semantic structure in multi-page documents. This paper introduces hierarchical reconstruction of document structures as a novel task suitable for NLP and CV fields. To better evaluate the system performance on the new task, we built a large-scale dataset named HRDoc, which consists of 2,500 multi-page documents with nearly 2 million semantic units. Every document in HRDoc has line-level annotations including categories and relations obtained from rule-based extractors and human annotators. Moreover, we proposed an encoder-decoder-based hierarchical document structure parsing system (DSPS) to tackle this problem. By adopting a multi-modal bidirectional encoder and a structure-aware GRU decoder with soft-mask operation, the DSPS model surpass the baseline method by a large margin. All scripts and datasets will be made publicly available at https://github.com/jfma-USTC/HRDoc.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17037468589192894036 DOCUMENT # en 0.98\n",
+ "1 metadata 1104942195736734785 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 10718753664355590567 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 7728974788577724069 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 14970080583824688175 TEXT #/texts/0 en 0.19\n",
+ "5 semantic 14970080583824688175 TEXT #/texts/0 text 0.99\n",
+ "6 language 1104942195736734785 TEXT #/texts/1 en 0.57\n",
+ "7 semantic 1104942195736734785 TEXT #/texts/1 header 0.81\n",
+ "8 language 4710658602481414826 TEXT #/texts/2 en 0.33\n",
+ "9 semantic 4710658602481414826 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 6306836046963688445 TEXT #/texts/3 en 0.71\n",
+ "11 semantic 6306836046963688445 TEXT #/texts/3 meta-data 0.93\n",
+ "2004.08686.pdf\n",
+ "title: A Large Dataset of Historical Japanese Documents with Complex Layouts\n",
+ "abstract: Abstract Deep learning-based approaches for automatic document layout analysis and content extraction have the potential to unlock rich information trapped in historical documents on a large scale. One major hurdle is the lack of large datasets for training robust models. In particular, little training data exist for Asian languages. To this end, we present HJDataset, a Large Dataset of H istorical J apanese Documents with Complex Layouts. It contains over 250,000 layout element annotations of seven types. In addition to bounding boxes and masks of the content regions, it also includes the hierarchical structures and reading orders for layout elements. The dataset is constructed using a combination of human and machine efforts. A semi-rule based method is developed to extract the layout elements, and the results are checked by human inspectors. The resulting large-scale dataset is used to provide baseline performance analyses for text region detection using state-of-the-art deep learning models. And we demonstrate the usefulness of the dataset on real-world document digitization tasks. The dataset is available at https://dell-research-harvard. github.io/HJDataset/.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13930001176231161878 DOCUMENT # en 1.00\n",
+ "1 metadata 7793786044407703107 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 8981345721148683510 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 849097712286451589 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 6803870968619498453 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 language 13458486629839261648 TEXT #/texts/0 en 0.39\n",
+ "6 semantic 13458486629839261648 TEXT #/texts/0 reference 0.91\n",
+ "7 language 7793786044407703107 TEXT #/texts/1 en 0.84\n",
+ "8 semantic 7793786044407703107 TEXT #/texts/1 header 0.84\n",
+ "9 language 16464535545893033410 TEXT #/texts/2 ca 0.24\n",
+ "10 semantic 16464535545893033410 TEXT #/texts/2 meta-data 1.00\n",
+ "11 language 12942928698614636971 TEXT #/texts/3 en 0.44\n",
+ "2305.10448.pdf\n",
+ "title: Sequence-to-Sequence Pre-training with Unified Modality Masking for Visual Document Understanding\n",
+ "abstract: Abstract This paper presents GenDoc, a general sequence-to-sequence document understanding model pre-trained with unified masking across three modalities: text, image, and layout. The proposed model utilizes an encoderdecoder architecture, which allows for increased adaptability to a wide range of downstream tasks with diverse output formats, in contrast to the encoder-only models commonly employed in document understanding. In addition to the traditional text infilling task used in previous encoder-decoder models, our pre-training extends to include tasks of masked image token prediction and masked layout prediction. We also design modalityspecific instruction and adopt both disentangled attention and the mixture-of-modalityexperts strategy to effectively capture the information leveraged by each modality. Evaluation of the proposed model through extensive experiments on several downstream tasks in document understanding demonstrates its ability to achieve superior or competitive performance compared to state-of-the-art approaches. Our analysis further suggests that GenDoc is more robust than the encoder-only models in scenarios where the OCR quality is imperfect.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6647708032236096662 DOCUMENT # en 1.00\n",
+ "1 metadata 7053189837528699617 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 17614263210074065046 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 14376828063070828111 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 1081297291117528818 TEXT #/texts/0 en 0.68\n",
+ "5 semantic 1081297291117528818 TEXT #/texts/0 text 0.83\n",
+ "6 language 7053189837528699617 TEXT #/texts/1 en 0.79\n",
+ "7 semantic 7053189837528699617 TEXT #/texts/1 header 0.91\n",
+ "8 language 15653723734524768695 TEXT #/texts/2 en 0.42\n",
+ "9 semantic 15653723734524768695 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 3724175546549476443 TEXT #/texts/3 de 0.47\n",
+ "11 semantic 3724175546549476443 TEXT #/texts/3 meta-data 1.00\n",
+ "2108.09433.pdf\n",
+ "title: )\n",
+ "abstract: Abstract. Precise boundary annotations of image regions can be crucial for downstream applications which rely on region-class semantics. Some document collections contain densely laid out, highly irregular and overlapping multi-class region instances with large range in aspect ratio. Fully automatic boundary estimation approaches tend to be data intensive, cannot handle variable-sized images and produce sub-optimal results for aforementioned images. To address these issues, we propose BoundaryNet, a novel resizing-free approach for high-precision semi-automatic layout annotation. The variable-sized user selected region of interest is first processed by an attention-guided skip network. The network optimization is guided via Fast Marching distance maps to obtain a good quality initial boundary estimate and an associated feature representation. These outputs are processed by a Residual Graph Convolution Network optimized using Hausdorff loss to obtain the final region boundary. Results on a challenging image manuscript dataset demonstrate that BoundaryNet outperforms strong baselines and produces high-quality semantic region boundaries. Qualitatively, our approach generalizes across multiple document image datasets containing different script systems and layouts, all without additional fine-tuning. We integrate BoundaryNet into a document annotation system and show that it provides high annotation throughput compared to manual and fully automatic alternatives. Keywords: document layout analysis · interactive · deep learning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7286648885983317681 DOCUMENT # en 0.97\n",
+ "1 metadata 6891813752407437231 DOCUMENT #/texts/4 title 1.00\n",
+ "2 metadata 1285909825609735560 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 1594751657300697312 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 17989209837640758744 TEXT #/texts/0 en 0.45\n",
+ "5 semantic 17989209837640758744 TEXT #/texts/0 reference 0.86\n",
+ "6 language 18061687588185802103 TEXT #/texts/1 en 0.60\n",
+ "7 semantic 18061687588185802103 TEXT #/texts/1 header 0.97\n",
+ "8 language 16389516399927816228 TEXT #/texts/2 en 0.53\n",
+ "9 semantic 16389516399927816228 TEXT #/texts/2 reference 0.98\n",
+ "10 language 6891813752407437230 TEXT #/texts/3 en 0.99\n",
+ "11 semantic 6891813752407437230 TEXT #/texts/3 reference 1.00\n",
+ "2203.15143.pdf\n",
+ "title: Towards End-to-End Unified Scene Text Detection and Layout Analysis\n",
+ "abstract: Abstract Scene text detection and document layout analysis have long been treated as two separate tasks in different image domains. In this paper, we bring them together and introduce the task of unified scene text detection and layout analysis. The first hierarchical scene text dataset is introduced to enable this novel research task. We also propose a novel method that is able to simultaneously detect scene text and form text clusters in a unified way. Comprehensive experiments show that our unified model achieves better performance than multiple well-designed baseline methods. Additionally, this model achieves stateof-the-art results on multiple scene text detection datasets without the need of complex post-processing. Dataset and code: https://github.com/google-research- datasets/hiertext.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16210526061993160862 DOCUMENT # en 1.00\n",
+ "1 metadata 12075476996982694064 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14820214027740521356 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 5609833535106633893 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 18311416795872171667 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 language 2157359505453830444 TEXT #/texts/0 en 0.30\n",
+ "6 semantic 2157359505453830444 TEXT #/texts/0 text 0.89\n",
+ "7 language 12075476996982694064 TEXT #/texts/1 en 0.72\n",
+ "8 semantic 12075476996982694064 TEXT #/texts/1 header 0.92\n",
+ "9 language 6712749907627824000 TEXT #/texts/2 en 0.29\n",
+ "10 semantic 6712749907627824000 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 6560726583765693241 TEXT #/texts/3 en 0.09\n",
+ "2212.09877.pdf\n",
+ "title: LayoutDETR: Detection Transformer Is a Good Multimodal Layout Designer\n",
+ "abstract: Abstract Graphic layout designs play an essential role in visual communication. Yet handcrafting layout designs are skilldemanding, time-consuming, and non-scalable to batch production. Although generative models emerge to make design automation no longer utopian, it remains non-trivial to customize designs that comply with designers' multimodal desires, i.e., constrained by background images and driven by foreground contents. In this study, we propose LayoutDETR that inherits the high quality and realism from generative modeling, in the meanwhile reformulating content-aware requirements as a detection problem: we learn to detect in a background image the reasonable locations, scales, and spatial relations for multimodal elements in a layout. Experiments validate that our solution yields new state-of-the-art performance for layout generation on public benchmarks and on our newly-curated ads banner dataset. For practical usage, we build our solution into a graphical system that facilitates user studies. We demonstrate that our designs attract more subjective preference than baselines by significant margins. Our code, models, dataset, graphical system, and demos are available at GitHub.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10084390588055568504 DOCUMENT # en 1.00\n",
+ "1 metadata 11994464921092174700 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7850596257863206924 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 17827466626641903476 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 10295852222025669617 TEXT #/texts/0 en 0.53\n",
+ "5 semantic 10295852222025669617 TEXT #/texts/0 reference 0.89\n",
+ "6 language 11994464921092174700 TEXT #/texts/1 en 0.54\n",
+ "7 semantic 11994464921092174700 TEXT #/texts/1 header 0.80\n",
+ "8 language 16518229938452596987 TEXT #/texts/2 en 0.51\n",
+ "9 semantic 16518229938452596987 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 8884254896872574636 TEXT #/texts/3 en 0.21\n",
+ "11 semantic 8884254896872574636 TEXT #/texts/3 meta-data 1.00\n",
+ "2303.10787.pdf\n",
+ "title: Diffusion-based Document Layout Generation\n",
+ "abstract: Abstract. We develop a diffusion-based approach for various document layout sequence generation. Layout sequences specify the contents of a document design in an explicit format. Our novel diffusion-based approach works in the sequence domain rather than the image domain in order to permit more complex and realistic layouts. We also introduce a new metric, Document Earth Mover's Distance (Doc-EMD). By considering similarity between heterogeneous categories document designs, we handle the shortcomings of prior document metrics that only evaluate the same category of layouts. Our empirical analysis shows that our diffusion-based approach is comparable to or outperforming other previous methods for layout generation across various document datasets. Moreover, our metric is capable of differentiating documents better than previous metrics for specific cases. Keywords: Structured document generation · Document layout · Diffusion methods · Generative models.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9758273175405515723 DOCUMENT # en 0.98\n",
+ "1 metadata 16052403849674172506 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2448372797569194825 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 9845471528146357593 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 7017913058660137428 TEXT #/texts/0 en 0.24\n",
+ "5 semantic 7017913058660137428 TEXT #/texts/0 text 0.89\n",
+ "6 language 16052403849674172506 TEXT #/texts/1 en 0.37\n",
+ "7 semantic 16052403849674172506 TEXT #/texts/1 header 0.96\n",
+ "8 language 6476430098925505246 TEXT #/texts/2 en 0.49\n",
+ "9 semantic 6476430098925505246 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 12218180187596295929 TEXT #/texts/3 en 0.56\n",
+ "11 semantic 12218180187596295929 TEXT #/texts/3 meta-data 0.97\n",
+ "2201.11438.pdf\n",
+ "title: DocSegTr: An Instance-Level End-to-End Document Image Segmentation Transformer\n",
+ "abstract: Abstract. Understanding documents with rich layouts is an essential step towards information extraction. Business intelligence processes often require the extraction of useful semantic content from documents at a large scale for subsequent decision-making tasks. In this context, instance-level segmentation of different document objects(title, sections, figures, tables and so on) has emerged as an interesting problem for the document layout analysis community. To advance the research in this direction, we present a transformer-based model for end-to-end segmentation of complex layouts in document images. To our knowledge, this is the first work on transformer-based document segmentation. Extensive experimentation on the PubLayNet dataset shows that our model achieved comparable or better segmentation performance than the existing state-of-the-art approaches. We hope our simple and flexible framework could serve as a promising baseline for instance-level recognition tasks in document images. Keywords: Document Layout Analysis · Instance-Level Segmentation · Transformers · Information extraction\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 208221823398909622 DOCUMENT # en 0.97\n",
+ "1 metadata 14220025198384659792 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 820982994215739284 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 15767495618934522204 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 11806661338889905799 TEXT #/texts/0 en 0.17\n",
+ "5 semantic 11806661338889905799 TEXT #/texts/0 text 0.88\n",
+ "6 language 14220025198384659792 TEXT #/texts/1 en 0.51\n",
+ "7 semantic 14220025198384659792 TEXT #/texts/1 header 0.89\n",
+ "8 language 8018994052576249341 TEXT #/texts/2 en 0.36\n",
+ "9 semantic 8018994052576249341 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 3272490091820520194 TEXT #/texts/3 en 0.68\n",
+ "11 semantic 3272490091820520194 TEXT #/texts/3 meta-data 0.99\n",
+ "2211.08863.pdf\n",
+ "title: ChartParser: Automatic Chart Parsing for Print-Impaired\n",
+ "abstract: Abstract Infographics are often an integral component of scientific documents for reporting qualitative or quantitative findings as they make it much simpler to comprehend the underlying complex information. However, their interpretation continues to be a challenge for the blind, low-vision, and other print-impaired (BLV) individuals. In this paper, we propose ChartParser, a fully automated pipeline that leverages deep learning, OCR, and image processing techniques to extract all figures from a research paper, classify them into various chart categories (bar chart, line chart, etc.) and obtain relevant information from them, specifically bar charts (including horizontal, vertical, stacked horizontal and stacked vertical charts) which already have several exciting challenges. Finally, we present the retrieved content in a tabular format that is screen-reader friendly and accessible to the BLV users. We present a thorough evaluation of our approach by applying our pipeline to sample real-world annotated bar charts from research papers.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9575898009567312608 DOCUMENT # en 1.00\n",
+ "1 metadata 13385598999438028549 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2004079478680832734 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 7547314663753897690 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 15463886055130069472 TEXT #/texts/0 en 0.37\n",
+ "5 semantic 15463886055130069472 TEXT #/texts/0 reference 0.91\n",
+ "6 language 13385598999438028549 TEXT #/texts/1 en 0.58\n",
+ "7 semantic 13385598999438028549 TEXT #/texts/1 reference 0.51\n",
+ "8 language 17622143137064393226 TEXT #/texts/2 en 0.20\n",
+ "9 semantic 17622143137064393226 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 1447479644763299756 TEXT #/texts/3 en 0.47\n",
+ "11 semantic 1447479644763299756 TEXT #/texts/3 meta-data 0.58\n",
+ "2111.05736.pdf\n",
+ "title: Multimodal Approach for Metadata Extraction from German Scientific Publications\n",
+ "abstract: Abstract Nowadays, metadata information is often given by the authors themselves upon submission. However, a significant part of already existing research papers have missing or incomplete metadata information. German scientific papers come in a large variety of layouts which makes the extraction of metadata a non-trivial task that requires a precise way to classify the metadata extracted from the documents. In this paper, we propose a multimodal deep learning approach for metadata extraction from scientific papers in the German language. We consider multiple types of input data by combining natural language processing and image vision processing. This model aims to increase the overall accuracy of metadata extraction compared to other state-of-the-art approaches. It enables the utilization of both spatial and contextual features in order to achieve a more reliable extraction. Our model for this approach was trained on a dataset consisting of around 8800 documents and is able to obtain an overall F1-score of 0.923. CCS Concepts: · Computing methodologies → Information extraction; Computer vision representations; Natural language processing; Supervised learning by classification; · Applied computing → Document metadata. Keywords: natural language processing, computer vision, metadata extraction, deep learning, biLSTM, classification, multimodality\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6214813008397517794 DOCUMENT # en 1.00\n",
+ "1 metadata 9336427579926614669 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 8542053257928421913 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 10087057927913462578 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 metadata 13952774756382584407 DOCUMENT #/texts/8 abstract 1.00\n",
+ "5 metadata 3716015961100721764 DOCUMENT #/texts/9 abstract 1.00\n",
+ "6 language 9086007843929612395 TEXT #/texts/0 en 0.54\n",
+ "7 semantic 9086007843929612395 TEXT #/texts/0 reference 0.51\n",
+ "8 language 9336427579926614669 TEXT #/texts/1 en 0.81\n",
+ "9 semantic 9336427579926614669 TEXT #/texts/1 header 0.52\n",
+ "10 language 6158805096068987949 TEXT #/texts/2 fr 0.66\n",
+ "11 semantic 6158805096068987949 TEXT #/texts/2 meta-data 1.00\n",
+ "2206.10253.pdf\n",
+ "title: Document Navigability: A Need for Print-Impaired\n",
+ "abstract: Abstract Printed documents continue to be a challenge for blind, low-vision, and other print-disabled (BLV) individuals. In this paper, we focus on the specific problem of (in-)accessibility of internal references to citations, footnotes, figures, tables and equations. While sighted users can flip to the referenced content and flip back in seconds, linear audio narration that BLV individuals rely on makes following these references extremely hard. We propose a vision based technique to locate the referenced content and extract metadata needed to (in subsequent work) inline a content summary into the audio narration. We apply our technique to citations in scientific documents and find it works well both on born-digital as well as scanned documents.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8367168009336343541 DOCUMENT # en 1.00\n",
+ "1 metadata 10890231688151233893 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16071323955613293918 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 16575161736546407260 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 12424868080524350218 TEXT #/texts/0 en 0.41\n",
+ "5 semantic 12424868080524350218 TEXT #/texts/0 reference 0.95\n",
+ "6 language 10890231688151233893 TEXT #/texts/1 en 0.53\n",
+ "7 semantic 10890231688151233893 TEXT #/texts/1 reference 0.49\n",
+ "8 language 15018036349796390670 TEXT #/texts/2 en 0.55\n",
+ "9 semantic 15018036349796390670 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 2095024526711489732 TEXT #/texts/3 sr 0.07\n",
+ "11 semantic 2095024526711489732 TEXT #/texts/3 meta-data 1.00\n",
+ "2306.01058.pdf\n",
+ "title: Are Layout-Infused Language Models Robust to Layout Distribution Shifts? A Case Study with Scientific Documents\n",
+ "abstract: Abstract Recent work has shown that infusing layout features into language models (LMs) improves processing of visually-rich documents such as scientific papers. Layout-infused LMs are often evaluated on documents with familiar layout features (e.g., papers from the same publisher), but in practice models encounter documents with unfamiliar distributions of layout features, such as new combinations of text sizes and styles, or new spatial configurations of textual elements. In this work, we test whether layoutinfused LMs are robust to layout distribution shifts. As a case study, we use the task of scientific document structure recovery, segmenting a scientific paper into its structural categories (e.g., TITLE, CAPTION, REFERENCE). To emulate distribution shifts that occur in practice, we re-partition the GROTOAP2 dataset. We find that under layout distribution shifts model performance degrades by up to 20 F1. Simple training strategies, such as increasing training diversity, can reduce this degradation by over 35% relative F1; however, models fail to reach in-distribution performance in any tested out-of-distribution conditions. This work highlights the need to consider layout distribution shifts during model evaluation, and presents a methodology for conducting such evaluations. 1\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 12891525512286936503 DOCUMENT # en 1.00\n",
+ "1 metadata 17168748629924448588 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11408116613210739775 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 720193478378176240 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 4461508289038516592 TEXT #/texts/0 en 0.37\n",
+ "5 semantic 4461508289038516592 TEXT #/texts/0 reference 0.66\n",
+ "6 language 17168748629924448588 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 17168748629924448588 TEXT #/texts/1 header 0.94\n",
+ "8 language 14393954923057164949 TEXT #/texts/2 en 0.43\n",
+ "9 semantic 14393954923057164949 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 13150667996460248282 TEXT #/texts/3 en 0.56\n",
+ "11 semantic 13150667996460248282 TEXT #/texts/3 meta-data 0.98\n",
+ "2112.05112.pdf\n",
+ "title: BLT: Bidirectional Layout Transformer for Controllable Layout Generation\n",
+ "abstract: Abstract Creating visual layouts is an important step in graphic design. Automatic generation of such layouts is important as we seek scale-able and diverse visual designs. Prior works on automatic layout generation focus on unconditional generation, in which the models generate layouts while neglecting user needs for specific problems. To advance conditional layout generation, we introduce BLT, a bidirectional layout transformer. BLT differs from autoregressive decoding as it first generates a draft layout that satisfies the user inputs and then refines the layout iteratively. We verify the proposed model on multiple benchmarks with various fidelity metrics. Our results demonstrate two key advances to the state-of-the-art layout transformer models. First, our model empowers layout transformers to fulfill controllable layout generation. Second, our model slashes the linear inference time in autoregressive decoding into a constant complexity, thereby achieving 4 x-10 x speedups in generating a layout at inference time.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 3487215798834057801 DOCUMENT # en 1.00\n",
+ "1 metadata 9988876189333205714 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14707720847299285769 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 10029200523167255709 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 11091681416119290226 TEXT #/texts/0 en 0.43\n",
+ "5 semantic 11091681416119290226 TEXT #/texts/0 reference 0.89\n",
+ "6 language 9988876189333205714 TEXT #/texts/1 en 0.72\n",
+ "7 semantic 9988876189333205714 TEXT #/texts/1 header 0.90\n",
+ "8 language 10607460801012111610 TEXT #/texts/2 en 0.21\n",
+ "9 semantic 10607460801012111610 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 14765350902884342761 TEXT #/texts/3 ru 0.12\n",
+ "11 semantic 14765350902884342761 TEXT #/texts/3 meta-data 1.00\n",
+ "2303.00289.pdf\n",
+ "title: STRUCTEXTV2: MASKED VISUAL-TEXTUAL PREDIC-TION FOR DOCUMENT IMAGE PRE-TRAINING\n",
+ "abstract: ABSTRACT In this paper, we present StrucTexTv2, an effective document image pre-training framework, by performing masked visual-textual prediction. It consists of two self-supervised pre-training tasks: masked image modeling and masked language modeling, based on text region-level image masking. The proposed method randomly masks some image regions according to the bounding box coordinates of text words. The objectives of our pre-training tasks are reconstructing the pixels of masked image regions and the corresponding masked tokens simultaneously. Hence the pre-trained encoder can capture more textual semantics in comparison to the masked image modeling that usually predicts the masked image patches. Compared to the masked multi-modal modeling methods for document image understanding that rely on both the image and text modalities, StrucTexTv2 models image-only input and potentially deals with more application scenarios free from OCR pre-processing. Extensive experiments on mainstream benchmarks of document image understanding demonstrate the effectiveness of StrucTexTv2. It achieves competitive or even new state-of-the-art performance in various downstream tasks such as image classification, layout analysis, table structure recognition, document OCR, and information extraction under the end-to-end scenario.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5501277416369373170 DOCUMENT # en 1.00\n",
+ "1 metadata 2625417652974853981 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14017668860031900670 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 2394508152047310417 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 17276933865135358730 TEXT #/texts/0 en 0.17\n",
+ "5 semantic 17276933865135358730 TEXT #/texts/0 text 0.89\n",
+ "6 language 2625417652974853981 TEXT #/texts/1 en 0.32\n",
+ "7 semantic 2625417652974853981 TEXT #/texts/1 header 0.91\n",
+ "8 language 16763168585780875790 TEXT #/texts/2 en 0.30\n",
+ "9 semantic 16763168585780875790 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 10481712215930735018 TEXT #/texts/3 en 0.69\n",
+ "11 semantic 10481712215930735018 TEXT #/texts/3 meta-data 0.91\n",
+ "2106.03331.pdf\n",
+ "title: SelfDoc: Self-Supervised Document Representation Learning\n",
+ "abstract: Abstract We propose SelfDoc, a task-agnostic pre-training framework for document image understanding. Because documents are multimodal and are intended for sequential reading, our framework exploits the positional, textual, and visual information of every semantically meaningful component in a document, and it models the contextualization between each block of content. Unlike existing document pre-training models, our model is coarse-grained instead of treating individual words as input, therefore avoiding an overly fine-grained with excessive contextualization. Beyond that, we introduce cross-modal learning in the model pre-training phase to fully leverage multimodal information from unlabeled documents. For downstream usage, we propose a novel modality-adaptive attention mechanism for multimodal feature fusion by adaptively emphasizing language and vision signals. Our framework benefits from self-supervised pre-training on documents without requiring annotations by a feature masking training strategy. It achieves superior performance on multiple downstream tasks with significantly fewer document images used in the pre-training stage compared to previous works.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5907688169914557708 DOCUMENT # en 1.00\n",
+ "1 metadata 15934776123702835385 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11722497451823681661 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 10620501522660942751 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 17609727343114496377 TEXT #/texts/0 en 0.40\n",
+ "5 semantic 17609727343114496377 TEXT #/texts/0 reference 0.95\n",
+ "6 language 15934776123702835385 TEXT #/texts/1 en 0.80\n",
+ "7 semantic 15934776123702835385 TEXT #/texts/1 header 0.86\n",
+ "8 language 16172421238534189085 TEXT #/texts/2 en 0.30\n",
+ "9 semantic 16172421238534189085 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 6934219798032987944 TEXT #/texts/3 en 0.13\n",
+ "11 semantic 6934219798032987944 TEXT #/texts/3 meta-data 1.00\n",
+ "2210.17246.pdf\n",
+ "title: Tables to LaTeX: Structure and Content Extraction from Scientific Tables\n",
+ "abstract: Abstract Scientific documents contain tables that list important information in a concise fashion. Structure and content extraction from tables embedded within PDF research documents is a very challenging task due to the existence of visual features like spanning cells and content features like mathematical symbols and equations. Most existing table structure identification methods tend to ignore these academic writing features. In this paper, we adapt the transformer-based language modeling paradigm for scientific table structure and content extraction. Specifically, the proposed model converts a tabular image to its corresponding L A T E X source code. Overall, we outperform the current state-of-the-art baselines and achieve an exact match accuracy of 70.35% and 49.69% on table structure and content extraction, respectively. Further analysis demonstrates that the proposed models efficiently identify the number of rows and columns, the alphanumeric characters, the L A T E X tokens, and symbols. Keywords Scientific documents · Transformer · L A T E X · Tabular information · Information Extraction\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6910941384560644633 DOCUMENT # en 0.99\n",
+ "1 metadata 16977338439878864218 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6955543981316047167 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 7723503127079648564 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 4431987692703606554 TEXT #/texts/0 en 0.34\n",
+ "5 semantic 4431987692703606554 TEXT #/texts/0 text 0.69\n",
+ "6 language 16977338439878864218 TEXT #/texts/1 en 0.73\n",
+ "7 semantic 16977338439878864218 TEXT #/texts/1 header 0.89\n",
+ "8 language 3857753522884437254 TEXT #/texts/2 en 0.35\n",
+ "9 semantic 3857753522884437254 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 6955543981316047167 TEXT #/texts/3 en 0.85\n",
+ "11 semantic 6955543981316047167 TEXT #/texts/3 text 0.99\n",
+ "2211.15504.pdf\n",
+ "title: Semantic Table Detection with LayoutLMv3\n",
+ "abstract: Abstract This paper presents an application of the LayoutLMv3 model for semantic table detection on financial documents from the IIIT-AR-13K dataset. The motivation behind this paper's experiment was that LayoutLMv3's official paper had no results for table detection using semantic information. We concluded that our approach did not improve the model's table detection capabilities, for which we can give several possible reasons. Either the model's weights were unsuitable for our purpose, or we needed to invest more time in optimising the model's hyperparameters. It is also possible that semantic information does not improve a model's table detection accuracy.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 3220804023042761428 DOCUMENT # en 0.97\n",
+ "1 metadata 8520412851891641044 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13057559607760445383 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 15600106556577833633 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 14852017143538016115 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 14852017143538016115 TEXT #/texts/0 reference 0.91\n",
+ "6 language 8520412851891641044 TEXT #/texts/1 en 0.78\n",
+ "7 semantic 8520412851891641044 TEXT #/texts/1 text 0.77\n",
+ "8 language 997834807154655310 TEXT #/texts/2 fi 0.26\n",
+ "9 semantic 997834807154655310 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 2747418767922277996 TEXT #/texts/3 fr 0.25\n",
+ "11 semantic 2747418767922277996 TEXT #/texts/3 meta-data 0.98\n",
+ "2304.01577.pdf\n",
+ "title: Form-NLU: Dataset for the Form Language Understanding\n",
+ "abstract: ABSTRACT Compared to general document analysis tasks, form document structure understanding and retrieval are challenging. Form documents are typically made by two types of authors; A form designer, who develops the form structure and keys, and a form user, who fills out form values based on the provided keys. Hence, the form values may not be aligned with the form designer's intention (structure and keys) if a form user gets confused. In this paper, we introduce Form-NLU, the first novel dataset for form structure understanding and its key and value information extraction, interpreting the form designer's intent and the alignment of user-written value on it. It consists of 857 form images, 6k form keys and values, and 4k table keys and values. Our dataset also includes three form types: digital, printed, and handwritten, which cover diverse form appearances and layouts. We propose a robust positional and logical relationbased form key-value information extraction framework. Using this dataset, Form-NLU, we first examine strong object detection models for the form layout understanding, then evaluate the key information extraction task on the dataset, providing fine-grained results for different types of forms and keys. Furthermore, we examine it with the off-the-shelf pdf layout extraction tool and prove its feasibility in real-world cases. CCS CONCEPTS · Information systems → Information retrieval. KEYWORDS Datasets, Form understanding, Natural language understanding ACM Reference Format: Yihao Ding, Siqu Long, Jiabin Huang, Kaixuan Ren, Xingxiang Luo, Hyunsuk Chung, and Soyeon Caren Han. 2023. Form-NLU: Dataset for the Form Language Understanding. In Proceedings of The 46th International ACM ACM ISBN 978-1-4503-XXXX-X/18/06...$15.00 Soyeon Caren Han The University of Sydney Sydney, NSW, Australia SIGIR Conference on Research and Development in Information Retrieval (SIGIR '23). ACM, New York, NY, USA, 10 pages. https://doi.org/XXX\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7248722372843658536 DOCUMENT # en 1.0\n",
+ "1 metadata 11505020220111782961 DOCUMENT #/texts/1 title 1.0\n",
+ "2 metadata 2637372515777053633 DOCUMENT #/texts/8 abstract 1.0\n",
+ "3 metadata 11448376779154117332 DOCUMENT #/texts/9 abstract 1.0\n",
+ "4 metadata 2106780096098535656 DOCUMENT #/texts/10 abstract 1.0\n",
+ "5 metadata 15919134484492398433 DOCUMENT #/texts/11 abstract 1.0\n",
+ "6 metadata 2638737412827573576 DOCUMENT #/texts/12 abstract 1.0\n",
+ "7 metadata 7791884911297709895 DOCUMENT #/texts/13 abstract 1.0\n",
+ "8 metadata 13884229375021504457 DOCUMENT #/texts/14 abstract 1.0\n",
+ "9 metadata 4071022534915414281 DOCUMENT #/texts/15 abstract 1.0\n",
+ "10 metadata 15904124921512099718 DOCUMENT #/texts/16 abstract 1.0\n",
+ "11 metadata 5875658572963855080 DOCUMENT #/texts/17 abstract 1.0\n",
+ "2107.02638.pdf\n",
+ "title: DocSynth: A Layout Guided Approach for Controllable Document Image Synthesis\n",
+ "abstract: Abstract. Despite significant progress on current state-of-the-art image generation models, synthesis of document images containing multiple and complex object layouts is a challenging task. This paper presents a novel approach, called DocSynth, to automatically synthesize document images based on a given layout. In this work, given a spatial layout (bounding boxes with object categories) as a reference by the user, our proposed DocSynth model learns to generate a set of realistic document images consistent with the defined layout. Also, this framework has been adapted to this work as a superior baseline model for creating synthetic document image datasets for augmenting real data during training for document layout analysis tasks. Different sets of learning objectives have been also used to improve the model performance. Quantitatively, we also compare the generated results of our model with real data using standard evaluation metrics. The results highlight that our model can successfully generate realistic and diverse document images with multiple objects. We also present a comprehensive qualitative analysis summary of the different scopes of synthetic image generation tasks. Lastly, to our knowledge this is the first work of its kind. Keywords: Document Synthesis · Generative Adversarial Networks · Layout Generation.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9684124591752018285 DOCUMENT # en 0.99\n",
+ "1 metadata 16022043740431148641 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13234250752867375615 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 11540617821912263900 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 1514611656878267735 TEXT #/texts/0 en 0.39\n",
+ "5 semantic 1514611656878267735 TEXT #/texts/0 reference 0.95\n",
+ "6 language 16022043740431148641 TEXT #/texts/1 en 0.83\n",
+ "7 semantic 16022043740431148641 TEXT #/texts/1 header 0.87\n",
+ "8 language 6400617401789590231 TEXT #/texts/2 en 0.32\n",
+ "9 semantic 6400617401789590231 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 15927436114134815536 TEXT #/texts/3 en 0.25\n",
+ "11 semantic 15927436114134815536 TEXT #/texts/3 meta-data 0.48\n",
+ "2307.16369.pdf\n",
+ "title: Workshop on Document Intelligence Understanding\n",
+ "abstract: ABSTRACT Document understanding and information extraction include different tasks to understand a document and extract valuable information automatically. Recently, there has been a rising demand for developing document understanding among different domains, including business, law, and medicine, to boost the efficiency of work that is associated with a large number of documents. This workshop aims to bring together researchers and industry developers in the field of document intelligence and understanding diverse document types to boost automatic document processing and understanding techniques. We also release a data challenge on the recently introduced document-level VQA dataset, PDFVQA$^{1}$. The PDFVQA challenge examines the model's structural and contextual understandings on the natural full document level of multiple consecutive document pages by including questions with a sequence of answers extracted from multi-pages of the full document. This task helps to boost the document understanding step from the single-page level to the full document level understanding. KEYWORDS Document Understanding, Information Extraction, Layout Analyzing, Visual Question Answering ACM Reference Format: Workshop on Document Intelligence Understanding https://doc-iu.github. io/. In Proceedings of Make sure to enter the correct conference title from your rights confirmation emai (Conference acronym 'XX). ACM, New York, NY, USA, 4 pages. https://doi.org/XXXXXXX.XXXXXXX\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6209530062408105643 DOCUMENT # en 0.99\n",
+ "1 metadata 8284828600829237345 DOCUMENT #/texts/2 abstract 1.00\n",
+ "2 metadata 1732992016307038206 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 2717719593872793157 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 8284713813064902382 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 metadata 11386063114585799801 DOCUMENT #/texts/6 abstract 1.00\n",
+ "6 metadata 15467961449563024489 DOCUMENT #/texts/7 abstract 1.00\n",
+ "7 metadata 5588889559182754493 DOCUMENT #/texts/8 abstract 1.00\n",
+ "8 language 8405150555766025679 TEXT #/texts/0 en 0.30\n",
+ "9 semantic 8405150555766025679 TEXT #/texts/0 reference 0.67\n",
+ "10 language 11183264259715455003 TEXT #/texts/1 en 0.51\n",
+ "11 semantic 11183264259715455003 TEXT #/texts/1 reference 0.84\n",
+ "2204.08387.pdf\n",
+ "title: LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking\n",
+ "abstract: Abstract Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pretrained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in imagecentric tasks such as document image classification and document layout analysis. The code and models are publicly available at https://aka.ms/layoutlmv3.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5613557634612087368 DOCUMENT # en 1.00\n",
+ "1 metadata 10055515728685339539 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11995162111768406472 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 11875175660407515804 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 1853412802335269668 TEXT #/texts/0 en 0.37\n",
+ "5 semantic 1853412802335269668 TEXT #/texts/0 text 0.96\n",
+ "6 language 10055515728685339539 TEXT #/texts/1 en 0.86\n",
+ "7 semantic 10055515728685339539 TEXT #/texts/1 header 0.79\n",
+ "8 language 4700528860860957044 TEXT #/texts/2 en 0.29\n",
+ "9 semantic 4700528860860957044 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 15999884248560655279 TEXT #/texts/3 en 0.39\n",
+ "11 semantic 15999884248560655279 TEXT #/texts/3 meta-data 0.97\n",
+ "2206.11229.pdf\n",
+ "title: Maty' aˇs Skalick' y, ˇ Stˇ ep' an ˇ Simsa, Michal Uˇriˇ c' aˇr, and Milan ˇ Sulc\n",
+ "abstract: Abstract. Information extraction from semi-structured documents is crucial for frictionless business-to-business (B2B) communication. While machine learning problems related to Document Information Extraction (IE) have been studied for decades, many common problem definitions and benchmarks do not reflect domain-specific aspects and practical needs for automating B2B document communication. We review the landscape of Document IE problems, datasets and benchmarks. We highlight the practical aspects missing in the common definitions and define the Key Information Localization and Extraction (KILE) and Line Item Recognition (LIR) problems. There is a lack of relevant datasets and benchmarks for Document IE on semi-structured business documents as their content is typically legally protected or sensitive. We discuss potential sources of available documents including synthetic data. Keywords: Document Understanding · Survey · Benchmarks · Datasets\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8967976425495197562 DOCUMENT # en 0.96\n",
+ "1 metadata 4509603375389757621 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 3735705173397610820 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 12757473539277897783 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 4282766078654614157 TEXT #/texts/0 en 0.23\n",
+ "5 semantic 4282766078654614157 TEXT #/texts/0 reference 0.67\n",
+ "6 language 777241034591429017 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 777241034591429017 TEXT #/texts/1 header 0.50\n",
+ "8 language 4509603375389757621 TEXT #/texts/2 en 0.15\n",
+ "9 semantic 4509603375389757621 TEXT #/texts/2 reference 0.73\n",
+ "10 language 7449385233145878399 TEXT #/texts/3 es 0.39\n",
+ "11 semantic 7449385233145878399 TEXT #/texts/3 meta-data 0.99\n",
+ "2305.04609.pdf\n",
+ "title: SwinDocSegmenter: An End-to-End Unified Domain Adaptive Transformer for Document Instance Segmentation\n",
+ "abstract: Abstract. Instance-level segmentation of documents consists in assigning a class-aware and instance-aware label to each pixel of the image. It is a key step in document parsing for their understanding. In this paper, we present a unified transformer encoder-decoder architecture for en-toend instance segmentation of complex layouts in document images. The method adapts a contrastive training with a mixed query selection for anchor initialization in the decoder. Later on, it performs a dot product between the obtained query embeddings and the pixel embedding map (coming from the encoder) for semantic reasoning. Extensive experimentation on competitive benchmarks like PubLayNet, PRIMA, Historical Japanese (HJ), and TableBank demonstrate that our model with SwinL backbone achieves better segmentation performance than the existing state-of-the-art approaches with the average precision of 93.72, 54.39, 84.65 and 98.04 respectively under one billion parameters. The code is made publicly available at: github.com/ayanban011/SwinDocSegmenter Keywords: Document Layout Analysis · Instance-Level Segmentation · Swin Transformer · Contrastive Learning.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1693492343147334485 DOCUMENT # en 0.99\n",
+ "1 metadata 6160941610122377495 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6639197326931776497 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 13483731373294669948 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 15950250016772815960 TEXT #/texts/0 en 0.58\n",
+ "5 semantic 15950250016772815960 TEXT #/texts/0 reference 0.66\n",
+ "6 language 6160941610122377495 TEXT #/texts/1 en 0.60\n",
+ "7 semantic 6160941610122377495 TEXT #/texts/1 header 0.72\n",
+ "8 language 7027868792384148107 TEXT #/texts/2 en 0.35\n",
+ "9 semantic 7027868792384148107 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 12268483507213020384 TEXT #/texts/3 en 0.19\n",
+ "11 semantic 12268483507213020384 TEXT #/texts/3 meta-data 0.54\n",
+ "2008.02569.pdf\n",
+ "title: IIIT-AR-13K: A New Dataset for Graphical Object Detection in Documents\n",
+ "abstract: Abstract. We introduce a new dataset for graphical object detection in business documents, more specifically annual reports. This dataset, iiit-$_{ar}$-13$_{k}$, is created by manually annotating the bounding boxes of graphical or page objects in publicly available annual reports. This dataset contains a total of 13$_{k}$ annotated page images with objects in five different popular categories-table, figure, natural image, logo, and signature. It is the largest manually annotated dataset for graphical object detection. Annual reports created in multiple languages for several years from various companies bring high diversity into this dataset. We benchmark $_{iiit-ar}$-13$_{k}$ dataset with two state of the art graphical object detection techniques using $_{f}$aster r-cnn [20] and $_{m}$ask r-cnn [11] and establish high baselines for further research. Our dataset is highly effective as training data for developing practical solutions for graphical object detection in both business documents and technical articles. By training with $_{iiit-ar}$-13$_{k}$, we demonstrate the feasibility of a single solution that can report superior performance compared to the equivalent ones trained with a much larger amount of data, for table detection. We hope that our dataset helps in advancing the research for detecting various types of graphical objects in business documents $^{1}$. Keywords: graphical object detection · annual reports · business documents · $_{f}$aster r-cnn · $_{m}$ask $_{r-cnn}$.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7134740071165908334 DOCUMENT # en 0.95\n",
+ "1 metadata 13822516791368960623 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13448690121196485778 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 6100566510081120729 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 language 11381059642053378033 TEXT #/texts/0 en 0.29\n",
+ "5 semantic 11381059642053378033 TEXT #/texts/0 reference 0.99\n",
+ "6 language 13822516791368960623 TEXT #/texts/1 en 0.51\n",
+ "7 semantic 13822516791368960623 TEXT #/texts/1 header 0.77\n",
+ "8 language 13657362457735794412 TEXT #/texts/2 en 0.43\n",
+ "9 semantic 13657362457735794412 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 12503024925615714599 TEXT #/texts/3 en 0.57\n",
+ "11 semantic 12503024925615714599 TEXT #/texts/3 meta-data 0.96\n",
+ "2203.02378.pdf\n",
+ "title: DIT: SELF-SUPERVISED PRE-TRAINING FOR DOCUMENT IMAGE TRANSFORMER\n",
+ "abstract: ABSTRACT Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a selfsupervised pre-trained D ocument I mage T ransformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, table detection as well as text detection for OCR. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9), table detection (94.23 → 96.55) and text detection for OCR (93.07 → 94.29). The code and pre-trained models are publicly available at https://aka.ms/msdit.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16626975744466566992 DOCUMENT # en 0.99\n",
+ "1 metadata 11648296819527699848 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 5421231543418135299 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 3697732579581766426 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 language 13200780365894350615 TEXT #/texts/0 en 0.54\n",
+ "5 semantic 13200780365894350615 TEXT #/texts/0 text 0.69\n",
+ "6 language 11648296819527699848 TEXT #/texts/1 en 0.26\n",
+ "7 semantic 11648296819527699848 TEXT #/texts/1 header 0.86\n",
+ "8 language 10095158915153554708 TEXT #/texts/2 en 0.29\n",
+ "9 semantic 10095158915153554708 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 12542425350952447845 TEXT #/texts/3 en 0.41\n",
+ "11 semantic 12542425350952447845 TEXT #/texts/3 meta-data 1.00\n",
+ "2203.13530.pdf\n",
+ "title: Multimodal Pre-training Based on Graph Attention Network for Document Understanding\n",
+ "abstract: Abstract-Document intelligence as a relatively new research topic supports many business applications. Its main task is to automatically read, understand, and analyze documents. However, due to the diversity of formats (invoices, reports, forms, etc.) and layouts in documents, it is difficult to make machines understand documents. In this paper, we present the GraphDoc, a multimodal graph attention-based model for various document understanding tasks. GraphDoc is pre-trained in a multimodal framework by utilizing text, layout, and image information simultaneously. In a document, a text block relies heavily on its surrounding contexts, so we inject the graph structure into the attention mechanism to form a graph attention layer so that each input node can only attend to its neighborhoods. The input nodes of each graph attention layer are composed of textual, visual, and positional features from semantically meaningful regions in a document image. We do the multimodal feature fusion of each node by the gate fusion layer. The contextualization between each node is modeled by the graph attention layer. GraphDoc learns a generic representation from only 320k unlabeled documents via the Masked Sentence Modeling task. Extensive experimental results on the publicly available datasets show that GraphDoc achieves state-of-the-art performance, which demonstrates the effectiveness of our proposed method. Index Terms-Document understanding, Pre-training, Multimodal, Graph attention layer.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11814168582946535945 DOCUMENT # en 0.98\n",
+ "1 metadata 18239112013650998523 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 4944397510388150405 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 1396660194561995515 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 1417991811488983492 TEXT #/texts/0 en 0.32\n",
+ "5 semantic 1417991811488983492 TEXT #/texts/0 text 0.89\n",
+ "6 language 18239112013650998523 TEXT #/texts/1 en 0.78\n",
+ "7 semantic 18239112013650998523 TEXT #/texts/1 header 0.85\n",
+ "8 language 9021015283574159893 TEXT #/texts/2 en 0.67\n",
+ "9 semantic 9021015283574159893 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 4944397510388150405 TEXT #/texts/3 en 0.91\n",
+ "11 semantic 4944397510388150405 TEXT #/texts/3 text 1.00\n",
+ "2302.08575.pdf\n",
+ "title: Foundation Models for Natural Language Processing -- Pre-trained Language Models Integrating Media\n",
+ "abstract: ['This open access book provides a comprehensive overview of the state of the art in research and applications of Foundation Models and is intended for readers familiar with basic Natural Language Processing (NLP) concepts. Over the recent years, a revolutionary new paradigm has been developed for training models for NLP. These models are first pre-trained on large collections of text documents to acquire general syntactic knowledge and semantic information. Then, they are fine-tuned for specific tasks, which they can often solve with superhuman accuracy. When the models are large enough, they can be instructed by prompts to solve new tasks without any fine-tuning. Moreover, they can be applied to a wide range of different media and problem domains, ranging from image and video processing to robot control learning. Because they provide a blueprint for solving many tasks in artificial intelligence, they have been called Foundation Models. After a brief introduction to basic NLP models the main pre-trained language models BERT, GPT and sequence-to-sequence transformer are described, as well as the concepts of self-attention and context-sensitive embedding. Then, different approaches to improving these models are discussed, such as expanding the pre-training criteria, increasing the length of input texts, or including extra knowledge. An overview of the best-performing models for about twenty application areas is then presented, e.g., question answering, translation, story generation, dialog systems, generating images from text, etc. For each application area, the strengths and weaknesses of current models are discussed, and an outlook on further developments is given. In addition, links are provided to freely available program code. A concluding chapter summarizes the economic opportunities, mitigation of risks, and potential developments of AI.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8421627772241711435 DOCUMENT # en 0.97\n",
+ "1 language 15144751674124179519 TEXT #/texts/0 en 0.35\n",
+ "2 semantic 15144751674124179519 TEXT #/texts/0 text 0.99\n",
+ "3 language 13543005973877344845 TEXT #/texts/1 de 0.37\n",
+ "4 semantic 13543005973877344845 TEXT #/texts/1 meta-data 0.99\n",
+ "5 language 13566690406347038172 TEXT #/texts/2 en 0.59\n",
+ "6 semantic 13566690406347038172 TEXT #/texts/2 header 0.47\n",
+ "7 language 8314110134380907026 TEXT #/texts/3 en 0.73\n",
+ "8 semantic 8314110134380907026 TEXT #/texts/3 header 0.81\n",
+ "9 language 4744613495211267368 TEXT #/texts/4 en 0.99\n",
+ "10 semantic 4744613495211267368 TEXT #/texts/4 text 0.91\n",
+ "11 language 2692523668970354209 TEXT #/texts/5 en 0.92\n",
+ "2205.02411.pdf\n",
+ "title: Relational Representation Learning in Visually-Rich Documents\n",
+ "abstract: Abstract Relational understanding is critical for a number of visually-rich documents (VRDs) understanding tasks. Through multi-modal pre-training, recent studies provide comprehensive contextual representations and exploit them as prior knowledge for downstream tasks. In spite of their impressive results, we observe that the widespread relational hints (e.g., relation of key/value fields on receipts) built upon contextual knowledge are not excavated yet. To mitigate this gap, we propose DocReL, a Doc ument Re lational Representation L earning framework. The major challenge of DocReL roots in the variety of relations. From the simplest pairwise relation to the complex global structure, it is infeasible to conduct supervised training due to the definition of relation varies and even conflicts in different tasks. To deal with the unpredictable definition of relations, we propose a novel contrastive learning task named Relational Consistency Modeling (RCM), which harnesses the fact that existing relations should be consistent in differently augmented positive views. RCM provides relational representations which are more compatible to the urgent need of downstream tasks, even without any knowledge about the exact definition of relation. DocReL achieves better performance on a wide variety of VRD relational understanding tasks, including table structure recognition, key information extraction and reading order detection.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14685796338317814159 DOCUMENT # en 1.00\n",
+ "1 metadata 14560507360067576989 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9619244236012610321 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 402873043014886069 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 4466021334064041913 TEXT #/texts/0 en 0.68\n",
+ "5 semantic 4466021334064041913 TEXT #/texts/0 text 0.83\n",
+ "6 language 14560507360067576989 TEXT #/texts/1 en 0.72\n",
+ "7 semantic 14560507360067576989 TEXT #/texts/1 header 0.73\n",
+ "8 language 10588574520574666354 TEXT #/texts/2 eo 0.42\n",
+ "9 semantic 10588574520574666354 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 4943200990914226626 TEXT #/texts/3 en 0.47\n",
+ "11 semantic 4943200990914226626 TEXT #/texts/3 meta-data 1.00\n",
+ "2305.08719.pdf\n",
+ "title: M $^{6}$Doc: A Large-Scale Multi-Format, Multi-Type, Multi-Layout, Multi-Language, Multi-Annotation Category Dataset for Modern Document Layout Analysis\n",
+ "abstract: Abstract Document layout analysis is a crucial prerequisite for document under standing, including document retrieval and conversion. Most public datasets currently contain only PDF documents and lack realistic documents. Models trained on these datasets may not generalize well to real-world scenarios. Therefore, this paper introduces a large and diverse document layout analysis dataset called M $^{6}$Doc. The M 6 designation represents six properties: (1) Multi-Format (including scanned, photographed, and PDF documents); (2) Multi-Type (such as scientific articles, textbooks, books, test papers, magazines, newspapers, and notes); (3) Multi-Layout (rectangular, Manhattan, non-Manhattan, and multi-column Manhattan); (4) Multi-Language (Chinese and English); (5) Multi-Annotation Category (74 types of annotation labels with 237,116 annotation instances in 9,080 manually annotated pages); and (6) Modern documents. Additionally, we propose a transformer-based document layout analysis method called TransDLANet, which leverages an adaptive element matching mechanism that enables query embedding to better match ground truth to improve recall, and constructs a segmentation branch for more precise document image instance segmentation. We conduct a comprehensive evaluation of M $^{6}$Doc with various layout analysis methods and demonstrate its ef fectiveness. TransDLANet achieves stateof-the-art performance on M $^{6}$Doc with 64.5% mAP. The M $^{6}$Doc dataset will be available at https://github. com/HCIILAB/ M6Doc.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 1876173860373953341 DOCUMENT # en \n",
+ "1 metadata 13256736339476695081 DOCUMENT #/texts/1 title \n",
+ "2 metadata 8152406454759716696 DOCUMENT #/texts/8 abstract \n",
+ "3 metadata 9004593750770180351 DOCUMENT #/texts/9 abstract \n",
+ "4 metadata 1940889033656737863 DOCUMENT #/texts/10 abstract \n",
+ "5 language 10861081459604961628 TEXT #/texts/0 en \n",
+ "6 semantic 10861081459604961628 TEXT #/texts/0 reference \n",
+ "7 language 13256736339476695081 TEXT #/texts/1 en \n",
+ "8 semantic 13256736339476695081 TEXT #/texts/1 header \n",
+ "9 language 1638358623752885653 TEXT #/texts/2 en \n",
+ "10 semantic 1638358623752885653 TEXT #/texts/2 meta-data \n",
+ "11 language 11362042950132522047 TEXT #/texts/3 en \n",
+ "\n",
+ " confidence \n",
+ "0 0.98 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 0.58 \n",
+ "6 0.66 \n",
+ "7 0.52 \n",
+ "8 0.67 \n",
+ "9 0.23 \n",
+ "10 0.95 \n",
+ "11 0.40 \n",
+ "2305.04833.pdf\n",
+ "title: Revisiting Table Detection Datasets for Visually Rich Documents\n",
+ "abstract: Abstract Table Detection has become a fundamental task for visually rich document understanding with the surging number of electronic documents. There have been some open datasets widely used in many studies. However, popular available datasets have some inherent limitations, including the noisy and inconsistent samples, and the limit number of training samples, and the limit number of data-sources. These limitations make these datasets unreliable to evaluate the model performance and cannot reflect the actual capacity of models. Therefore, in this paper, we revisit some open datasets with high quality of annotations, identify and clean the noise, and align the annotation definitions of these datasets to merge a larger dataset, termed with Open-Tables. Moreover, to enrich the data sources, we propose a new dataset, termed with ICT-TD, using the PDF files of Information and communication technologies (ICT) commodities which is a di erent domain containing unique samples that hardly appear in open datasets. To ensure the label quality of the dataset, we annotated the dataset manually following the guidance of a domain expert. The proposed dataset has a larger intra-variance and smaller inter-variance, making it more challenging and can be a sample of actual cases in the business context. We built strong baselines using various state-of-the-art object detection models and also built the baselines in the cross-domain setting. Our experimental results show that the domain di erence among existing open datasets are small, even they have di erent data-sources. Our proposed Open-tables and ICT-TD are more suitable for the cross domain setting, and can provide more reliable evaluation for model because of their high quality and consistent annotations. We conduct experiments to discuss the side e ects of noise in the open source datasets. Our experimental results show that in the cross-domain setting, benchmark models trained with cleaned Open-Tables dataset can achieve 0.6%-2.6% higher weighted average F1 than the corresponding ones trained with the noisy version of Open-Tables, demonstrating the reliability of the proposed datasets. The datasets are public available at http://ieee-dataport. org/documents/table-detection-dataset-visually-rich-documents Keywords: Object Detection, Table Detection Dataset, ICT Supply Chain, Table Detection 1. Introduction Tables or tabular data have been widely used in electronic documents to summarize critical information so that the information can be presented e ciently to human readers. However, electronic documents, such as Portable Document Format (PDF) files, cannot provide enough meta-data to describe the location and the structure of these tables, meaning that these tables are unstructured and cannot be quickly processed and interpreted automatically. With the surging amount of electronic documents, Table Detection (TD) becomes a fundamental task for downstream document understanding tasks, such as Key Information Extraction and Table Structure Recognition [1]. With the development of deep learning, transforming electronic documents into visually rich document images and formulating the problem as an object detection problem became the dominant solutions. There have been some public datasets for the TD problem, such as ICDAR2013 [2], ICDAR2017 [3], ICDAR2019 [4] and TableBank [5]. Some of these datasets are manually labeled, which means that the annotations are more reliable and consistent, but the number of training sample in these datasets are usually limited. Besides, the annotation definitions across these datasets are often di erent, which means we cannot simply merge these datasets together to form larger datasets. In contrast, datasets such as TableBank [5] and PubLayNet [6] are annotated by parsing meta-data of electronic documents, making these annotations are noisy and inconsistent, even though these datasets are much larger. Figure 1 shows two samples from the TableBank test set. One typical issue of these meta-data generated datasets is that the bounding box can be larger than an ideal bounding box, as shown in Figure 1 (a), which can make the evaluation unreliable when the Intersection over Union (IoU) threshold is high. Another issue is that some tables are missed or the bounding box is not large enough to cover the whole table, as shown in Figure 1 (b). The quality of a table detection set is critical for the TD problem because a successful TD application should avoid losing information presented in the tables. And the issues of noisy labels in the test set can influence the model evaluation, especially for widely used evaluation metrics threshold by IoU scores. It is worth mentioning that even though manually annotated datasets have higher quality of annotations, there are still many noisy samples in both their training and testing sets. Therefore, in this study, we revisit several well-annotated datasets, including ICDAR2013, ICDAR2017, ICDAR2019, Marmot and TNCR, align the labeling definition of these datasets, clean the noisy samples and merge them together to form a larger dataset, termed with Open-Tables. The new Open-Tables dataset can minimize the side e ects of noisy samples to the model evaluation and provide more reliable results. We include more details regarding Open-Tables dataset in section 3.1.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7304776487939613810 DOCUMENT # en 1.00\n",
+ "1 metadata 8558559027627596631 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 3716338067735375485 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 8636239885669143883 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 16225242767116965410 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 metadata 4785204321927401322 DOCUMENT #/texts/7 abstract 1.00\n",
+ "6 metadata 1493289078835811758 DOCUMENT #/texts/8 abstract 1.00\n",
+ "7 metadata 11541983487153591499 DOCUMENT #/texts/9 abstract 1.00\n",
+ "8 metadata 10011220560812929426 DOCUMENT #/texts/10 abstract 1.00\n",
+ "9 language 8558559027627596631 TEXT #/texts/0 en 0.60\n",
+ "10 semantic 8558559027627596631 TEXT #/texts/0 text 0.48\n",
+ "11 language 13696316175992901111 TEXT #/texts/1 en 0.18\n",
+ "2108.00871.pdf\n",
+ "title: Constrained Graphic Layout Generation via Latent Optimization\n",
+ "abstract: ABSTRACT It is common in graphic design humans visually arrange various elements according to their design intent and semantics. For example, a title text almost always appears on top of other elements in a document. In this work, we generate graphic layouts that can flexibly incorporate such design semantics, either specified implicitly or explicitly by a user. We optimize using the latent space of an off-the-shelf layout generation model, allowing our approach to be complementary to and used with existing layout generation models. Our approach builds on a generative layout model based on a Transformer architecture, and formulates the layout generation as a constrained optimization problem where design constraints are used for element alignment, overlap avoidance, or any other user-specified relationship. We show in the experiments that our approach is capable of generating realistic layouts in both constrained and unconstrained generation tasks with a single model. The code is available at https://github.com/ktrk115/const_layout. CCS CONCEPTS · Human-centered computing → Interaction design process and methods; · Applied computing → Computer-aided design. KEYWORDS layout generation, generative adversarial network, constrained optimization, latent space exploration ACM Reference Format: Kotaro Kikuchi, Edgar Simo-Serra, Mayu Otani, and Kota Yamaguchi. 2021. Constrained Graphic Layout Generation via Latent Optimization. In Proceedings of the 29th ACM International Conference on Multimedia (MM '21), October 20-24, 2021, Virtual Event, China. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/3474085.3475497\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 12199915035636768252 DOCUMENT # en \n",
+ "1 metadata 8384071914771172394 DOCUMENT #/texts/1 title \n",
+ "2 metadata 2109654227642717686 DOCUMENT #/texts/8 abstract \n",
+ "3 metadata 3211312228955143655 DOCUMENT #/texts/9 abstract \n",
+ "4 metadata 6555000622341922001 DOCUMENT #/texts/10 abstract \n",
+ "5 metadata 16983468812637625951 DOCUMENT #/texts/11 abstract \n",
+ "6 metadata 2109469912817786993 DOCUMENT #/texts/12 abstract \n",
+ "7 metadata 3683188399811590448 DOCUMENT #/texts/13 abstract \n",
+ "8 metadata 9328733848143751678 DOCUMENT #/texts/14 abstract \n",
+ "9 metadata 13989158213906669862 DOCUMENT #/texts/15 abstract \n",
+ "10 language 1239214986904154463 TEXT #/texts/0 en \n",
+ "11 semantic 1239214986904154463 TEXT #/texts/0 reference \n",
+ "\n",
+ " confidence \n",
+ "0 0.98 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 1.00 \n",
+ "6 1.00 \n",
+ "7 1.00 \n",
+ "8 1.00 \n",
+ "9 1.00 \n",
+ "10 0.41 \n",
+ "11 0.86 \n",
+ "2306.08937.pdf\n",
+ "title: Document Entity Retrieval with Massive and Noisy Pre-training\n",
+ "abstract: Abstract Visually-Rich Document Entity Retrieval (VDER) is a type of machine learning task that aims at recovering text spans in the documents for each of the entities in question. VDER has gained significant attention in recent years thanks to its broad applications in enterprise AI. Unfortunately, as document images often contain personally identifiable information (PII), publicly available data have been scarce, not only because of privacy constraints but also the costs of acquiring annotations. To make things worse, each dataset would often define its own sets of entities, and the non-overlapping entity spaces between datasets make it difficult to transfer knowledge between documents. In this paper, we propose a method to collect massive-scale, noisy, and weakly labeled data from the web to benefit the training of VDER models. Such a method will generate a huge amount of document image data to compensate for the lack of training data in many VDER settings. Moreover, the collected dataset named DocuNet would not need to be dependent on specific document types or entity sets, making it universally applicable to all VDER tasks. Empowered by DocuNet, we present a lightweight multimodal architecture named UniFormer, which can learn a unified representation from text, layout, and image crops without needing extra visual pretraining. We experiment with our methods on popular VDER models in various settings and show the improvements when this massive dataset is incorporated with UniFormer on both classic entity retrieval and few-shot learning settings.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15142591603334469240 DOCUMENT # en 0.98\n",
+ "1 metadata 10066368879651874202 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15287023683645999756 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 9371627777753320426 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 metadata 15691518741437309238 DOCUMENT #/texts/5 abstract 1.00\n",
+ "5 language 13913708657465956901 TEXT #/texts/0 en 0.20\n",
+ "6 semantic 13913708657465956901 TEXT #/texts/0 reference 0.66\n",
+ "7 language 10066368879651874202 TEXT #/texts/1 en 0.75\n",
+ "8 semantic 10066368879651874202 TEXT #/texts/1 header 0.98\n",
+ "9 language 8221334556122986640 TEXT #/texts/2 en 0.32\n",
+ "10 semantic 8221334556122986640 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 15287023683645999756 TEXT #/texts/3 en 0.32\n",
+ "2103.05908.pdf\n",
+ "title: DeepCPCFG: Deep Learning and Context Free Grammars for End-to-End Information Extraction\n",
+ "abstract: Abstract We combine deep learning and Conditional Probabilistic Context Free Grammars (CPCFG) to create an end-to-end system for extracting structured information from complex documents. For each class of documents, we create a CPCFG that describes the structure of the information to be extracted. Conditional probabilities are modeled by deep neural networks. We use this grammar to parse 2-D documents to directly produce structured records containing the extracted information. This system is trained end-to-end with (Document, Record) pairs. We apply this approach to extract information from scanned invoices achieving state-of-the-art results.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5782007314389811631 DOCUMENT # en 1.00\n",
+ "1 metadata 13526205582851115312 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 17155324988894173481 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 3790138250182757054 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 15035037928597917938 TEXT #/texts/0 en 0.32\n",
+ "5 semantic 15035037928597917938 TEXT #/texts/0 text 0.99\n",
+ "6 language 13526205582851115312 TEXT #/texts/1 en 0.59\n",
+ "7 semantic 13526205582851115312 TEXT #/texts/1 header 0.86\n",
+ "8 language 13070915798935976283 TEXT #/texts/2 en 0.37\n",
+ "9 semantic 13070915798935976283 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 11881341791620129678 TEXT #/texts/3 en 0.38\n",
+ "11 semantic 11881341791620129678 TEXT #/texts/3 meta-data 1.00\n",
+ "2109.01078.pdf\n",
+ "title: Skim-Attention: Learning to Focus via Document Layout\n",
+ "abstract: Abstract Transformer-based pre-training techniques of text and layout have proven effective in a number of document understanding tasks. Despite this success, multimodal pre-training models suffer from very high computational and memory costs. Motivated by human reading strategies, this paper presents Skim-Attention, a new attention mechanism that takes advantage of the structure of the document and its layout. Skim-Attention only attends to the 2dimensional position of the words in a document. Our experiments show that Skim-Attention obtains a lower perplexity than prior works, while being more computationally efficient. Skim-Attention can be further combined with long-range Transformers to efficiently process long documents. We also show how Skim-Attention can be used off-the-shelf as a mask for any Pre-trained Language Model, allowing to improve their performance while restricting attention. Finally, we show the emergence of a document structure representation in Skim-Attention.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9883551471125205979 DOCUMENT # en 0.99\n",
+ "1 metadata 10129463260375798652 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 5534460045214421186 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 3636955013674087094 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 4755179979580467845 TEXT #/texts/0 en 0.48\n",
+ "5 semantic 4755179979580467845 TEXT #/texts/0 reference 0.66\n",
+ "6 language 10129463260375798652 TEXT #/texts/1 en 0.71\n",
+ "7 semantic 10129463260375798652 TEXT #/texts/1 header 0.74\n",
+ "8 language 572329452412399190 TEXT #/texts/2 en 0.28\n",
+ "9 semantic 572329452412399190 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 4645504454607610911 TEXT #/texts/3 fr 0.41\n",
+ "11 semantic 4645504454607610911 TEXT #/texts/3 meta-data 0.98\n",
+ "2108.09436.pdf\n",
+ "title: Palmira: A Deep Deformable Network for Instance Segmentation of Dense and Uneven Layouts in Handwritten Manuscripts\n",
+ "abstract: Abstract. Handwritten documents are often characterized by dense and uneven layout. Despite advances, standard deep network based approaches for semantic layout segmentation are not robust to complex deformations seen across semantic regions. This phenomenon is especially pronounced for the low-resource Indic palm-leaf manuscript domain. To address the issue, we first introduce Indiscapes2, a new large-scale diverse dataset of Indic manuscripts with semantic layout annotations. Indiscapes2 contains documents from four different historical collections and is 150% larger than its predecessor, Indiscapes. We also propose a novel deep network Palmira for robust, deformation-aware instance segmentation of regions in handwritten manuscripts. We also report Hausdorff distance and its variants as a boundary-aware performance measure. Our experiments demonstrate that$_{Palmira}$ provides robust layouts, outperforms strong baseline approaches and ablative variants. We also include qualitative results on Arabic, South-East Asian and Hebrew historical manuscripts to showcase the generalization capability of $_{Palmira}$. Keywords: instance segmentation · deformable convolutional network · historical document analysis · document image segmentation · dataset\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9626559153965122002 DOCUMENT # en 0.99\n",
+ "1 metadata 6342588646724752712 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9076307976746177469 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 18076662708448135472 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 1894792420845750180 TEXT #/texts/0 en 0.47\n",
+ "5 semantic 1894792420845750180 TEXT #/texts/0 reference 0.86\n",
+ "6 language 6342588646724752712 TEXT #/texts/1 en 0.60\n",
+ "7 semantic 6342588646724752712 TEXT #/texts/1 header 0.73\n",
+ "8 language 2763018529891900900 TEXT #/texts/2 en 0.41\n",
+ "9 semantic 2763018529891900900 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 18273145714788509308 TEXT #/texts/3 en 0.45\n",
+ "11 semantic 18273145714788509308 TEXT #/texts/3 meta-data 1.00\n",
+ "2308.01971.pdf\n",
+ "title: SpaDen : Sparse and Dense Keypoint Estimation for Real-World Chart Understanding\n",
+ "abstract: Abstract. We introduce a novel bottom-up approach for the extraction of chart data. Our model utilizes images of charts as inputs and learns to detect keypoints (KP), which are used to reconstruct the components within the plot area. Our novelty lies in detecting a fusion of continuous and discrete KP as predicted heatmaps. A combination of sparse and dense per-pixel objectives coupled with a uni-modal self-attentionbased feature-fusion layer is applied to learn KP embeddings. Further leveraging deep metric learning for unsupervised clustering, allows us to segment the chart plot area into various objects. By further matching the chart components to the legend, we are able to obtain the data series names. A post-processing threshold is applied to the KP embeddings to refine the object reconstructions and improve accuracy. Our extensive experiments include an evaluation of different modules for KP estimation and the combination of deep layer aggregation and corner pooling approaches. The results of our experiments provide extensive evaluation for the task of real-world chart data extraction. $^{1}$. Keywords: Charts and Document Understanding and Reasoning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13367340984838580806 DOCUMENT # en 1.00\n",
+ "1 metadata 1903187599367179918 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 12538701134398444025 DOCUMENT #/texts/8 abstract 1.00\n",
+ "3 metadata 10579837932229061513 DOCUMENT #/texts/9 abstract 1.00\n",
+ "4 language 341948888729253223 TEXT #/texts/0 en 0.36\n",
+ "5 semantic 341948888729253223 TEXT #/texts/0 reference 0.86\n",
+ "6 language 1903187599367179918 TEXT #/texts/1 en 0.45\n",
+ "7 semantic 1903187599367179918 TEXT #/texts/1 header 0.87\n",
+ "8 language 17616124111452301542 TEXT #/texts/2 en 0.16\n",
+ "9 semantic 17616124111452301542 TEXT #/texts/2 meta-data 0.89\n",
+ "10 language 17428461424590529908 TEXT #/texts/3 en 0.22\n",
+ "11 semantic 17428461424590529908 TEXT #/texts/3 meta-data 1.00\n",
+ "2203.08504.pdf\n",
+ "title: A Survey of Historical Document Image Datasets\n",
+ "abstract: Abstract This paper presents a systematic literature review of image datasets for document image analysis, focusing on historical documents, such as handwritten manuscripts and early prints. Finding appropriate datasets for historical document analysis is a crucial prerequisite to facilitate research using different machine learning algorithms. However, because of the very large variety of the actual data (e.g., scripts, tasks, dates, support systems, and amount of deterioration), the different formats for data and label representation, and the different evaluation processes and benchmarks, finding appropriate datasets is a difficult task. This work fills this gap, presenting a meta-study on existing datasets. After a systematic selection process (according to PRISMA guidelines), we select 56 studies that are chosen based on different factors, such as the year of publication, number of methods implemented in the article, reliability of the chosen algorithms, dataset size, and journal outlet. We summarize each study by assigning it to one of three pre-defined tasks: document classification, layout structure, or semantic analysis. We present the statistics, document type, language, tasks, input visual aspects, and ground truth information for every dataset. In addition, we provide the benchmark tasks and results from these papers or recent competitions. We further discuss gaps and challenges in this domain. We advocate for providing conversion tools to common formats (e.g., COCO format for computer vision tasks) and always providing a set of evaluation metrics, instead of just one, to make results comparable across studies. Keywords: Historical Documents, Image Datasets, Document Image Analysis, Machine Learning\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 4007586703670525870 DOCUMENT # en \n",
+ "1 metadata 7274577276839794671 DOCUMENT #/texts/1 title \n",
+ "2 metadata 16472628404673665640 DOCUMENT #/texts/10 abstract \n",
+ "3 metadata 17605835865519781365 DOCUMENT #/texts/11 abstract \n",
+ "4 metadata 15932023376733108084 DOCUMENT #/texts/12 abstract \n",
+ "5 language 9758151282582610634 TEXT #/texts/0 en \n",
+ "6 semantic 9758151282582610634 TEXT #/texts/0 text \n",
+ "7 language 7274577276839794671 TEXT #/texts/1 en \n",
+ "8 semantic 7274577276839794671 TEXT #/texts/1 header \n",
+ "9 language 5009320327279888416 TEXT #/texts/2 en \n",
+ "10 semantic 5009320327279888416 TEXT #/texts/2 meta-data \n",
+ "11 language 14203392011151395431 TEXT #/texts/3 en \n",
+ "\n",
+ " confidence \n",
+ "0 0.99 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 0.33 \n",
+ "6 0.89 \n",
+ "7 0.58 \n",
+ "8 0.93 \n",
+ "9 0.32 \n",
+ "10 0.99 \n",
+ "11 0.47 \n",
+ "2304.14953.pdf\n",
+ "title: CCpdf: Building a High Quality Corpus for Visually Rich Documents from Web Crawl Data\n",
+ "abstract: Abstract. In recent years, the field of document understanding has progressed a lot. A significant part of this progress has been possible thanks to the use of language models pretrained on large amounts of documents. However, pretraining corpora used in the domain of document understanding are single domain, monolingual, or nonpublic. Our goal in this paper is to propose an efficient pipeline for creating a big-scale, diverse, multilingual corpus of PDF files from all over the Internet using Common Crawl, as PDF files are the most canonical types of documents as considered in document understanding. We analyzed extensively all of the steps of the pipeline and proposed a solution which is a trade-off between data quality and processing time. We also share a CCpdf corpus in a form or an index of PDF files along with a script for downloading them, which produces a collection useful for language model pretraining. The dataset and tools published with this paper offer researchers the opportunity to develop even better multilingual language models. Keywords: Natural Language Processing, language models, dataset construction, document understanding.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10771338546968252461 DOCUMENT # en 0.98\n",
+ "1 metadata 3724028993523053831 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1609918121696388518 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 8094359134579896324 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 10595408906284717089 TEXT #/texts/0 en 0.32\n",
+ "5 semantic 10595408906284717089 TEXT #/texts/0 reference 0.66\n",
+ "6 language 3724028993523053831 TEXT #/texts/1 en 0.76\n",
+ "7 semantic 3724028993523053831 TEXT #/texts/1 header 0.57\n",
+ "8 language 8104816477335951616 TEXT #/texts/2 pl 0.49\n",
+ "9 semantic 8104816477335951616 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 8883813848999863888 TEXT #/texts/3 en 0.49\n",
+ "11 semantic 8883813848999863888 TEXT #/texts/3 meta-data 0.81\n",
+ "2307.12571.pdf\n",
+ "title: MataDoc: Margin and Text Aware Document Dewarping for Arbitrary Boundary\n",
+ "abstract: Abstract Document dewarping from a distorted camera-captured image is of great value for OCR and document understanding. The document boundary plays an important role which is more evident than the inner region in document dewarping. Current learning-based methods mainly focus on complete boundary cases, leading to poor document correction performance of documents with incomplete boundaries. In contrast to these methods, this paper proposes MataDoc, the first method focusing on arbitrary boundary document dewarping with margin and text aware regularizations. Specifically, we design the margin regularization by explicitly considering background consistency to enhance boundary perception. Moreover, we introduce word position consistency to keep text lines straight in rectified document images. To produce a comprehensive evaluation of MataDoc, we propose a novel benchmark ArbDoc, mainly consisting of document images with arbitrary boundaries in four typical scenarios. Extensive experiments confirm the superiority of MataDoc with consideration for the incomplete boundary on ArbDoc and also demonstrate the effectiveness of the proposed method on DocUNet, DIR300, and Warp-Doc datasets.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 742822728802086802 DOCUMENT # en 0.99\n",
+ "1 metadata 14673947180037347255 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6340112537169098581 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 9458629370508501719 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 6284796105560134854 TEXT #/texts/0 en 0.46\n",
+ "5 semantic 6284796105560134854 TEXT #/texts/0 reference 0.95\n",
+ "6 language 14673947180037347255 TEXT #/texts/1 en 0.60\n",
+ "7 semantic 14673947180037347255 TEXT #/texts/1 header 0.81\n",
+ "8 language 4014942427634426251 TEXT #/texts/2 en 0.42\n",
+ "9 semantic 4014942427634426251 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 2614023099649271751 TEXT #/texts/3 en 0.44\n",
+ "11 semantic 2614023099649271751 TEXT #/texts/3 meta-data 1.00\n",
+ "2204.10939.pdf\n",
+ "title: Unified Pretraining Framework for Document Understanding\n",
+ "abstract: Abstract Document intelligence automates the extraction of information from documents and supports many business applications. Recent self-supervised learning methods on large-scale unlabeled document datasets have opened up promising directions towards reducing annotation efforts by training models with self-supervised objectives. However, most of the existing document pretraining methods are still language-dominated. We present UDoc, a new unified pretraining framework for document understanding. UDoc is designed to support most document understanding tasks, extending the Transformer to take multimodal embeddings as input. Each input element is composed of words and visual features from a semantic region of the input document image. An important feature of UDoc is that it learns a generic representation by making use of three self-supervised losses, encouraging the representation to model sentences, learn similarities, and align modalities. Extensive empirical analysis demonstrates that the pretraining procedure learns better joint representations and leads to improvements in downstream tasks.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 14399465253152604874 DOCUMENT # en 1.00\n",
+ "1 metadata 4747122830571822511 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9967082317227599151 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 3123119904347243024 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 2608806305983470192 TEXT #/texts/0 en 0.35\n",
+ "5 semantic 2608806305983470192 TEXT #/texts/0 text 0.96\n",
+ "6 language 4747122830571822511 TEXT #/texts/1 en 0.68\n",
+ "7 semantic 4747122830571822511 TEXT #/texts/1 header 0.86\n",
+ "8 language 4979022925906638436 TEXT #/texts/2 en 0.19\n",
+ "9 semantic 4979022925906638436 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 6628659997364410072 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 6628659997364410072 TEXT #/texts/3 meta-data 1.00\n",
+ "2308.10511.pdf\n",
+ "title: Shrestha Datta\n",
+ "abstract: Computer Science and Engineering Shahjalal University of Science and Technology Sylhet, Bangladesh raisafairoozshafa@gmail.com Tariful Islam Fahim Computer Science and Engineering Shahjalal University of Science and Technology Sylhet, Bangladesh tarifulislamfahim12@gmail.com $^{Abstract}$-Understanding digital documents is like solving a puzzle, especially historical ones. Document Layout Analysis (DLA) helps with this puzzle by dividing documents into sections like paragraphs, images, and tables. This is crucial for machines to read and understand these documents. In the DL Sprint 2.0 competition, we worked on understanding Bangla documents. We used a dataset called BaDLAD with lots of examples. We trained a special model called Mask R-CNN to help with this understanding. We made this model better by step-by-step hyperparameter tuning, and we achieved a good dice score of 0.889. However, not everything went perfectly. We tried using a model trained for English documents, but it didn't fit well with Bangla. This showed us that each language has its own challenges. Our solution for the DL Sprint 2.0 is publicly available at https://www.kaggle.com/competitions/dlsprint2/discussion/432201 along with notebooks, weights, and inference notebook. $^{Index Terms}$-Instant Segmentation, Mask-RCNN, DLA\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 260975602765610135 DOCUMENT # en 0.97\n",
+ "1 metadata 17590598390767860458 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 9809662086485383601 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 14265448797484868543 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 16405199266218820589 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 metadata 18136302189533099052 DOCUMENT #/texts/8 abstract 1.00\n",
+ "6 language 11994155766495304257 TEXT #/texts/0 en 0.34\n",
+ "7 semantic 11994155766495304257 TEXT #/texts/0 reference 0.86\n",
+ "8 language 1717063881973427376 TEXT #/texts/1 en 0.52\n",
+ "9 semantic 1717063881973427376 TEXT #/texts/1 header 0.95\n",
+ "10 language 17590598390767860458 TEXT #/texts/2 en 0.72\n",
+ "11 semantic 17590598390767860458 TEXT #/texts/2 reference 0.78\n",
+ "2205.08094.pdf\n",
+ "title: MATrIX-Modality-Aware Transformer for Information eXtraction\n",
+ "abstract: Abstract We present MATrIX-a Modality-Aware Transformer for Information eXtraction in the Visual Document Understanding (VDU) domain. VDU covers information extraction from visually rich documents such as forms, invoices, receipts, tables, graphs, presentations, or advertisements. In these, text semantics and visual information supplement each other to provide a global understanding of the document. MATrIX is pre-trained in an unsupervised way with specifically designed tasks that require the use of multimodal information (spatial, visual, or textual). We consider the spatial and text modalities all at once in a single token set. To make the attention more flexible, we use a learned modality-aware relative bias in the attention mechanism to modulate the attention between the tokens of different modalities. We evaluate MATrIX on 3 different datasets each with strong baselines.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 18149103541339865680 DOCUMENT # en \n",
+ "1 metadata 12704912113251550677 DOCUMENT #/texts/1 title \n",
+ "2 metadata 11932713160379830690 DOCUMENT #/texts/10 abstract \n",
+ "3 metadata 2977944849444182431 DOCUMENT #/texts/11 abstract \n",
+ "4 language 15328238962955998856 TEXT #/texts/0 en \n",
+ "5 semantic 15328238962955998856 TEXT #/texts/0 reference \n",
+ "6 language 12704912113251550677 TEXT #/texts/1 en \n",
+ "7 semantic 12704912113251550677 TEXT #/texts/1 header \n",
+ "8 language 3511161955042061787 TEXT #/texts/2 de \n",
+ "9 semantic 3511161955042061787 TEXT #/texts/2 meta-data \n",
+ "10 language 351589254607964310 TEXT #/texts/3 de \n",
+ "11 semantic 351589254607964310 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.98 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.60 \n",
+ "5 0.66 \n",
+ "6 0.45 \n",
+ "7 0.83 \n",
+ "8 0.50 \n",
+ "9 0.99 \n",
+ "10 0.56 \n",
+ "11 0.93 \n",
+ "2010.01762.pdf\n",
+ "title: OLALA : Object-Level Active Learning based Layout Annotation\n",
+ "abstract: Abstract In layout object detection problems, the ground-truth datasets are constructed by annotating object instances individually. Yet active learning for object detection is typically conducted at the image level, not at the object level. Because objects appear with different frequencies across images, image-level active learning may be subject to over-exposure to common objects. This reduces the efficiency of human labeling. This work introduces an Object-Level Active Learning Layout Annotation framework, OLALA, which includes an object scoring method and a prediction correction algorithm. The object scoring method estimates the object prediction informativeness considering both the object category and the location. It selects only the most ambiguous object prediction regions within an image for annotators to label, optimizing the use of the annotation budget. For the unselected model predictions, we propose a correction algorithm to rectify two types of potential errors with minor supervision from ground-truths. The human annotated and model predicted objects are then merged as new image annotations for training the object detection models. In simulated labeling experiments, we show that OLALA helps to create the dataset more efficiently and report strong accuracy improvements of the trained models compared to image-level active learning baselines. The code is available at https://github.com/ lolipopshock/Detectron2 AL.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10724987884045795509 DOCUMENT # en 1.00\n",
+ "1 metadata 2022203075097451439 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 621634545155468654 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 6076357810235503844 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 18121462109731020905 TEXT #/texts/0 en 0.43\n",
+ "5 semantic 18121462109731020905 TEXT #/texts/0 reference 0.94\n",
+ "6 language 2022203075097451439 TEXT #/texts/1 en 0.51\n",
+ "7 semantic 2022203075097451439 TEXT #/texts/1 header 0.93\n",
+ "8 language 10515800068930002976 TEXT #/texts/2 en 0.27\n",
+ "9 semantic 10515800068930002976 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 16070955043266230473 TEXT #/texts/3 en 0.65\n",
+ "11 semantic 16070955043266230473 TEXT #/texts/3 meta-data 0.99\n",
+ "2106.14616.pdf\n",
+ "title: ICDAR 2021 Competition on Scientific Literature Parsing\n",
+ "abstract: ['contain important information related to cutting-edge innovations in diverse domains. Advances in natural language processing have been driving the fast development in automated information extraction from scientific literature. However, scientific literature is often available in unstructured PDF format. While PDF is great for preserving basic visual elements, such as characters, lines, shapes, etc., on a canvas for presentation to humans, automatic processing of the PDF format by machines presents many challenges. With over 2.5 trillion PDF documents in existence, these issues are prevalent in many other important application domains as well. Our ICDAR 2021 Scientific Literature Parsing Competition (ICDAR2021-SLP) aims to drive the advances specifically in document understanding. ICDAR2021-SLP leverages the PubLayNet and PubTabNet datasets, which provide hundreds of thousands of training and evaluation examples. In Task A, Document Layout Recognition, submissions with the highest performance combine object detection and specialised solutions for the different categories. In Task B, Table Recognition, top submissions rely on methods to identify table components and post-processing methods to generate the table structure and content. Results from both tasks show an impressive performance and opens the possibility for high performance practical applications.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17927538766348339794 DOCUMENT # en 1.00\n",
+ "1 metadata 90058776505785949 DOCUMENT #/texts/1 title 1.00\n",
+ "2 language 13303669633711505885 TEXT #/texts/0 en 0.51\n",
+ "3 semantic 13303669633711505885 TEXT #/texts/0 reference 0.67\n",
+ "4 language 90058776505785949 TEXT #/texts/1 en 0.72\n",
+ "5 semantic 90058776505785949 TEXT #/texts/1 header 0.69\n",
+ "6 language 13081534621825993538 TEXT #/texts/2 en 0.42\n",
+ "7 semantic 13081534621825993538 TEXT #/texts/2 meta-data 0.98\n",
+ "8 language 1534861440285625209 TEXT #/texts/3 en 0.20\n",
+ "9 semantic 1534861440285625209 TEXT #/texts/3 meta-data 1.00\n",
+ "10 language 17359734174191143350 TEXT #/texts/4 en 0.87\n",
+ "11 semantic 17359734174191143350 TEXT #/texts/4 meta-data 1.00\n",
+ "2308.01979.pdf\n",
+ "title: Saleem Ahmed() [0000-0001-8648-9625],\n",
+ "abstract: Abstract. We present a comprehensive study of chart visual questionanswering(QA) task, to address the challenges faced in comprehending and extracting data from chart visualizations within documents. Despite efforts to tackle this problem using synthetic charts, solutions are limited by the shortage of annotated real-world data. To fill this gap, we introduce a benchmark and dataset for chart visual QA on real-world charts, offering a systematic analysis of the task and a novel taxonomy for template-based chart question creation. Our contribution includes the introduction of a new answer type, 'list', with both ranked and unranked variations. Our study is conducted on a real-world chart dataset from scientific literature, showcasing higher visual complexity compared to other works. Our focus is on template-based QA and how it can serve as a standard for evaluating the first-order logic capabilities of models. The results of our experiments, conducted on a real-world out-of-distribution dataset, provide a robust evaluation of large-scale pre-trained models and advance the field of chart visual QA and formal logic verification for neural networks in general. Our code and dataset is publicly available $^{1}$. Keywords: Charts and Document Understanding and Reasoning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 3808109563705575166 DOCUMENT # en 0.99\n",
+ "1 metadata 4019322271052231808 DOCUMENT #/texts/2 title 1.00\n",
+ "2 metadata 11773917095604246068 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 16220849979362681651 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 17985728316496339607 TEXT #/texts/0 en 0.29\n",
+ "5 semantic 17985728316496339607 TEXT #/texts/0 reference 0.86\n",
+ "6 language 9923502157441382971 TEXT #/texts/1 en 0.79\n",
+ "7 semantic 9923502157441382971 TEXT #/texts/1 header 0.70\n",
+ "8 language 4019322271052231808 TEXT #/texts/2 en 0.26\n",
+ "9 semantic 4019322271052231808 TEXT #/texts/2 reference 0.64\n",
+ "10 language 5576611028079447637 TEXT #/texts/3 en 0.46\n",
+ "11 semantic 5576611028079447637 TEXT #/texts/3 meta-data 0.93\n",
+ "2308.15517.pdf\n",
+ "title: Document AI: A Comparative Study of Transformer-Based, Graph-Based Models, and Convolutional Neural Networks For Document Layout Analysis\n",
+ "abstract: ABSTRACT Document AI aims to automatically analyze documents by leveraging natural language processing and computer vision techniques. One of the major tasks of Document AI is document layout analysis, which structures document pages by interpreting the content and spatial relationships of layout, image, and text. This task can be image-centric, wherein the aim is to identify and label various regions such as authors and paragraphs, or text-centric, where the focus is on classifying individual words in a document. Although there are increasingly sophisticated methods for improving layout analysis, doubts remain about the extent to which their findings can be generalized to a broader context. Specifically, prior work developed systems based on very different architectures, such as transformer-based, graph-based, and CNNs. However, no work has mentioned the effectiveness of these models in a comparative analysis. Moreover, while language-independent Document AI models capable of knowledge transfer have been developed, it remains to be investigated to what degree they can effectively transfer knowledge. In this study, we aim to fill these gaps by conducting a comparative evaluation of state-of-the-art models in document layout analysis and investigating the potential of cross-lingual layout analysis by utilizing machine translation techniques. KEYWORDS Document AI, Document Layout Analysis, Vision and Language, Multilingual Document Understanding, Machine Translation\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13637588366794903612 DOCUMENT # en 0.99\n",
+ "1 metadata 18194790379355975476 DOCUMENT #/texts/0 title 1.00\n",
+ "2 metadata 6741446958383146662 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 15052902537839731467 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 6741617251885352993 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 metadata 12872026505660432034 DOCUMENT #/texts/7 abstract 1.00\n",
+ "6 language 18194790379355975476 TEXT #/texts/0 en 0.60\n",
+ "7 semantic 18194790379355975476 TEXT #/texts/0 header 0.70\n",
+ "8 language 11017632467139747605 TEXT #/texts/1 en 0.29\n",
+ "9 semantic 11017632467139747605 TEXT #/texts/1 meta-data 1.00\n",
+ "10 language 1435601097181785708 TEXT #/texts/2 nl 0.10\n",
+ "11 semantic 1435601097181785708 TEXT #/texts/2 meta-data 1.00\n",
+ "2012.14740.pdf\n",
+ "title: LAYOUTLMV2: MULTI-MODAL PRE-TRAINING FOR VISUALLY-RICH DOCUMENT UNDERSTANDING\n",
+ "abstract: ABSTRACT Pre-training of text and layout has proved effective in a variety of visuallyrich document understanding tasks due to its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked visual-language modeling task but also the new text-image alignment and textimage matching tasks in the pre-training stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware selfattention mechanism into the Transformer architecture, so that the model can fully understand the relative positional relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and achieves new state-of-the-art results on a wide variety of downstream visuallyrich document understanding tasks, including FUNSD (0.7895 → 0.8420), CORD (0.9493 → 0.9601), SROIE (0.9524 → 0.9781), Kleister-NDA (0.834 → 0.852), RVL-CDIP (0.9443 → 0.9564), and DocVQA (0.7295 → 0.8672).\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 14425372393029709547 DOCUMENT # en \n",
+ "1 metadata 11474741870108700040 DOCUMENT #/texts/1 title \n",
+ "2 metadata 4857877928660915249 DOCUMENT #/texts/12 abstract \n",
+ "3 metadata 11909429981990900791 DOCUMENT #/texts/13 abstract \n",
+ "4 language 9433559876163775546 TEXT #/texts/0 en \n",
+ "5 semantic 9433559876163775546 TEXT #/texts/0 reference \n",
+ "6 language 11474741870108700040 TEXT #/texts/1 en \n",
+ "7 semantic 11474741870108700040 TEXT #/texts/1 header \n",
+ "8 language 17478075665133754721 TEXT #/texts/2 en \n",
+ "9 semantic 17478075665133754721 TEXT #/texts/2 meta-data \n",
+ "10 language 11558241638393646319 TEXT #/texts/3 en \n",
+ "11 semantic 11558241638393646319 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.98 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.37 \n",
+ "5 0.94 \n",
+ "6 0.37 \n",
+ "7 0.85 \n",
+ "8 0.40 \n",
+ "9 1.00 \n",
+ "10 0.13 \n",
+ "11 1.00 \n",
+ "2304.12506.pdf\n",
+ "title: DualSlide: Global-to-Local Sketching Interface for Slide Content and Layout Design\n",
+ "abstract: Abstract-Online learning and academic conferences have become pervasive and essential for education and professional development, especially since the onset of pandemics. Academic presentations usually require well-designed slides that are easily understood. Sketches that visually represent design intentions and are readily accessible to the average users. To assist non-expert users in creating visually appealing academic slides, we propose DualSlide, a global and local two-stage sketching interface system that provides image retrieval and user guidance. At the global stage, DualSlide provides a heat map canvas to display the distribution of all slide layouts in a dataset, allowing users to explore the reference slides efficiently. At the local stage of the system, detailed references and guidance for designing slide content, such as diagrams and fonts, can be provided. We further propose a sketch-matching algorithm to compare the user's input sketch and similar diagrams. All user guidance can be adapted in real-time editing, and users can design slides with a high degree of freedom. We conducted a user study to verify the effectiveness and usability of the proposed DualSlide system confirming that DualSlide provides high retrieval accuracy and satisfactory design results with a good user experience. Index Terms-two-stage design, sketching interface, slides, layout design\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 5705758540542284734 DOCUMENT # en 1.00\n",
+ "1 metadata 5519579206395144526 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16967503306961863663 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 6035262646238865268 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 1595675669711621451 TEXT #/texts/0 en 0.45\n",
+ "5 semantic 1595675669711621451 TEXT #/texts/0 text 0.96\n",
+ "6 language 5519579206395144526 TEXT #/texts/1 en 0.63\n",
+ "7 semantic 5519579206395144526 TEXT #/texts/1 header 0.92\n",
+ "8 language 7737451635683353484 TEXT #/texts/2 en 0.45\n",
+ "9 semantic 7737451635683353484 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 4795755608797352799 TEXT #/texts/3 en 0.20\n",
+ "11 semantic 4795755608797352799 TEXT #/texts/3 meta-data 0.94\n",
+ "2008.10831.pdf\n",
+ "title: CDeC-Net: Composite Deformable Cascade Network for Table Detection in Document Images\n",
+ "abstract: Abstract-Localizing page elements/objects such as tables, figures, equations, etc. is the primary step in extracting information from document images. We propose a novel endto-end trainable deep network, (CDeC-Net) for detecting tables present in the documents. The proposed network consists of a multistage extension of Mask R-CNN with a dual backbone having deformable convolution for detecting tables varying in scale with high detection accuracy at higher IoU threshold. We empirically evaluate CDeC-Net on all the publicly available benchmark datasets-ICDAR-2013, ICDAR-2017, ICDAR-2019, UNLV, Marmot, PubLayNet, and TableBank-with extensive experiments. Our solution has three important properties: (i) a single trained model CDeC-Net ‡ performs well across all the popular benchmark datasets; (ii) we report excellent performances across multiple, including higher, thresholds of IoU; (iii) by following the same protocol of the recent papers for each of the benchmarks, we consistently demonstrate the superior quantitative performance. Our code and models will be publicly released for enabling the reproducibility of the results. Keywords-Page object, table detection, Cascade Mask R-CNN, deformable convolution, single model.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7500632956678550284 DOCUMENT # en 1.00\n",
+ "1 metadata 3886870210686438049 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7844835822484761539 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 16959079097357691233 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 10826267580776457888 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 language 8245647462462407059 TEXT #/texts/0 en 0.42\n",
+ "6 semantic 8245647462462407059 TEXT #/texts/0 reference 0.99\n",
+ "7 language 3886870210686438049 TEXT #/texts/1 en 0.45\n",
+ "8 semantic 3886870210686438049 TEXT #/texts/1 header 0.72\n",
+ "9 language 15487035232066340389 TEXT #/texts/2 en 0.56\n",
+ "10 semantic 15487035232066340389 TEXT #/texts/2 meta-data 0.92\n",
+ "11 language 17396645985426887891 TEXT #/texts/3 en 0.29\n",
+ "2301.11529.pdf\n",
+ "title: PLay: Parametrically Conditioned Layout Generation using Latent Diffusion\n",
+ "abstract: Abstract Layout design is an important task in various design fields, including user interfaces, document, and graphic design. As this task requires tedious manual effort by designers, prior works have attempted to automate this process using generative models, but commonly fell short of providing intuitive user controls and achieving design objectives. In this paper, we build a conditional latent diffusion model, PLay, that generates parametrically conditioned layouts in vector graphic space from user-specified guidelines, which are commonly used by designers for representing their design intents in current practices. Our method outperforms prior works across three datasets on metrics including FID and FD-VG, and in user test. Moreover, it brings a novel and interactive experience to professional layout design processes.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2466707221960475363 DOCUMENT # en 1.00\n",
+ "1 metadata 2108956444558052725 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6816454215830934296 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 14315677914332171523 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 7964423042393186425 TEXT #/texts/0 en 0.35\n",
+ "5 semantic 7964423042393186425 TEXT #/texts/0 text 0.97\n",
+ "6 language 2108956444558052725 TEXT #/texts/1 en 0.32\n",
+ "7 semantic 2108956444558052725 TEXT #/texts/1 header 0.76\n",
+ "8 language 1180747206188371815 TEXT #/texts/2 en 0.37\n",
+ "9 semantic 1180747206188371815 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 6816454215830934296 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 6816454215830934296 TEXT #/texts/3 header 0.93\n",
+ "2308.13769.pdf\n",
+ "title: Bengali Document Layout Analysis with Detectron2\n",
+ "abstract: Abstract-Document digitization is vital for preserving historical records, efficient document management, and advancing OCR (Optical Character Recognition) research. Document Layout Analysis (DLA) involves segmenting documents into meaningful units like text boxes, paragraphs, images, and tables. Challenges arise when dealing with diverse layouts, historical documents, and unique scripts like Bengali, hindered by the lack of comprehensive Bengali DLA datasets. We improved the accuracy of the DLA model for Bengali documents by utilizing advanced Mask R-CNN models available in the Detectron2 library. Our evaluation involved three variants: Mask R-CNN R-50, R-101, and X-101, both with and without pretrained weights from PubLayNet, on the BaDLAD dataset, which contains human-annotated Bengali documents in four categories: text boxes, paragraphs, images, and tables. Results show the effectiveness of these models in accurately segmenting Bengali documents. We discuss speed-accuracy tradeoffs and underscore the significance of pretrained weights. Our findings expand the applicability of Mask R-CNN in document layout analysis, efficient document management, and OCR research while suggesting future avenues for fine-tuning and data augmentation. Index Terms-Mask R-CNN, Instance Segmentation, Transfer Learning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17548983273677985646 DOCUMENT # en 1.00\n",
+ "1 metadata 559063730110845019 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11324618554899354160 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 10644090233689937859 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 metadata 11368196014538905625 DOCUMENT #/texts/8 abstract 1.00\n",
+ "5 metadata 4911548022119256481 DOCUMENT #/texts/9 abstract 1.00\n",
+ "6 language 3042703310976267912 TEXT #/texts/0 en 0.31\n",
+ "7 semantic 3042703310976267912 TEXT #/texts/0 reference 0.86\n",
+ "8 language 559063730110845019 TEXT #/texts/1 en 0.48\n",
+ "9 semantic 559063730110845019 TEXT #/texts/1 header 0.82\n",
+ "10 language 14447214234305273736 TEXT #/texts/2 en 0.23\n",
+ "11 semantic 14447214234305273736 TEXT #/texts/2 meta-data 0.99\n",
+ "2112.12703.pdf\n",
+ "title: Digital Editions as Distant Supervision for Layout Analysis of Printed Books ⋆\n",
+ "abstract: Abstract. Archivists, textual scholars, and historians often produce digital editions of historical documents. Using markup schemes such as those of the Text Encoding Initiative and EpiDoc, these digital editions often record documents' semantic regions (such as notes and figures) and physical features (such as page and line breaks) as well as transcribing their textual content. We describe methods for exploiting this semantic markup as distant supervision for training and evaluating layout analysis models. In experiments with several model architectures on the half-million pages of the Deutsches Textarchiv (DTA), we find a high correlation of these region-level evaluation methods with pixel-level and word-level metrics. We discuss the possibilities for improving accuracy with self-training and the ability of models trained on the DTA to generalize to other historical printed books. Keywords: Layout analysis · Distant supervision · Evaluation.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15802633760579864106 DOCUMENT # en 0.99\n",
+ "1 metadata 6470581748295618029 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13796643975352810616 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 3726832405081047624 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 11473093061238217778 TEXT #/texts/0 en 0.59\n",
+ "5 semantic 11473093061238217778 TEXT #/texts/0 reference 0.89\n",
+ "6 language 6470581748295618029 TEXT #/texts/1 en 0.91\n",
+ "7 semantic 6470581748295618029 TEXT #/texts/1 header 0.88\n",
+ "8 language 16387431572513897996 TEXT #/texts/2 en 0.31\n",
+ "9 semantic 16387431572513897996 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 7570883125013524308 TEXT #/texts/3 en 0.75\n",
+ "11 semantic 7570883125013524308 TEXT #/texts/3 meta-data 0.96\n",
+ "2305.08455.pdf\n",
+ "title: Document Understanding Dataset and Evaluation (DUDE)\n",
+ "abstract: Abstract We call on the Document AI (DocAI) community to reevaluate current methodologies and embrace the challenge of creating more practically-oriented benchmarks. Document Understanding Dataset and Evaluation (DUDE) seeks to remediate the halted research progress in understanding visually-rich documents (VRDs). We present a new dataset with novelties related to types of questions, answers, and document layouts based on multi-industry, multi-domain, and multi-page VRDs of various origins, and dates. Moreover, we are pushing the boundaries of current methods by creating multi-task and multi-domain evaluation setups that more accurately simulate real-world situations where powerful generalization and adaptation under low-resource settings are desired. DUDE aims to set a new standard as a more practical, long-standing benchmark for the community, and we hope that it will lead to future extensions and contributions that address real-world challenges. Finally, our work illustrates the importance of finding more efficient ways to model language, images, and layout in DocAI.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 101647772389983397 DOCUMENT # en 0.99\n",
+ "1 metadata 10331356580876402369 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14331103659799847498 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 4393248028108810465 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 7099592825139238992 TEXT #/texts/0 en 0.61\n",
+ "5 semantic 7099592825139238992 TEXT #/texts/0 reference 0.66\n",
+ "6 language 10331356580876402369 TEXT #/texts/1 en 0.48\n",
+ "7 semantic 10331356580876402369 TEXT #/texts/1 header 0.88\n",
+ "8 language 9476829149127177963 TEXT #/texts/2 pl 0.18\n",
+ "9 semantic 9476829149127177963 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 14331103659799847498 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 14331103659799847498 TEXT #/texts/3 header 0.93\n",
+ "2212.12975.pdf\n",
+ "title: Interactive Layout Drawing Interface with Shadow Guidance\n",
+ "abstract: ABSTRACT It is difficult to design a visually appealing layout for common users, which takes time even for professional designers. In this paper, we present an interactive layout design system with shadow guidance and layout retrieval to help users obtain satisfactory design results. This study focuses in particular on the design of academic presentation slides. The user may refer to the shadow guidance as a heat map, which is the layout distribution of our gathered data set, using the suggested shadow guidance. The suggested system is datadriven, allowing users to analyze the design data naturally. The layout may then be edited by the user to finalize the layout design. We validated the suggested interface in our user study by comparing it with common design interfaces. The findings show that the suggested interface may achieve high retrieval accuracy while simultaneously offering a pleasant user experience. Keywords: Educational video, slide-based video, user interface, layout design\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17236209161277235289 DOCUMENT # en 1.00\n",
+ "1 metadata 9715825528859916993 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2496493818778864004 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 8708013986464472762 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 16728472205694020197 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 language 7593916797503527452 TEXT #/texts/0 en 0.67\n",
+ "6 semantic 7593916797503527452 TEXT #/texts/0 text 0.57\n",
+ "7 language 9715825528859916993 TEXT #/texts/1 en 0.64\n",
+ "8 semantic 9715825528859916993 TEXT #/texts/1 header 0.87\n",
+ "9 language 3488045698984828010 TEXT #/texts/2 en 0.84\n",
+ "10 semantic 3488045698984828010 TEXT #/texts/2 meta-data 1.00\n",
+ "11 language 8233996923867261655 TEXT #/texts/3 en 0.65\n",
+ "2306.05749.pdf\n",
+ "title: DocAligner: Annotating Real-world Photographic Document Images by Simply Taking Pictures\n",
+ "abstract: Abstract Recently, there has been a growing interest in research concerning document image analysis and recognition in photographic scenarios. However, the lack of labeled datasets for this emerging challenge poses a significant obstacle, as manual annotation can be time-consuming and impractical. To tackle this issue, we present DocAligner, a novel method that streamlines the manual annotation process to a simple step of taking pictures. DocAligner achieves this by establishing dense correspondence between photographic document images and their clean counterparts. It enables the automatic transfer of existing annotations in clean document images to photographic ones and helps to automatically acquire labels that are unavailable through manual labeling. Considering the distinctive characteristics of document images, DocAligner incorporates several innovative features. First, we propose a non-rigid pre-alignment technique based on the document's edges, which effectively eliminates interference caused by significant global shifts and repetitive patterns present in document images. Second, to handle large shifts and ensure high accuracy, we introduce a hierarchical aligning approach that combines global and local correlation layers. Furthermore, considering the importance of fine-grained elements in document images, we present a details recurrent refinement module to enhance the output in a high-resolution space. To train DocAligner, we construct a synthetic dataset and introduce a self-supervised learning approach to enhance its robustness for real-world data. Through extensive experiments, we demonstrate the effectiveness of DocAligner and the acquired dataset. Datasets and codes will be publicly available.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16350862388529045349 DOCUMENT # en 1.00\n",
+ "1 metadata 1693234739285510286 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 8284927886365265466 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 12966088567558872775 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 4212173990135466214 TEXT #/texts/0 en 0.32\n",
+ "5 semantic 4212173990135466214 TEXT #/texts/0 reference 0.95\n",
+ "6 language 1693234739285510286 TEXT #/texts/1 en 0.70\n",
+ "7 semantic 1693234739285510286 TEXT #/texts/1 header 0.46\n",
+ "8 language 4770053369267445470 TEXT #/texts/2 en 0.29\n",
+ "9 semantic 4770053369267445470 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 1088813220980863929 TEXT #/texts/3 en 0.48\n",
+ "11 semantic 1088813220980863929 TEXT #/texts/3 meta-data 0.99\n",
+ "2303.11589.pdf\n",
+ "title: LayoutDiffusion: Improving Graphic Layout Generation by Discrete Diffusion Probabilistic Models\n",
+ "abstract: Abstract Creating graphic layouts is a fundamental step in graphic designs. In this work, we present a novel generative model named LayoutDiffusion for automatic layout generation. As layout is typically represented as a sequence of discrete tokens, LayoutDiffusion models layout generation as a discrete denoising diffusion process. It learns to reverse a mild forward process, in which layouts become increasingly chaotic with the growth of forward steps and layouts in the neighboring steps do not differ too much. Designing such a mild forward process is however very challenging as layout has both categorical attributes and ordinal attributes. To tackle the challenge, we summarize three critical factors for achieving a mild forward process for the layout, i.e., legality, coordinate proximity and type disruption. Based on the factors, we propose a block-wise transition matrix coupled with a piece-wise linear noise schedule. Experiments on RICO and PubLayNet datasets show that Layout-Diffusion outperforms state-of-the-art approaches significantly. Moreover, it enables two conditional layout generation tasks in a plug-and-play manner without re-training and achieves better performance than existing methods.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9641950465761553286 DOCUMENT # en 0.99\n",
+ "1 metadata 3540765956142751839 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15449882581325865178 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 11577166392640436701 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 6096154047150880050 TEXT #/texts/0 en 0.30\n",
+ "5 semantic 6096154047150880050 TEXT #/texts/0 text 0.89\n",
+ "6 language 3540765956142751839 TEXT #/texts/1 en 0.53\n",
+ "7 semantic 3540765956142751839 TEXT #/texts/1 header 0.83\n",
+ "8 language 16684261228906829665 TEXT #/texts/2 en 0.49\n",
+ "9 semantic 16684261228906829665 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 7858426494994361717 TEXT #/texts/3 ro 0.11\n",
+ "11 semantic 7858426494994361717 TEXT #/texts/3 meta-data 0.98\n",
+ "2012.08191.pdf\n",
+ "title: docExtractor: An off-the-shelf historical document element extraction\n",
+ "abstract: Abstract-We present docExtractor, a generic approach for extracting visual elements such as text lines or illustrations from historical documents without requiring any real data annotation. We demonstrate it provides high-quality performances as an offthe-shelf system across a wide variety of datasets and leads to results on par with state-of-the-art when fine-tuned. We argue that the performance obtained without fine-tuning on a specific dataset is critical for applications, in particular in digital humanities, and that the line-level page segmentation we address is the most relevant for a general purpose element extraction engine. We rely on a fast generator of rich synthetic documents and design a fully convolutional network, which we show to generalize better than a detection-based approach. Furthermore, we introduce a new public dataset dubbed IlluHisDoc dedicated to the fine evaluation of illustration segmentation in historical documents. Index Terms-deep learning, document layout analysis, historical document, page segmentation, text line detection, synthetic data\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 12746519129587114900 DOCUMENT # en 0.99\n",
+ "1 metadata 6487608977047476565 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 5025046227996738281 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 336290989734687421 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 16017635334618188321 TEXT #/texts/0 en 0.50\n",
+ "5 semantic 16017635334618188321 TEXT #/texts/0 reference 0.99\n",
+ "6 language 6487608977047476565 TEXT #/texts/1 en 0.74\n",
+ "7 semantic 6487608977047476565 TEXT #/texts/1 text 0.49\n",
+ "8 language 5569259395429831475 TEXT #/texts/2 en 0.58\n",
+ "9 semantic 5569259395429831475 TEXT #/texts/2 meta-data 0.98\n",
+ "10 language 1878354837183421307 TEXT #/texts/3 fr 0.23\n",
+ "11 semantic 1878354837183421307 TEXT #/texts/3 meta-data 0.95\n",
+ "2106.11539.pdf\n",
+ "title: DocFormer: End-to-End Transformer for Document Understanding\n",
+ "abstract: Abstract We present DocFormer-a multi-modal transformer based architecture for the task of Visual Document Understanding (VDU). VDU is a challenging problem which aims to understand documents in their varied formats (forms, receipts etc.) and layouts. In addition, DocFormer is pre-trained in an unsupervised fashion using carefully designed tasks which encourage multi-modal interaction. DocFormer uses text, vision and spatial features and combines them using a novel multi-modal self-attention layer. DocFormer also shares learned spatial embeddings across modalities which makes it easy for the model to correlate text to visual tokens and vice versa. DocFormer is evaluated on 4 different datasets each with strong baselines. DocFormer achieves state-of-the-art results on all of them, sometimes beating models 4x its size (in no. of parameters).\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 9235921951217316380 DOCUMENT # en \n",
+ "1 metadata 9729555286773671600 DOCUMENT #/texts/1 title \n",
+ "2 metadata 15482861471896913137 DOCUMENT #/texts/10 abstract \n",
+ "3 metadata 2622808252535089031 DOCUMENT #/texts/11 abstract \n",
+ "4 language 5106629619373635080 TEXT #/texts/0 en \n",
+ "5 semantic 5106629619373635080 TEXT #/texts/0 reference \n",
+ "6 language 9729555286773671600 TEXT #/texts/1 en \n",
+ "7 semantic 9729555286773671600 TEXT #/texts/1 header \n",
+ "8 language 5467895438401112969 TEXT #/texts/2 en \n",
+ "9 semantic 5467895438401112969 TEXT #/texts/2 meta-data \n",
+ "10 language 9961559843998000328 TEXT #/texts/3 en \n",
+ "11 semantic 9961559843998000328 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.99 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.33 \n",
+ "5 0.95 \n",
+ "6 0.68 \n",
+ "7 0.57 \n",
+ "8 0.62 \n",
+ "9 0.97 \n",
+ "10 0.27 \n",
+ "11 1.00 \n",
+ "2308.12896.pdf\n",
+ "title: Beyond Document Page Classification: Design, Datasets, and Challenges\n",
+ "abstract: Abstract This paper highlights the need to bring document classification benchmarking closer to real-world applications, both in the nature of data tested (X : multi-channel, multipaged, multi-industry; Y : class distributions and label set variety) and in classification tasks considered (f : multipage document, page stream, and document bundle classification,...). We identify the lack of public multi-page document classification datasets, formalize different classification tasks arising in application scenarios, and motivate the value of targeting efficient multi-page document representations. An experimental study on proposed multi-page document classification datasets demonstrates that current benchmarks have become irrelevant and need to be updated to evaluate complete documents, as they naturally occur in practice. This reality check also calls for more mature evaluation methodologies, covering calibration evaluation, inference complexity (time-memory), and a range of realistic distribution shifts (e.g., born-digital vs. scanning noise, shifting page order). Our study ends on a hopeful note by recommending concrete avenues for future improvements.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4395650766487548677 DOCUMENT # en 0.99\n",
+ "1 metadata 14640984598143256974 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 12465491340188692402 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 10875145229315404845 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 15855267480109489419 TEXT #/texts/0 en 0.35\n",
+ "5 semantic 15855267480109489419 TEXT #/texts/0 reference 0.86\n",
+ "6 language 14640984598143256974 TEXT #/texts/1 en 0.77\n",
+ "7 semantic 14640984598143256974 TEXT #/texts/1 reference 0.56\n",
+ "8 language 6853249753038199094 TEXT #/texts/2 en 0.28\n",
+ "9 semantic 6853249753038199094 TEXT #/texts/2 meta-data 0.97\n",
+ "10 language 6836684947553125863 TEXT #/texts/3 ca 0.14\n",
+ "11 semantic 6836684947553125863 TEXT #/texts/3 meta-data 0.95\n",
+ "2301.10140.pdf\n",
+ "title: The Semantic Scholar Open Data Platform\n",
+ "abstract: Abstract The volume of scientific output is creating an urgent need for automated tools to help scientists keep up with developments in their field. Semantic Scholar (S2) is an open data platform and website aimed at accelerating science by helping scholars discover and understand scientific literature. We combine public and proprietary data sources using state-of-theart techniques for scholarly PDF content extraction and automatic knowledge graph construction to build the Semantic Scholar Academic Graph, the largest open scientific literature graph to-date, with 200M+ papers, 80M+ authors, 550M+ paper-authorship edges, and 2.4B+ citation edges. The graph includes advanced semantic features such as structurally parsed text, natural language summaries, and vector embeddings. In this paper, we describe the components of the S2 data processing pipeline and the associated APIs offered by the platform. We will update this living document to reflect changes as we add new data offerings and improve existing services.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17344644664439194234 DOCUMENT # en 1.00\n",
+ "1 metadata 4896217369367338170 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13887372868498991883 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 4492224273364446239 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 10473876935274060974 TEXT #/texts/0 en 0.31\n",
+ "5 semantic 10473876935274060974 TEXT #/texts/0 text 0.95\n",
+ "6 language 4896217369367338170 TEXT #/texts/1 en 0.61\n",
+ "7 semantic 4896217369367338170 TEXT #/texts/1 header 0.90\n",
+ "8 language 11464820173648324489 TEXT #/texts/2 en 0.47\n",
+ "9 semantic 11464820173648324489 TEXT #/texts/2 meta-data 0.85\n",
+ "10 language 14822167066882793830 TEXT #/texts/3 en 0.69\n",
+ "11 semantic 14822167066882793830 TEXT #/texts/3 meta-data 0.69\n",
+ "2212.02896.pdf\n",
+ "title: Multimodal Tree Decoder for Table of Contents Extraction in Document Images\n",
+ "abstract: Abstract-Table of contents (ToC) extraction aims to extract headings of different levels in documents to better understand the outline of the contents, which can be widely used for document understanding and information retrieval. Existing works often use hand-crafted features and predefined rule-based functions to detect headings and resolve the hierarchical relationship between headings. Both the benchmark and research based on deep learning are still limited. Accordingly, in this paper, we first introduce a standard dataset, HierDoc, including image samples from 650 documents of scientific papers with their content labels. Then we propose a novel end-to-end model by using the multimodal tree decoder (MTD) for ToC as a benchmark for HierDoc. The MTD model is mainly composed of three parts, namely encoder, classifier, and decoder. The encoder fuses the multimodality features of vision, text, and layout information for each entity of the document. Then the classifier recognizes and selects the heading entities. Next, to parse the hierarchical relationship between the heading entities, a tree-structured decoder is designed. To evaluate the performance, both the metric of tree-edit-distance similarity (TEDS) and F1-Measure are adopted. Finally, our MTD approach achieves an average TEDS of 87.2% and an average F1-Measure of 88.1% on the test set of HierDoc. The code and dataset will be released at: https://github.com/Pengfei-Hu/MTD.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4520737201345173879 DOCUMENT # en 0.97\n",
+ "1 metadata 2294829083606373586 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 5961811913560082829 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 language 18342393307934275562 TEXT #/texts/0 en 0.49\n",
+ "4 semantic 18342393307934275562 TEXT #/texts/0 reference 0.89\n",
+ "5 language 2294829083606373586 TEXT #/texts/1 en 0.70\n",
+ "6 semantic 2294829083606373586 TEXT #/texts/1 header 0.49\n",
+ "7 language 6688015588212255585 TEXT #/texts/2 en 0.36\n",
+ "8 semantic 6688015588212255585 TEXT #/texts/2 meta-data 0.99\n",
+ "9 language 8322937833169004557 TEXT #/texts/3 en 0.64\n",
+ "10 semantic 8322937833169004557 TEXT #/texts/3 meta-data 0.95\n",
+ "11 language 16705250730596284103 TEXT #/texts/4 en 0.80\n",
+ "2302.05658.pdf\n",
+ "title: DocILE Benchmark for Document Information Localization and Extraction\n",
+ "abstract: Abstract. This paper introduces the DocILE benchmark with the largest dataset of business documents for the tasks of Key Information Localization and Extraction and Line Item Recognition. It contains 6. 7k annotated business documents, 100k synthetically generated documents, and nearly 1M unlabeled documents for unsupervised pre-training. The dataset has been built with knowledge of domain-and task-specific aspects, resulting in the following key features: (i) annotations in 55 classes, which surpasses the granularity of previously published key information extraction datasets by a large margin; (ii) Line Item Recognition represents a highly practical information extraction task, where key information has to be assigned to items in a table; (iii) documents come from numerous layouts and the test set includes zero-and few-shot cases as well as layouts commonly seen in the training set. The benchmark comes with several baselines, including RoBERTa, LayoutLMv3 and DETRbased Table Transformer; applied to both tasks of the DocILE benchmark, with results shared in this paper, offering a quick starting point for future work. The dataset, baselines and supplementary material are available at https://github.com/rossumai/docile. Keywords: Document AI · Information Extraction · Line Item Recognition · Business Documents · Intelligent Document Processing\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 7009734128893118738 DOCUMENT # en \n",
+ "1 metadata 13493428666814362393 DOCUMENT #/texts/1 title \n",
+ "2 metadata 1681750115897753256 DOCUMENT #/texts/9 abstract \n",
+ "3 metadata 5646429187594938390 DOCUMENT #/texts/10 abstract \n",
+ "4 language 5503286060854998078 TEXT #/texts/0 en \n",
+ "5 semantic 5503286060854998078 TEXT #/texts/0 text \n",
+ "6 language 13493428666814362393 TEXT #/texts/1 en \n",
+ "7 semantic 13493428666814362393 TEXT #/texts/1 header \n",
+ "8 language 172976763706734726 TEXT #/texts/2 en \n",
+ "9 semantic 172976763706734726 TEXT #/texts/2 meta-data \n",
+ "10 language 12947117675211018377 TEXT #/texts/3 en \n",
+ "11 semantic 12947117675211018377 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.94 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.58 \n",
+ "5 0.83 \n",
+ "6 0.57 \n",
+ "7 0.89 \n",
+ "8 0.15 \n",
+ "9 0.92 \n",
+ "10 0.23 \n",
+ "11 0.73 \n",
+ "2304.13240.pdf\n",
+ "title: Structure Diagram Recognition in Financial Announcements\n",
+ "abstract: Abstract. Accurately extracting structured data from structure diagrams in financial announcements is of great practical importance for building financial knowledge graphs and further improving the efficiency of various financial applications. First, we proposed a new method for recognizing structure diagrams in financial announcements, which can better detect and extract different types of connecting lines, including straight lines, curves, and polylines of different orientations and angles. Second, we developed a semi-automated, two-stage method to efficiently generate the industry's first benchmark of structure diagrams from Chinese financial announcements, where a large number of diagrams were synthesized and annotated using an automated tool to train a preliminary recognition model with fairly good performance, and then a highquality benchmark can be obtained by automatically annotating the realworld structure diagrams using the preliminary model and then making few manual corrections. Finally, we experimentally verified the significant performance advantage of our structure diagram recognition method over previous methods. Keywords: Structure Diagram Recognition · Document AI · Financial Announcements\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15950461597083458226 DOCUMENT # en 1.00\n",
+ "1 metadata 403958489487654933 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 14621555780754220160 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 14253956465128531592 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 10864193995400050123 TEXT #/texts/0 en 0.53\n",
+ "5 semantic 10864193995400050123 TEXT #/texts/0 reference 0.66\n",
+ "6 language 403958489487654933 TEXT #/texts/1 en 0.69\n",
+ "7 semantic 403958489487654933 TEXT #/texts/1 header 0.85\n",
+ "8 language 3492372939238010805 TEXT #/texts/2 en 0.17\n",
+ "9 semantic 3492372939238010805 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 16586331086933136302 TEXT #/texts/3 en 0.81\n",
+ "11 semantic 16586331086933136302 TEXT #/texts/3 meta-data 0.99\n",
+ "2102.09395.pdf\n",
+ "title: Robust PDF Document Conversion Using Recurrent Neural Networks\n",
+ "abstract: Abstract The number of published PDF documents in both the academic and commercial world has increased exponentially in recent decades. There is a growing need to make their rich content discoverable to information retrieval tools. Achieving high-quality semantic searches demands that a document's structural components such as title, section headers, paragraphs, (nested) lists, tables and figures (including their captions) are properly identified. Unfortunately, the PDF format is known to not conserve such structural information because it simply represents a document as a stream of low-level printing commands, in which one or more characters are placed in a bounding box with a particular styling. In this paper, we present a novel approach to document structure recovery in PDF using recurrent neural networks to process the low-level PDF data representation directly, instead of relying on a visual re-interpretation of the rendered PDF page, as has been proposed in previous literature. We demonstrate how a sequence of PDF printing commands can be used as input into a neural network and how the network can learn to classify each printing command according to its structural function in the page. This approach has three advantages: First, it can distinguish among more fine-grained labels (typically 10-20 labels as opposed to 1-5 with visual methods), which results in a more accurate and detailed document structure resolution. Second, it can take into account the text flow across pages more naturally compared to visual methods because it can concatenate the printing commands of sequential pages. Last, our proposed method needs less memory and it is computationally less expensive than visual methods. This allows us to deploy such models in production environments at a much lower cost. Through extensive architectural search in combination with advanced feature engineering, we were able to implement a model that yields a weighted average F$_{1}$ score of 97% across 17 distinct structural labels. The best model we achieved is currently served in production environments on our Corpus Conversion Service (CCS), which was presented at KDD18. This model enhances the capabilities of CCS significantly, as it eliminates the need for human annotated label ground-truth for every unseen document layout. This proved particularly useful when applied to a huge corpus of PDF articles related to COVID-19.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10561561617789153455 DOCUMENT # en 1.00\n",
+ "1 metadata 7135050673999108316 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13302319754357243881 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 7392025859030204500 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 11051377048051759528 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 11051377048051759528 TEXT #/texts/0 text 0.97\n",
+ "6 language 7135050673999108316 TEXT #/texts/1 en 0.37\n",
+ "7 semantic 7135050673999108316 TEXT #/texts/1 header 0.94\n",
+ "8 language 15708761523827467165 TEXT #/texts/2 en 0.28\n",
+ "9 semantic 15708761523827467165 TEXT #/texts/2 meta-data 0.66\n",
+ "10 language 10144578681078525297 TEXT #/texts/3 en 0.52\n",
+ "11 semantic 10144578681078525297 TEXT #/texts/3 meta-data 1.00\n",
+ "2104.12756.pdf\n",
+ "title: InfographicVQA\n",
+ "abstract: Abstract Infographics are documents designed to effectively communicate information using a combination of textual, graphical and visual elements. In this work, we explore the automatic understanding of infographic images by using Visual Question Answering technique. To this end, we present InfographicVQA, a new dataset that comprises a diverse collection of infographics along with natural language questions and answers annotations. The collected questions require methods to jointly reason over the document layout, textual content, graphical elements, and data visualizations. We curate the dataset with emphasis on questions that require elementary reasoning and basic arithmetic skills. Finally, we evaluate two strong baselines based on state of the art multi-modal VQA models, and establish baseline performance for the new task. The dataset, code and leaderboard will be made available at docvqa.org\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7013058411310364900 DOCUMENT # en 1.00\n",
+ "1 metadata 16914697807876545820 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 1451867982113835867 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 5889522086521579528 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 3037774237887954095 TEXT #/texts/0 en 0.52\n",
+ "5 semantic 3037774237887954095 TEXT #/texts/0 text 0.69\n",
+ "6 language 16914697807876545820 TEXT #/texts/1 en 0.44\n",
+ "7 semantic 16914697807876545820 TEXT #/texts/1 header 0.66\n",
+ "8 language 10601015517049298818 TEXT #/texts/2 en 0.24\n",
+ "9 semantic 10601015517049298818 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 9677905795005395855 TEXT #/texts/3 en 0.19\n",
+ "11 semantic 9677905795005395855 TEXT #/texts/3 meta-data 0.93\n",
+ "2212.02623.pdf\n",
+ "title: Unifying Vision, Text, and Layout for Universal Document Processing\n",
+ "abstract: Abstract We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves highquality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE). 1\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15492257190033828791 DOCUMENT # en 1.00\n",
+ "1 metadata 17694378913530663894 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 5130702399638276543 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 1750046879787500780 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 6822356808946763591 TEXT #/texts/0 en 0.55\n",
+ "5 semantic 6822356808946763591 TEXT #/texts/0 reference 0.89\n",
+ "6 language 17694378913530663894 TEXT #/texts/1 en 0.61\n",
+ "7 semantic 17694378913530663894 TEXT #/texts/1 reference 0.77\n",
+ "8 language 7225642215376316190 TEXT #/texts/2 en 0.46\n",
+ "9 semantic 7225642215376316190 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 3758912565052507091 TEXT #/texts/3 en 0.35\n",
+ "11 semantic 3758912565052507091 TEXT #/texts/3 meta-data 0.96\n",
+ "2111.13809.pdf\n",
+ "title: DOCUMENT LAYOUT ANALYSIS WITH AESTHETIC-GUIDED IMAGE AUGMENTATION\n",
+ "abstract: ABSTRACT Document layout analysis (DLA) plays an important role in information extraction and document understanding. At present, document layout analysis has reached a milestone achievement, however, document layout analysis of non-Manhattan is still a challenge. In this paper, we propose an image layer modeling method to tackle this challenge. To measure the proposed image layer modeling method, we propose a manually-labeled non-Manhattan layout fine-grained segmentation dataset named FPD. As far as we know, FPD is the first manually-labeled non-Manhattan layout fine-grained segmentation dataset. To effectively extract fine-grained features of documents, we propose an edge embedding network named L-E $^{3}$Net. Experimental results prove that our proposed image layer modeling method can better deal with the fine-grained segmented document of the non-Manhattan layout. Index Terms docuemnt layout analysis, data augmentation, deep learning, non-Manhattan layout\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 7685565125117959574 DOCUMENT # en 1.00\n",
+ "1 metadata 17663094003299423450 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11832164313368010743 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 14954915565413020412 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 1448756563310952220 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 language 7446772614954564842 TEXT #/texts/0 en 0.39\n",
+ "6 semantic 7446772614954564842 TEXT #/texts/0 reference 0.91\n",
+ "7 language 17663094003299423450 TEXT #/texts/1 en 0.42\n",
+ "8 semantic 17663094003299423450 TEXT #/texts/1 header 0.95\n",
+ "9 language 2411124507804902582 TEXT #/texts/2 en 0.27\n",
+ "10 semantic 2411124507804902582 TEXT #/texts/2 meta-data 1.00\n",
+ "11 language 13847099027065667571 TEXT #/texts/3 en 0.68\n",
+ "2309.09742.pdf\n",
+ "title: David Tschirschwitz r 0000 ' 0001 ' 5344 ' 4172 $^{s}$,\n",
+ "abstract: Abstract. The reliability of supervised machine learning systems depends on the accuracy and availability of ground truth labels. However, the process of human annotation, being prone to error, introduces the potential for noisy labels, which can impede the practicality of these systems. While training with noisy labels is a significant consideration, the reliability of test data is also crucial to ascertain the dependability of the results. A common approach to addressing this issue is repeated labeling, where multiple annotators label the same example, and their labels are combined to provide a better estimate of the true label. In this paper, we propose a novel localization algorithm that adapts wellestablished ground truth estimation methods for object detection and instance segmentation tasks. The key innovation of our method lies in its ability to transform combined localization and classification tasks into classification-only problems, thus enabling the application of techniques such as Expectation-Maximization (EM) or Majority Voting (MJV). Although our main focus is the aggregation of unique ground truth for test data, our algorithm also shows superior performance during training on the TexBiG dataset, surpassing both noisy label training and label aggregation using Weighted Boxes Fusion (WBF). Our experiments indicate that the benefits of repeated labels emerge under specific dataset and annotation configurations. The key factors appear to be (1) dataset complexity, the (2) annotator consistency, and (3) the given annotation budget constraints. Keywords: Object Detection · Instance Segmentation · Robust Learning.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 992249970351541192 DOCUMENT # en \n",
+ "1 metadata 9276283848181217609 DOCUMENT #/texts/2 title \n",
+ "2 metadata 16381755428115428238 DOCUMENT #/texts/9 abstract \n",
+ "3 metadata 1019027222051697096 DOCUMENT #/texts/10 abstract \n",
+ "4 language 16338564269424971201 TEXT #/texts/0 en \n",
+ "5 semantic 16338564269424971201 TEXT #/texts/0 reference \n",
+ "6 language 10996720205158029901 TEXT #/texts/1 en \n",
+ "7 semantic 10996720205158029901 TEXT #/texts/1 header \n",
+ "8 language 9276283848181217609 TEXT #/texts/2 de \n",
+ "9 semantic 9276283848181217609 TEXT #/texts/2 reference \n",
+ "10 language 3259182652263725180 TEXT #/texts/3 de \n",
+ "11 semantic 3259182652263725180 TEXT #/texts/3 meta-data \n",
+ "\n",
+ " confidence \n",
+ "0 0.96 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 0.30 \n",
+ "5 0.95 \n",
+ "6 0.74 \n",
+ "7 0.96 \n",
+ "8 0.34 \n",
+ "9 0.58 \n",
+ "10 0.15 \n",
+ "11 0.46 \n",
+ "2208.10970.pdf\n",
+ "title: Doc-GCN: Heterogeneous Graph Convolutional Networks for Document Layout Analysis\n",
+ "abstract: Abstract Recognizing the layout of unstructured digital documents is crucial when parsing the documents into the structured, machine-readable format for downstream applications. Recent studies in Document Layout Analysis usually rely on computer vision models to understand documents while ignoring other information, such as context information or relation of document components, which are vital to capture. Our Doc-GCN presents an effective way to harmonize and integrate heterogeneous aspects for Document Layout Analysis. We first construct graphs to explicitly describe four main aspects, including syntactic, semantic, density, and appearance/visual information. Then, we apply graph convolutional networks for representing each aspect of information and use pooling to integrate them. Finally, we aggregate each aspect and feed them into 2-layer MLPs for document layout component classification. Our Doc-GCN achieves new state-of-the-art results in three widely used DLA datasets.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 2720927874437785770 DOCUMENT # en 1.00\n",
+ "1 metadata 17870169857378124289 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 9077524686609812103 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 14890249552540682664 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 8059319291858021328 TEXT #/texts/0 en 0.51\n",
+ "5 semantic 8059319291858021328 TEXT #/texts/0 reference 0.86\n",
+ "6 language 17870169857378124289 TEXT #/texts/1 en 0.57\n",
+ "7 semantic 17870169857378124289 TEXT #/texts/1 header 0.83\n",
+ "8 language 6152090793369172651 TEXT #/texts/2 en 0.34\n",
+ "9 semantic 6152090793369172651 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 17053512559835824917 TEXT #/texts/3 en 0.65\n",
+ "11 semantic 17053512559835824917 TEXT #/texts/3 meta-data 0.82\n",
+ "2302.01451.pdf\n",
+ "title: CTE: A Dataset for Contextualized Table Extraction\n",
+ "abstract: Abstract Relevant information in documents is often summarized in tables, helping the reader to identify useful facts. Most benchmark datasets support either document layout analysis or table understanding, but lack in providing data to apply both tasks in a unified way. We define the task of Contextualized Table Extraction (CTE), which aims to extract and define the structure of tables considering the textual context of the document. The dataset comprises 75k fully annotated pages of scientific papers, including more than 35k tables. Data are gathered from PubMed Central, merging the information provided by annotations in the PubTables-1M and PubLayNet datasets. The dataset can support CTE and adds new classes to the original ones. The generated annotations can be used to develop end-to-end pipelines for various tasks, including document layout analysis, table detection, structure recognition, and functional analysis. We formally define CTE and evaluation metrics, showing which subtasks can be tackled, describing advantages, limitations, and future works of this collection of data. Annotations and code will be accessible at https://github.com/AILab-UniFI/cte-dataset. Keywords Dataset, Table Extraction, Scientific Paper Analysis, Document Layout Analysis, Benchmark\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10999014387843235765 DOCUMENT # en 0.98\n",
+ "1 metadata 6774018724173639210 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2501798605750979630 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 12534062044414101616 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 2501901927569946792 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 metadata 13016416439003278616 DOCUMENT #/texts/7 abstract 1.00\n",
+ "6 language 2625281664540391426 TEXT #/texts/0 en 0.31\n",
+ "7 semantic 2625281664540391426 TEXT #/texts/0 text 0.99\n",
+ "8 language 6774018724173639210 TEXT #/texts/1 en 0.75\n",
+ "9 semantic 6774018724173639210 TEXT #/texts/1 text 0.57\n",
+ "10 language 14763756518753826937 TEXT #/texts/2 it 0.26\n",
+ "11 semantic 14763756518753826937 TEXT #/texts/2 meta-data 1.00\n",
+ "2301.10781.pdf\n",
+ "title: Generalizability in Document Layout Analysis for Scientific Article Figure & Caption Extraction\n",
+ "abstract: Abstract The lack of generalizability-in which a model trained on one dataset cannot provide accurate results for a different dataset-is a known problem in the field of document layout analysis. Thus, when a model is used to locate important page objects in scientific literature such as figures, tables, captions, and math formulas, the model often cannot be applied successfully to new domains. While several solutions have been proposed, including newer and updated deep learning models, larger handannotated datasets, and the generation of large synthetic datasets, so far there is no 'magic bullet' for translating a model trained on a particular domain or historical time period to a new field. Here we present our ongoing work in translating our document layout analysis model from the historical astrophysical literature to the larger corpus of scientific documents within the HathiTrust U.S. Federal Documents collection. We use this example as an avenue to highlight some of the problems with generalizability in the document layout analysis community and discuss several challenges and possible solutions to address these issues. All code for this work is available on The Reading Time Machine GitHub repository, https://github.com/ReadingTimeMachine/htrc short conf. Keywords: scholarly document processing, document layout analysis, astronomy.\n",
+ " type subj_hash subj_name subj_path label \\\n",
+ "0 language 15211011720834601151 DOCUMENT # en \n",
+ "1 metadata 5738259683770902497 DOCUMENT #/texts/1 title \n",
+ "2 metadata 7687256829341479909 DOCUMENT #/texts/8 abstract \n",
+ "3 metadata 18316644352025785821 DOCUMENT #/texts/9 abstract \n",
+ "4 metadata 3325627418640212133 DOCUMENT #/texts/10 abstract \n",
+ "5 language 7989988473973578225 TEXT #/texts/0 en \n",
+ "6 semantic 7989988473973578225 TEXT #/texts/0 text \n",
+ "7 language 5738259683770902497 TEXT #/texts/1 en \n",
+ "8 semantic 5738259683770902497 TEXT #/texts/1 header \n",
+ "9 language 16699890999419839987 TEXT #/texts/2 en \n",
+ "10 semantic 16699890999419839987 TEXT #/texts/2 meta-data \n",
+ "11 language 13542574747711984673 TEXT #/texts/3 en \n",
+ "\n",
+ " confidence \n",
+ "0 1.00 \n",
+ "1 1.00 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 \n",
+ "5 0.44 \n",
+ "6 0.95 \n",
+ "7 0.60 \n",
+ "8 0.82 \n",
+ "9 0.52 \n",
+ "10 0.99 \n",
+ "11 0.68 \n",
+ "2102.05533.pdf\n",
+ "title: A view of computational models for image segmentation\n",
+ "abstract: ['Image segmentation is a central topic in image processing and computer vision and a key issue in many applications, e.g., in medical imaging, microscopy, document analysis and remote sensing. According to the human perception, image segmentation is the process of dividing an image into non-overlapping regions. These regions, which may correspond, e.g., to different objects, are fundamental for the correct interpretation and classification of the scene represented by the image. The division into regions is not unique, but it depends on the application, i.e., it must be driven by the final goal of the segmentation and hence by the most significant features with respect to that goal. Thus, image segmentation can be regarded as a strongly ill-posed problem. A classical approach to deal with ill posedness consists in incorporating in the model a-priori information about the solution, e.g., in the form of penalty terms. In this work we provide a brief overview of basic computational models for image segmentation, focusing on edge-based and region-based variational models, as well as on statistical and machine-learning approaches. We also sketch numerical methods that are applied in computing solutions to these models. In our opinion, our view can help the readers identify suitable classes of methods for solving their specific problems.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8334122060317671079 DOCUMENT # en 0.92\n",
+ "1 language 10459924432452545932 TEXT #/texts/0 en 0.82\n",
+ "2 semantic 10459924432452545932 TEXT #/texts/0 header 0.62\n",
+ "3 language 13729361378862501881 TEXT #/texts/1 en 0.84\n",
+ "4 semantic 13729361378862501881 TEXT #/texts/1 header 0.93\n",
+ "5 language 1216235182387265925 TEXT #/texts/2 en 0.19\n",
+ "6 semantic 1216235182387265925 TEXT #/texts/2 meta-data 0.84\n",
+ "7 language 8013272106503804603 TEXT #/texts/3 en 0.76\n",
+ "8 semantic 8013272106503804603 TEXT #/texts/3 meta-data 0.67\n",
+ "9 language 3453923813147997148 TEXT #/texts/4 en 0.35\n",
+ "10 semantic 3453923813147997148 TEXT #/texts/4 text 0.99\n",
+ "11 language 11771141895094630316 TEXT #/texts/5 en 0.58\n",
+ "2108.11591.pdf\n",
+ "title: LayoutReader: Pre-training of Text and Layout for Reading Order Detection\n",
+ "abstract: Abstract Reading order detection is the cornerstone to understanding visually-rich documents (e.g., receipts and forms). Unfortunately, no existing work took advantage of advanced deep learning models because it is too laborious to annotate a large enough dataset. We observe that the reading order of WORD documents is embedded in their XML metadata; meanwhile, it is easy to convert WORD documents to PDFs or images. Therefore, in an automated manner, we construct ReadingBank, a benchmark dataset that contains reading order, text, and layout information for 500,000 document images covering a wide spectrum of document types. This first-ever large-scale dataset unleashes the power of deep neural networks for reading order detection. Specifically, our proposed LayoutReader captures the text and layout information for reading order prediction using the seq2seq model. It performs almost perfectly in reading order detection and significantly improves both open-source and commercial OCR engines in ordering text lines in their results in our experiments. We will release the dataset and model at https:// aka.ms/layoutreader.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 9757730494358412914 DOCUMENT # en 0.99\n",
+ "1 metadata 13558474190134142955 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13643495312176067581 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 5705766784987579239 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 4713674989249568416 TEXT #/texts/0 en 0.44\n",
+ "5 semantic 4713674989249568416 TEXT #/texts/0 text 0.61\n",
+ "6 language 13558474190134142955 TEXT #/texts/1 en 0.76\n",
+ "7 semantic 13558474190134142955 TEXT #/texts/1 header 0.90\n",
+ "8 language 17626226113250559526 TEXT #/texts/2 en 0.27\n",
+ "9 semantic 17626226113250559526 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 13633992836670036397 TEXT #/texts/3 en 0.61\n",
+ "11 semantic 13633992836670036397 TEXT #/texts/3 meta-data 0.99\n",
+ "2104.08836.pdf\n",
+ "title: LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding ∗\n",
+ "abstract: Abstract Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in 7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA cross-lingual pre-trained models on the XFUN dataset. The pre-trained LayoutXLM model and the XFUN dataset will be publicly available at https: //aka.ms/layoutxlm.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4883914572144313992 DOCUMENT # en 1.00\n",
+ "1 metadata 14541526077917844680 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 2439660219367115416 DOCUMENT #/texts/6 abstract 1.00\n",
+ "3 metadata 9547735864067414344 DOCUMENT #/texts/7 abstract 1.00\n",
+ "4 language 1421188079197686270 TEXT #/texts/0 en 0.49\n",
+ "5 semantic 1421188079197686270 TEXT #/texts/0 text 0.96\n",
+ "6 language 14541526077917844680 TEXT #/texts/1 en 0.63\n",
+ "7 semantic 14541526077917844680 TEXT #/texts/1 header 0.69\n",
+ "8 language 7935954898418490613 TEXT #/texts/2 en 0.27\n",
+ "9 semantic 7935954898418490613 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 10743865134721424209 TEXT #/texts/3 en 0.27\n",
+ "11 semantic 10743865134721424209 TEXT #/texts/3 meta-data 1.00\n",
+ "2305.15393.pdf\n",
+ "title: LayoutGPT: Compositional Visual Planning and Generation with Large Language Models\n",
+ "abstract: Abstract Attaining a high degree of user controllability in visual generation often requires intricate, fine-grained inputs like layouts. However, such inputs impose a substantial burden on users when compared to simple text inputs. To address the issue, we study how Large Language Models (LLMs) can serve as visual planners by generating layouts from text conditions, and thus collaborate with visual generative models. We propose LayoutGPT, a method to compose in-context visual demonstrations in style sheet language to enhance the visual planning skills of LLMs. LayoutGPT can generate plausible layouts in multiple domains, ranging from 2D images to 3D indoor scenes. LayoutGPT also shows superior performance in converting challenging language concepts like numerical and spatial relations to layout arrangements for faithful text-to-image generation. When combined with a downstream image generation model, LayoutGPT outperforms text-to-image models/systems by 20-40% and achieves comparable performance as human users in designing visual layouts for numerical and spatial correctness. Lastly, Layout-GPT achieves comparable performance to supervised methods in 3D indoor scene synthesis, demonstrating its effectiveness and potential in multiple visual domains. 1 Introduction Can Large Language Models (LLMs) comprehend visual concepts and generate plausible arrangments in visual spaces? Recently, LLMs have shown significant advancement in various reasoning skills [50, 49] that remain challenging to existing visual generative models. For instance, text-to-image generation (T2I) models suffer from generating objects with specified counts, positions, and attributes [10]. 3D scene synthesis models face challenges in preserving furniture within pre-defined room sizes [30]. Addressing these issues necessitates the development of compositional skills that effectively arrange components in a coherent manner, accurately reflecting object specifications and interactions.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 6911408252689886783 DOCUMENT # en 1.00\n",
+ "1 metadata 16832561012421574481 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 3435677435934837381 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 459968366665532336 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 17723871280452781935 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 metadata 12745803599622234410 DOCUMENT #/texts/7 abstract 1.00\n",
+ "6 language 10264649267537693906 TEXT #/texts/0 en 0.64\n",
+ "7 semantic 10264649267537693906 TEXT #/texts/0 reference 0.66\n",
+ "8 language 16832561012421574481 TEXT #/texts/1 en 0.62\n",
+ "9 semantic 16832561012421574481 TEXT #/texts/1 header 0.86\n",
+ "10 language 5934779742741270059 TEXT #/texts/2 en 0.53\n",
+ "11 semantic 5934779742741270059 TEXT #/texts/2 meta-data 0.99\n",
+ "2202.12985.pdf\n",
+ "title: OCR-IDL: OCR Annotations for Industry Document Library Dataset\n",
+ "abstract: Abstract. Pretraining has proven successful in Document Intelligence tasks where deluge of documents are used to pretrain the models only later to be finetuned on downstream tasks. One of the problems of the pretraining approaches is the inconsistent usage of pretraining data with different OCR engines leading to incomparable results between models. In other words, it is not obvious whether the performance gain is coming from diverse usage of amount of data and distinct OCR engines or from the proposed models. To remedy the problem, we make public the OCR annotations for IDL documents using commercial OCR engine given their superior performance over open source OCR models. The contributed dataset (OCR-IDL) has an estimated monetary value over 20K US $. It is our hope that OCR-IDL can be a starting point for future works on Document Intelligence. All of our data and its collection process with the annotations can be found in https://github.com/furkanbiten/idl_data.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10847684039601849357 DOCUMENT # en 0.99\n",
+ "1 metadata 7551407551839940680 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 6471885383062386798 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 language 7082209914402450907 TEXT #/texts/0 en 0.38\n",
+ "4 semantic 7082209914402450907 TEXT #/texts/0 text 0.90\n",
+ "5 language 7551407551839940680 TEXT #/texts/1 en 0.34\n",
+ "6 semantic 7551407551839940680 TEXT #/texts/1 header 0.88\n",
+ "7 language 5513468041440136666 TEXT #/texts/2 es 0.15\n",
+ "8 semantic 5513468041440136666 TEXT #/texts/2 meta-data 1.00\n",
+ "9 language 11627341239655523964 TEXT #/texts/3 en 0.70\n",
+ "10 semantic 11627341239655523964 TEXT #/texts/3 meta-data 0.98\n",
+ "11 language 11261052408275609312 TEXT #/texts/4 uk 0.17\n",
+ "2303.14884.pdf\n",
+ "title: A large-scale dataset for end-to-end table recognition in the wild\n",
+ "abstract: ['Table recognition (TR) is one of the research hotspots in pattern recognition, which aims to extract information from tables in an image. Common table recognition tasks include table detection (TD), table structure recognition (TSR) and table content recognition (TCR). TD is to locate tables in the image, TCR recognizes text content, and TSR recognizes spatial ogical structure. Currently, the end-to-end TR in real scenarios, accomplishing the three sub-tasks simultaneously, is yet an unexplored research area. One major factor that inhibits researchers is the lack of a benchmark dataset. To this end, we propose a new large-scale dataset named Table Recognition Set (TabRecSet) with diverse table forms sourcing from multiple scenarios in the wild, providing complete annotation dedicated to end-to-end TR research. It is the largest and first bi-lingual dataset for end-to-end TR, with 38.1K tables in which 20.4K are in English\\\\, and 17.7K are in Chinese. The samples have diverse forms, such as the border-complete and -incomplete table, regular and irregular table (rotated, distorted, etc.). The scenarios are multiple in the wild, varying from scanned to camera-taken images, documents to Excel tables, educational test papers to financial invoices. The annotations are complete, consisting of the table body spatial annotation, cell spatial logical annotation and text content for TD, TSR and TCR, respectively. The spatial annotation utilizes the polygon instead of the bounding box or quadrilateral adopted by most datasets. The polygon spatial annotation is more suitable for irregular tables that are common in wild scenarios. Additionally, we propose a visualized and interactive annotation tool named TableMe to improve the efficiency and quality of table annotation.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 8114940727583702668 DOCUMENT # en 1.00\n",
+ "1 metadata 11716041957983712877 DOCUMENT #/texts/0 title 1.00\n",
+ "2 language 11716041957983712877 TEXT #/texts/0 en 0.78\n",
+ "3 semantic 11716041957983712877 TEXT #/texts/0 text 0.68\n",
+ "4 language 6084627799462099121 TEXT #/texts/1 en 0.44\n",
+ "5 semantic 6084627799462099121 TEXT #/texts/1 meta-data 0.99\n",
+ "6 language 17393083399206941044 TEXT #/texts/2 en 0.77\n",
+ "7 semantic 17393083399206941044 TEXT #/texts/2 meta-data 0.98\n",
+ "8 language 4093452139209713676 TEXT #/texts/3 en 0.41\n",
+ "9 semantic 4093452139209713676 TEXT #/texts/3 meta-data 0.93\n",
+ "10 language 7996752084309247071 TEXT #/texts/4 en 0.26\n",
+ "11 semantic 7996752084309247071 TEXT #/texts/4 meta-data 1.00\n",
+ "2101.06573.pdf\n",
+ "title: Understanding in Artificial Intelligence\n",
+ "abstract: Abstract Current Artificial Intelligence (AI) methods, most based on deep learning, have facilitated progress in several fields, including computer vision and natural language understanding. The progress of these AI methods is measured using benchmarks designed to solve challenging tasks, such as visual question answering. A question remains of how much understanding is leveraged by these methods and how appropriate are the current benchmarks to measure understanding capabilities. To answer these questions, we have analysed existing benchmarks and their understanding capabilities, defined by a set of understanding capabilities, and current research streams. We show how progress has been made in benchmark development to measure understanding capabilities of AI methods and we review as well how current methods develop understanding capabilities. Keywords: Artificial intelligence, Deep-learning, neuro-symbolic, reasoning, understanding, computer vision, natural language processing\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 679268346913823170 DOCUMENT # en 1.00\n",
+ "1 metadata 16418080794343938376 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 11352816888786938849 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 15222897743183409902 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 metadata 15402767724637827274 DOCUMENT #/texts/6 abstract 1.00\n",
+ "5 language 16408750086547568363 TEXT #/texts/0 en 0.32\n",
+ "6 semantic 16408750086547568363 TEXT #/texts/0 text 0.95\n",
+ "7 language 16418080794343938376 TEXT #/texts/1 en 0.75\n",
+ "8 semantic 16418080794343938376 TEXT #/texts/1 header 0.56\n",
+ "9 language 13644035075325621441 TEXT #/texts/2 en 0.23\n",
+ "10 semantic 13644035075325621441 TEXT #/texts/2 meta-data 0.99\n",
+ "11 language 611290635858811423 TEXT #/texts/3 en 0.58\n",
+ "2304.06447.pdf\n",
+ "title: PDF-VQA: A New Dataset for Real-World VQA on PDF Documents\n",
+ "abstract: Abstract. Document-based Visual Question Answering examines the document understanding of document images in conditions of natural language questions. We proposed a new document-based VQA dataset, PDF-VQA, to comprehensively examine the document understanding from various aspects, including document element recognition, document layout structural understanding as well as contextual understanding and key information extraction. Our PDF-VQA dataset extends the current scale of document understanding that limits on the single document page to the new scale that asks questions over the full document of multiple pages. We also propose a new graph-based VQA model that explicitly integrates the spatial and hierarchically structural relationships between different document elements to boost the document structural understanding. The performances are compared with several baselines over different question types and tasks $^{4}$. Keywords: Document Understanding · Document Information Extraction · Visual Question Answering\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17739815372653637004 DOCUMENT # en 1.00\n",
+ "1 metadata 11359654040391113622 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 16398652216189860809 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 11512328541013711730 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 10248123059142269704 TEXT #/texts/0 en 0.34\n",
+ "5 semantic 10248123059142269704 TEXT #/texts/0 reference 0.95\n",
+ "6 language 11359654040391113622 TEXT #/texts/1 en 0.15\n",
+ "7 semantic 11359654040391113622 TEXT #/texts/1 header 0.99\n",
+ "8 language 9242125684836779999 TEXT #/texts/2 en 0.40\n",
+ "9 semantic 9242125684836779999 TEXT #/texts/2 meta-data 0.99\n",
+ "10 language 4101268396283963173 TEXT #/texts/3 en 0.74\n",
+ "11 semantic 4101268396283963173 TEXT #/texts/3 meta-data 0.36\n",
+ "2101.12741.pdf\n",
+ "title: Post-OCR Paragraph Recognition by Graph Convolutional Networks\n",
+ "abstract: ['We propose a new approach for paragraph recognition in document images by spatial graph convolutional networks (GCN) applied on OCR text boxes. Two steps, namely line splitting and line clustering, are performed to extract paragraphs from the lines in OCR results. Each step uses a beta-skeleton graph constructed from bounding boxes, where the graph edges provide efficient support for graph convolution operations. With only pure layout input features, the GCN model size is 3~4 orders of magnitude smaller compared to R-CNN based models, while achieving comparable or better accuracies on PubLayNet and other datasets. Furthermore, the GCN models show good generalization from synthetic training data to real-world images, and good adaptivity for variable document styles.']\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10969933011407272340 DOCUMENT # en 1.00\n",
+ "1 language 11717495397265653643 TEXT #/texts/0 en 0.48\n",
+ "2 semantic 11717495397265653643 TEXT #/texts/0 text 0.90\n",
+ "3 language 1895072058650522271 TEXT #/texts/1 en 0.94\n",
+ "4 semantic 1895072058650522271 TEXT #/texts/1 text 0.99\n",
+ "5 language 6548777523668082231 TEXT #/texts/2 en 0.80\n",
+ "6 semantic 6548777523668082231 TEXT #/texts/2 header 0.84\n",
+ "7 language 13639445319858036725 TEXT #/texts/3 en 0.61\n",
+ "8 semantic 13639445319858036725 TEXT #/texts/3 meta-data 0.88\n",
+ "9 language 10013140086229577108 TEXT #/texts/4 en 0.88\n",
+ "10 semantic 10013140086229577108 TEXT #/texts/4 text 0.99\n",
+ "11 language 2420190195673594550 TEXT #/texts/5 en 0.80\n",
+ "2202.08125.pdf\n",
+ "title: Processing the structure of documents: Logical Layout Analysis of historical newspapers in French\n",
+ "abstract: Abstract Background. In recent years, libraries and archives led important digitisation campaigns that opened the access to vast collections of historical documents. While such documents are often available as XML ALTO documents, they lack information about their logical structure. In this paper, we address the problem of Logical Layout Analysis applied to historical documents in French. We propose a rule-based method, that we evaluate and compare with two Machine-Learning models, namely RIPPER and Gradient Boosting. Our data set contains French newspapers, periodicals and magazines, published in the first half of the twentieth century in the Franche-Comté Region. Results. Our rule-based system outperforms the two other models in nearly all evaluations. It has especially better Recall results, indicating that our system covers more types of every logical label than the other two models. When comparing RIPPER with Gradient Boosting, we can observe that Gradient Boosting has better Precision scores but RIPPER has better Recall scores. Conclusions. The evaluation shows that our system outperforms the two Machine Learning models, and provides significantly higher Recall. It also confirms that our system can be used to produce annotated data sets that are large enough to envisage Machine Learning or Deep Learning approaches for the task of Logical Layout Analysis. Combining rules and Machine Learning models into hybrid systems could potentially provide even better performances. Furthermore, as the layout in historical documents evolves rapidly, one possible solution to overcome this problem would be to apply Rule Learning algorithms to bootstrap rule sets adapted to different publication periods.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 10694267316762447349 DOCUMENT # en 0.98\n",
+ "1 metadata 17453111564062200966 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 10719077243545767902 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 11121295389236019161 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 10458261237088064204 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 metadata 15014609299195796349 DOCUMENT #/texts/8 abstract 1.00\n",
+ "6 language 3140571036692500309 TEXT #/texts/0 en 0.45\n",
+ "7 semantic 3140571036692500309 TEXT #/texts/0 text 0.99\n",
+ "8 language 17453111564062200966 TEXT #/texts/1 en 0.87\n",
+ "9 semantic 17453111564062200966 TEXT #/texts/1 text 0.55\n",
+ "10 language 10183605818128324151 TEXT #/texts/2 de 0.53\n",
+ "11 semantic 10183605818128324151 TEXT #/texts/2 meta-data 0.98\n",
+ "2308.02051.pdf\n",
+ "title: A Graphical Approach to Document Layout Analysis\n",
+ "abstract: Abstract. Document layout analysis (DLA) is the task of detecting the distinct, semantic content within a document and correctly classifying these items into an appropriate category (e.g., text, title, figure). DLA pipelines enable users to convert documents into structured machinereadable formats that can then be used for many useful downstream tasks. Most existing state-of-the-art (SOTA) DLA models represent documents as images, discarding the rich metadata available in electronically generated PDFs. Directly leveraging this metadata, we represent each PDF page as a structured graph and frame the DLA problem as a graph segmentation and classification problem. We introduce the Graph-based Layout Analysis Model (GLAM), a lightweight graph neural network competitive with SOTA models on two challenging DLA datasets-while being an order of magnitude smaller than existing models. In particular, the 4-million parameter GLAM model outperforms the leading 140M+ parameter computer vision-based model on 5 of the 11 classes on the DocLayNet dataset. A simple ensemble of these two models achieves a new state-of-the-art on DocLayNet, increasing mAP from 76.8 to 80.8. Overall, GLAM is over 5 times more efficient than SOTA models, making GLAM a favorable engineering choice for DLA tasks.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 4516578811656693411 DOCUMENT # en 1.00\n",
+ "1 metadata 1547390354026499610 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7215812434060372258 DOCUMENT #/texts/9 abstract 1.00\n",
+ "3 language 11271486628029795725 TEXT #/texts/0 en 0.45\n",
+ "4 semantic 11271486628029795725 TEXT #/texts/0 reference 0.54\n",
+ "5 language 1547390354026499610 TEXT #/texts/1 en 0.56\n",
+ "6 semantic 1547390354026499610 TEXT #/texts/1 header 0.96\n",
+ "7 language 6590265173522894960 TEXT #/texts/2 en 0.24\n",
+ "8 semantic 6590265173522894960 TEXT #/texts/2 meta-data 1.00\n",
+ "9 language 11511962287349343203 TEXT #/texts/3 en 0.31\n",
+ "10 semantic 11511962287349343203 TEXT #/texts/3 meta-data 1.00\n",
+ "11 language 331836956372778542 TEXT #/texts/4 en 0.23\n",
+ "2006.14615.pdf\n",
+ "title: Layout Generation and Completion with Self-attention\n",
+ "abstract: Abstract. We address the problem of layout generation for diverse domains such as images, documents, and mobile applications. A layout is a set of graphical elements, belonging to one or more categories, placed together in a meaningful way. Generating a new layout or extending an existing layout requires understanding the relationships between these graphical elements. To do this, we propose a novel framework, Layout-Transformer, that leverages a self-attention based approach to learn contextual relationships between layout elements and generate layouts in a given domain. The proposed model improves upon the state-of-the-art approaches in layout generation in four ways. First, our model can generate a new layout either from an empty set or add more elements to a partial layout starting from an initial set of elements. Second, as the approach is attention-based, we can visualize which previous elements the model is attending to predict the next element, thereby providing an interpretable sequence of layout elements. Third, our model can easily scale to support both a large number of element categories and a large number of elements per layout. Finally, the model also produces an embedding for various element categories, which can be used to explore the relationships between the categories. We demonstrate with experiments that our model can produce meaningful layouts in diverse settings such as object bounding boxes in scenes (COCO bounding boxes), documents (PubLayNet), and mobile applications (RICO dataset). Keywords: Generative modeling, Self-attention, Layout generation\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 16078767997603633708 DOCUMENT # en 0.99\n",
+ "1 metadata 3448975359140491406 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15248218062120468612 DOCUMENT #/texts/4 abstract 1.00\n",
+ "3 metadata 942661215784899310 DOCUMENT #/texts/5 abstract 1.00\n",
+ "4 language 9584954673896534035 TEXT #/texts/0 en 0.34\n",
+ "5 semantic 9584954673896534035 TEXT #/texts/0 reference 1.00\n",
+ "6 language 3448975359140491406 TEXT #/texts/1 en 0.88\n",
+ "7 semantic 3448975359140491406 TEXT #/texts/1 header 0.86\n",
+ "8 language 5240041772907971174 TEXT #/texts/2 en 0.36\n",
+ "9 semantic 5240041772907971174 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 8314509802714261312 TEXT #/texts/3 en 0.63\n",
+ "11 semantic 8314509802714261312 TEXT #/texts/3 meta-data 1.00\n",
+ "2012.06547.pdf\n",
+ "title: LayoutGMN: Neural Graph Matching for Structural Layout Similarity\n",
+ "abstract: Abstract We present a deep neural network to predict structural similarity between 2D layouts by leveraging Graph Matching Networks (GMN). Our network, coined LayoutGMN, learns the layout metric via neural graph matching, using an attention-based GMN designed under a triplet network setting. To train our network, we utilize weak labels obtained by pixel-wise Intersection-over-Union (IoUs) to define the triplet loss. Importantly, LayoutGMN is built with a structural bias which can effectively compensate for the lack of structure awareness in IoUs. We demonstrate this on two prominent forms of layouts, viz., floorplans and UI designs, via retrieval experiments on large-scale datasets. In particular, retrieval results by our network better match human judgement of structural layout similarity compared to both IoUs and other baselines including a state-of-theart method based on graph neural networks and image convolution. In addition, LayoutGMN is the first deep model to offer both metric learning of structural layout similarity and structural matching between layout elements.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 11932898538783523936 DOCUMENT # en 0.99\n",
+ "1 metadata 12407577734524725751 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 15238942460965794110 DOCUMENT #/texts/3 abstract 1.00\n",
+ "3 metadata 18057253837422795236 DOCUMENT #/texts/4 abstract 1.00\n",
+ "4 language 5463348197003769018 TEXT #/texts/0 en 0.39\n",
+ "5 semantic 5463348197003769018 TEXT #/texts/0 reference 0.99\n",
+ "6 language 12407577734524725751 TEXT #/texts/1 en 0.56\n",
+ "7 semantic 12407577734524725751 TEXT #/texts/1 header 0.87\n",
+ "8 language 17514608994654715057 TEXT #/texts/2 en 0.45\n",
+ "9 semantic 17514608994654715057 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 15238942460965794110 TEXT #/texts/3 en 0.32\n",
+ "11 semantic 15238942460965794110 TEXT #/texts/3 header 0.93\n",
+ "1908.07836.pdf\n",
+ "title: PubLayNet: largest dataset ever for document layout analysis\n",
+ "abstract: Abstract-Recognizing the layout of unstructured digital documents is an important step when parsing the documents into structured machine-readable format for downstream applications. Deep neural networks that are developed for computer vision have been proven to be an effective method to analyze layout of document images. However, document layout datasets that are currently publicly available are several magnitudes smaller than established computing vision datasets. Models have to be trained by transfer learning from a base model that is pre-trained on a traditional computer vision dataset. In this paper, we develop the PubLayNet dataset for document layout analysis by automatically matching the XML representations and the content of over 1 million PDF articles that are publicly available on PubMed Central$^{™}$. The size of the dataset is comparable to established computer vision datasets, containing over 360 thousand document images, where typical document layout elements are annotated. The experiments demonstrate that deep neural networks trained on PubLayNet accurately recognize the layout of scientific articles. The pre-trained models are also a more effective base mode for transfer learning on a different document domain. We release the dataset (https://github.com/ibm-aur-nlp/PubLayNet) to support development and evaluation of more advanced models for document layout analysis. Index Terms-automatic annotation, document layout, deep learning, transfer learning\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 444664934372890831 DOCUMENT # en 0.99\n",
+ "1 metadata 11925642137111790531 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 8260381015807345190 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 4125432983079110592 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 13116259115036862914 TEXT #/texts/0 en 0.46\n",
+ "5 semantic 13116259115036862914 TEXT #/texts/0 reference 0.93\n",
+ "6 language 11925642137111790531 TEXT #/texts/1 en 0.58\n",
+ "7 semantic 11925642137111790531 TEXT #/texts/1 header 0.72\n",
+ "8 language 13473377044079016890 TEXT #/texts/2 en 0.58\n",
+ "9 semantic 13473377044079016890 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 13933898933532581931 TEXT #/texts/3 en 0.47\n",
+ "11 semantic 13933898933532581931 TEXT #/texts/3 meta-data 0.99\n",
+ "2108.13297.pdf\n",
+ "title: VTLayout: Fusion of Visual and Text Features for Document Layout Analysis\n",
+ "abstract: Abstract. Documents often contain complex physical structures, which make the Document Layout Analysis (DLA) task challenging. As a preprocessing step for content extraction, DLA has the potential to capture rich information in historical or scientific documents on a large scale. Although many deep-learning-based methods from computer vision have already achieved excellent performance in detecting Figure from documents, they are still unsatisfactory in recognizing the List, Table, Text and Title category blocks in DLA. This paper proposes a VTLayout model fusing the documents' deep visual, shallow visual, and text features to localize and identify different category blocks. The model mainly includes two stages, and the three feature extractors are built in the second stage. In the first stage, the Cascade Mask R-CNN model is applied directly to localize all category blocks of the documents. In the second stage, the deep visual, shallow visual, and text features are extracted for fusion to identify the category blocks of documents. As a result, we strengthen the classification power of different category blocks based on the existing localization technique. The experimental results show that the identification capability of the VTLayout is superior to the most advanced method of DLA based on the PubLayNet dataset, and the F1 score is as high as 0.9599. Keywords: Document Layout Analysis · Fusion of Visual and Text · VTLayout · PubLayNet\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 15662473300130382433 DOCUMENT # en 1.00\n",
+ "1 metadata 6721946344459692653 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 10147356995285361999 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 15912456067780105091 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 7673201184713737662 TEXT #/texts/0 en 0.40\n",
+ "5 semantic 7673201184713737662 TEXT #/texts/0 text 0.61\n",
+ "6 language 6721946344459692653 TEXT #/texts/1 en 0.58\n",
+ "7 semantic 6721946344459692653 TEXT #/texts/1 header 0.89\n",
+ "8 language 18275165193923108925 TEXT #/texts/2 en 0.40\n",
+ "9 semantic 18275165193923108925 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 3124243469042107727 TEXT #/texts/3 en 0.84\n",
+ "11 semantic 3124243469042107727 TEXT #/texts/3 meta-data 0.96\n",
+ "2305.02577.pdf\n",
+ "title: Text Reading Order in Uncontrolled Conditions by Sparse Graph Segmentation\n",
+ "abstract: Abstract. Text reading order is a crucial aspect in the output of an OCR engine, with a large impact on downstream tasks. Its difficulty lies in the large variation of domain specific layout structures, and is further exacerbated by real-world image degradations such as perspective distortions. We propose a lightweight, scalable and generalizable approach to identify text reading order with a multi-modal, multi-task graph convolutional network (GCN) running on a sparse layout based graph. Predictions from the model provide hints of bidimensional relations among text lines and layout region structures, upon which a post-processing cluster-and-sort algorithm generates an ordered sequence of all the text lines. The model is language-agnostic and runs effectively across multilanguage datasets that contain various types of images taken in uncontrolled conditions, and it is small enough to be deployed on virtually any platform including mobile devices. Keywords: Multi-modality, bidimensional ordering relations, graph convolutional networks.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 1972699006411133721 DOCUMENT # en 1.00\n",
+ "1 metadata 7732980062417155571 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 571625923297984197 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 4908119920997196749 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 language 16757843187132766078 TEXT #/texts/0 en 0.58\n",
+ "5 semantic 16757843187132766078 TEXT #/texts/0 reference 0.66\n",
+ "6 language 7732980062417155571 TEXT #/texts/1 en 0.87\n",
+ "7 semantic 7732980062417155571 TEXT #/texts/1 header 0.90\n",
+ "8 language 3313787235241970061 TEXT #/texts/2 en 0.43\n",
+ "9 semantic 3313787235241970061 TEXT #/texts/2 meta-data 0.83\n",
+ "10 language 16094051868833861638 TEXT #/texts/3 en 0.29\n",
+ "11 semantic 16094051868833861638 TEXT #/texts/3 meta-data 0.99\n",
+ "2102.08445.pdf\n",
+ "title: TableLab: An Interactive Table Extraction System with Adaptive Deep Learning\n",
+ "abstract: Additional Key Words and Phrases: Table extraction, neural networks, Label correction ACM Reference Format: Nancy Xin Ru Wang, Douglas Burdick, and Yunyao Li. 2021. TableLab: An Interactive Table Extraction System with Adaptive Deep Learning. In 26th International Conference on Intelligent User Interfaces (IUI '21 Companion), April 14-17, 2021, College Station, TX, USA. ACM, New York, NY, USA, 5 pages. https://doi.org/10.1145/3397482.3450718\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 17938030701221560116 DOCUMENT # en 1.00\n",
+ "1 metadata 13161890226093865876 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 13357791164955825560 DOCUMENT #/texts/5 abstract 1.00\n",
+ "3 metadata 8235759605829279332 DOCUMENT #/texts/6 abstract 1.00\n",
+ "4 metadata 562980825111325059 DOCUMENT #/texts/7 abstract 1.00\n",
+ "5 language 5128146051365197271 TEXT #/texts/0 en 0.58\n",
+ "6 semantic 5128146051365197271 TEXT #/texts/0 text 0.98\n",
+ "7 language 13161890226093865876 TEXT #/texts/1 en 0.64\n",
+ "8 semantic 13161890226093865876 TEXT #/texts/1 reference 0.55\n",
+ "9 language 8981668870332301618 TEXT #/texts/2 en 0.50\n",
+ "10 semantic 8981668870332301618 TEXT #/texts/2 meta-data 1.00\n",
+ "11 language 1596575228035912164 TEXT #/texts/3 en 0.90\n",
+ "2201.01654.pdf\n",
+ "title: TableParser: Automatic Table Parsing with Weak Supervision from Spreadsheets\n",
+ "abstract: Abstract Tables have been an ever-existing structure to store data. There exist now different approaches to store tabular data physically. PDFs, images, spreadsheets, and CSVs are leading examples. Being able to parse table structures and extract content bounded by these structures is of high importance in many applications. In this paper, we devise TableParser, a system capable of parsing tables in both native PDFs and scanned images with high precision. We have conducted extensive experiments to show the efficacy of domain adaptation in developing such a tool. Moreover, we create TableAnnotator and ExcelAnnotator, which constitute a spreadsheet-based weak supervision mechanism and a pipeline to enable table parsing. We share these resources with the research community to facilitate further research in this interesting direction.\n",
+ " type subj_hash subj_name subj_path label confidence\n",
+ "0 language 13912777304373020997 DOCUMENT # en 0.99\n",
+ "1 metadata 901169889309037110 DOCUMENT #/texts/1 title 1.00\n",
+ "2 metadata 7917292835367858114 DOCUMENT #/texts/7 abstract 1.00\n",
+ "3 metadata 6679159056897124658 DOCUMENT #/texts/8 abstract 1.00\n",
+ "4 language 6386611247138565482 TEXT #/texts/0 en 0.18\n",
+ "5 semantic 6386611247138565482 TEXT #/texts/0 text 0.88\n",
+ "6 language 901169889309037110 TEXT #/texts/1 en 0.83\n",
+ "7 semantic 901169889309037110 TEXT #/texts/1 header 0.39\n",
+ "8 language 14716675017513697862 TEXT #/texts/2 en 0.24\n",
+ "9 semantic 14716675017513697862 TEXT #/texts/2 meta-data 1.00\n",
+ "10 language 351740986866721621 TEXT #/texts/3 en 0.70\n",
+ "11 semantic 351740986866721621 TEXT #/texts/3 meta-data 0.99\n"
+ ]
+ }
+ ],
+ "source": [
+ "data_collection = ElasticDataCollectionSource(elastic_id=\"default\", index_key=\"arxiv\")\n",
+ "page_size = 5\n",
+ "\n",
+ "# Prepare the data query\n",
+ "query = DataQuery(\n",
+ " search_query, # The search query to be executed\n",
+ " #source=[\"description.title\", \"description.authors\", \"identifiers\"], # Which fields of documents we want to fetch\n",
+ " limit=page_size, # The size of each request page\n",
+ " coordinates=data_collection # The data collection to be queries\n",
+ ")\n",
+ "\n",
+ "\n",
+ "# [Optional] Compute the number of total results matched. This can be used to monitor the pagination progress.\n",
+ "count_query = deepcopy(query)\n",
+ "count_query.paginated_task.parameters[\"limit\"] = 0\n",
+ "count_results = api.queries.run(count_query)\n",
+ "expected_total = count_results.outputs[\"data_count\"]\n",
+ "expected_pages = (expected_total + page_size - 1) // page_size # this is simply a ceiling formula\n",
+ "\n",
+ "model = init_nlp_model(\"language;reference;metadata\")\n",
+ "\n",
+ "# Iterate through all results by fetching `page_size` results at the same time\n",
+ "all_results = []\n",
+ "cursor = api.queries.run_paginated_query(query)\n",
+ "for result_page in tqdm(cursor, total=expected_pages):\n",
+ " # Iterate through the results of a single page, and add to the total list\n",
+ " for row in result_page.outputs[\"data_outputs\"]:\n",
+ " doc = row[\"_source\"]\n",
+ " print(doc[\"file-info\"][\"filename\"])\n",
+ "\n",
+ " res = model.apply_on_doc(doc)\n",
+ "\n",
+ " if \"title\" in res[\"description\"]:\n",
+ " print(\"title: \", res[\"description\"][\"title\"])\n",
+ "\n",
+ " if \"abstract\" in res[\"description\"]:\n",
+ " print(\"abstract: \", res[\"description\"][\"abstract\"])\n",
+ "\n",
+ " props = pd.DataFrame(res[\"properties\"][\"data\"], columns=res[\"properties\"][\"headers\"])\n",
+ " #print(props[0:12])\n",
+ "\n",
+ " #insts = pd.DataFrame(res[\"instances\"][\"data\"], columns=res[\"instances\"][\"headers\"])\n",
+ "\n",
+ " #doc_insts = insts[insts[\"subj_name\"]==\"DOCUMENT\"][[\"subtype\", \"subj_path\", \"name\"]]\n",
+ " #print(doc_insts)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f49fced-9e94-40c2-82b6-e0f66331784e",
+ "metadata": {},
+ "source": [
+ "## Extract MetaData from private documents"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8da5b230-6b56-4d7d-aa40-262549963217",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import json\n",
+ "import argparse\n",
+ "\n",
+ "# Import standard dependenices\n",
+ "from copy import deepcopy\n",
+ "import pandas as pd\n",
+ "from numerize.numerize import numerize\n",
+ "from tqdm import tqdm\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Import the deepsearch-toolkit\n",
+ "import deepsearch as ds\n",
+ "from deepsearch.cps.client.components.elastic import ElasticDataCollectionSource\n",
+ "from deepsearch.cps.queries import DataQuery"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "86670afe-5e93-48eb-90ab-895ea5796903",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_indices_in_project(api, proj_key, coll_name):\n",
+ "\n",
+ " data_indices = api.data_indices.list(proj_key=proj_key)\n",
+ "\n",
+ " for index in data_indices:\n",
+ " if coll_name==index.name:\n",
+ " return index\n",
+ "\n",
+ " print(\"Could not find collection in project. Please select one of the following collections\")\n",
+ " for index in data_indices:\n",
+ " print(\" -> collection: \", index)\n",
+ " \n",
+ " return None\n",
+ "\n",
+ "def search_documents(api, proj_key, coll_name, query, max_docs=100, page_size=1):\n",
+ "\n",
+ " index = get_indices_in_project(api, coll_name=coll_name,\n",
+ " proj_key=proj_key)\n",
+ "\n",
+ " if index==None:\n",
+ " return\n",
+ "\n",
+ " try:\n",
+ " data_query = DataQuery(query, coordinates=index.source, limit=page_size) # The size of each request page)\n",
+ " cursor = api.queries.run_paginated_query(data_query)\n",
+ "\n",
+ " # [Optional] Compute the number of total results matched. This can be used to monitor the pagination progress.\n",
+ " count_query = deepcopy(data_query)\n",
+ " count_query.paginated_task.parameters[\"limit\"] = 0\n",
+ " count_results = api.queries.run(count_query)\n",
+ " expected_total = count_results.outputs[\"data_count\"]\n",
+ " expected_pages = (expected_total + page_size - 1) // page_size # this is simply a ceiling formula\n",
+ "\n",
+ " print(\"#-documents: \", expected_total)\n",
+ "\n",
+ " cur_docs = 0\n",
+ " for result_page in tqdm(cursor):\n",
+ "\n",
+ " if cur_docs>max_docs:\n",
+ " break\n",
+ "\n",
+ " for row in result_page.outputs[\"data_outputs\"]:\n",
+ "\n",
+ " #print(cur_docs, max_docs)\n",
+ " if cur_docs>max_docs:\n",
+ " break\n",
+ "\n",
+ "\n",
+ " yield row[\"_source\"]\n",
+ " cur_docs += 1\n",
+ "\n",
+ " except Exception as e:\n",
+ " print(\" => \", e)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "2e2d12e8-00ba-49d3-bc8e-19a257eb85df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "#-documents: 9\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "1it [00:01, 1.12s/it]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 0f43aba61158df5f5a00d91434bee8dd47e9dad2a6252ab7607408e2e6057b7d\n",
+ "title: \n",
+ " Source area and tectonic provenance of Paleocene-Eocene red bed\n",
+ "clastics from the Kurdistan area NE Iraq: Bulk-rock geochemistry\n",
+ "constraints \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"Brian G Jones\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Muatasam Mahmood Hassan\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Solomon Buckman\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Ali Ismail Al Jubory\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Sabah Ahmed Ismail\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: [\n",
+ " {\n",
+ " \"name\": \"School of Earth and Environmental Sciences\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"University of Wollongong\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"School of Earth Science\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"School of Earth Science\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"University of Kirkuk\"\n",
+ " }\n",
+ "]\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "Paleocene-Eocene Red Beds exist along a narrow belt in the NW-SE\n",
+ "oriented imbricate zone in northeastern Iraq and are composed of\n",
+ "clastic rocks including conglomerate, sandstone and mudstone. \n",
+ "\n",
+ "Trace elements show that the lower part of the Red Beds (unit one) was\n",
+ "derived mainly from mafic and ultramafic rocks. A decrease in mafic\n",
+ "and ultramafic components in the upper part of the Red Beds is\n",
+ "accompanied by an increase in felsic components indicating the\n",
+ "exposure of both felsic and intermediate igneous bodies in the source\n",
+ "areas. \n",
+ "\n",
+ "Trace elements normalized to upper continental crust confirmed the\n",
+ "mafic and ultramafic source for the lower part of the Red Beds. Unit\n",
+ "two and the overlying unit four reflect a style showing felsic and\n",
+ "mafic trends with transition elements being depleted in these parts.\n",
+ "The intervening unit three shows various patterns partly similar to\n",
+ "units one and two depending on clast abundance. \n",
+ "\n",
+ "The concentrations of rare earth elements in the mudstone reaches up\n",
+ "to 60% of the main chemical elements, therefore it is useful to\n",
+ "concentrate on this facies for geochemical studies. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2it [00:01, 1.18it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 44cd3953cb824628f2d7fe8976afc9beb2ed07c26ae83f0c79ca357af85af9d4\n",
+ "title: \n",
+ " Facies analysis and diagenetic features of the Aptian Dariyan\n",
+ "Formation in Zagros Fold-Thrust Belt, SW Iran \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"Arash Shaabanpour Haghighi\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Mohammad Sahraeyan\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: []\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "The Aptian Dariyan Formation (upper part of the Khami Group), is one\n",
+ "of the important reservoir rocks in the Zagros Fold-Thrust Belt. The\n",
+ "Zagros Fold-Thrust Belt is located on the boundary between the Arabian\n",
+ "and Eurasian lithospheric plates and formed from collision between\n",
+ "Eurasia and advancing Arabia during the Cenozoic. In these studied\n",
+ "area, the Dariyan Formation with a thickness of 136 meters (Fahliyan\n",
+ "section) and 100 meters (Kuh-e-Rahmat section), consists of carbonate\n",
+ "rocks. Based on the facies analysis and sedimentological data, 16\n",
+ "microfacies were identified. The microfacies are attributed to five\n",
+ "facies belts: tidal flat (lime mudstone, dolomitic mudstone and\n",
+ "stromatolitic boundstone), lagoon (bioclastic packstone, orbitolinids\n",
+ "bioclastic packstone and orbitolinids peloidal packstone), shoal\n",
+ "(orbitolinids grainstone and peloidal grainstone), restricted\n",
+ "(peloidal packstone, rudist floatstone/rudstone and orbitolinid\n",
+ "wackestone), and open marine (orbitolinid floatstone, dasycladacean\n",
+ "algae floatstone, bioclast pelagic foraminiferal wackestone/packstone,\n",
+ "pelagic foraminiferal mudstone/wackestone, and calcispere\n",
+ "packstone/wackestone). The depositional model relates to the carbonate\n",
+ "ramp. The allochems of the Dariyan Formation are dominated by\n",
+ "foraminifera, bioclasts and green algae. Peloids, and intraclasts are\n",
+ "less abundant in this formation. Due to the great diversity and\n",
+ "abundance of the foraminifera, this carbonate ramp is referred to as a\n",
+ "''foraminifera-dominated carbonate ramp system''. This carbonate\n",
+ "system reflects a local regression in the Fahliyan section which can\n",
+ "be related to the vertical movement of the Kazeroon Fault. The\n",
+ "carbonates of Dariyan Formation have been affected by a variety of\n",
+ "diagenetic processes such as compaction, dissolution, cementation,\n",
+ "neomorphism, and dolomitization. \n",
+ "\n",
+ "Ó 2014 Elsevier Ltd. All rights reserved. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "3it [00:02, 1.26it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 45319f285bb4544209fb74269a72a17c3a3525246945441aec927928a105bf04\n",
+ "title: \n",
+ " Integrated provenance analysis of Zakeen (Devonian) and Faraghan\n",
+ "(early Permian) sandstones in the Zagros belt, SW Iran \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"S Mohammad Zamanzadeh\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Yousef Zoleikhaei\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Abdolhossein Amini\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: [\n",
+ " {\n",
+ " \"name\": \"College of Science\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"University of Tehran\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Faculty of Geography\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"University of Tehran\"\n",
+ " }\n",
+ "]\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "Successions of a controversial period of time in the Zagros and\n",
+ "Arabian Plate stratigraphic column, including Zakeen (Devonian) and\n",
+ "Faraghan (early Permian) formations are investigated for their\n",
+ "provenance characteristics. Nearly similar depositional environments\n",
+ "of the formations, regardless of 70-80 My hiatus between them, is the\n",
+ "main motivation for this study. Evidence from various methods are put\n",
+ "together to reconstruct a comprehensive image of their provenance.\n",
+ "Results from petrographic and detrital mode analysis indicate a\n",
+ "continental block provenance for of the sandstones of both formations.\n",
+ "In addition, evidence of recycling is evident from some rock fragments\n",
+ "in the conglomeratic facies. Heavy mineral diversities are limited to\n",
+ "the ultra-stable species which represent consistent morphological\n",
+ "characteristics in both formations. However, the values of rutile:\n",
+ "zircon index (RZi) showed intermittent changes from low RZi to high\n",
+ "RZi intervals in both formations. Detrital zircon age data in previous\n",
+ "studies represented the same source for these two formations, which\n",
+ "also remained unchanged from Neo-Proterozoic to late Paleozoic\n",
+ "successions. Zircon grains' morphology, however, showed remarkable\n",
+ "difference between the Zakeen and Faraghan formations on the one hand\n",
+ "and successions deposited in the basin prior to the tectonic movements\n",
+ "of mid-Paleozoic time on the other. Outcomes of this study show that,\n",
+ "although each single technique may shed light on a particular aspect\n",
+ "of the greater provenance problem, by integration of all the data,\n",
+ "important evidence of recycled nature of these successions could be\n",
+ "confirmed. Changes in the thickness of the Paleozoic units, the nature\n",
+ "of their stratal surfaces, along with the information from magmatic\n",
+ "events in the area provide a tectonostratigraphic framework for\n",
+ "northern margin of Gondwana in which the recycled nature of these\n",
+ "successions is justifiable. The recycled nature of the studied\n",
+ "formations on the one hand, and their identical provenance on the\n",
+ "other, raise a challenge for the timing proposed for two tectonic\n",
+ "activities of middle Paleozoic and mid-Carboniferous. \n",
+ "\n",
+ "Ó 2014 Elsevier Ltd. All rights reserved. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "4it [00:03, 1.42it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 71b9d4a7505055da7d78886e41abc80602eecdaed0863a0f51add493f38968ba\n",
+ "title: \n",
+ " Multi-phase inversion tectonics related to the Hendijan e Nowrooz e\n",
+ "Khafji Fault activity, Zagros Mountains, SW Iran \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"Sadjad Kazem Shiroodi\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Mohammad Ghafoori\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Ali Faghih\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Mostafa Ghanadian\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Gholamreza Lashkaripour\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Naser Hafezi Moghadas\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: [\n",
+ " {\n",
+ " \"name\": \"Department of Geology\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Faculty of Sciences\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Ferdowsi University of Mashhad\"\n",
+ " }\n",
+ "]\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "Distinctive characteristics of inverted structures make them important\n",
+ "criteria for the identification of certain structural styles of folded\n",
+ "belts. The interpretation of 3D seismic reflection and well data sheds\n",
+ "new light on the structural evolution and age of inverted structures\n",
+ "associated to the Hendijan$_{e}$Nowrooz $_{e}$Khafji Fault within the\n",
+ "Persian Gulf Basin and northeastern margin of Afro-Arabian plate.\n",
+ "Analysis of thickness variations of growth strata using $_{'}$T-Z\n",
+ "plot$_{'}$ (thickness versus throw plot) method revealed the\n",
+ "kinematics of the fault. Obtained results show that the fault has\n",
+ "experienced a multi-phase evolutionary history over six different\n",
+ "extension and compression deformation events (i.e. positive and\n",
+ "negative inversion) between 252.2 and 11.62 Ma. This cyclic activity\n",
+ "of the growth fault was resulted from alteration of sedimentary\n",
+ "processes during continuous fault slip. The structural development of\n",
+ "the study area both during positive and negative inversion geometry\n",
+ "styles was ultimately controlled by the relative motion between the\n",
+ "Afro-Arabian and Central-Iranian plates. \n",
+ "\n",
+ "© 2015 Elsevier Ltd. All rights reserved. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "5it [00:03, 1.57it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 7594495bb7872d4aa3bfa7bacbc4f598fa8c84fddc6c553effaf4f1b101935c0\n",
+ "title: \n",
+ " Lithofacies, architectural elements and tectonic provenance of the\n",
+ "siliciclastic rocks of the Lower Permian Dorud Formation in the Alborz\n",
+ "Mountain Range, Northern Iran \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"Mojtaba Javidan\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Hosseinali Mokhtarpour\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Mohammad Sahraeyan\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Hojatollah Kheyrandish\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: [\n",
+ " {\n",
+ " \"name\": \"Department of Geology\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"College of Basic Sciences\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Department of Geology\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Department of Geology\"\n",
+ " }\n",
+ "]\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "The siliciclastic deposits of the Lower Permian Dorud Formation widely\n",
+ "crop out in the eastern part of the Alborz Mountain Range (northern\n",
+ "Iran). In order to interpret the sedimentary environments and tectonic\n",
+ "provenance of these deposits, two sections in the Kiyasar and\n",
+ "Talmadareh with 112 and 122 m thickness, respectively; have been\n",
+ "studied. The analysis of lithofacies and architectural elements, leads\n",
+ "to recognition of seven lithofacies (Gmm, Sr, Sl, Sh, Sp, Fl, and Fm),\n",
+ "and four architectural elements (FF, LA, CH, and CR). Based on these\n",
+ "results, the sedimentary environment of these deposits has been\n",
+ "identified as a sandy meandering river. The petrographical analysis\n",
+ "indicates that these sediments were deposited under humid weather in\n",
+ "the craton interior and recycled orogeny tectonic provenance. \n",
+ "\n",
+ "Ó 2015 Elsevier Ltd. All rights reserved. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "6it [00:04, 1.64it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "document-hash: 7c5d4947280cec27fbb01892eea145933df0813be615ccd5fb5bb5503254d0f1\n",
+ "title: \n",
+ " Stratigraphy, mineralogy and depositional environment of the evaporite\n",
+ "unit in the As ¸ kale (Erzurum) sub-basin, Eastern Anatolia (Turkey) \n",
+ "\n",
+ "authors: [\n",
+ " {\n",
+ " \"name\": \"Emel Abdio\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Mehmet Arslan\"\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Cahit Helvac\"\n",
+ " }\n",
+ "]\n",
+ "affiliations: []\n",
+ "abstract: \n",
+ "\n",
+ "abstract \n",
+ "\n",
+ "The study area is situated in the As¸ kale sub-basin where the Early-\n",
+ "Middle Miocene aged As¸ kale Formation was deposited in a shallow\n",
+ "marine to lagoonal environment, and consists of interstratifications\n",
+ "of clastic sediments, carbonates and evaporites. The successions of\n",
+ "the As¸ kale Formation can be divided into four main members\n",
+ "interfingering with one another both vertically and laterally, and\n",
+ "composed of the sandstone-mudstone-limestone member, the evaporite\n",
+ "member, the gravelstone-sandstone-mudstone intercalations and the\n",
+ "limestone member. The evaporite unit comprises of secondary gypsum\n",
+ "lithofacies formed by hydration of precursor anhydrite, anhydrite,\n",
+ "gypsum-bearing limestone and claystone in the form of wedges and\n",
+ "lenses. Massive, nodular, nodular-banded, laminated and laminated-\n",
+ "banded gpysum lithofacieses in addition to chicken-wire and rare\n",
+ "entrolithic structures were described, indicating a sabhka or a\n",
+ "shallow water depositional environment. Alabastrine and porphyblastic\n",
+ "textures of gypsum were identified within the all lithofacieses with\n",
+ "abundant amount of anhydrite relics. Additionally, saponite and\n",
+ "illite/smectite, calcite and dolomite, celestite, epsomite were also\n",
+ "observed. Successions of the As¸ kale Formation were deposited in\n",
+ "stable subtropical climatic conditions within rapidly subsiding sub-\n",
+ "basin resulted in conversion of sub-basin to shallow platform and even\n",
+ "in lagoon environment. \n",
+ "\n",
+ "© 2015 Elsevier Ltd. All rights reserved. \n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "6it [00:04, 1.32it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "api = ds.CpsApi.from_env(profile_name=PROFILE_NAME)\n",
+ "\n",
+ "model = init_nlp_model(\"language;reference;metadata\")\n",
+ "\n",
+ "proj_key = \"c4ae6545156c5f99770fdfd161102a01567d8ecd\"\n",
+ "#coll_name = \"GeoArabia\"\n",
+ "#coll_name = \"BasinResearch1\"\n",
+ "coll_name = \"African_ES\"\n",
+ "\n",
+ "query = \"*\"\n",
+ "\n",
+ "for doc in search_documents(api, proj_key, coll_name, query, max_docs=5, page_size=1):\n",
+ " \n",
+ " print(\"document-hash: \", doc[\"file-info\"][\"document-hash\"])\n",
+ " \n",
+ " res = model.apply_on_doc(doc)\n",
+ " #print(res[\"description\"].keys())\n",
+ " \n",
+ " if \"title\" in res[\"description\"]:\n",
+ " text = res[\"description\"][\"title\"]\n",
+ " text = \"\\n\".join(textwrap.wrap(text, width=70))\n",
+ "\n",
+ " print(\"title: \\n\", text, \"\\n\")\n",
+ "\n",
+ " if \"authors\" in res[\"description\"]:\n",
+ " print(\"authors: \", json.dumps(res[\"description\"][\"authors\"], indent=2))\n",
+ "\n",
+ " if \"affiliations\" in res[\"description\"]:\n",
+ " print(\"affiliations: \", json.dumps(res[\"description\"][\"affiliations\"], indent=2))\n",
+ " \n",
+ " if \"abstract\" in res[\"description\"]:\n",
+ "\n",
+ " print(\"abstract: \\n\")\n",
+ " for _ in res[\"description\"][\"abstract\"]:\n",
+ " text = \"\\n\".join(textwrap.wrap(_, width=70))\n",
+ " print(text, \"\\n\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7207ed71-a8e3-4e4f-88c1-5fbbb4f82ac1",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b8d13fa-d23d-46c6-9b05-a07b27e4d6c7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.18"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "23663f76e1e243f0a6319b8ef58f504b6b45c83666dfefd3138ba8cf69ab01fa"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/poetry.lock b/poetry.lock
index 39dce52..0ea80ee 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,87 +2,87 @@
[[package]]
name = "aiohttp"
-version = "3.9.3"
+version = "3.9.5"
description = "Async http client/server framework (asyncio)"
optional = false
python-versions = ">=3.8"
files = [
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:939677b61f9d72a4fa2a042a5eee2a99a24001a67c13da113b2e30396567db54"},
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f5cd333fcf7590a18334c90f8c9147c837a6ec8a178e88d90a9b96ea03194cc"},
- {file = "aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82e6aa28dd46374f72093eda8bcd142f7771ee1eb9d1e223ff0fa7177a96b4a5"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f56455b0c2c7cc3b0c584815264461d07b177f903a04481dfc33e08a89f0c26b"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bca77a198bb6e69795ef2f09a5f4c12758487f83f33d63acde5f0d4919815768"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e083c285857b78ee21a96ba1eb1b5339733c3563f72980728ca2b08b53826ca5"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab40e6251c3873d86ea9b30a1ac6d7478c09277b32e14745d0d3c6e76e3c7e29"},
- {file = "aiohttp-3.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df822ee7feaaeffb99c1a9e5e608800bd8eda6e5f18f5cfb0dc7eeb2eaa6bbec"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:acef0899fea7492145d2bbaaaec7b345c87753168589cc7faf0afec9afe9b747"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cd73265a9e5ea618014802ab01babf1940cecb90c9762d8b9e7d2cc1e1969ec6"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a78ed8a53a1221393d9637c01870248a6f4ea5b214a59a92a36f18151739452c"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6b0e029353361f1746bac2e4cc19b32f972ec03f0f943b390c4ab3371840aabf"},
- {file = "aiohttp-3.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7cf5c9458e1e90e3c390c2639f1017a0379a99a94fdfad3a1fd966a2874bba52"},
- {file = "aiohttp-3.9.3-cp310-cp310-win32.whl", hash = "sha256:3e59c23c52765951b69ec45ddbbc9403a8761ee6f57253250c6e1536cacc758b"},
- {file = "aiohttp-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:055ce4f74b82551678291473f66dc9fb9048a50d8324278751926ff0ae7715e5"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6b88f9386ff1ad91ace19d2a1c0225896e28815ee09fc6a8932fded8cda97c3d"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c46956ed82961e31557b6857a5ca153c67e5476972e5f7190015018760938da2"},
- {file = "aiohttp-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07b837ef0d2f252f96009e9b8435ec1fef68ef8b1461933253d318748ec1acdc"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad46e6f620574b3b4801c68255492e0159d1712271cc99d8bdf35f2043ec266"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ed3e046ea7b14938112ccd53d91c1539af3e6679b222f9469981e3dac7ba1ce"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:039df344b45ae0b34ac885ab5b53940b174530d4dd8a14ed8b0e2155b9dddccb"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7943c414d3a8d9235f5f15c22ace69787c140c80b718dcd57caaade95f7cd93b"},
- {file = "aiohttp-3.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84871a243359bb42c12728f04d181a389718710129b36b6aad0fc4655a7647d4"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5eafe2c065df5401ba06821b9a054d9cb2848867f3c59801b5d07a0be3a380ae"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9d3c9b50f19704552f23b4eaea1fc082fdd82c63429a6506446cbd8737823da3"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f033d80bc6283092613882dfe40419c6a6a1527e04fc69350e87a9df02bbc283"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:2c895a656dd7e061b2fd6bb77d971cc38f2afc277229ce7dd3552de8313a483e"},
- {file = "aiohttp-3.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1f5a71d25cd8106eab05f8704cd9167b6e5187bcdf8f090a66c6d88b634802b4"},
- {file = "aiohttp-3.9.3-cp311-cp311-win32.whl", hash = "sha256:50fca156d718f8ced687a373f9e140c1bb765ca16e3d6f4fe116e3df7c05b2c5"},
- {file = "aiohttp-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:5fe9ce6c09668063b8447f85d43b8d1c4e5d3d7e92c63173e6180b2ac5d46dd8"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:38a19bc3b686ad55804ae931012f78f7a534cce165d089a2059f658f6c91fa60"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:770d015888c2a598b377bd2f663adfd947d78c0124cfe7b959e1ef39f5b13869"},
- {file = "aiohttp-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee43080e75fc92bf36219926c8e6de497f9b247301bbf88c5c7593d931426679"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52df73f14ed99cee84865b95a3d9e044f226320a87af208f068ecc33e0c35b96"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc9b311743a78043b26ffaeeb9715dc360335e5517832f5a8e339f8a43581e4d"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b955ed993491f1a5da7f92e98d5dad3c1e14dc175f74517c4e610b1f2456fb11"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:504b6981675ace64c28bf4a05a508af5cde526e36492c98916127f5a02354d53"},
- {file = "aiohttp-3.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fe5571784af92b6bc2fda8d1925cccdf24642d49546d3144948a6a1ed58ca5"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ba39e9c8627edc56544c8628cc180d88605df3892beeb2b94c9bc857774848ca"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e5e46b578c0e9db71d04c4b506a2121c0cb371dd89af17a0586ff6769d4c58c1"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:938a9653e1e0c592053f815f7028e41a3062e902095e5a7dc84617c87267ebd5"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:c3452ea726c76e92f3b9fae4b34a151981a9ec0a4847a627c43d71a15ac32aa6"},
- {file = "aiohttp-3.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ff30218887e62209942f91ac1be902cc80cddb86bf00fbc6783b7a43b2bea26f"},
- {file = "aiohttp-3.9.3-cp312-cp312-win32.whl", hash = "sha256:38f307b41e0bea3294a9a2a87833191e4bcf89bb0365e83a8be3a58b31fb7f38"},
- {file = "aiohttp-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:b791a3143681a520c0a17e26ae7465f1b6f99461a28019d1a2f425236e6eedb5"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ed621426d961df79aa3b963ac7af0d40392956ffa9be022024cd16297b30c8c"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f46acd6a194287b7e41e87957bfe2ad1ad88318d447caf5b090012f2c5bb528"},
- {file = "aiohttp-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:feeb18a801aacb098220e2c3eea59a512362eb408d4afd0c242044c33ad6d542"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f734e38fd8666f53da904c52a23ce517f1b07722118d750405af7e4123933511"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b40670ec7e2156d8e57f70aec34a7216407848dfe6c693ef131ddf6e76feb672"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdd215b7b7fd4a53994f238d0f46b7ba4ac4c0adb12452beee724ddd0743ae5d"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:017a21b0df49039c8f46ca0971b3a7fdc1f56741ab1240cb90ca408049766168"},
- {file = "aiohttp-3.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99abf0bba688259a496f966211c49a514e65afa9b3073a1fcee08856e04425b"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:648056db9a9fa565d3fa851880f99f45e3f9a771dd3ff3bb0c048ea83fb28194"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8aacb477dc26797ee089721536a292a664846489c49d3ef9725f992449eda5a8"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:522a11c934ea660ff8953eda090dcd2154d367dec1ae3c540aff9f8a5c109ab4"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5bce0dc147ca85caa5d33debc4f4d65e8e8b5c97c7f9f660f215fa74fc49a321"},
- {file = "aiohttp-3.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b4af9f25b49a7be47c0972139e59ec0e8285c371049df1a63b6ca81fdd216a2"},
- {file = "aiohttp-3.9.3-cp38-cp38-win32.whl", hash = "sha256:298abd678033b8571995650ccee753d9458dfa0377be4dba91e4491da3f2be63"},
- {file = "aiohttp-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:69361bfdca5468c0488d7017b9b1e5ce769d40b46a9f4a2eed26b78619e9396c"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0fa43c32d1643f518491d9d3a730f85f5bbaedcbd7fbcae27435bb8b7a061b29"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:835a55b7ca49468aaaac0b217092dfdff370e6c215c9224c52f30daaa735c1c1"},
- {file = "aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:06a9b2c8837d9a94fae16c6223acc14b4dfdff216ab9b7202e07a9a09541168f"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abf151955990d23f84205286938796c55ff11bbfb4ccfada8c9c83ae6b3c89a3"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59c26c95975f26e662ca78fdf543d4eeaef70e533a672b4113dd888bd2423caa"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f95511dd5d0e05fd9728bac4096319f80615aaef4acbecb35a990afebe953b0e"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595f105710293e76b9dc09f52e0dd896bd064a79346234b521f6b968ffdd8e58"},
- {file = "aiohttp-3.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c8b816c2b5af5c8a436df44ca08258fc1a13b449393a91484225fcb7545533"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1088fa100bf46e7b398ffd9904f4808a0612e1d966b4aa43baa535d1b6341eb"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f59dfe57bb1ec82ac0698ebfcdb7bcd0e99c255bd637ff613760d5f33e7c81b3"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:361a1026c9dd4aba0109e4040e2aecf9884f5cfe1b1b1bd3d09419c205e2e53d"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:363afe77cfcbe3a36353d8ea133e904b108feea505aa4792dad6585a8192c55a"},
- {file = "aiohttp-3.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e2c45c208c62e955e8256949eb225bd8b66a4c9b6865729a786f2aa79b72e9d"},
- {file = "aiohttp-3.9.3-cp39-cp39-win32.whl", hash = "sha256:f7217af2e14da0856e082e96ff637f14ae45c10a5714b63c77f26d8884cf1051"},
- {file = "aiohttp-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:27468897f628c627230dba07ec65dc8d0db566923c48f29e084ce382119802bc"},
- {file = "aiohttp-3.9.3.tar.gz", hash = "sha256:90842933e5d1ff760fae6caca4b2b3edba53ba8f4b71e95dacf2818a2aca06f7"},
+ {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"},
+ {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"},
+ {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"},
+ {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"},
+ {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"},
+ {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"},
+ {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"},
+ {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"},
+ {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"},
+ {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"},
+ {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"},
+ {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"},
+ {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"},
+ {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"},
+ {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"},
+ {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"},
+ {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"},
+ {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"},
+ {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"},
+ {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"},
+ {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"},
+ {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"},
+ {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"},
+ {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"},
+ {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"},
+ {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"},
+ {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"},
+ {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"},
+ {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"},
+ {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"},
+ {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"},
+ {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"},
+ {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"},
+ {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"},
+ {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"},
+ {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"},
+ {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"},
+ {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"},
+ {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"},
+ {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"},
+ {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"},
+ {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"},
+ {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"},
+ {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"},
+ {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"},
+ {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"},
+ {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"},
+ {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"},
+ {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"},
+ {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"},
+ {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"},
+ {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"},
+ {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"},
+ {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"},
+ {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"},
+ {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"},
+ {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"},
+ {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"},
+ {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"},
+ {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"},
+ {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"},
+ {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"},
+ {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"},
+ {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"},
+ {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"},
+ {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"},
+ {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"},
+ {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"},
+ {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"},
+ {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"},
+ {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"},
+ {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"},
+ {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"},
+ {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"},
+ {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"},
+ {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"},
]
[package.dependencies]
@@ -159,13 +159,13 @@ files = [
[[package]]
name = "argilla"
-version = "1.24.0"
+version = "1.27.0"
description = "Open-source tool for exploring, labeling, and monitoring data for NLP projects."
optional = false
python-versions = "<3.12,>=3.8"
files = [
- {file = "argilla-1.24.0-py3-none-any.whl", hash = "sha256:862a86439ef283a24a245d67ae92214861b45a28c5d6e7c9a1f15d0a0af44b05"},
- {file = "argilla-1.24.0.tar.gz", hash = "sha256:76a45f9e576d1b503538a21383c860c693f50e010f1ee6fb4fddae978d3b5d2c"},
+ {file = "argilla-1.27.0-py3-none-any.whl", hash = "sha256:ee951b2bf3697a786aeb50cc07dad32b7326c275f227583b809c186f4d80f050"},
+ {file = "argilla-1.27.0.tar.gz", hash = "sha256:8b33a961a6d9cb73ced88b0cf442388441eafa65f143ef049a78a6eb375985ce"},
]
[package.dependencies]
@@ -175,7 +175,7 @@ httpx = ">=0.15,<=0.26"
monotonic = "*"
numpy = "<1.24.0"
packaging = ">=20.0"
-pandas = ">=1.0.0,<2.0.0"
+pandas = ">=1.0.0"
pydantic = ">=1.10.7"
rich = "!=13.1.0"
tqdm = ">=4.27.0"
@@ -183,10 +183,10 @@ typer = ">=0.6.0,<0.10.0"
wrapt = ">=1.13,<1.15"
[package.extras]
-integrations = ["PyYAML (>=5.4.1,<6.1.0)", "cleanlab (>=2.0.0,<2.1.0)", "datasets (>1.17.0,!=2.3.2)", "evaluate", "faiss-cpu", "flair (>=0.12.2)", "flyingsquid", "huggingface-hub (>=0.5.0)", "ipynbname", "openai (>=0.27.10,<1.0.0)", "peft", "pgmpy", "plotly (>=4.1.0)", "sentence-transformers", "sentence-transformers (>=2.0.0,<3.0.0)", "seqeval", "setfit (>=1.0.0)", "snorkel (>=0.9.7)", "spacy (>=3.5.0,<3.7.0)", "spacy-huggingface-hub (>=0.0.10)", "spacy-transformers (>=1.2.5)", "span-marker", "textdescriptives (>=2.7.0,<3.0.0)", "transformers[torch] (>=4.30.0)", "trl (>=0.5.0)"]
-listeners = ["prodict (>=0.8.0,<0.9.0)", "schedule (>=1.1.0,<1.2.0)"]
-postgresql = ["argilla-server[postgresql] (>=1.24.0,<1.25.0)"]
-server = ["argilla-server (>=1.24.0,<1.25.0)"]
+integrations = ["PyYAML (>=5.4.1,<6.1.0)", "datasets (>1.17.0,!=2.3.2)", "evaluate", "faiss-cpu", "flair (>=0.12.2)", "flyingsquid", "huggingface-hub (>=0.5.0)", "ipynbname", "openai (>=0.27.10,<1.0.0)", "peft", "pgmpy", "plotly (>=4.1.0)", "sentence-transformers", "sentence-transformers (>=2.0.0,<3.0.0)", "seqeval", "setfit (>=1.0.0)", "snorkel (>=0.9.7)", "spacy (>=3.5.0,<3.7.0)", "spacy-huggingface-hub (>=0.0.10)", "spacy-transformers (>=1.2.5)", "span-marker", "textdescriptives (>=2.7.0,<3.0.0)", "transformers[torch] (>=4.30.0)", "trl (>=0.5.0)"]
+listeners = ["schedule (>=1.1.0,<1.2.0)"]
+server = ["argilla-server (>=1.27.0,<1.28.0)"]
+server-postgresql = ["argilla-server[postgresql] (>=1.27.0,<1.28.0)"]
tests = ["factory-boy (>=3.2.1,<3.3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock"]
[[package]]
@@ -329,13 +329,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p
[[package]]
name = "babel"
-version = "2.14.0"
+version = "2.15.0"
description = "Internationalization utilities"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"},
- {file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"},
+ {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"},
+ {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"},
]
[package.dependencies]
@@ -783,13 +783,13 @@ files = [
[[package]]
name = "comm"
-version = "0.2.1"
+version = "0.2.2"
description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc."
optional = false
python-versions = ">=3.8"
files = [
- {file = "comm-0.2.1-py3-none-any.whl", hash = "sha256:87928485c0dfc0e7976fd89fc1e187023cf587e7c353e4a9b417555b44adf021"},
- {file = "comm-0.2.1.tar.gz", hash = "sha256:0bc91edae1344d39d3661dcbc36937181fdaddb304790458f8b044dbc064b89a"},
+ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"},
+ {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"},
]
[package.dependencies]
@@ -985,23 +985,23 @@ files = [
[[package]]
name = "deepsearch-glm"
-version = "0.16.2"
+version = "0.18.4"
description = "Graph Language Models"
optional = false
python-versions = "<4.0,>=3.8"
files = [
- {file = "deepsearch_glm-0.16.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:bccc2b9878cbe313008ad094ee90bb391c77da4e1c3cf7b67be16daa8b0dd4de"},
- {file = "deepsearch_glm-0.16.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:5c4eaf57924b4fb4cee2e672416d162fc96f205f6631fbe53dfccf229beb489a"},
- {file = "deepsearch_glm-0.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e39e0d8ee4bd8cac9940e4b9ae131f8632bba42ea1784f14165eed0b78ac8fa"},
- {file = "deepsearch_glm-0.16.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:2548f896356b9cdd58822ce00278942ada42dc21f193047ab7e9fc60d3196674"},
- {file = "deepsearch_glm-0.16.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:b5539843d34ad81982abf985a8625b3dfa3c4be4efb5a71797e18e29e9b8bccc"},
- {file = "deepsearch_glm-0.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:465f0207883886b941704c1c31854b3179fa0e5c4bf9b9ebd4c916e7cb3eb1f5"},
- {file = "deepsearch_glm-0.16.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:4879fddf93ea0381b6f4cdd3025fe4cb4ac70f5266e61b49a88dd65d3ef90765"},
- {file = "deepsearch_glm-0.16.2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:6b8db679215ab1766207e65ce65cb2b43aa1f3e0245ef41e8767804f299d68c4"},
- {file = "deepsearch_glm-0.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3b6d88ef34368809265678fd38cc8303ef64c8292aec7be5605a557e4f6492e"},
- {file = "deepsearch_glm-0.16.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a784ef29f56af82111a0cac4f5002cafa48a807a9024ccda9623648041c38eb8"},
- {file = "deepsearch_glm-0.16.2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:0df47564b3a3009b15c099237d6b3a7ab0a13b0b544760a1115ee5ba5c0a6a85"},
- {file = "deepsearch_glm-0.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eb4a93ee6de1d013d2a0b46a7bd5f2ed5fab6e028f7deee61b11a709bd7cebe"},
+ {file = "deepsearch_glm-0.18.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ad88c5bf3c203174ef81e0699405aec0f5386130cbc6a975b165f81887bc1a52"},
+ {file = "deepsearch_glm-0.18.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:21d51a0671f0713d23be57030287a0f907f4a5f0627a45ea07e2caf54129a71a"},
+ {file = "deepsearch_glm-0.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fc853941ea751a15f65e83f9bee9f988d0ecac4b28fac067b2aab49e15edb74"},
+ {file = "deepsearch_glm-0.18.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:cab5e577cf724343f2a5987ff4488c69e86a2dbca8cb0359c9243a07c6cd7d69"},
+ {file = "deepsearch_glm-0.18.4-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:dda02391306d657a884b12f21cc3d1228663f940ec6001c833893dd2844bcc25"},
+ {file = "deepsearch_glm-0.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dccd4286a93ee1a216acba27e1fc76f5d14e280d968998cfeae11a00ad1b6cb"},
+ {file = "deepsearch_glm-0.18.4-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:cf38368bc72eab673459ea0fc96c02b1f3ae120df2d9443e1a63e010764ac1e9"},
+ {file = "deepsearch_glm-0.18.4-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:d3fd83ea3b2bce11bac1d710f12547728f4dd48bfaa8bd472366ef144469d52c"},
+ {file = "deepsearch_glm-0.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fb4bfd43ac3b996cdd151c35e94fa399953ee3952d7e86390a825880ece95f3"},
+ {file = "deepsearch_glm-0.18.4-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:57cb67e435cacb6c4a6b6a9109d943267c493ebbba252a88ca40909976f60225"},
+ {file = "deepsearch_glm-0.18.4-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:edc399939b6464f96600d2f23796ae2641d668fb794b77199e87abdef77f8853"},
+ {file = "deepsearch_glm-0.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00ad8d932e7f0d1be4fd99fc0d4c8d50cb1ff10764f146b6ecb310a1379123d4"},
]
[package.dependencies]
@@ -1098,13 +1098,13 @@ files = [
[[package]]
name = "exceptiongroup"
-version = "1.2.0"
+version = "1.2.1"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
- {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
- {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
+ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
+ {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
]
[package.extras]
@@ -1140,69 +1140,69 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc
[[package]]
name = "filelock"
-version = "3.13.1"
+version = "3.14.0"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.8"
files = [
- {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"},
- {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
+ {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"},
+ {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"},
]
[package.extras]
-docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
typing = ["typing-extensions (>=4.8)"]
[[package]]
name = "fonttools"
-version = "4.49.0"
+version = "4.51.0"
description = "Tools to manipulate font files"
optional = false
python-versions = ">=3.8"
files = [
- {file = "fonttools-4.49.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d970ecca0aac90d399e458f0b7a8a597e08f95de021f17785fb68e2dc0b99717"},
- {file = "fonttools-4.49.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac9a745b7609f489faa65e1dc842168c18530874a5f5b742ac3dd79e26bca8bc"},
- {file = "fonttools-4.49.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ba0e00620ca28d4ca11fc700806fd69144b463aa3275e1b36e56c7c09915559"},
- {file = "fonttools-4.49.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdee3ab220283057e7840d5fb768ad4c2ebe65bdba6f75d5d7bf47f4e0ed7d29"},
- {file = "fonttools-4.49.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ce7033cb61f2bb65d8849658d3786188afd80f53dad8366a7232654804529532"},
- {file = "fonttools-4.49.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:07bc5ea02bb7bc3aa40a1eb0481ce20e8d9b9642a9536cde0218290dd6085828"},
- {file = "fonttools-4.49.0-cp310-cp310-win32.whl", hash = "sha256:86eef6aab7fd7c6c8545f3ebd00fd1d6729ca1f63b0cb4d621bccb7d1d1c852b"},
- {file = "fonttools-4.49.0-cp310-cp310-win_amd64.whl", hash = "sha256:1fac1b7eebfce75ea663e860e7c5b4a8831b858c17acd68263bc156125201abf"},
- {file = "fonttools-4.49.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:edc0cce355984bb3c1d1e89d6a661934d39586bb32191ebff98c600f8957c63e"},
- {file = "fonttools-4.49.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:83a0d9336de2cba86d886507dd6e0153df333ac787377325a39a2797ec529814"},
- {file = "fonttools-4.49.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36c8865bdb5cfeec88f5028e7e592370a0657b676c6f1d84a2108e0564f90e22"},
- {file = "fonttools-4.49.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33037d9e56e2562c710c8954d0f20d25b8386b397250d65581e544edc9d6b942"},
- {file = "fonttools-4.49.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8fb022d799b96df3eaa27263e9eea306bd3d437cc9aa981820850281a02b6c9a"},
- {file = "fonttools-4.49.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33c584c0ef7dc54f5dd4f84082eabd8d09d1871a3d8ca2986b0c0c98165f8e86"},
- {file = "fonttools-4.49.0-cp311-cp311-win32.whl", hash = "sha256:cbe61b158deb09cffdd8540dc4a948d6e8f4d5b4f3bf5cd7db09bd6a61fee64e"},
- {file = "fonttools-4.49.0-cp311-cp311-win_amd64.whl", hash = "sha256:fc11e5114f3f978d0cea7e9853627935b30d451742eeb4239a81a677bdee6bf6"},
- {file = "fonttools-4.49.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d647a0e697e5daa98c87993726da8281c7233d9d4ffe410812a4896c7c57c075"},
- {file = "fonttools-4.49.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f3bbe672df03563d1f3a691ae531f2e31f84061724c319652039e5a70927167e"},
- {file = "fonttools-4.49.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bebd91041dda0d511b0d303180ed36e31f4f54b106b1259b69fade68413aa7ff"},
- {file = "fonttools-4.49.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4145f91531fd43c50f9eb893faa08399816bb0b13c425667c48475c9f3a2b9b5"},
- {file = "fonttools-4.49.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ea329dafb9670ffbdf4dbc3b0e5c264104abcd8441d56de77f06967f032943cb"},
- {file = "fonttools-4.49.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c076a9e548521ecc13d944b1d261ff3d7825048c338722a4bd126d22316087b7"},
- {file = "fonttools-4.49.0-cp312-cp312-win32.whl", hash = "sha256:b607ea1e96768d13be26d2b400d10d3ebd1456343eb5eaddd2f47d1c4bd00880"},
- {file = "fonttools-4.49.0-cp312-cp312-win_amd64.whl", hash = "sha256:a974c49a981e187381b9cc2c07c6b902d0079b88ff01aed34695ec5360767034"},
- {file = "fonttools-4.49.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b85ec0bdd7bdaa5c1946398cbb541e90a6dfc51df76dfa88e0aaa41b335940cb"},
- {file = "fonttools-4.49.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:af20acbe198a8a790618ee42db192eb128afcdcc4e96d99993aca0b60d1faeb4"},
- {file = "fonttools-4.49.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d418b1fee41a1d14931f7ab4b92dc0bc323b490e41d7a333eec82c9f1780c75"},
- {file = "fonttools-4.49.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b44a52b8e6244b6548851b03b2b377a9702b88ddc21dcaf56a15a0393d425cb9"},
- {file = "fonttools-4.49.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7c7125068e04a70739dad11857a4d47626f2b0bd54de39e8622e89701836eabd"},
- {file = "fonttools-4.49.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29e89d0e1a7f18bc30f197cfadcbef5a13d99806447c7e245f5667579a808036"},
- {file = "fonttools-4.49.0-cp38-cp38-win32.whl", hash = "sha256:9d95fa0d22bf4f12d2fb7b07a46070cdfc19ef5a7b1c98bc172bfab5bf0d6844"},
- {file = "fonttools-4.49.0-cp38-cp38-win_amd64.whl", hash = "sha256:768947008b4dc552d02772e5ebd49e71430a466e2373008ce905f953afea755a"},
- {file = "fonttools-4.49.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:08877e355d3dde1c11973bb58d4acad1981e6d1140711230a4bfb40b2b937ccc"},
- {file = "fonttools-4.49.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fdb54b076f25d6b0f0298dc706acee5052de20c83530fa165b60d1f2e9cbe3cb"},
- {file = "fonttools-4.49.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0af65c720520710cc01c293f9c70bd69684365c6015cc3671db2b7d807fe51f2"},
- {file = "fonttools-4.49.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f255ce8ed7556658f6d23f6afd22a6d9bbc3edb9b96c96682124dc487e1bf42"},
- {file = "fonttools-4.49.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d00af0884c0e65f60dfaf9340e26658836b935052fdd0439952ae42e44fdd2be"},
- {file = "fonttools-4.49.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:263832fae27481d48dfafcc43174644b6706639661e242902ceb30553557e16c"},
- {file = "fonttools-4.49.0-cp39-cp39-win32.whl", hash = "sha256:0404faea044577a01bb82d47a8fa4bc7a54067fa7e324785dd65d200d6dd1133"},
- {file = "fonttools-4.49.0-cp39-cp39-win_amd64.whl", hash = "sha256:b050d362df50fc6e38ae3954d8c29bf2da52be384649ee8245fdb5186b620836"},
- {file = "fonttools-4.49.0-py3-none-any.whl", hash = "sha256:af281525e5dd7fa0b39fb1667b8d5ca0e2a9079967e14c4bfe90fd1cd13e0f18"},
- {file = "fonttools-4.49.0.tar.gz", hash = "sha256:ebf46e7f01b7af7861310417d7c49591a85d99146fc23a5ba82fdb28af156321"},
+ {file = "fonttools-4.51.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:84d7751f4468dd8cdd03ddada18b8b0857a5beec80bce9f435742abc9a851a74"},
+ {file = "fonttools-4.51.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8b4850fa2ef2cfbc1d1f689bc159ef0f45d8d83298c1425838095bf53ef46308"},
+ {file = "fonttools-4.51.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5b48a1121117047d82695d276c2af2ee3a24ffe0f502ed581acc2673ecf1037"},
+ {file = "fonttools-4.51.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:180194c7fe60c989bb627d7ed5011f2bef1c4d36ecf3ec64daec8302f1ae0716"},
+ {file = "fonttools-4.51.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:96a48e137c36be55e68845fc4284533bda2980f8d6f835e26bca79d7e2006438"},
+ {file = "fonttools-4.51.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:806e7912c32a657fa39d2d6eb1d3012d35f841387c8fc6cf349ed70b7c340039"},
+ {file = "fonttools-4.51.0-cp310-cp310-win32.whl", hash = "sha256:32b17504696f605e9e960647c5f64b35704782a502cc26a37b800b4d69ff3c77"},
+ {file = "fonttools-4.51.0-cp310-cp310-win_amd64.whl", hash = "sha256:c7e91abdfae1b5c9e3a543f48ce96013f9a08c6c9668f1e6be0beabf0a569c1b"},
+ {file = "fonttools-4.51.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a8feca65bab31479d795b0d16c9a9852902e3a3c0630678efb0b2b7941ea9c74"},
+ {file = "fonttools-4.51.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8ac27f436e8af7779f0bb4d5425aa3535270494d3bc5459ed27de3f03151e4c2"},
+ {file = "fonttools-4.51.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e19bd9e9964a09cd2433a4b100ca7f34e34731e0758e13ba9a1ed6e5468cc0f"},
+ {file = "fonttools-4.51.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2b92381f37b39ba2fc98c3a45a9d6383bfc9916a87d66ccb6553f7bdd129097"},
+ {file = "fonttools-4.51.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5f6bc991d1610f5c3bbe997b0233cbc234b8e82fa99fc0b2932dc1ca5e5afec0"},
+ {file = "fonttools-4.51.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9696fe9f3f0c32e9a321d5268208a7cc9205a52f99b89479d1b035ed54c923f1"},
+ {file = "fonttools-4.51.0-cp311-cp311-win32.whl", hash = "sha256:3bee3f3bd9fa1d5ee616ccfd13b27ca605c2b4270e45715bd2883e9504735034"},
+ {file = "fonttools-4.51.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f08c901d3866a8905363619e3741c33f0a83a680d92a9f0e575985c2634fcc1"},
+ {file = "fonttools-4.51.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4060acc2bfa2d8e98117828a238889f13b6f69d59f4f2d5857eece5277b829ba"},
+ {file = "fonttools-4.51.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1250e818b5f8a679ad79660855528120a8f0288f8f30ec88b83db51515411fcc"},
+ {file = "fonttools-4.51.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76f1777d8b3386479ffb4a282e74318e730014d86ce60f016908d9801af9ca2a"},
+ {file = "fonttools-4.51.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b5ad456813d93b9c4b7ee55302208db2b45324315129d85275c01f5cb7e61a2"},
+ {file = "fonttools-4.51.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:68b3fb7775a923be73e739f92f7e8a72725fd333eab24834041365d2278c3671"},
+ {file = "fonttools-4.51.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8e2f1a4499e3b5ee82c19b5ee57f0294673125c65b0a1ff3764ea1f9db2f9ef5"},
+ {file = "fonttools-4.51.0-cp312-cp312-win32.whl", hash = "sha256:278e50f6b003c6aed19bae2242b364e575bcb16304b53f2b64f6551b9c000e15"},
+ {file = "fonttools-4.51.0-cp312-cp312-win_amd64.whl", hash = "sha256:b3c61423f22165541b9403ee39874dcae84cd57a9078b82e1dce8cb06b07fa2e"},
+ {file = "fonttools-4.51.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1621ee57da887c17312acc4b0e7ac30d3a4fb0fec6174b2e3754a74c26bbed1e"},
+ {file = "fonttools-4.51.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d9298be7a05bb4801f558522adbe2feea1b0b103d5294ebf24a92dd49b78e5"},
+ {file = "fonttools-4.51.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee1af4be1c5afe4c96ca23badd368d8dc75f611887fb0c0dac9f71ee5d6f110e"},
+ {file = "fonttools-4.51.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c18b49adc721a7d0b8dfe7c3130c89b8704baf599fb396396d07d4aa69b824a1"},
+ {file = "fonttools-4.51.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de7c29bdbdd35811f14493ffd2534b88f0ce1b9065316433b22d63ca1cd21f14"},
+ {file = "fonttools-4.51.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cadf4e12a608ef1d13e039864f484c8a968840afa0258b0b843a0556497ea9ed"},
+ {file = "fonttools-4.51.0-cp38-cp38-win32.whl", hash = "sha256:aefa011207ed36cd280babfaa8510b8176f1a77261833e895a9d96e57e44802f"},
+ {file = "fonttools-4.51.0-cp38-cp38-win_amd64.whl", hash = "sha256:865a58b6e60b0938874af0968cd0553bcd88e0b2cb6e588727117bd099eef836"},
+ {file = "fonttools-4.51.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:60a3409c9112aec02d5fb546f557bca6efa773dcb32ac147c6baf5f742e6258b"},
+ {file = "fonttools-4.51.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7e89853d8bea103c8e3514b9f9dc86b5b4120afb4583b57eb10dfa5afbe0936"},
+ {file = "fonttools-4.51.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56fc244f2585d6c00b9bcc59e6593e646cf095a96fe68d62cd4da53dd1287b55"},
+ {file = "fonttools-4.51.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d145976194a5242fdd22df18a1b451481a88071feadf251221af110ca8f00ce"},
+ {file = "fonttools-4.51.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5b8cab0c137ca229433570151b5c1fc6af212680b58b15abd797dcdd9dd5051"},
+ {file = "fonttools-4.51.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:54dcf21a2f2d06ded676e3c3f9f74b2bafded3a8ff12f0983160b13e9f2fb4a7"},
+ {file = "fonttools-4.51.0-cp39-cp39-win32.whl", hash = "sha256:0118ef998a0699a96c7b28457f15546815015a2710a1b23a7bf6c1be60c01636"},
+ {file = "fonttools-4.51.0-cp39-cp39-win_amd64.whl", hash = "sha256:599bdb75e220241cedc6faebfafedd7670335d2e29620d207dd0378a4e9ccc5a"},
+ {file = "fonttools-4.51.0-py3-none-any.whl", hash = "sha256:15c94eeef6b095831067f72c825eb0e2d48bb4cea0647c1b05c981ecba2bf39f"},
+ {file = "fonttools-4.51.0.tar.gz", hash = "sha256:dc0673361331566d7a663d7ce0f6fdcbfbdc1f59c6e3ed1165ad7202ca183c68"},
]
[package.extras]
@@ -1318,13 +1318,13 @@ files = [
[[package]]
name = "fsspec"
-version = "2024.2.0"
+version = "2024.3.1"
description = "File-system specification"
optional = false
python-versions = ">=3.8"
files = [
- {file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"},
- {file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"},
+ {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"},
+ {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"},
]
[package.extras]
@@ -1364,13 +1364,13 @@ files = [
[[package]]
name = "httpcore"
-version = "1.0.3"
+version = "1.0.5"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
- {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
- {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
+ {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
+ {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
]
[package.dependencies]
@@ -1381,7 +1381,7 @@ h11 = ">=0.13,<0.15"
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.24.0)"]
+trio = ["trio (>=0.22.0,<0.26.0)"]
[[package]]
name = "httpx"
@@ -1409,13 +1409,13 @@ socks = ["socksio (==1.*)"]
[[package]]
name = "identify"
-version = "2.5.35"
+version = "2.5.36"
description = "File identification library for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "identify-2.5.35-py2.py3-none-any.whl", hash = "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e"},
- {file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"},
+ {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"},
+ {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"},
]
[package.extras]
@@ -1423,43 +1423,43 @@ license = ["ukkonen"]
[[package]]
name = "idna"
-version = "3.6"
+version = "3.7"
description = "Internationalized Domain Names in Applications (IDNA)"
optional = false
python-versions = ">=3.5"
files = [
- {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"},
- {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
+ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
+ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
]
[[package]]
name = "importlib-metadata"
-version = "7.0.1"
+version = "7.1.0"
description = "Read metadata from Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "importlib_metadata-7.0.1-py3-none-any.whl", hash = "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e"},
- {file = "importlib_metadata-7.0.1.tar.gz", hash = "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"},
+ {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"},
+ {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"},
]
[package.dependencies]
zipp = ">=0.5"
[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
+testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
[[package]]
name = "importlib-resources"
-version = "6.1.1"
+version = "6.4.0"
description = "Read resources from Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "importlib_resources-6.1.1-py3-none-any.whl", hash = "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6"},
- {file = "importlib_resources-6.1.1.tar.gz", hash = "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a"},
+ {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"},
+ {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
]
[package.dependencies]
@@ -1467,17 +1467,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"]
+testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
[[package]]
name = "ipykernel"
-version = "6.29.2"
+version = "6.29.4"
description = "IPython Kernel for Jupyter"
optional = false
python-versions = ">=3.8"
files = [
- {file = "ipykernel-6.29.2-py3-none-any.whl", hash = "sha256:50384f5c577a260a1d53f1f59a828c7266d321c9b7d00d345693783f66616055"},
- {file = "ipykernel-6.29.2.tar.gz", hash = "sha256:3bade28004e3ff624ed57974948116670604ac5f676d12339693f3142176d3f0"},
+ {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"},
+ {file = "ipykernel-6.29.4.tar.gz", hash = "sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c"},
]
[package.dependencies]
@@ -1500,7 +1500,7 @@ cov = ["coverage[toml]", "curio", "matplotlib", "pytest-cov", "trio"]
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "trio"]
pyqt5 = ["pyqt5"]
pyside6 = ["pyside6"]
-test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (==0.23.4)", "pytest-cov", "pytest-timeout"]
+test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.23.5)", "pytest-cov", "pytest-timeout"]
[[package]]
name = "ipython"
@@ -1623,13 +1623,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
[[package]]
name = "jinja2"
-version = "3.1.3"
+version = "3.1.4"
description = "A very fast and expressive template engine."
optional = false
python-versions = ">=3.7"
files = [
- {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
- {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
+ {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
+ {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
]
[package.dependencies]
@@ -1640,29 +1640,26 @@ i18n = ["Babel (>=2.7)"]
[[package]]
name = "joblib"
-version = "1.3.2"
+version = "1.4.2"
description = "Lightweight pipelining with Python functions"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"},
- {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"},
+ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
+ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
]
[[package]]
name = "json5"
-version = "0.9.17"
+version = "0.9.25"
description = "A Python implementation of the JSON5 data format."
optional = false
python-versions = ">=3.8"
files = [
- {file = "json5-0.9.17-py2.py3-none-any.whl", hash = "sha256:f8ec1ecf985951d70f780f6f877c4baca6a47b6e61e02c4cd190138d10a7805a"},
- {file = "json5-0.9.17.tar.gz", hash = "sha256:717d99d657fa71b7094877b1d921b1cce40ab444389f6d770302563bb7dfd9ae"},
+ {file = "json5-0.9.25-py3-none-any.whl", hash = "sha256:34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f"},
+ {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"},
]
-[package.extras]
-dev = ["hypothesis"]
-
[[package]]
name = "jsonpointer"
version = "2.4"
@@ -1676,13 +1673,13 @@ files = [
[[package]]
name = "jsonschema"
-version = "4.21.1"
+version = "4.22.0"
description = "An implementation of JSON Schema validation for Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"},
- {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"},
+ {file = "jsonschema-4.22.0-py3-none-any.whl", hash = "sha256:ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802"},
+ {file = "jsonschema-4.22.0.tar.gz", hash = "sha256:5b22d434a45935119af990552c862e5d6d564e8f6601206b305a61fdf661a2b7"},
]
[package.dependencies]
@@ -1742,13 +1739,13 @@ qtconsole = "*"
[[package]]
name = "jupyter-client"
-version = "8.6.0"
+version = "8.6.1"
description = "Jupyter protocol implementation and client libraries"
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter_client-8.6.0-py3-none-any.whl", hash = "sha256:909c474dbe62582ae62b758bca86d6518c85234bdee2d908c778db6d72f39d99"},
- {file = "jupyter_client-8.6.0.tar.gz", hash = "sha256:0642244bb83b4764ae60d07e010e15f0e2d275ec4e918a8f7b80fbbef3ca60c7"},
+ {file = "jupyter_client-8.6.1-py3-none-any.whl", hash = "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f"},
+ {file = "jupyter_client-8.6.1.tar.gz", hash = "sha256:e842515e2bab8e19186d89fdfea7abd15e39dd581f94e399f00e2af5a1652d3f"},
]
[package.dependencies]
@@ -1789,13 +1786,13 @@ test = ["flaky", "pexpect", "pytest"]
[[package]]
name = "jupyter-core"
-version = "5.7.1"
+version = "5.7.2"
description = "Jupyter core package. A base package on which Jupyter projects rely."
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter_core-5.7.1-py3-none-any.whl", hash = "sha256:c65c82126453a723a2804aa52409930434598fd9d35091d63dfb919d2b765bb7"},
- {file = "jupyter_core-5.7.1.tar.gz", hash = "sha256:de61a9d7fc71240f688b2fb5ab659fbb56979458dc66a71decd098e03c79e218"},
+ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"},
+ {file = "jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9"},
]
[package.dependencies]
@@ -1805,17 +1802,17 @@ traitlets = ">=5.3"
[package.extras]
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"]
-test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"]
+test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"]
[[package]]
name = "jupyter-events"
-version = "0.9.0"
+version = "0.10.0"
description = "Jupyter Event System library"
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter_events-0.9.0-py3-none-any.whl", hash = "sha256:d853b3c10273ff9bc8bb8b30076d65e2c9685579db736873de6c2232dde148bf"},
- {file = "jupyter_events-0.9.0.tar.gz", hash = "sha256:81ad2e4bc710881ec274d31c6c50669d71bbaa5dd9d01e600b56faa85700d399"},
+ {file = "jupyter_events-0.10.0-py3-none-any.whl", hash = "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960"},
+ {file = "jupyter_events-0.10.0.tar.gz", hash = "sha256:670b8229d3cc882ec782144ed22e0d29e1c2d639263f92ca8383e66682845e22"},
]
[package.dependencies]
@@ -1834,13 +1831,13 @@ test = ["click", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "p
[[package]]
name = "jupyter-lsp"
-version = "2.2.2"
+version = "2.2.5"
description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server"
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter-lsp-2.2.2.tar.gz", hash = "sha256:256d24620542ae4bba04a50fc1f6ffe208093a07d8e697fea0a8d1b8ca1b7e5b"},
- {file = "jupyter_lsp-2.2.2-py3-none-any.whl", hash = "sha256:3b95229e4168355a8c91928057c1621ac3510ba98b2a925e82ebd77f078b1aa5"},
+ {file = "jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001"},
+ {file = "jupyter_lsp-2.2.5-py3-none-any.whl", hash = "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da"},
]
[package.dependencies]
@@ -1849,49 +1846,49 @@ jupyter-server = ">=1.1.2"
[[package]]
name = "jupyter-server"
-version = "2.12.5"
+version = "2.14.0"
description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications."
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter_server-2.12.5-py3-none-any.whl", hash = "sha256:184a0f82809a8522777cfb6b760ab6f4b1bb398664c5860a27cec696cb884923"},
- {file = "jupyter_server-2.12.5.tar.gz", hash = "sha256:0edb626c94baa22809be1323f9770cf1c00a952b17097592e40d03e6a3951689"},
+ {file = "jupyter_server-2.14.0-py3-none-any.whl", hash = "sha256:fb6be52c713e80e004fac34b35a0990d6d36ba06fd0a2b2ed82b899143a64210"},
+ {file = "jupyter_server-2.14.0.tar.gz", hash = "sha256:659154cea512083434fd7c93b7fe0897af7a2fd0b9dd4749282b42eaac4ae677"},
]
[package.dependencies]
anyio = ">=3.1.0"
-argon2-cffi = "*"
-jinja2 = "*"
+argon2-cffi = ">=21.1"
+jinja2 = ">=3.0.3"
jupyter-client = ">=7.4.4"
jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
jupyter-events = ">=0.9.0"
-jupyter-server-terminals = "*"
+jupyter-server-terminals = ">=0.4.4"
nbconvert = ">=6.4.4"
nbformat = ">=5.3.0"
-overrides = "*"
-packaging = "*"
-prometheus-client = "*"
-pywinpty = {version = "*", markers = "os_name == \"nt\""}
+overrides = ">=5.0"
+packaging = ">=22.0"
+prometheus-client = ">=0.9"
+pywinpty = {version = ">=2.0.1", markers = "os_name == \"nt\""}
pyzmq = ">=24"
send2trash = ">=1.8.2"
terminado = ">=0.8.3"
tornado = ">=6.2.0"
traitlets = ">=5.6.0"
-websocket-client = "*"
+websocket-client = ">=1.7"
[package.extras]
docs = ["ipykernel", "jinja2", "jupyter-client", "jupyter-server", "myst-parser", "nbformat", "prometheus-client", "pydata-sphinx-theme", "send2trash", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-openapi (>=0.8.0)", "sphinxcontrib-spelling", "sphinxemoji", "tornado", "typing-extensions"]
-test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-scripts", "pytest-jupyter[server] (>=0.4)", "pytest-timeout", "requests"]
+test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0,<9)", "pytest-console-scripts", "pytest-jupyter[server] (>=0.7)", "pytest-timeout", "requests"]
[[package]]
name = "jupyter-server-terminals"
-version = "0.5.2"
+version = "0.5.3"
description = "A Jupyter Server Extension Providing Terminals."
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyter_server_terminals-0.5.2-py3-none-any.whl", hash = "sha256:1b80c12765da979513c42c90215481bbc39bd8ae7c0350b4f85bc3eb58d0fa80"},
- {file = "jupyter_server_terminals-0.5.2.tar.gz", hash = "sha256:396b5ccc0881e550bf0ee7012c6ef1b53edbde69e67cab1d56e89711b46052e8"},
+ {file = "jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa"},
+ {file = "jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269"},
]
[package.dependencies]
@@ -1904,13 +1901,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (>
[[package]]
name = "jupyterlab"
-version = "4.1.2"
+version = "4.1.8"
description = "JupyterLab computational environment"
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyterlab-4.1.2-py3-none-any.whl", hash = "sha256:aa88193f03cf4d3555f6712f04d74112b5eb85edd7d222c588c7603a26d33c5b"},
- {file = "jupyterlab-4.1.2.tar.gz", hash = "sha256:5d6348b3ed4085181499f621b7dfb6eb0b1f57f3586857aadfc8e3bf4c4885f9"},
+ {file = "jupyterlab-4.1.8-py3-none-any.whl", hash = "sha256:c3baf3a2f91f89d110ed5786cd18672b9a357129d4e389d2a0dead15e11a4d2c"},
+ {file = "jupyterlab-4.1.8.tar.gz", hash = "sha256:3384aded8680e7ce504fd63b8bb89a39df21c9c7694d9e7dc4a68742cdb30f9b"},
]
[package.dependencies]
@@ -1918,15 +1915,15 @@ async-lru = ">=1.0.0"
httpx = ">=0.25.0"
importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""}
importlib-resources = {version = ">=1.4", markers = "python_version < \"3.9\""}
-ipykernel = "*"
+ipykernel = ">=6.5.0"
jinja2 = ">=3.0.3"
jupyter-core = "*"
jupyter-lsp = ">=2.0.0"
jupyter-server = ">=2.4.0,<3"
-jupyterlab-server = ">=2.19.0,<3"
+jupyterlab-server = ">=2.27.1,<3"
notebook-shim = ">=0.2"
packaging = "*"
-tomli = {version = "*", markers = "python_version < \"3.11\""}
+tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
tornado = ">=6.2.0"
traitlets = "*"
@@ -1935,6 +1932,7 @@ dev = ["build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov",
docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-jupyter", "sphinx (>=1.8,<7.3.0)", "sphinx-copybutton"]
docs-screenshots = ["altair (==5.2.0)", "ipython (==8.16.1)", "ipywidgets (==8.1.1)", "jupyterlab-geojson (==3.4.0)", "jupyterlab-language-pack-zh-cn (==4.0.post6)", "matplotlib (==3.8.2)", "nbconvert (>=7.0.0)", "pandas (==2.2.0)", "scipy (==1.12.0)", "vega-datasets (==0.9.0)"]
test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter (>=0.5.3)", "pytest-timeout", "pytest-tornasync", "requests", "requests-cache", "virtualenv"]
+upgrade-extension = ["copier (>=8.0,<9.0)", "jinja2-time (<0.3)", "pydantic (<2.0)", "pyyaml-include (<2.0)", "tomli-w (<2.0)"]
[[package]]
name = "jupyterlab-pygments"
@@ -1949,13 +1947,13 @@ files = [
[[package]]
name = "jupyterlab-server"
-version = "2.25.3"
+version = "2.27.1"
description = "A set of server components for JupyterLab and JupyterLab like applications."
optional = false
python-versions = ">=3.8"
files = [
- {file = "jupyterlab_server-2.25.3-py3-none-any.whl", hash = "sha256:c48862519fded9b418c71645d85a49b2f0ec50d032ba8316738e9276046088c1"},
- {file = "jupyterlab_server-2.25.3.tar.gz", hash = "sha256:846f125a8a19656611df5b03e5912c8393cea6900859baa64fa515eb64a8dc40"},
+ {file = "jupyterlab_server-2.27.1-py3-none-any.whl", hash = "sha256:f5e26156e5258b24d532c84e7c74cc212e203bff93eb856f81c24c16daeecc75"},
+ {file = "jupyterlab_server-2.27.1.tar.gz", hash = "sha256:097b5ac709b676c7284ac9c5e373f11930a561f52cd5a86e4fc7e5a9c8a8631d"},
]
[package.dependencies]
@@ -1971,7 +1969,7 @@ requests = ">=2.31"
[package.extras]
docs = ["autodoc-traits", "jinja2 (<3.2.0)", "mistune (<4)", "myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-copybutton", "sphinxcontrib-openapi (>0.8)"]
openapi = ["openapi-core (>=0.18.0,<0.19.0)", "ruamel-yaml"]
-test = ["hatch", "ipykernel", "openapi-core (>=0.18.0,<0.19.0)", "openapi-spec-validator (>=0.6.0,<0.8.0)", "pytest (>=7.0)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter[server] (>=0.6.2)", "pytest-timeout", "requests-mock", "ruamel-yaml", "sphinxcontrib-spelling", "strict-rfc3339", "werkzeug"]
+test = ["hatch", "ipykernel", "openapi-core (>=0.18.0,<0.19.0)", "openapi-spec-validator (>=0.6.0,<0.8.0)", "pytest (>=7.0,<8)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter[server] (>=0.6.2)", "pytest-timeout", "requests-mock", "ruamel-yaml", "sphinxcontrib-spelling", "strict-rfc3339", "werkzeug"]
[[package]]
name = "jupyterlab-widgets"
@@ -2099,17 +2097,123 @@ files = [
[[package]]
name = "langcodes"
-version = "3.3.0"
+version = "3.4.0"
description = "Tools for labeling human languages with IETF language tags"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
+files = [
+ {file = "langcodes-3.4.0-py3-none-any.whl", hash = "sha256:10a4cc078b8e8937d8485d3352312a0a89a3125190db9f2bb2074250eef654e9"},
+ {file = "langcodes-3.4.0.tar.gz", hash = "sha256:ae5a77d1a01d0d1e91854a671890892b7ce9abb601ab7327fc5c874f899e1979"},
+]
+
+[package.dependencies]
+language-data = ">=1.2"
+
+[package.extras]
+build = ["build", "twine"]
+test = ["pytest", "pytest-cov"]
+
+[[package]]
+name = "language-data"
+version = "1.2.0"
+description = "Supplementary data about languages used by the langcodes module"
+optional = false
+python-versions = "*"
files = [
- {file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"},
- {file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"},
+ {file = "language_data-1.2.0-py3-none-any.whl", hash = "sha256:77d5cab917f91ee0b2f1aa7018443e911cf8985ef734ca2ba3940770f6a3816b"},
+ {file = "language_data-1.2.0.tar.gz", hash = "sha256:82a86050bbd677bfde87d97885b17566cfe75dad3ac4f5ce44b52c28f752e773"},
]
+[package.dependencies]
+marisa-trie = ">=0.7.7"
+
[package.extras]
-data = ["language-data (>=1.1,<2.0)"]
+build = ["build", "twine"]
+test = ["pytest", "pytest-cov"]
+
+[[package]]
+name = "marisa-trie"
+version = "1.1.0"
+description = "Static memory-efficient and fast Trie-like structures for Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "marisa-trie-1.1.0.tar.gz", hash = "sha256:5bf43ed0cf36af4578fe7b034cf95f532439766516680e4bd603723611ebd56b"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ed1b37ef1444083ab11e15d9150861874d8dd7be70c8899eccf1b986d37823a5"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:119366f9db9f53242439f69c3d49a3f1a3912970bc29b9af6ed9b6d0b7ef8c9e"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6964bfa23af502591094712e79886974a631d8047eb72cdf646babc62b03ae5e"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab8ec133daabb288e832d448fdff2e71756e7ba5ea7ff1b7b7645b010b2c23ac"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:61a52a0e5ef404bfdcc2117cd39cb572595ff01f73f27feb5fc9e92889adbae0"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9ce60c2ed4f4138ef78e346d43b105185977c6be7bce0609b48bb14092110612"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3b90a422eb660bd111ffe54290bfbabf98a30fccfe8a594a512b3ba81fda8aa5"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-win32.whl", hash = "sha256:6b92cd77787aeb92fd815a5ad00d4828f528d30032c1314d5f17571afe125cbe"},
+ {file = "marisa_trie-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:d415c11aada47f7f4afb818ce92e46c8f1b55611d325c09df7070088cfaa24bb"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:68a71ebb12498ad82e1579f41efe52c91839d92c0823a79389924057094c0a68"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1de6df9686175feb48f1e271a9252f6bf7ce1a4669a5bab3a97dffb8b11b13e6"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:789374ab88afe9e8ecfbd03a213f7b11fbefb3a8286c8fad88a2da0d7e5e0ef9"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0f1b05f7dcde6ca2b460126519a37707fde53808b9e29e6d5b44de737262104"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:312e414001e5777506f459fa3032c3a5827e80a32babfd44ab528dd0fb824e61"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:571f68432d3dbf06b715cbb6aed1eed9898c149619045d65e6d82407d4eb4c9e"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c1d98fe7da386c7f789526d8cf0b824b87fa1019e52619f8ad5e877912cc0f71"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-win32.whl", hash = "sha256:953400c8d7639349df9ef3f899f67c158852416a0470e7221fb06f19e3b1d0f6"},
+ {file = "marisa_trie-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:c423e651daec5931371fa3d480fb5ac59164ed7dea968d8f51b1ba369bac4975"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9f4a37a17b9a551d1678b909c44841109b9979d12e72a9ed6e922a51f41889f1"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bbc118727474d710851db69d2762b4a3936ad1d2ffebb519c3f8f42a925fa118"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c74557386eb62ce6526a9d0ad44410530e973feee5e0cabebf57b4d72696b2a"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4af7893ffc7099b68fd9d667fecc50d38e3e49405fcd6be97bc5ec72816ffa2"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:690eb9af9c0f4c677b74077843d0afafd08e543cdb3905b8a354aa0b0a2c06c3"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7e1771bedce1d9c37931c5efffac23aaed32f1364b99420673fa9417a0b5a6f1"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:38a64b1b4cbab19c23cfabed654c99e072af1c574f54b57ededd81357382d679"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-win32.whl", hash = "sha256:92cfb535174d711c3dbb3a9f3bbbd5abd180e778cd8ba2839a34565294c33190"},
+ {file = "marisa_trie-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:9f0cd1d11f7f7022a044a32a59632f18af91ee31fa84ff98c914cb5b9fae449d"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1d95308e0453302706d5246935beb9e3255c20238a633d0637b3d345de428aa3"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dbff54cf950dccc8bded31ad130571330efd1d6849fbcc7825e62ac5063bd0a"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c14e494b28f78f806f5320f02b8625770d598bff0a4ea45f825f55257efcaf52"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2484e83b9c233b337f45bb09740a74aeb510081856cdd4b293b48b970c710c1d"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f661d79e5fef5c38ab41fd5a16c29f8bd9d46a0de6c407b88ebbf24c7637ac84"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:5998b16395cefd76c52ce8cae35b837254ff097d3a357023f592218ff9d2112b"},
+ {file = "marisa_trie-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0b5d97515d9d65f237049ba01d385455fe5cc8dfb9c97b4a5b976382b9aff6c1"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4407e7ec82cdb501015188f1895bbdcac1a5ecb0e5ecc5cbbba028d5940499f2"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de62a115afd157fe6cfc8e4194905605c4603c6664eac30788f3f6866b67345f"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d7e17abb08ada031c86835e358242b6a2dc6645e1a872e30e1ce1c1b1cd6317d"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac288cb48e09927d96d00f4b2ad7bbfad91ce2e20fc6e6bb8b61dda05dbc28d2"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da0d59b93a327d772b49d9a79ef11f2e1c23aaafcefeab95376447794318d189"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d810f95a548751484bd57cfe5940ea5423d4e39678a10c9582b3f102fac27bbe"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:521a954dd469a336e3c8a307f7fe7ba272032d77cc8f801edebf2d11549ac1c2"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-win32.whl", hash = "sha256:1b25422875673ca5a15e236f2158f6a277f7252057272bb0b51272f4a9d3c401"},
+ {file = "marisa_trie-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:c80b85559e09ec7f69b9f623ea06fd5cfe25ead20bb4a09c20e879cd1851db35"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:844a56eebe32b098b6d97af28bfa9ca576400b5560be8a09c021a521faadee4a"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:917ef793e0e90bd01fc436cebf93707de1ac31f2feadc4d4b0ddbdb9522617d5"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e09cb17288a5a43431e23737d2d91bd54e6d694380740267960dbc7ab96ad69d"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5353d3c8c48524aac40c325794d6227c59e517a68746d3a0524608a20438a1e9"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d4dd18d1c67a949eeaba16385ab2c1a3e1eb7a2acb982c3744193a59df30cfd"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4a9a17507211700c77849d1caf4e6a64756536e491327cda3ea12259ce70eae5"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6699b0d3ca090172713bfbb9ef8063bfe27cae8d05121d5f71d1c4048b57936d"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-win32.whl", hash = "sha256:b4450a4917af45614edc3da1ab1b927b96de01e5742719c330e6d4a0e36fee7d"},
+ {file = "marisa_trie-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:89ba0ba6a05683d1ea966afe7aeae114d13fd8f354c6692a90bc2b181657ccbf"},
+ {file = "marisa_trie-1.1.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:10665a17a7965c2a49b2dda6beb14cf206f6932f013ca0337105a8744d67395d"},
+ {file = "marisa_trie-1.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86365aac6dde7228b0090d0e993f3ed557a43111cbe3b397f1bad77febbab342"},
+ {file = "marisa_trie-1.1.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:086d7c2b45b03185c70431450e7b92e76d3f3333074bf9b3aabb2eb6e1b85f89"},
+ {file = "marisa_trie-1.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9e5450eb023bf7a232cdaaf18fbe67fe45ed724d5cb30dd35f48c3a723ad3a4f"},
+ {file = "marisa_trie-1.1.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:206db942691d82310cdb6c59e34acbe648766ddb569c13de8b534e17892c608c"},
+ {file = "marisa_trie-1.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff2e12de8aea7fde90b4128bb8340a99cfb4a55e4c41b6336d187660e899385"},
+ {file = "marisa_trie-1.1.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8652141e4623b36017275a6ae6efe7a2ece3b304b984d4f66acb620a78eed9"},
+ {file = "marisa_trie-1.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7916ddd3cf621a20285256e4e5e5e7e6c86aa29356faa31cc8de535b8b71afe3"},
+ {file = "marisa_trie-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c57f2d6caa71829973a18b80c70b422337328686d3c7ea4519082f0b291fa01"},
+ {file = "marisa_trie-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd45429b25098034a9ca2fc78877e3edc9d59f88ca8b3c69cff5f299c728d771"},
+ {file = "marisa_trie-1.1.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71ee2edb2574b87a2173d64dd3f79c8e1af2e8d7bd1469bdcfe5fd895ada913a"},
+ {file = "marisa_trie-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:427ce824566382309a300a8d080a84ccf6795325204c834839bdcb41203591f4"},
+ {file = "marisa_trie-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37fcb2265d73a5c04829b25af7cdf819a27d71a898a6e1b54822e006f1843c94"},
+ {file = "marisa_trie-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b34ea73a92c35577171bf9d8216e6c57acdf08b77b5d84f1efad8cf721159da"},
+ {file = "marisa_trie-1.1.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fdd7445f2f2785c02c18d46acf0c14baffafa6e7e73b3e9052b512e1f7dadbb3"},
+ {file = "marisa_trie-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e0f4c47fca455bd75cab9e2181138d3978721ed546e2ed18e83b0852c49eca4f"},
+]
+
+[package.dependencies]
+setuptools = "*"
+
+[package.extras]
+test = ["hypothesis", "pytest", "readme-renderer"]
[[package]]
name = "markdown-it-py"
@@ -2274,13 +2378,13 @@ python-dateutil = ">=2.7"
[[package]]
name = "matplotlib-inline"
-version = "0.1.6"
+version = "0.1.7"
description = "Inline Matplotlib backend for Jupyter"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.8"
files = [
- {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"},
- {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"},
+ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
+ {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
]
[package.dependencies]
@@ -2496,13 +2600,13 @@ files = [
[[package]]
name = "nbclient"
-version = "0.9.0"
+version = "0.9.1"
description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
optional = false
python-versions = ">=3.8.0"
files = [
- {file = "nbclient-0.9.0-py3-none-any.whl", hash = "sha256:a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15"},
- {file = "nbclient-0.9.0.tar.gz", hash = "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e"},
+ {file = "nbclient-0.9.1-py3-none-any.whl", hash = "sha256:2c50a866e8dd6c5f655de47d2e252c82d2ebe978574e760ac229f5950593a434"},
+ {file = "nbclient-0.9.1.tar.gz", hash = "sha256:4f7b78c6c2a380e228f8a3bb469b847cb24e5b8ad6fda410691b5621e05ce5a2"},
]
[package.dependencies]
@@ -2514,17 +2618,17 @@ traitlets = ">=5.4"
[package.extras]
dev = ["pre-commit"]
docs = ["autodoc-traits", "mock", "moto", "myst-parser", "nbclient[test]", "sphinx (>=1.7)", "sphinx-book-theme", "sphinxcontrib-spelling"]
-test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"]
+test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0,<8)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"]
[[package]]
name = "nbconvert"
-version = "7.16.1"
+version = "7.16.4"
description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)."
optional = false
python-versions = ">=3.8"
files = [
- {file = "nbconvert-7.16.1-py3-none-any.whl", hash = "sha256:3188727dffadfdc9c6a1c7250729063d7bc78b355ad7aa023138afa030d1cd07"},
- {file = "nbconvert-7.16.1.tar.gz", hash = "sha256:e79e6a074f49ba3ed29428ed86487bf51509d9aab613bd8522ac08f6d28fd7fd"},
+ {file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"},
+ {file = "nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4"},
]
[package.dependencies]
@@ -2546,29 +2650,29 @@ tinycss2 = "*"
traitlets = ">=5.1"
[package.extras]
-all = ["nbconvert[docs,qtpdf,serve,test,webpdf]"]
+all = ["flaky", "ipykernel", "ipython", "ipywidgets (>=7.5)", "myst-parser", "nbsphinx (>=0.2.12)", "playwright", "pydata-sphinx-theme", "pyqtwebengine (>=5.15)", "pytest (>=7)", "sphinx (==5.0.2)", "sphinxcontrib-spelling", "tornado (>=6.1)"]
docs = ["ipykernel", "ipython", "myst-parser", "nbsphinx (>=0.2.12)", "pydata-sphinx-theme", "sphinx (==5.0.2)", "sphinxcontrib-spelling"]
-qtpdf = ["nbconvert[qtpng]"]
+qtpdf = ["pyqtwebengine (>=5.15)"]
qtpng = ["pyqtwebengine (>=5.15)"]
serve = ["tornado (>=6.1)"]
-test = ["flaky", "ipykernel", "ipywidgets (>=7.5)", "pytest"]
+test = ["flaky", "ipykernel", "ipywidgets (>=7.5)", "pytest (>=7)"]
webpdf = ["playwright"]
[[package]]
name = "nbformat"
-version = "5.9.2"
+version = "5.10.4"
description = "The Jupyter Notebook format"
optional = false
python-versions = ">=3.8"
files = [
- {file = "nbformat-5.9.2-py3-none-any.whl", hash = "sha256:1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9"},
- {file = "nbformat-5.9.2.tar.gz", hash = "sha256:5f98b5ba1997dff175e77e0c17d5c10a96eaed2cbd1de3533d1fc35d5e111192"},
+ {file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"},
+ {file = "nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a"},
]
[package.dependencies]
-fastjsonschema = "*"
+fastjsonschema = ">=2.15"
jsonschema = ">=2.6"
-jupyter-core = "*"
+jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
traitlets = ">=5.1"
[package.extras]
@@ -2636,13 +2740,13 @@ setuptools = "*"
[[package]]
name = "notebook"
-version = "7.1.0"
+version = "7.1.3"
description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
optional = false
python-versions = ">=3.8"
files = [
- {file = "notebook-7.1.0-py3-none-any.whl", hash = "sha256:a8fa4ccb5e5fe220f29d9900337efd7752bc6f2efe004d6f320db01f7743adc9"},
- {file = "notebook-7.1.0.tar.gz", hash = "sha256:99caf01ff166b1cc86355c9b37c1ba9bf566c1d7fc4ab57bb6f8f24e36c4260e"},
+ {file = "notebook-7.1.3-py3-none-any.whl", hash = "sha256:919b911e59f41f6e3857ce93c9d93535ba66bb090059712770e5968c07e1004d"},
+ {file = "notebook-7.1.3.tar.gz", hash = "sha256:41fcebff44cf7bb9377180808bcbae066629b55d8c7722f1ebbe75ca44f9cfc1"},
]
[package.dependencies]
@@ -2809,13 +2913,13 @@ files = [
[[package]]
name = "packaging"
-version = "23.2"
+version = "24.0"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
files = [
- {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
- {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
+ {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
+ {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
]
[[package]]
@@ -2878,18 +2982,18 @@ files = [
[[package]]
name = "parso"
-version = "0.8.3"
+version = "0.8.4"
description = "A Python Parser"
optional = false
python-versions = ">=3.6"
files = [
- {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
- {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
+ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
+ {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
]
[package.extras]
-qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
-testing = ["docopt", "pytest (<6.0.0)"]
+qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
+testing = ["docopt", "pytest"]
[[package]]
name = "pathspec"
@@ -2929,79 +3033,80 @@ files = [
[[package]]
name = "pillow"
-version = "10.2.0"
+version = "10.3.0"
description = "Python Imaging Library (Fork)"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pillow-10.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:7823bdd049099efa16e4246bdf15e5a13dbb18a51b68fa06d6c1d4d8b99a796e"},
- {file = "pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:83b2021f2ade7d1ed556bc50a399127d7fb245e725aa0113ebd05cfe88aaf588"},
- {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad5ff2f13d69b7e74ce5b4ecd12cc0ec530fcee76356cac6742785ff71c452"},
- {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2b52b37dad6d9ec64e653637a096905b258d2fc2b984c41ae7d08b938a67e4"},
- {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563"},
- {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:322bdf3c9b556e9ffb18f93462e5f749d3444ce081290352c6070d014c93feb2"},
- {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51f1a1bffc50e2e9492e87d8e09a17c5eea8409cda8d3f277eb6edc82813c17c"},
- {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69ffdd6120a4737710a9eee73e1d2e37db89b620f702754b8f6e62594471dee0"},
- {file = "pillow-10.2.0-cp310-cp310-win32.whl", hash = "sha256:c6dafac9e0f2b3c78df97e79af707cdc5ef8e88208d686a4847bab8266870023"},
- {file = "pillow-10.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:aebb6044806f2e16ecc07b2a2637ee1ef67a11840a66752751714a0d924adf72"},
- {file = "pillow-10.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:7049e301399273a0136ff39b84c3678e314f2158f50f517bc50285fb5ec847ad"},
- {file = "pillow-10.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35bb52c37f256f662abdfa49d2dfa6ce5d93281d323a9af377a120e89a9eafb5"},
- {file = "pillow-10.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c23f307202661071d94b5e384e1e1dc7dfb972a28a2310e4ee16103e66ddb67"},
- {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773efe0603db30c281521a7c0214cad7836c03b8ccff897beae9b47c0b657d61"},
- {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11fa2e5984b949b0dd6d7a94d967743d87c577ff0b83392f17cb3990d0d2fd6e"},
- {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:716d30ed977be8b37d3ef185fecb9e5a1d62d110dfbdcd1e2a122ab46fddb03f"},
- {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a086c2af425c5f62a65e12fbf385f7c9fcb8f107d0849dba5839461a129cf311"},
- {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c8de2789052ed501dd829e9cae8d3dcce7acb4777ea4a479c14521c942d395b1"},
- {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:609448742444d9290fd687940ac0b57fb35e6fd92bdb65386e08e99af60bf757"},
- {file = "pillow-10.2.0-cp311-cp311-win32.whl", hash = "sha256:823ef7a27cf86df6597fa0671066c1b596f69eba53efa3d1e1cb8b30f3533068"},
- {file = "pillow-10.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:1da3b2703afd040cf65ec97efea81cfba59cdbed9c11d8efc5ab09df9509fc56"},
- {file = "pillow-10.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:edca80cbfb2b68d7b56930b84a0e45ae1694aeba0541f798e908a49d66b837f1"},
- {file = "pillow-10.2.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:1b5e1b74d1bd1b78bc3477528919414874748dd363e6272efd5abf7654e68bef"},
- {file = "pillow-10.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0eae2073305f451d8ecacb5474997c08569fb4eb4ac231ffa4ad7d342fdc25ac"},
- {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c2286c23cd350b80d2fc9d424fc797575fb16f854b831d16fd47ceec078f2c"},
- {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e23412b5c41e58cec602f1135c57dfcf15482013ce6e5f093a86db69646a5aa"},
- {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:52a50aa3fb3acb9cf7213573ef55d31d6eca37f5709c69e6858fe3bc04a5c2a2"},
- {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:127cee571038f252a552760076407f9cff79761c3d436a12af6000cd182a9d04"},
- {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8d12251f02d69d8310b046e82572ed486685c38f02176bd08baf216746eb947f"},
- {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54f1852cd531aa981bc0965b7d609f5f6cc8ce8c41b1139f6ed6b3c54ab82bfb"},
- {file = "pillow-10.2.0-cp312-cp312-win32.whl", hash = "sha256:257d8788df5ca62c980314053197f4d46eefedf4e6175bc9412f14412ec4ea2f"},
- {file = "pillow-10.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:154e939c5f0053a383de4fd3d3da48d9427a7e985f58af8e94d0b3c9fcfcf4f9"},
- {file = "pillow-10.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:f379abd2f1e3dddb2b61bc67977a6b5a0a3f7485538bcc6f39ec76163891ee48"},
- {file = "pillow-10.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8373c6c251f7ef8bda6675dd6d2b3a0fcc31edf1201266b5cf608b62a37407f9"},
- {file = "pillow-10.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:870ea1ada0899fd0b79643990809323b389d4d1d46c192f97342eeb6ee0b8483"},
- {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4b6b1e20608493548b1f32bce8cca185bf0480983890403d3b8753e44077129"},
- {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3031709084b6e7852d00479fd1d310b07d0ba82765f973b543c8af5061cf990e"},
- {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:3ff074fc97dd4e80543a3e91f69d58889baf2002b6be64347ea8cf5533188213"},
- {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:cb4c38abeef13c61d6916f264d4845fab99d7b711be96c326b84df9e3e0ff62d"},
- {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1b3020d90c2d8e1dae29cf3ce54f8094f7938460fb5ce8bc5c01450b01fbaf6"},
- {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:170aeb00224ab3dc54230c797f8404507240dd868cf52066f66a41b33169bdbe"},
- {file = "pillow-10.2.0-cp38-cp38-win32.whl", hash = "sha256:c4225f5220f46b2fde568c74fca27ae9771536c2e29d7c04f4fb62c83275ac4e"},
- {file = "pillow-10.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0689b5a8c5288bc0504d9fcee48f61a6a586b9b98514d7d29b840143d6734f39"},
- {file = "pillow-10.2.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b792a349405fbc0163190fde0dc7b3fef3c9268292586cf5645598b48e63dc67"},
- {file = "pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c570f24be1e468e3f0ce7ef56a89a60f0e05b30a3669a459e419c6eac2c35364"},
- {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8ecd059fdaf60c1963c58ceb8997b32e9dc1b911f5da5307aab614f1ce5c2fb"},
- {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c365fd1703040de1ec284b176d6af5abe21b427cb3a5ff68e0759e1e313a5e7e"},
- {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:70c61d4c475835a19b3a5aa42492409878bbca7438554a1f89d20d58a7c75c01"},
- {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b6f491cdf80ae540738859d9766783e3b3c8e5bd37f5dfa0b76abdecc5081f13"},
- {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d189550615b4948f45252d7f005e53c2040cea1af5b60d6f79491a6e147eef7"},
- {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:49d9ba1ed0ef3e061088cd1e7538a0759aab559e2e0a80a36f9fd9d8c0c21591"},
- {file = "pillow-10.2.0-cp39-cp39-win32.whl", hash = "sha256:babf5acfede515f176833ed6028754cbcd0d206f7f614ea3447d67c33be12516"},
- {file = "pillow-10.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8"},
- {file = "pillow-10.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:0fb3e7fc88a14eacd303e90481ad983fd5b69c761e9e6ef94c983f91025da869"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:322209c642aabdd6207517e9739c704dc9f9db943015535783239022002f054a"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eedd52442c0a5ff4f887fab0c1c0bb164d8635b32c894bc1faf4c618dd89df2"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb28c753fd5eb3dd859b4ee95de66cc62af91bcff5db5f2571d32a520baf1f04"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33870dc4653c5017bf4c8873e5488d8f8d5f8935e2f1fb9a2208c47cdd66efd2"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3c31822339516fb3c82d03f30e22b1d038da87ef27b6a78c9549888f8ceda39a"},
- {file = "pillow-10.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a2b56ba36e05f973d450582fb015594aaa78834fefe8dfb8fcd79b93e64ba4c6"},
- {file = "pillow-10.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d8e6aeb9201e655354b3ad049cb77d19813ad4ece0df1249d3c793de3774f8c7"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:2247178effb34a77c11c0e8ac355c7a741ceca0a732b27bf11e747bbc950722f"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15587643b9e5eb26c48e49a7b33659790d28f190fc514a322d55da2fb5c2950e"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753cd8f2086b2b80180d9b3010dd4ed147efc167c90d3bf593fe2af21265e5a5"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7c8f97e8e7a9009bcacbe3766a36175056c12f9a44e6e6f2d5caad06dcfbf03b"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d1b35bcd6c5543b9cb547dee3150c93008f8dd0f1fef78fc0cd2b141c5baf58a"},
- {file = "pillow-10.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868"},
- {file = "pillow-10.2.0.tar.gz", hash = "sha256:e87f0b2c78157e12d7686b27d63c070fd65d994e8ddae6f328e0dcf4a0cd007e"},
+ {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"},
+ {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"},
+ {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"},
+ {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"},
+ {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"},
+ {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"},
+ {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"},
+ {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"},
+ {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"},
+ {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"},
+ {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"},
+ {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"},
+ {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"},
+ {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"},
+ {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"},
+ {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"},
+ {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"},
+ {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"},
+ {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"},
+ {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"},
+ {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"},
+ {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"},
+ {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"},
+ {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"},
+ {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"},
+ {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"},
+ {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"},
+ {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"},
+ {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"},
+ {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"},
+ {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"},
+ {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"},
+ {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"},
+ {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"},
+ {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
]
[package.extras]
@@ -3040,13 +3145,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co
[[package]]
name = "pluggy"
-version = "1.4.0"
+version = "1.5.0"
description = "plugin and hook calling mechanisms for python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"},
- {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"},
+ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+ {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
]
[package.extras]
@@ -3200,43 +3305,43 @@ tests = ["pytest"]
[[package]]
name = "pybind11"
-version = "2.11.1"
+version = "2.12.0"
description = "Seamless operability between C++11 and Python"
optional = false
python-versions = ">=3.6"
files = [
- {file = "pybind11-2.11.1-py3-none-any.whl", hash = "sha256:33cdd02a6453380dd71cc70357ce388ad1ee8d32bd0e38fc22b273d050aa29b3"},
- {file = "pybind11-2.11.1.tar.gz", hash = "sha256:00cd59116a6e8155aecd9174f37ba299d1d397ed4a6b86ac1dfe01b3e40f2cc4"},
+ {file = "pybind11-2.12.0-py3-none-any.whl", hash = "sha256:df8d60b94f9e714d81013db233393d430ebf9f3551642b82291cf1b14d1afdbd"},
+ {file = "pybind11-2.12.0.tar.gz", hash = "sha256:5e3c557a84b06b969247630407fc4d985bed157b4253b13153b8e8e165e0c3dc"},
]
[package.extras]
-global = ["pybind11-global (==2.11.1)"]
+global = ["pybind11-global (==2.12.0)"]
[[package]]
name = "pycparser"
-version = "2.21"
+version = "2.22"
description = "C parser in Python"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+python-versions = ">=3.8"
files = [
- {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
- {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
+ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
+ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
]
[[package]]
name = "pydantic"
-version = "2.6.1"
+version = "2.7.1"
description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"},
- {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"},
+ {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
+ {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
]
[package.dependencies]
annotated-types = ">=0.4.0"
-pydantic-core = "2.16.2"
+pydantic-core = "2.18.2"
typing-extensions = ">=4.6.1"
[package.extras]
@@ -3244,90 +3349,90 @@ email = ["email-validator (>=2.0.0)"]
[[package]]
name = "pydantic-core"
-version = "2.16.2"
-description = ""
+version = "2.18.2"
+description = "Core functionality for Pydantic validation and serialization"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"},
- {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"},
- {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"},
- {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"},
- {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"},
- {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"},
- {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"},
- {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"},
- {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"},
- {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"},
- {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"},
- {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"},
- {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"},
- {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"},
- {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"},
- {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"},
- {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"},
- {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"},
- {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"},
- {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"},
- {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"},
- {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"},
- {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"},
- {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"},
- {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"},
- {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"},
- {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"},
- {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"},
- {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"},
- {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"},
- {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"},
- {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"},
- {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"},
- {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"},
- {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"},
- {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"},
- {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"},
- {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"},
- {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"},
- {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
+ {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
+ {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
+ {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
+ {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
+ {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
+ {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
+ {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
+ {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
+ {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
+ {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
+ {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
+ {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
+ {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
+ {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
+ {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
+ {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
+ {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
+ {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
+ {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
+ {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
]
[package.dependencies]
@@ -3335,28 +3440,27 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pygments"
-version = "2.17.2"
+version = "2.18.0"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"},
- {file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"},
+ {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
+ {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
]
[package.extras]
-plugins = ["importlib-metadata"]
windows-terminal = ["colorama (>=0.4.6)"]
[[package]]
name = "pyparsing"
-version = "3.1.1"
+version = "3.1.2"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
optional = false
python-versions = ">=3.6.8"
files = [
- {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"},
- {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"},
+ {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"},
+ {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"},
]
[package.extras]
@@ -3364,13 +3468,13 @@ diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
description = "Extensions to the standard Python datetime module"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
- {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
- {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+ {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
]
[package.dependencies]
@@ -3454,17 +3558,17 @@ files = [
[[package]]
name = "pywinpty"
-version = "2.0.12"
+version = "2.0.13"
description = "Pseudo terminal support for Windows from Python."
optional = false
python-versions = ">=3.8"
files = [
- {file = "pywinpty-2.0.12-cp310-none-win_amd64.whl", hash = "sha256:21319cd1d7c8844fb2c970fb3a55a3db5543f112ff9cfcd623746b9c47501575"},
- {file = "pywinpty-2.0.12-cp311-none-win_amd64.whl", hash = "sha256:853985a8f48f4731a716653170cd735da36ffbdc79dcb4c7b7140bce11d8c722"},
- {file = "pywinpty-2.0.12-cp312-none-win_amd64.whl", hash = "sha256:1617b729999eb6713590e17665052b1a6ae0ad76ee31e60b444147c5b6a35dca"},
- {file = "pywinpty-2.0.12-cp38-none-win_amd64.whl", hash = "sha256:189380469ca143d06e19e19ff3fba0fcefe8b4a8cc942140a6b863aed7eebb2d"},
- {file = "pywinpty-2.0.12-cp39-none-win_amd64.whl", hash = "sha256:7520575b6546db23e693cbd865db2764097bd6d4ef5dc18c92555904cd62c3d4"},
- {file = "pywinpty-2.0.12.tar.gz", hash = "sha256:8197de460ae8ebb7f5d1701dfa1b5df45b157bb832e92acba316305e18ca00dd"},
+ {file = "pywinpty-2.0.13-cp310-none-win_amd64.whl", hash = "sha256:697bff211fb5a6508fee2dc6ff174ce03f34a9a233df9d8b5fe9c8ce4d5eaf56"},
+ {file = "pywinpty-2.0.13-cp311-none-win_amd64.whl", hash = "sha256:b96fb14698db1284db84ca38c79f15b4cfdc3172065b5137383910567591fa99"},
+ {file = "pywinpty-2.0.13-cp312-none-win_amd64.whl", hash = "sha256:2fd876b82ca750bb1333236ce98488c1be96b08f4f7647cfdf4129dfad83c2d4"},
+ {file = "pywinpty-2.0.13-cp38-none-win_amd64.whl", hash = "sha256:61d420c2116c0212808d31625611b51caf621fe67f8a6377e2e8b617ea1c1f7d"},
+ {file = "pywinpty-2.0.13-cp39-none-win_amd64.whl", hash = "sha256:71cb613a9ee24174730ac7ae439fd179ca34ccb8c5349e8d7b72ab5dea2c6f4b"},
+ {file = "pywinpty-2.0.13.tar.gz", hash = "sha256:c34e32351a3313ddd0d7da23d27f835c860d32fe4ac814d372a3ea9594f41dde"},
]
[[package]]
@@ -3528,104 +3632,99 @@ files = [
[[package]]
name = "pyzmq"
-version = "25.1.2"
+version = "26.0.3"
description = "Python bindings for 0MQ"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
files = [
- {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"},
- {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"},
- {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"},
- {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"},
- {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"},
- {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"},
- {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"},
- {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"},
- {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"},
- {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"},
- {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"},
- {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"},
- {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"},
- {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"},
- {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"},
- {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"},
- {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"},
- {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"},
- {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"},
- {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"},
- {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"},
- {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"},
- {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"},
- {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"},
- {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"},
- {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"},
- {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"},
- {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"},
- {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"},
- {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"},
- {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"},
- {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"},
- {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"},
- {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"},
- {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"},
- {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"},
- {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"},
- {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"},
- {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"},
- {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"},
- {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"},
- {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"},
- {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"},
- {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"},
- {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"},
- {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"},
- {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"},
- {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"},
- {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"},
- {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"},
- {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"},
- {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"},
- {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"},
- {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"},
- {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"},
- {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"},
- {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"},
- {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"},
- {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"},
- {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"},
- {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"},
- {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"},
- {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"},
- {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"},
- {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"},
- {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"},
- {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"},
- {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"},
- {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"},
- {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"},
- {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"},
- {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"},
- {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"},
- {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"},
- {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"},
- {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"},
- {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"},
- {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"},
- {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"},
- {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"},
- {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"},
- {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"},
- {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"},
- {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"},
- {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"},
- {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"},
- {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"},
- {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"},
+ {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625"},
+ {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acb704195a71ac5ea5ecf2811c9ee19ecdc62b91878528302dd0be1b9451cc90"},
+ {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbb9c997932473a27afa93954bb77a9f9b786b4ccf718d903f35da3232317de"},
+ {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6bcb34f869d431799c3ee7d516554797f7760cb2198ecaa89c3f176f72d062be"},
+ {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ece17ec5f20d7d9b442e5174ae9f020365d01ba7c112205a4d59cf19dc38ee"},
+ {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ba6e5e6588e49139a0979d03a7deb9c734bde647b9a8808f26acf9c547cab1bf"},
+ {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3bf8b000a4e2967e6dfdd8656cd0757d18c7e5ce3d16339e550bd462f4857e59"},
+ {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2136f64fbb86451dbbf70223635a468272dd20075f988a102bf8a3f194a411dc"},
+ {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e8918973fbd34e7814f59143c5f600ecd38b8038161239fd1a3d33d5817a38b8"},
+ {file = "pyzmq-26.0.3-cp310-cp310-win32.whl", hash = "sha256:0aaf982e68a7ac284377d051c742610220fd06d330dcd4c4dbb4cdd77c22a537"},
+ {file = "pyzmq-26.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:f1a9b7d00fdf60b4039f4455afd031fe85ee8305b019334b72dcf73c567edc47"},
+ {file = "pyzmq-26.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:80b12f25d805a919d53efc0a5ad7c0c0326f13b4eae981a5d7b7cc343318ebb7"},
+ {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:a72a84570f84c374b4c287183debc776dc319d3e8ce6b6a0041ce2e400de3f32"},
+ {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ca684ee649b55fd8f378127ac8462fb6c85f251c2fb027eb3c887e8ee347bcd"},
+ {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e222562dc0f38571c8b1ffdae9d7adb866363134299264a1958d077800b193b7"},
+ {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f17cde1db0754c35a91ac00b22b25c11da6eec5746431d6e5092f0cd31a3fea9"},
+ {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7c0c0b3244bb2275abe255d4a30c050d541c6cb18b870975553f1fb6f37527"},
+ {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac97a21de3712afe6a6c071abfad40a6224fd14fa6ff0ff8d0c6e6cd4e2f807a"},
+ {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:88b88282e55fa39dd556d7fc04160bcf39dea015f78e0cecec8ff4f06c1fc2b5"},
+ {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:72b67f966b57dbd18dcc7efbc1c7fc9f5f983e572db1877081f075004614fcdd"},
+ {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f4b6cecbbf3b7380f3b61de3a7b93cb721125dc125c854c14ddc91225ba52f83"},
+ {file = "pyzmq-26.0.3-cp311-cp311-win32.whl", hash = "sha256:eed56b6a39216d31ff8cd2f1d048b5bf1700e4b32a01b14379c3b6dde9ce3aa3"},
+ {file = "pyzmq-26.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:3191d312c73e3cfd0f0afdf51df8405aafeb0bad71e7ed8f68b24b63c4f36500"},
+ {file = "pyzmq-26.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:b6907da3017ef55139cf0e417c5123a84c7332520e73a6902ff1f79046cd3b94"},
+ {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:068ca17214038ae986d68f4a7021f97e187ed278ab6dccb79f837d765a54d753"},
+ {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7821d44fe07335bea256b9f1f41474a642ca55fa671dfd9f00af8d68a920c2d4"},
+ {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eeb438a26d87c123bb318e5f2b3d86a36060b01f22fbdffd8cf247d52f7c9a2b"},
+ {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69ea9d6d9baa25a4dc9cef5e2b77b8537827b122214f210dd925132e34ae9b12"},
+ {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7daa3e1369355766dea11f1d8ef829905c3b9da886ea3152788dc25ee6079e02"},
+ {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6ca7a9a06b52d0e38ccf6bca1aeff7be178917893f3883f37b75589d42c4ac20"},
+ {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1b7d0e124948daa4d9686d421ef5087c0516bc6179fdcf8828b8444f8e461a77"},
+ {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e746524418b70f38550f2190eeee834db8850088c834d4c8406fbb9bc1ae10b2"},
+ {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:6b3146f9ae6af82c47a5282ac8803523d381b3b21caeae0327ed2f7ecb718798"},
+ {file = "pyzmq-26.0.3-cp312-cp312-win32.whl", hash = "sha256:2b291d1230845871c00c8462c50565a9cd6026fe1228e77ca934470bb7d70ea0"},
+ {file = "pyzmq-26.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:926838a535c2c1ea21c903f909a9a54e675c2126728c21381a94ddf37c3cbddf"},
+ {file = "pyzmq-26.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:5bf6c237f8c681dfb91b17f8435b2735951f0d1fad10cc5dfd96db110243370b"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c0991f5a96a8e620f7691e61178cd8f457b49e17b7d9cfa2067e2a0a89fc1d5"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dbf012d8fcb9f2cf0643b65df3b355fdd74fc0035d70bb5c845e9e30a3a4654b"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:01fbfbeb8249a68d257f601deb50c70c929dc2dfe683b754659569e502fbd3aa"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c8eb19abe87029c18f226d42b8a2c9efdd139d08f8bf6e085dd9075446db450"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5344b896e79800af86ad643408ca9aa303a017f6ebff8cee5a3163c1e9aec987"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:204e0f176fd1d067671157d049466869b3ae1fc51e354708b0dc41cf94e23a3a"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a42db008d58530efa3b881eeee4991146de0b790e095f7ae43ba5cc612decbc5"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-win32.whl", hash = "sha256:8d7a498671ca87e32b54cb47c82a92b40130a26c5197d392720a1bce1b3c77cf"},
+ {file = "pyzmq-26.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:3b4032a96410bdc760061b14ed6a33613ffb7f702181ba999df5d16fb96ba16a"},
+ {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:2cc4e280098c1b192c42a849de8de2c8e0f3a84086a76ec5b07bfee29bda7d18"},
+ {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5bde86a2ed3ce587fa2b207424ce15b9a83a9fa14422dcc1c5356a13aed3df9d"},
+ {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:34106f68e20e6ff253c9f596ea50397dbd8699828d55e8fa18bd4323d8d966e6"},
+ {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ebbbd0e728af5db9b04e56389e2299a57ea8b9dd15c9759153ee2455b32be6ad"},
+ {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6b1d1c631e5940cac5a0b22c5379c86e8df6a4ec277c7a856b714021ab6cfad"},
+ {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e891ce81edd463b3b4c3b885c5603c00141151dd9c6936d98a680c8c72fe5c67"},
+ {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9b273ecfbc590a1b98f014ae41e5cf723932f3b53ba9367cfb676f838038b32c"},
+ {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b32bff85fb02a75ea0b68f21e2412255b5731f3f389ed9aecc13a6752f58ac97"},
+ {file = "pyzmq-26.0.3-cp38-cp38-win32.whl", hash = "sha256:f6c21c00478a7bea93caaaef9e7629145d4153b15a8653e8bb4609d4bc70dbfc"},
+ {file = "pyzmq-26.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:3401613148d93ef0fd9aabdbddb212de3db7a4475367f49f590c837355343972"},
+ {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:2ed8357f4c6e0daa4f3baf31832df8a33334e0fe5b020a61bc8b345a3db7a606"},
+ {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1c8f2a2ca45292084c75bb6d3a25545cff0ed931ed228d3a1810ae3758f975f"},
+ {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:b63731993cdddcc8e087c64e9cf003f909262b359110070183d7f3025d1c56b5"},
+ {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8"},
+ {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115f8359402fa527cf47708d6f8a0f8234f0e9ca0cab7c18c9c189c194dbf620"},
+ {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:715bdf952b9533ba13dfcf1f431a8f49e63cecc31d91d007bc1deb914f47d0e4"},
+ {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e1258c639e00bf5e8a522fec6c3eaa3e30cf1c23a2f21a586be7e04d50c9acab"},
+ {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:15c59e780be8f30a60816a9adab900c12a58d79c1ac742b4a8df044ab2a6d920"},
+ {file = "pyzmq-26.0.3-cp39-cp39-win32.whl", hash = "sha256:d0cdde3c78d8ab5b46595054e5def32a755fc028685add5ddc7403e9f6de9879"},
+ {file = "pyzmq-26.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:ce828058d482ef860746bf532822842e0ff484e27f540ef5c813d516dd8896d2"},
+ {file = "pyzmq-26.0.3-cp39-cp39-win_arm64.whl", hash = "sha256:788f15721c64109cf720791714dc14afd0f449d63f3a5487724f024345067381"},
+ {file = "pyzmq-26.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c18645ef6294d99b256806e34653e86236eb266278c8ec8112622b61db255de"},
+ {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e6bc96ebe49604df3ec2c6389cc3876cabe475e6bfc84ced1bf4e630662cb35"},
+ {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:971e8990c5cc4ddcff26e149398fc7b0f6a042306e82500f5e8db3b10ce69f84"},
+ {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8416c23161abd94cc7da80c734ad7c9f5dbebdadfdaa77dad78244457448223"},
+ {file = "pyzmq-26.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:082a2988364b60bb5de809373098361cf1dbb239623e39e46cb18bc035ed9c0c"},
+ {file = "pyzmq-26.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d57dfbf9737763b3a60d26e6800e02e04284926329aee8fb01049635e957fe81"},
+ {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:77a85dca4c2430ac04dc2a2185c2deb3858a34fe7f403d0a946fa56970cf60a1"},
+ {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4c82a6d952a1d555bf4be42b6532927d2a5686dd3c3e280e5f63225ab47ac1f5"},
+ {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4496b1282c70c442809fc1b151977c3d967bfb33e4e17cedbf226d97de18f709"},
+ {file = "pyzmq-26.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:e4946d6bdb7ba972dfda282f9127e5756d4f299028b1566d1245fa0d438847e6"},
+ {file = "pyzmq-26.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:03c0ae165e700364b266876d712acb1ac02693acd920afa67da2ebb91a0b3c09"},
+ {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3e3070e680f79887d60feeda051a58d0ac36622e1759f305a41059eff62c6da7"},
+ {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6ca08b840fe95d1c2bd9ab92dac5685f949fc6f9ae820ec16193e5ddf603c3b2"},
+ {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76654e9dbfb835b3518f9938e565c7806976c07b37c33526b574cc1a1050480"},
+ {file = "pyzmq-26.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:871587bdadd1075b112e697173e946a07d722459d20716ceb3d1bd6c64bd08ce"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d0a2d1bd63a4ad79483049b26514e70fa618ce6115220da9efdff63688808b17"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0270b49b6847f0d106d64b5086e9ad5dc8a902413b5dbbb15d12b60f9c1747a4"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:703c60b9910488d3d0954ca585c34f541e506a091a41930e663a098d3b794c67"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74423631b6be371edfbf7eabb02ab995c2563fee60a80a30829176842e71722a"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4adfbb5451196842a88fda3612e2c0414134874bffb1c2ce83ab4242ec9e027d"},
+ {file = "pyzmq-26.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3516119f4f9b8671083a70b6afaa0a070f5683e431ab3dc26e9215620d7ca1ad"},
+ {file = "pyzmq-26.0.3.tar.gz", hash = "sha256:dba7d9f2e047dfa2bca3b01f4f84aa5246725203d6284e3790f2ca15fba6b40a"},
]
[package.dependencies]
@@ -3633,13 +3732,13 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""}
[[package]]
name = "qtconsole"
-version = "5.5.1"
+version = "5.5.2"
description = "Jupyter Qt console"
optional = false
-python-versions = ">= 3.8"
+python-versions = ">=3.8"
files = [
- {file = "qtconsole-5.5.1-py3-none-any.whl", hash = "sha256:8c75fa3e9b4ed884880ff7cea90a1b67451219279ec33deaee1d59e3df1a5d2b"},
- {file = "qtconsole-5.5.1.tar.gz", hash = "sha256:a0e806c6951db9490628e4df80caec9669b65149c7ba40f9bf033c025a5b56bc"},
+ {file = "qtconsole-5.5.2-py3-none-any.whl", hash = "sha256:42d745f3d05d36240244a04e1e1ec2a86d5d9b6edb16dbdef582ccb629e87e0b"},
+ {file = "qtconsole-5.5.2.tar.gz", hash = "sha256:6b5fb11274b297463706af84dcbbd5c92273b1f619e6d25d08874b0a88516989"},
]
[package.dependencies]
@@ -3712,13 +3811,13 @@ Pillow = "*"
[[package]]
name = "referencing"
-version = "0.33.0"
+version = "0.35.1"
description = "JSON Referencing + Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "referencing-0.33.0-py3-none-any.whl", hash = "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5"},
- {file = "referencing-0.33.0.tar.gz", hash = "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7"},
+ {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"},
+ {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"},
]
[package.dependencies]
@@ -3773,13 +3872,13 @@ files = [
[[package]]
name = "rich"
-version = "13.7.0"
+version = "13.7.1"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
python-versions = ">=3.7.0"
files = [
- {file = "rich-13.7.0-py3-none-any.whl", hash = "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235"},
- {file = "rich-13.7.0.tar.gz", hash = "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa"},
+ {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
+ {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
]
[package.dependencies]
@@ -3985,13 +4084,13 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
[[package]]
name = "send2trash"
-version = "1.8.2"
+version = "1.8.3"
description = "Send file to trash natively under Mac OS X, Windows and Linux"
optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
- {file = "Send2Trash-1.8.2-py3-none-any.whl", hash = "sha256:a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679"},
- {file = "Send2Trash-1.8.2.tar.gz", hash = "sha256:c132d59fa44b9ca2b1699af5c86f57ce9f4c5eb56629d5d55fbb7a35f84e2312"},
+ {file = "Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9"},
+ {file = "Send2Trash-1.8.3.tar.gz", hash = "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf"},
]
[package.extras]
@@ -4001,19 +4100,19 @@ win32 = ["pywin32"]
[[package]]
name = "setuptools"
-version = "69.1.0"
+version = "69.5.1"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "setuptools-69.1.0-py3-none-any.whl", hash = "sha256:c054629b81b946d63a9c6e732bc8b2513a7c3ea645f11d0139a2191d735c60c6"},
- {file = "setuptools-69.1.0.tar.gz", hash = "sha256:850894c4195f09c4ed30dba56213bf7c3f21d86ed6bdaafb5df5972593bfc401"},
+ {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
+ {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
]
[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
[[package]]
name = "shellingham"
@@ -4167,13 +4266,13 @@ webhdfs = ["requests"]
[[package]]
name = "sniffio"
-version = "1.3.0"
+version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
- {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
- {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
+ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
+ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
@@ -4381,13 +4480,13 @@ widechars = ["wcwidth"]
[[package]]
name = "terminado"
-version = "0.18.0"
+version = "0.18.1"
description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library."
optional = false
python-versions = ">=3.8"
files = [
- {file = "terminado-0.18.0-py3-none-any.whl", hash = "sha256:87b0d96642d0fe5f5abd7783857b9cab167f221a39ff98e3b9619a788a3c0f2e"},
- {file = "terminado-0.18.0.tar.gz", hash = "sha256:1ea08a89b835dd1b8c0c900d92848147cef2537243361b2e3f4dc15df9b6fded"},
+ {file = "terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0"},
+ {file = "terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e"},
]
[package.dependencies]
@@ -4509,24 +4608,24 @@ torch = ["torch (>=1.6.0)"]
[[package]]
name = "threadpoolctl"
-version = "3.3.0"
+version = "3.5.0"
description = "threadpoolctl"
optional = false
python-versions = ">=3.8"
files = [
- {file = "threadpoolctl-3.3.0-py3-none-any.whl", hash = "sha256:6155be1f4a39f31a18ea70f94a77e0ccd57dced08122ea61109e7da89883781e"},
- {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"},
+ {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"},
+ {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
]
[[package]]
name = "tinycss2"
-version = "1.2.1"
+version = "1.3.0"
description = "A tiny CSS parser"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "tinycss2-1.2.1-py3-none-any.whl", hash = "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847"},
- {file = "tinycss2-1.2.1.tar.gz", hash = "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627"},
+ {file = "tinycss2-1.3.0-py3-none-any.whl", hash = "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7"},
+ {file = "tinycss2-1.3.0.tar.gz", hash = "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d"},
]
[package.dependencies]
@@ -4534,7 +4633,7 @@ webencodings = ">=0.4"
[package.extras]
doc = ["sphinx", "sphinx_rtd_theme"]
-test = ["flake8", "isort", "pytest"]
+test = ["pytest", "ruff"]
[[package]]
name = "tokenize-rt"
@@ -4600,13 +4699,13 @@ opt-einsum = ["opt-einsum (>=3.3)"]
[[package]]
name = "torch-geometric"
-version = "2.5.0"
+version = "2.5.3"
description = "Graph Neural Network Library for PyTorch"
optional = false
python-versions = ">=3.8"
files = [
- {file = "torch_geometric-2.5.0-py3-none-any.whl", hash = "sha256:9322fef81189d870b08b5a8b44957c768c9e0ba431a81e6bd0d7cb39fcb87de8"},
- {file = "torch_geometric-2.5.0.tar.gz", hash = "sha256:f4f4a57fd885c74e982d570df3a259ffff5adbb4ad7a3a95aabf5ace5fa7a240"},
+ {file = "torch_geometric-2.5.3-py3-none-any.whl", hash = "sha256:8277abfc12600b0e8047e0c3ea2d55cc43f08c1448e73e924de827c15d0b5f85"},
+ {file = "torch_geometric-2.5.3.tar.gz", hash = "sha256:ad0761650c8fa56cdc46ee61c564fd4995f07f079965fe732b3a76d109fd3edc"},
]
[package.dependencies]
@@ -4651,13 +4750,13 @@ files = [
[[package]]
name = "tqdm"
-version = "4.66.2"
+version = "4.66.4"
description = "Fast, Extensible Progress Meter"
optional = false
python-versions = ">=3.7"
files = [
- {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"},
- {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"},
+ {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"},
+ {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"},
]
[package.dependencies]
@@ -4671,28 +4770,28 @@ telegram = ["requests"]
[[package]]
name = "traitlets"
-version = "5.14.1"
+version = "5.14.3"
description = "Traitlets Python configuration system"
optional = false
python-versions = ">=3.8"
files = [
- {file = "traitlets-5.14.1-py3-none-any.whl", hash = "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74"},
- {file = "traitlets-5.14.1.tar.gz", hash = "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e"},
+ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
+ {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
]
[package.extras]
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
-test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"]
+test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
[[package]]
name = "typer"
-version = "0.9.0"
+version = "0.9.4"
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
optional = false
python-versions = ">=3.6"
files = [
- {file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"},
- {file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"},
+ {file = "typer-0.9.4-py3-none-any.whl", hash = "sha256:aa6c4a4e2329d868b80ecbaf16f807f2b54e192209d7ac9dd42691d63f7a54eb"},
+ {file = "typer-0.9.4.tar.gz", hash = "sha256:f714c2d90afae3a7929fcd72a3abb08df305e1ff61719381384211c4070af57f"},
]
[package.dependencies]
@@ -4706,28 +4805,28 @@ typing-extensions = ">=3.7.4.3"
all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"]
doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
-test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
+test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.971)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
[[package]]
name = "types-python-dateutil"
-version = "2.8.19.20240106"
+version = "2.9.0.20240316"
description = "Typing stubs for python-dateutil"
optional = false
python-versions = ">=3.8"
files = [
- {file = "types-python-dateutil-2.8.19.20240106.tar.gz", hash = "sha256:1f8db221c3b98e6ca02ea83a58371b22c374f42ae5bbdf186db9c9a76581459f"},
- {file = "types_python_dateutil-2.8.19.20240106-py3-none-any.whl", hash = "sha256:efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2"},
+ {file = "types-python-dateutil-2.9.0.20240316.tar.gz", hash = "sha256:5d2f2e240b86905e40944dd787db6da9263f0deabef1076ddaed797351ec0202"},
+ {file = "types_python_dateutil-2.9.0.20240316-py3-none-any.whl", hash = "sha256:6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b"},
]
[[package]]
name = "typing-extensions"
-version = "4.9.0"
+version = "4.11.0"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
files = [
- {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"},
- {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
+ {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"},
+ {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"},
]
[[package]]
@@ -4762,13 +4861,13 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "virtualenv"
-version = "20.25.0"
+version = "20.26.1"
description = "Virtual Python Environment builder"
optional = false
python-versions = ">=3.7"
files = [
- {file = "virtualenv-20.25.0-py3-none-any.whl", hash = "sha256:4238949c5ffe6876362d9c0180fc6c3a824a7b12b80604eeb8085f2ed7460de3"},
- {file = "virtualenv-20.25.0.tar.gz", hash = "sha256:bf51c0d9c7dd63ea8e44086fa1e4fb1093a31e963b86959257378aef020e1f1b"},
+ {file = "virtualenv-20.26.1-py3-none-any.whl", hash = "sha256:7aa9982a728ae5892558bff6a2839c00b9ed145523ece2274fad6f414690ae75"},
+ {file = "virtualenv-20.26.1.tar.gz", hash = "sha256:604bfdceaeece392802e6ae48e69cec49168b9c5f4a44e483963f9242eb0e78b"},
]
[package.dependencies]
@@ -4777,7 +4876,7 @@ filelock = ">=3.12.2,<4"
platformdirs = ">=3.9.1,<5"
[package.extras]
-docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
[[package]]
@@ -4855,29 +4954,29 @@ files = [
[[package]]
name = "websocket-client"
-version = "1.7.0"
+version = "1.8.0"
description = "WebSocket client for Python with low level API options"
optional = false
python-versions = ">=3.8"
files = [
- {file = "websocket-client-1.7.0.tar.gz", hash = "sha256:10e511ea3a8c744631d3bd77e61eb17ed09304c413ad42cf6ddfa4c7787e8fe6"},
- {file = "websocket_client-1.7.0-py3-none-any.whl", hash = "sha256:f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588"},
+ {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"},
+ {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"},
]
[package.extras]
-docs = ["Sphinx (>=6.0)", "sphinx-rtd-theme (>=1.1.0)"]
+docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
optional = ["python-socks", "wsaccel"]
test = ["websockets"]
[[package]]
name = "wheel"
-version = "0.42.0"
+version = "0.43.0"
description = "A built-package format for Python"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "wheel-0.42.0-py3-none-any.whl", hash = "sha256:177f9c9b0d45c47873b619f5b650346d632cdc35fb5e4d25058e09c9e581433d"},
- {file = "wheel-0.42.0.tar.gz", hash = "sha256:c45be39f7882c9d34243236f2d63cbd58039e360f85d0913425fbd7ceea617a8"},
+ {file = "wheel-0.43.0-py3-none-any.whl", hash = "sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81"},
+ {file = "wheel-0.43.0.tar.gz", hash = "sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85"},
]
[package.extras]
@@ -5163,20 +5262,21 @@ multidict = ">=4.0"
[[package]]
name = "zipp"
-version = "3.17.0"
+version = "3.18.1"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
python-versions = ">=3.8"
files = [
- {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"},
- {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"},
+ {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"},
+ {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"},
]
[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
[metadata]
lock-version = "2.0"
python-versions = ">= 3.8, <3.11"
-content-hash = "fd5f2d6a718f50247e5ae782dba09a51968ef8424b1e88ce67fe54b90bad6afb"
+content-hash = "73f1b0b84cdeb292efcf668a0a3deeb2ac3f76ebcc3ced6791f681792f17d3b8"
+
diff --git a/pyproject.toml b/pyproject.toml
index e9681a6..9459b1d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ python-dotenv = "^1.0.0"
nbclient = "^0.9.0"
pandas = "^1.5.1"
argilla = "^1.24.0"
-deepsearch-glm = "v0.16.2"
+deepsearch-glm = "v0.18.4"
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^22.1.0"}