docs: python pipelines (#1613)

IgnatovFedor · web-flow · commit cbff0bb45fce · 2023-01-09T14:29:56.000+03:00
diff --git a/README.md b/README.md
@@ -181,6 +181,9 @@ from deeppavlov import evaluate_model
 model = evaluate_model(<config_path>, install=True, download=True)
 ```
 
+DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/features/python.html) to build a model from components for
+inference using Python.
+
 ## License
 
 DeepPavlov is Apache 2.0 - licensed.
diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py
@@ -1,4 +1,4 @@
-__version__ = '1.0.1'
+__version__ = '1.0.2'
 __author__ = 'Neural Networks and Deep Learning lab, MIPT'
 __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
 __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
diff --git a/docs/index.rst b/docs/index.rst
@@ -9,6 +9,7 @@ Welcome to DeepPavlov's documentation!
    QuickStart <intro/quick_start>
    General concepts <intro/overview>
    Configuration file <intro/configuration>
+   Python pipelines <intro/python.ipynb>
    Models overview <features/overview>
 
 
diff --git a/docs/intro/python.ipynb b/docs/intro/python.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6d5cd16b",
+   "metadata": {},
+   "source": [
+    "#### Python pipelines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da10fd80",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deeppavlov/DeepPavlov/blob/master/docs/intro/python.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d55ebe35",
+   "metadata": {},
+   "source": [
+    "Python models could be used without .json configuration files.\n",
+    "\n",
+    "The code below is an alternative to building [insults_kaggle_bert](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/classifiers/insults_kaggle_bert.json) model and using it with\n",
+    "\n",
+    "```python\n",
+    "from deeppavlov import build_model\n",
+    "\n",
+    "model = build_model('insults_kaggle_bert', download=True)\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa1db63b",
+   "metadata": {},
+   "source": [
+    "At first, define variables for model components and download model data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d6671e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov.core.commands.utils import expand_path\n",
+    "from deeppavlov.download import download_resource\n",
+    "\n",
+    "\n",
+    "classifiers_path = expand_path('~/.deeppavlov/models/classifiers')\n",
+    "model_path = classifiers_path / 'insults_kaggle_torch_bert'\n",
+    "transformer_name = 'bert-base-uncased'\n",
+    "\n",
+    "download_resource(\n",
+    "    'http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v5.tar.gz',\n",
+    "    {classifiers_path}\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "332d644e",
+   "metadata": {},
+   "source": [
+    "Then, initialize model components."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "809c31ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov.core.data.simple_vocab import SimpleVocabulary\n",
+    "from deeppavlov.models.classifiers.proba2labels import Proba2Labels\n",
+    "from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor\n",
+    "from deeppavlov.models.torch_bert.torch_transformers_classifier import TorchTransformersClassifierModel\n",
+    "\n",
+    "\n",
+    "preprocessor = TorchTransformersPreprocessor(\n",
+    "    vocab_file=transformer_name,\n",
+    "    max_seq_length=64\n",
+    ")\n",
+    "\n",
+    "classes_vocab = SimpleVocabulary(\n",
+    "    load_path=model_path/'classes.dict',\n",
+    "    save_path=model_path/'classes.dict'\n",
+    ")\n",
+    "\n",
+    "classifier =  TorchTransformersClassifierModel(\n",
+    "    n_classes=classes_vocab.len,\n",
+    "    return_probas=True,\n",
+    "    pretrained_bert=transformer_name,\n",
+    "    save_path=model_path/'model',\n",
+    "    optimizer_parameters={'lr': 1e-05}\n",
+    ")\n",
+    "\n",
+    "proba2labels = Proba2Labels(max_proba=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87e8ec20",
+   "metadata": {},
+   "source": [
+    "Finally, create model from components. ``Element`` is a wrapper for a component. ``Element`` receives the component and the names of the incoming and outgoing arguments. ``Model`` combines ``Element``s into pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acfe29de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov import Element, Model\n",
+    "\n",
+    "model = Model(\n",
+    "    x=['x'],\n",
+    "    out=['y_pred_labels'],\n",
+    "    pipe=[\n",
+    "        Element(component=preprocessor, x=['x'], out=['bert_features']),\n",
+    "        Element(component=classifier, x=['bert_features'], out=['y_pred_probas']),\n",
+    "        Element(component=proba2labels, x=['y_pred_probas'], out=['y_pred_ids']),\n",
+    "        Element(component=classes_vocab, x=['y_pred_ids'], out=['y_pred_labels'])\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "model(['you are stupid', 'you are smart'])"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '1.0.1'`
	`1`	`+__version__ = '1.0.2'`
`2`	`2`	`__author__ = 'Neural Networks and Deep Learning lab, MIPT'`
`3`	`3`	`__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'`
`4`	`4`	`__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']`