Update docs

Beat Buesser · Beat Buesser · commit c810d53aabc7 · 2020-09-18T21:47:40.000+01:00
Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;
diff --git a/art/estimators/speech_recognition/pytorch_deep_speech.py b/art/estimators/speech_recognition/pytorch_deep_speech.py
@@ -277,11 +277,11 @@ def predict(
         :type transcription_output: `bool`
         :return: Probability (if transcription_output is None or False) or transcription (if transcription_output is
                  True) predictions:
-                    - Probability return is a tuple of (probs, sizes), where:
-                        - probs is the probability of characters of shape (nb_samples, seq_length, nb_classes).
-                        - sizes is the real sequence length of shape (nb_samples,).
-                    - Transcription return is a numpy array of characters. A possible example of a transcription return
-                      is `np.array(['SIXTY ONE', 'HELLO'])`.
+                 - Probability return is a tuple of (probs, sizes), where `probs` is the probability of characters of
+                 shape (nb_samples, seq_length, nb_classes) and `sizes` is the real sequence length of shape
+                 (nb_samples,).
+                 - Transcription return is a numpy array of characters. A possible example of a transcription return
+                 is `np.array(['SIXTY ONE', 'HELLO'])`.
         """
         import torch  # lgtm [py/repeated-import]
 
@@ -529,11 +529,11 @@ def transform_model_input(
         :param real_lengths: Real lengths of original sequences.
         :return: A tuple of inputs and targets in the model space with the original index
                  `(inputs, targets, input_percentages, target_sizes, batch_idx)`, where:
-                    - inputs: model inputs of shape (nb_samples, nb_frequencies, seq_length).
-                    - targets: ground truth targets of shape (sum over nb_samples of real seq_lengths).
-                    - input_percentages: percentages of real inputs in inputs.
-                    - target_sizes: list of real seq_lengths.
-                    - batch_idx: original index of inputs.
+                 - inputs: model inputs of shape (nb_samples, nb_frequencies, seq_length).
+                 - targets: ground truth targets of shape (sum over nb_samples of real seq_lengths).
+                 - input_percentages: percentages of real inputs in inputs.
+                 - target_sizes: list of real seq_lengths.
+                 - batch_idx: original index of inputs.
         """
         import torch  # lgtm [py/repeated-import]
         import torchaudio
diff --git a/docs/index.rst b/docs/index.rst
@@ -80,6 +80,7 @@ Supported Machine Learning Libraries
    modules/estimators/generation
    modules/estimators/object_detection
    modules/estimators/regression
+   modules/estimators/speech_recognition
    modules/metrics
    modules/wrappers
    modules/data_generators
diff --git a/docs/modules/estimators/speech_recognition.rst b/docs/modules/estimators/speech_recognition.rst
@@ -0,0 +1,17 @@
+:mod:`art.estimators.speech_recognition`
+========================================
+.. automodule:: art.estimators.speech_recognition
+
+Mixin Base Class Speech Recognizer
+----------------------------------
+.. autoclass:: SpeechRecognizerMixin
+   :members:
+   :special-members: __init__
+   :inherited-members:
+
+Speech Recognizer Deep Speech
+-----------------------------
+.. autoclass:: PyTorchDeepSpeech
+   :members:
+   :special-members: __init__
+   :inherited-members: