Merge pull request #27 from JuliaAI/a

ablaom · web-flow · commit f44ad05a1477 · 2024-01-10T08:35:11.000+13:00
Fix a docstring and the whitespace in the docstrings
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJText"
 uuid = "5e27fcf9-6bac-46ba-8580-b5712f3d6387"
 authors = ["Chris Alexander <uvapazzo@gmail.com>, Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.2.1"
+version = "0.2.2"
 
 [deps]
 CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8"
diff --git a/src/bm25_transformer.jl b/src/bm25_transformer.jl
@@ -137,21 +137,21 @@ In MLJ or MLJBase, bind an instance `model` to data with
 
     mach = machine(model, X)
 
-$DOC_IDF
+$DOC_TRANSFORMER_INPUTS
 
 Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider.
-  Terms that occur in `> max_doc_freq` documents will not be considered by the
-  transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than
-  90% of the documents will be removed.
+- `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms
+  that occur in `> max_doc_freq` documents will not be considered by the transformer. For
+  example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the
+  documents will be removed.
 
-- `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider.
-  Terms that occur in `< max_doc_freq` documents will not be considered by the
-  transformer. A value of 0.01 means that only terms that are at least in 1% of the
-  documents will be included.
+- `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms
+  that occur in `< max_doc_freq` documents will not be considered by the transformer. A
+  value of 0.01 means that only terms that are at least in 1% of the documents will be
+  included.
 
 - `κ=2`: The term frequency saturation characteristic. Higher values represent slower
   saturation. What we mean by saturation is the degree to which a term occurring extra
diff --git a/src/count_transformer.jl b/src/count_transformer.jl
@@ -94,15 +94,15 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider.
-  Terms that occur in `> max_doc_freq` documents will not be considered by the
-  transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than
-  90% of the documents will be removed.
-
-- `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider.
-  Terms that occur in `< max_doc_freq` documents will not be considered by the
-  transformer. A value of 0.01 means that only terms that are at least in 1% of the
-  documents will be included.
+- `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms
+  that occur in `> max_doc_freq` documents will not be considered by the transformer. For
+  example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the
+  documents will be removed.
+
+- `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms
+  that occur in `< max_doc_freq` documents will not be considered by the transformer. A
+  value of 0.01 means that only terms that are at least in 1% of the documents will be
+  included.
 
 # Operations