Skip to content

Commit 142752b

Browse files
committed
minor change to docstring and var names
1 parent b7abc37 commit 142752b

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

src/bm25_transformer.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ to 2.
2121
The bigger β is, the more document length is amplified in terms of the overall score. The default value is 0.75.
2222
2323
For more explanations, please see:
24-
http://ethen8181.github.io/machine-learning/search/bm25_intro.html
25-
https://en.wikipedia.org/wiki/Okapi_BM25
26-
https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html
24+
- http://ethen8181.github.io/machine-learning/search/bm25_intro.html
25+
- https://en.wikipedia.org/wiki/Okapi_BM25
26+
- https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html
2727
2828
The parameters `max_doc_freq` and `min_doc_freq` restrict the vocabulary
2929
that the transformer will consider. `max_doc_freq` indicates that terms in only
@@ -81,8 +81,8 @@ function build_bm25!(doc_term_mat::SparseMatrixCSC{T},
8181
p, n = size(doc_term_mat)
8282

8383
# TF tells us what proportion of a document is defined by a term
84-
words_in_documents = F.(sum(doc_term_mat, dims=1))
85-
ln = words_in_documents./mean(words_in_documents)
84+
words_in_documents = F.(sum(doc_term_mat; dims=1))
85+
ln = words_in_documents ./ mean(words_in_documents)
8686
oneval = one(F)
8787

8888
for i = 1:n

src/tfidf_transformer.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,22 @@ end
6262

6363
get_result(::TfidfTransformer, idf::Vector{Float64}, vocab::Vector{String}) = TfidfTransformerResult(vocab, idf)
6464

65-
function build_tfidf!(dtm::SparseMatrixCSC{T},
65+
function build_tfidf!(doc_term_mat::SparseMatrixCSC{T},
6666
tfidf::SparseMatrixCSC{F},
6767
idf_vector::Vector{F}) where {T <: Real, F <: AbstractFloat}
68-
rows = rowvals(dtm)
69-
dtmvals = nonzeros(dtm)
68+
rows = rowvals(doc_term_mat)
69+
dtmvals = nonzeros(doc_term_mat)
7070
tfidfvals = nonzeros(tfidf)
7171
@assert size(dtmvals) == size(tfidfvals)
7272

73-
p, n = size(dtm)
73+
p, n = size(doc_term_mat)
7474

7575
# TF tells us what proportion of a document is defined by a term
76-
words_in_documents = F.(sum(dtm, dims=1))
76+
words_in_documents = F.(sum(doc_term_mat; dims=1))
7777
oneval = one(F)
7878

7979
@inbounds for i = 1:n
80-
for j in nzrange(dtm, i)
80+
for j in nzrange(doc_term_mat, i)
8181
row = rows[j]
8282
tfidfvals[j] = dtmvals[j] / max(words_in_documents[i], oneval) * idf_vector[row]
8383
end

0 commit comments

Comments
 (0)