Skip to content

Commit 32dd01d

Browse files
authored
Merge pull request #1431 from indentlabs/claude/audit-document-analysis-gJPFq
Document analysis general bugfixes
2 parents a70c216 + a8838e4 commit 32dd01d

26 files changed

+1144
-132
lines changed

app/controllers/document_analyses_controller.rb

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ def hub
4040
end
4141

4242
def index
43-
@document_analyses = DocumentAnalysis.all
43+
if user_signed_in?
44+
@document_analyses = DocumentAnalysis.joins(:document).where(documents: { user_id: current_user.id })
45+
else
46+
@document_analyses = DocumentAnalysis.joins(:document).where(documents: { privacy: 'public' })
47+
end
4448
end
4549

4650
def show
@@ -57,13 +61,18 @@ def style
5761
# end
5862

5963
def sentiment
60-
@document_sentiment_color = (@analysis.sentiment_score < 0) ? 'blue' : 'green'
64+
unless @analysis&.has_sentiment_scores?
65+
redirect_to analysis_document_path(@document), notice: "Sentiment data is not yet available for this document."
66+
return
67+
end
68+
69+
@document_sentiment_color = (@analysis.sentiment_score.to_f < 0) ? 'blue' : 'green'
6170
@document_emotion_data = Hash[{
62-
"Anger" => (100 * @analysis.anger_score).round(1),
63-
"Fear" => (100 * @analysis.fear_score).round(1),
64-
"Sadness" => (100 * @analysis.sadness_score).round(1),
65-
"Disgust" => (100 * @analysis.disgust_score).round(1),
66-
"Joy" => (100 * @analysis.joy_score).round(1)
71+
"Anger" => (100 * (@analysis.anger_score || 0)).round(1),
72+
"Fear" => (100 * (@analysis.fear_score || 0)).round(1),
73+
"Sadness" => (100 * (@analysis.sadness_score || 0)).round(1),
74+
"Disgust" => (100 * (@analysis.disgust_score || 0)).round(1),
75+
"Joy" => (100 * (@analysis.joy_score || 0)).round(1)
6776
}.sort_by(&:second).reverse]
6877
@document_dominant_emotion = @document_emotion_data.keys.first
6978
@document_secondary_emotion = @document_emotion_data.keys.second

app/jobs/document_mention_job.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def perform(*args)
2121
# by the document owner, or else people could add arbitrary pages to quick reference
2222
# to view them.
2323
next unless Rails.application.config.content_types[:all].map(&:name).include?(entity_type)
24-
related_page = entity_type.constantize.find(id)
24+
related_page = entity_type.constantize.find_by(id: id)
25+
next unless related_page.present?
2526
# todo we could also work off privacy here, so people could add public pages to quick ference
2627
# -- we'd have to delete those quick-references whenever a page went private though
2728
next unless related_page.user_id == document.user_id

app/models/concerns/has_parts_of_speech.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def interrogatives
7575
end
7676

7777
def numbers
78-
@numbers ||= text.strip
78+
@numbers ||= plaintext.strip
7979
.split(' ')
8080
.select { |w| is_numeric?(w) }
8181
.uniq

app/models/documents/document_analysis.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class DocumentAnalysis < ApplicationRecord
1414

1515
# usage: analysis.pos_percentage(:adjective) => 23.4
1616
def pos_percentage(pos_symbol)
17+
return 0.0 if word_count.nil? || word_count == 0
1718
(send(pos_symbol.to_s + '_count').to_f / word_count * 100).round(2)
1819
end
1920

app/models/documents/document_entity.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ def linked_name_if_possible
5353
end
5454

5555
def dominant_emotion
56-
return { unknown: 1 } if emotions.values.uniq == [0]
56+
return [[:unknown, 1]] if emotions.values.compact.empty? || emotions.values.uniq == [0]
5757

58-
emotions.sort_by { |emotion, score| score }.reverse
58+
emotions.sort_by { |emotion, score| score.to_f }.reverse
5959
end
6060

6161
def recessive_emotion
62-
return { unknown: 1 } if emotions.values.uniq == [0]
62+
return [[:unknown, 1]] if emotions.values.compact.empty? || emotions.values.uniq == [0]
6363

64-
emotions.sort_by { |emotion, score| score }
64+
emotions.sort_by { |emotion, score| score.to_f }
6565
end
6666

6767
def emotions

app/services/documents/analysis/content_service.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def self.analyze(analysis_id)
2222
analysis.save!
2323
end
2424

25-
def self.adult_content?(matchlist=:hate, content)
25+
def self.adult_content?(content, matchlist: :hate)
2626
LanguageFilter::Filter.new(matchlist: matchlist.to_sym).matched(content)
2727
end
2828
end

app/services/documents/analysis/flesch_kincaid_service.rb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@ module Documents
22
module Analysis
33
class FleschKincaidService < Service
44
def self.grade_level(document)
5-
@flesch_kincaid_grade_level ||= [
5+
return 0 if document.words.length == 0 || document.sentences.length == 0
6+
[
67
0.38 * (document.words.length.to_f / document.sentences.length),
78
11.18 * (document.word_syllables.sum.to_f / document.words.length),
89
-15.59
910
].sum.clamp(0, 16)
1011
end
1112

1213
def self.age_minimum(document)
13-
@flesch_kincaid_age_minimum ||= case reading_ease(document)
14+
case reading_ease(document)
1415
when (90..100) then 11
1516
when (71..89) then 12
1617
when (67..69) then 13
@@ -26,7 +27,8 @@ def self.age_minimum(document)
2627
end
2728

2829
def self.reading_ease(document)
29-
@flesch_kincaid_reading_ease ||= [
30+
return 0 if document.words.length == 0 || document.sentences.length == 0
31+
[
3032
206.835,
3133
-(1.015 * document.words.length.to_f / document.sentences.length),
3234
-(84.6 * document.word_syllables.sum.to_f / document.words.length)

app/services/documents/analysis/readability_service.rb

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,22 @@ def self.analyze(analysis_id)
2626
end
2727

2828
def self.forcast_grade_level(document)
29-
@forcast_grade_level ||= 20 - (((document.words_with_syllables(1).length.to_f / document.words.length) * 150) / 10.0).clamp(1, 16)
29+
return 1 if document.words.length == 0
30+
(20 - (((document.words_with_syllables(1).length.to_f / document.words.length) * 150) / 10.0)).clamp(1, 16)
3031
end
3132

3233
def self.coleman_liau_index(document)
33-
@coleman_liau_index ||= [
34+
return 1 if document.words.length == 0 || document.sentences.length == 0
35+
[
3436
0.0588 * 100 * document.characters.reject { |l| [" ", "\t", "\r", "\n"].include?(l) }.length.to_f / document.words.length,
3537
-0.296 * 100/ (document.words.length.to_f / document.sentences.length),
3638
-15.8
3739
].sum.clamp(1, 16)
3840
end
3941

4042
def self.automated_readability_index(document)
41-
@automated_readability_index ||= [
43+
return 1 if document.words.length == 0 || document.sentences.length == 0
44+
[
4245
4.71 * document.characters.reject(&:blank?).length.to_f / document.words.length,
4346
0.5 * document.words.length.to_f / document.sentences.length,
4447
-21.43
@@ -47,19 +50,21 @@ def self.automated_readability_index(document)
4750

4851
def self.gunning_fog_index(document)
4952
#todo GFI word/suffix exclusions
50-
@gunning_fog_index ||= 0.4 * (document.words.length.to_f/ document.sentences.length + 100 * (document.complex_words.length.to_f / document.words.length)).clamp(1, 16)
53+
return 1 if document.words.length == 0 || document.sentences.length == 0
54+
(0.4 * (document.words.length.to_f/ document.sentences.length + 100 * (document.complex_words.length.to_f / document.words.length))).clamp(1, 16)
5155
end
5256

5357
def self.smog_grade(document)
54-
@smog_grade ||= (1.043 * Math.sqrt(document.complex_words.length.to_f * (30.0 / document.sentences.length)) + 3.1291).clamp(1, 16)
58+
return 1 if document.sentences.length == 0
59+
(1.043 * Math.sqrt(document.complex_words.length.to_f * (30.0 / document.sentences.length)) + 3.1291).clamp(1, 16)
5560
end
5661

5762
def self.linsear_write_grade(document)
58-
@linsear_write_grade ||= TextStat.linsear_write_formula(document.plaintext).clamp(1, 16)
63+
TextStat.linsear_write_formula(document.plaintext).clamp(1, 16)
5964
end
6065

6166
def self.dale_chall_grade(document)
62-
@dale_chall_grade ||= TextStat.dale_chall_readability_score(document.plaintext).clamp(1, 10)
67+
TextStat.dale_chall_readability_score(document.plaintext).clamp(1, 10)
6368
end
6469

6570
# deprecated in favor of TextStat.text_standard(text, float_output=False)

app/services/documents/analysis/syllables_service.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ class SyllablesService < Service
66
}
77

88
def self.count(word)
9-
word.downcase.gsub!(/[^a-z]/, '')
9+
word = word.downcase.gsub(/[^a-z]/, '')
1010

1111
return 1 if word.length <= 3
1212
return SYLLABLE_COUNT_OVERRIDES[word] if SYLLABLE_COUNT_OVERRIDES.key?(word)
1313

14-
word.sub(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '').sub!(/^y/, '')
15-
word.scan(/[aeiouy]{1,2}/).length
14+
word = word.sub(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '').sub(/^y/, '')
15+
count = word.scan(/[aeiouy]{1,2}/).length
16+
count > 0 ? count : 1
1617
end
1718
end
1819
end

test/fixtures/document_analyses.yml

Lines changed: 77 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,89 @@
22

33
one:
44
document: one
5-
word_count: 1
5+
word_count: 65
66
page_count: 1
7-
paragraph_count: 1
8-
character_count: 1
9-
readability_score: 1
10-
combined_average_reading_level: 1.5
11-
flesch_kincaid_grade_level: 1
12-
flesch_kincaid_age_minimum: 1
13-
flesch_kincaid_reading_ease: 1.5
14-
forcast_grade_level: 1.5
15-
coleman_liau_index: 1.5
16-
automated_readability_index: 1.5
17-
gunning_fog_index: 1.5
18-
adjective_count: 1
19-
noun_count: 1
20-
verb_count: 1
21-
pronoun_count: 1
22-
preposition_count: 1
23-
conjunction_count: 1
24-
adverb_count: 1
25-
determiner_count: 1
26-
n_syllable_words:
27-
words_used_once_count: 1
28-
words_used_repeatedly_count: 1
29-
simple_words_count: 1
30-
complex_words_count: 1
31-
words_per_sentence:
7+
paragraph_count: 2
8+
character_count: 350
9+
sentence_count: 3
10+
readability_score: 55
11+
combined_average_reading_level: 8.5
12+
flesch_kincaid_grade_level: 8
13+
flesch_kincaid_age_minimum: 13
14+
flesch_kincaid_reading_ease: 65.0
15+
forcast_grade_level: 9.5
16+
coleman_liau_index: 7.5
17+
automated_readability_index: 8.0
18+
gunning_fog_index: 10.5
19+
smog_grade: 9.0
20+
linsear_write_grade: 7.0
21+
dale_chall_grade: 6.5
22+
adjective_count: 5
23+
noun_count: 15
24+
verb_count: 12
25+
pronoun_count: 4
26+
preposition_count: 6
27+
conjunction_count: 3
28+
adverb_count: 2
29+
determiner_count: 8
30+
proper_noun_count: 1
31+
interrogative_count: 0
32+
n_syllable_words: {"1": 40, "2": 15, "3": 8, "4": 2}
33+
words_used_once_count: 45
34+
words_used_repeatedly_count: 20
35+
simple_words_count: 50
36+
complex_words_count: 15
37+
unique_simple_words_count: 40
38+
unique_complex_words_count: 12
39+
words_per_sentence: [20, 25, 20]
40+
sentiment_score: 0.25
41+
sentiment_label: "positive"
42+
language: "en"
43+
joy_score: 0.65
44+
sadness_score: 0.12
45+
fear_score: 0.05
46+
disgust_score: 0.03
47+
anger_score: 0.08
48+
completed_at: "2026-03-01 12:00:00"
49+
queued_at: "2026-03-01 11:55:00"
50+
progress: 100
3251

3352
two:
3453
document: two
35-
word_count: 1
54+
word_count: 9
3655
page_count: 1
3756
paragraph_count: 1
38-
character_count: 1
39-
readability_score: 1
40-
combined_average_reading_level: 1.5
41-
flesch_kincaid_grade_level: 1
42-
flesch_kincaid_age_minimum: 1
43-
flesch_kincaid_reading_ease: 1.5
44-
forcast_grade_level: 1.5
45-
coleman_liau_index: 1.5
46-
automated_readability_index: 1.5
47-
gunning_fog_index: 1.5
48-
adjective_count: 1
49-
noun_count: 1
57+
character_count: 50
58+
sentence_count: 1
59+
readability_score: 80
60+
combined_average_reading_level: 3.0
61+
flesch_kincaid_grade_level: 3
62+
flesch_kincaid_age_minimum: 11
63+
flesch_kincaid_reading_ease: 90.0
64+
forcast_grade_level: 4.0
65+
coleman_liau_index: 3.5
66+
automated_readability_index: 3.0
67+
gunning_fog_index: 4.0
68+
smog_grade: 3.0
69+
linsear_write_grade: 3.0
70+
dale_chall_grade: 3.0
71+
adjective_count: 2
72+
noun_count: 3
5073
verb_count: 1
51-
pronoun_count: 1
74+
pronoun_count: 0
5275
preposition_count: 1
53-
conjunction_count: 1
54-
adverb_count: 1
76+
conjunction_count: 0
77+
adverb_count: 0
5578
determiner_count: 1
56-
n_syllable_words:
57-
words_used_once_count: 1
58-
words_used_repeatedly_count: 1
59-
simple_words_count: 1
60-
complex_words_count: 1
61-
words_per_sentence:
79+
proper_noun_count: 0
80+
interrogative_count: 0
81+
n_syllable_words: {"1": 7, "2": 2}
82+
words_used_once_count: 9
83+
words_used_repeatedly_count: 0
84+
simple_words_count: 9
85+
complex_words_count: 0
86+
unique_simple_words_count: 9
87+
unique_complex_words_count: 0
88+
words_per_sentence: [9]
89+
progress: 100
90+
queued_at: "2026-03-01 11:55:00"

0 commit comments

Comments
 (0)