Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 47 additions & 38 deletions lib/importer/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,33 +57,57 @@ def get_json(url, params = {})

protected


def run_after_import_hooks(resource)
# Runs hooks, logs any import issues, verifies imported keys, and handles Uzbek Latin conversion
def run_after_import_hooks(resource, source_key: nil, imported_keys: nil)
resource.run_draft_import_hooks

if @issues.present?
issues_group = @issues.group_by do |issue|
issue[:tag]
end

issues_group.keys.each do |issue_tag|
issue_description = issues_group[issue_tag].map do |issue|
issue[:text]
end

issues_group = @issues.group_by { |issue| issue[:tag] }
issues_group.each do |issue_tag, issues|
descriptions = issues.map { |i| i[:text] }.uniq
AdminTodo.create(
is_finished: false,
tags: issue_tag,
resource_content_id: resource.id,
description: "#{resource.source_slug} parse issues in #{resource.name}(#{resource.id}). <div>#{issue_tag}</div>\n#{issue_description.uniq.join(', ')}"
description: "#{resource.source_slug} parse issues in #{resource.name}(#{resource.id}). <div>#{issue_tag}</div>\n#{descriptions.join(', ')}"
)
end
end

if resource.id == 127
# This is Ubzek translation, we've a Latin version of this translation
# update the latin version too
# Verify JSON keys vs imported keys
if source_key && imported_keys
begin
sources = get_json('https://tafsir.app/sources/sources.json')
json_entry = sources[source_key]
json_keys = json_entry && json_entry['keys']

if json_keys.is_a?(Array)
missing = json_keys - imported_keys.keys
extra = imported_keys.keys - json_keys

log_message "\n=== Import Verification for '#{source_key}' ==="
log_message "Keys in JSON: #{json_keys.inspect}"
log_message "Imported keys: #{imported_keys.keys.inspect}"

if missing.any?
log_message "Missing keys: #{missing.inspect}"
else
log_message "No missing keys. All JSON keys imported."
end

if extra.any?
log_message "Extra imported keys not in JSON: #{extra.inspect}"
end
else
log_message "No 'keys' array found in sources.json for '#{source_key}'"
end
rescue => e
log_message "Error during import verification for '#{source_key}': #{e.message}"
end
end

if resource.id == 127
# This is Uzbek translation, we have a Latin version too
latin = ResourceContent.find(55)
latin_footnote = ResourceContent.find(195)
converter = Utils::CyrillicToLatin.new
Expand All @@ -108,10 +132,10 @@ def run_after_import_hooks(resource)
)

draft_translation.foot_notes.each do |foot_note|
text = converter.to_latin(foot_note.draft_text)
note_text = converter.to_latin(foot_note.draft_text)
foot_note.update_columns(
draft_text: text,
text_matched: text == foot_note.current_text
draft_text: note_text,
text_matched: note_text == foot_note.current_text
)
end
end
Expand All @@ -120,14 +144,12 @@ def run_after_import_hooks(resource)
resource.save(validate: false)
end


def log_message(message)
puts message
end

def log_issue(issue)
@issues << issue

log_message "#{issue[:tag]}: #{issue[:text]}"
end

Expand All @@ -150,33 +172,20 @@ def sanitize(text)
end

def fix_encoding(text)
text = if text.valid_encoding?
text
else
text.scrub
end

text.sub(/^[\s\u00A0]+|[\s\u00A0]+$/, '').strip
text = text.valid_encoding? ? text : text.scrub
text.strip
end

def split_paragraphs(text)
return [] if text.blank?

text.to_str.split(/\r?\n+r?/).select do |para|
para.presence.present?
end
text.to_str.split(/\r?\n+\r?/).select(&:present?)
end

def simple_format(text)
paragraphs = split_paragraphs(text)
return paragraphs.first.strip if paragraphs.size == 1

if paragraphs.size == 1
paragraphs[0].strip
else
paragraphs.map! { |paragraph|
"<p>#{paragraph.strip}</p>"
}.join('').html_safe
end
paragraphs.map { |para| "<p>#{para.strip}</p>" }.join('').html_safe
end
end
end
55 changes: 26 additions & 29 deletions lib/importer/tafsir_app.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,25 +77,23 @@ def import(key)
verses_imported = {}

Verse.order('verse_index ASC').find_each do |verse|
next if verses_imported[verse.id].present?
next if verses_imported[verse.id]

result = fetch_tafsir(key, verse)

if tafsir = import_tafsir(verse, result, resource_content)
tafsir.ayah_group_ids.each do |id|
verses_imported[id] = true
end
tafsir.ayah_group_ids.each { |id| verses_imported[id] = true }
else
log_issue({tag: 'missing-tafsir', text: verse.verse_key })
log_issue({ tag: 'missing-tafsir', text: verse.verse_key })
end
end

run_after_import_hooks(resource_content)
# Pass source_key and imported_keys for verification
run_after_import_hooks(resource_content, source_key: key, imported_keys: verses_imported)
end

protected


def import_tafsir(verse, tafsir_json, resource_content)
draft_tafsir = Draft::Tafsir
.where(
Expand All @@ -107,44 +105,44 @@ def import_tafsir(verse, tafsir_json, resource_content)
source_text = tafsir_json['data'].to_s.strip

if source_text.present?
if resource_content.tafsir_app_key == 'iraab-graphs'
text = source_text
else
text = sanitize_text(source_text)
end
text = if resource_content.tafsir_app_key == 'iraab-graphs'
source_text
else
sanitize_text(source_text)
end

draft_tafsir.set_meta_value('source_data', { text: source_text })
existing_tafsir = Tafsir.for_verse(verse, resource_content)

draft_tafsir.tafsir_id = existing_tafsir&.id
draft_tafsir.current_text = existing_tafsir&.text
draft_tafsir.draft_text = text
draft_tafsir.text_matched = existing_tafsir&.text == text

draft_tafsir.verse_key = verse.verse_key
draft_tafsir.tafsir_id = existing_tafsir&.id
draft_tafsir.current_text = existing_tafsir&.text
draft_tafsir.draft_text = text
draft_tafsir.text_matched = existing_tafsir&.text == text

draft_tafsir.verse_key = verse.verse_key
draft_tafsir.group_verse_key_from = group_verses.first.verse_key
draft_tafsir.group_verse_key_to = group_verses.last.verse_key
draft_tafsir.group_verses_count = group_verses.size
draft_tafsir.start_verse_id = group_verses.first.id
draft_tafsir.end_verse_id = group_verses.last.id
draft_tafsir.group_tafsir_id = verse.id
draft_tafsir.group_verse_key_to = group_verses.last.verse_key
draft_tafsir.group_verses_count = group_verses.size
draft_tafsir.start_verse_id = group_verses.first.id
draft_tafsir.end_verse_id = group_verses.last.id
draft_tafsir.group_tafsir_id = verse.id

draft_tafsir.save(validate: false)

puts "#{verse.verse_key} - #{draft_tafsir.id}"
draft_tafsir
else
log_message "Tafsir is missing for ayah #{verse.verse_key}"
nil
end
end

def fetch_tafsir(key, verse)
url = "https://tafsir.app/get.php?src=#{key}&s=#{verse.chapter_id}&a=#{verse.verse_number}&ver=1"
data = get_json(url)

data['count'] = 0 if data['count'].blank?
data['ayahs_start'] = verse.verse_number if data['ayahs_start'].blank?
data['count'] ||= 0
data['ayahs_start'] ||= verse.verse_number

data
rescue RestClient::NotFound
Expand All @@ -154,7 +152,8 @@ def fetch_tafsir(key, verse)

def find_ayah_group(verse, start_ayah, count)
Verse.where(
chapter_id: verse.chapter_id, verse_number: start_ayah..(start_ayah + count)
chapter_id: verse.chapter_id,
verse_number: start_ayah..(start_ayah + count)
).order('verse_index ASC')
end

Expand All @@ -180,10 +179,8 @@ def sanitize_text(text)

doc.css('a[href^="/quran-roots/"]').each do |a_tag|
if a_tag['href'] =~ %r{^/quran-roots/(.+)}
root_text = $1
# Create a new <root> element with the extracted text
new_node = Nokogiri::XML::Node.new('root', doc)
new_node.content = root_text
new_node.content = $1
a_tag.replace(new_node)
end
end
Expand Down