88require "set"
99require 'fileutils'
1010require 'yaml'
11+ require 'json'
1112require 'diffy'
1213require_relative "version"
1314require_relative 'asciidoctor-extensions'
@@ -95,6 +96,86 @@ def extract_headings(html)
9596 headings
9697end
9798
99+ def extract_glossary_from_html ( content , lang = 'en' )
100+ # skip front matter
101+ content = content . split ( /^---$/ ) [ 2 ] || content
102+
103+ doc = Nokogiri ::HTML ::DocumentFragment . parse ( content )
104+
105+ glossary = { }
106+
107+ doc . css ( 'dt' ) . each do |dt |
108+ def_anchor = dt . css ( 'a[id^="def_"]' ) . first
109+ next unless def_anchor
110+
111+ term_id = def_anchor [ 'id' ]
112+ next unless term_id &.start_with? ( 'def_' )
113+
114+ term_name = dt . text . strip
115+ # hack to handle this one weird (also) thing
116+ term_names = [ ]
117+ if term_name == 'tree-ish (also treeish)'
118+ term_names = [ 'tree-ish' , 'treeish' ]
119+ elsif term_name == 'arbre-esque (aussi arbresque)'
120+ term_names = [ 'arbre-esque' , 'arbresque' ]
121+ else
122+ term_names = [ term_name ]
123+ end
124+ current_element = dt . next_element
125+ raise 'Expected dd' unless current_element &.name == 'dd'
126+
127+ # Fix up the links because they'regoing to be on a different page
128+ if lang == 'en'
129+ glossary_url = '/docs/gitglossary'
130+ else
131+ glossary_url = "/docs/gitglossary/#{ lang } "
132+ end
133+
134+ definition_fragment = Nokogiri ::HTML ::DocumentFragment . parse ( current_element . inner_html . strip )
135+ definition_fragment . css ( 'a[href^="#def_"]' ) . each do |link |
136+ href = link [ 'href' ]
137+ if href &.start_with? ( '#def_' )
138+ link [ 'href' ] = "#{ glossary_url } #{ href } "
139+ link [ 'target' ] = '_blank'
140+ end
141+ end
142+ definition = definition_fragment . to_html
143+
144+ term_names . each do |term |
145+ glossary [ term ] = definition
146+ end
147+ end
148+
149+ glossary
150+ end
151+
152+ def save_glossary_files ( glossary_data_by_lang )
153+ return if glossary_data_by_lang . empty?
154+
155+ glossary_dir = "#{ SITE_ROOT } static/js/glossary"
156+ FileUtils . mkdir_p ( glossary_dir )
157+
158+ glossary_data_by_lang . each do |lang , glossary_data |
159+ output_file = "#{ glossary_dir } /#{ lang } .json"
160+ puts " saving glossary data to #{ output_file } (#{ glossary_data . size } terms)"
161+ File . write ( output_file , JSON . pretty_generate ( glossary_data ) )
162+ end
163+ end
164+
165+ def mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
166+ current_glossary = glossary_data_by_lang [ lang ] || { }
167+
168+ html . gsub ( /<([^&]+)>/ ) do |match |
169+ term = $1
170+ # Only mark terms that exist in the glossary
171+ if current_glossary . key? ( term )
172+ "<span class=\" hover-term\" data-term=\" #{ term } \" ><#{ term } ></span>"
173+ else
174+ match
175+ end
176+ end
177+ end
178+
98179def index_l10n_doc ( filter_tags , doc_list , get_content )
99180 rebuild = ENV . fetch ( "REBUILD_DOC" , nil )
100181 rerun = ENV [ "RERUN" ] || rebuild || false
@@ -139,8 +220,15 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
139220 end
140221
141222 check_paths = Set . new ( [ ] )
223+ glossary_data_by_lang = { }
224+
225+ # Process glossary docs first so that we can use the parsed glossary to mark
226+ # tooltip items in the other documents
227+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
228+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
229+ ordered_docs = glossary_docs + other_docs
142230
143- doc_files . each do |entry |
231+ ordered_docs . each do |entry |
144232 full_path , sha = entry
145233 ids = Set . new ( [ ] )
146234 lang = File . dirname ( full_path )
@@ -177,6 +265,12 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
177265 next if !rerun && lang_data [ lang ] == asciidoc_sha
178266
179267 html = asciidoc . render
268+
269+ if path == 'gitglossary'
270+ glossary_data_by_lang [ lang ] = extract_glossary_from_html ( html , lang )
271+ puts " extracted #{ glossary_data_by_lang [ lang ] . size } glossary terms for #{ lang } "
272+ end
273+
180274 html . gsub! ( /linkgit:(\S +?)\[ (\d +)\] / ) do |line |
181275 x = /^linkgit:(\S +?)\[ (\d +)\] / . match ( line )
182276 relurl = "docs/#{ x [ 1 ] . gsub ( /-/ , '-' ) } /#{ lang } "
@@ -223,6 +317,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
223317 "#{ before } {{< relurl \" #{ after } \" >}}"
224318 end
225319
320+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
321+
226322 # Write <docname>/<lang>.html
227323 front_matter = {
228324 "category" => "manual" ,
@@ -248,6 +344,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
248344 lang_data [ lang ] = asciidoc_sha
249345 end
250346
347+ save_glossary_files ( glossary_data_by_lang )
348+
251349 # In some cases, translations are not complete. As a consequence, some
252350 # translated manual pages may point to other translated manual pages that do
253351 # not exist. In these cases, redirect to the English version.
@@ -432,8 +530,15 @@ def index_doc(filter_tags, doc_list, get_content)
432530 end
433531
434532 check_paths = Set . new ( [ ] )
533+ glossary_data_by_lang = { }
435534
436- doc_files . each do |entry |
535+ # Process glossary docs first so that we can use the parsed glossary to mark
536+ # tooltip items in the other documents
537+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
538+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
539+ ordered_docs = glossary_docs + other_docs
540+
541+ ordered_docs . each do |entry |
437542 path , sha = entry
438543 txt_path = path . sub ( /\. adoc$/ , '.txt' )
439544 ids = Set . new ( [ ] )
@@ -482,6 +587,12 @@ def index_doc(filter_tags, doc_list, get_content)
482587
483588 # Generate HTML
484589 html = asciidoc . render
590+
591+ if docname == 'gitglossary'
592+ glossary_data_by_lang [ 'en' ] = extract_glossary_from_html ( html , 'en' )
593+ puts " extracted #{ glossary_data_by_lang [ 'en' ] . size } glossary terms for 'en'"
594+ end
595+
485596 html . gsub! ( /linkgit:+(\S +?)\[ (\d +)\] / ) do |line |
486597 x = /^linkgit:+(\S +?)\[ (\d +)\] / . match ( line )
487598 if x [ 1 ] == "curl"
@@ -522,6 +633,8 @@ def index_doc(filter_tags, doc_list, get_content)
522633 "#{ before } {{< relurl \" #{ after } \" >}}"
523634 end
524635
636+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , 'en' )
637+
525638 doc_versions = version_map . keys . sort { |a , b | Version . version_to_num ( a ) <=> Version . version_to_num ( b ) }
526639 doc_version_index = doc_versions . index ( version )
527640
@@ -640,6 +753,9 @@ def index_doc(filter_tags, doc_list, get_content)
640753 end
641754 end
642755 end
756+
757+ save_glossary_files ( glossary_data_by_lang )
758+
643759 data [ "latest-version" ] = version if !data [ "latest-version" ] || Version . version_to_num ( data [ "latest-version" ] ) < Version . version_to_num ( version )
644760 end
645761
0 commit comments