88require "set"
99require 'fileutils'
1010require 'yaml'
11+ require 'json'
1112require 'diffy'
1213require_relative "version"
1314require_relative 'asciidoctor-extensions'
@@ -95,6 +96,68 @@ def extract_headings(html)
9596 headings
9697end
9798
99+ def extract_glossary_from_html ( content )
100+ # skip front matter
101+ content = content . split ( /^---$/ ) [ 2 ] || content
102+
103+ doc = Nokogiri ::HTML ::DocumentFragment . parse ( content )
104+
105+ glossary = { }
106+
107+ doc . css ( 'dt' ) . each do |dt |
108+ def_anchor = dt . css ( 'a[id^="def_"]' ) . first
109+ next unless def_anchor
110+
111+ term_id = def_anchor [ 'id' ]
112+ next unless term_id &.start_with? ( 'def_' )
113+
114+ term_name = dt . text . strip
115+ # hack to handle this one weird (also) thing
116+ term_names = [ ]
117+ if term_name == "tree-ish (also treeish)"
118+ term_names = [ 'tree-ish' , 'treeish' ]
119+ else
120+ term_names = [ term_name ]
121+ end
122+ current_element = dt . next_element
123+ raise 'Expected dd' unless current_element &.name == 'dd'
124+
125+ definition = current_element . inner_html . strip
126+ term_names . each do |term |
127+ glossary [ term ] = definition
128+ end
129+ end
130+
131+ glossary
132+ end
133+
134+ def save_glossary_files ( glossary_data_by_lang )
135+ return if glossary_data_by_lang . empty?
136+
137+ glossary_dir = "#{ SITE_ROOT } static/js/glossary"
138+ FileUtils . mkdir_p ( glossary_dir )
139+
140+ glossary_data_by_lang . each do |lang , glossary_data |
141+ output_file = "#{ glossary_dir } /#{ lang } .json"
142+ puts " saving glossary data to #{ output_file } (#{ glossary_data . size } terms)"
143+ File . write ( output_file , JSON . generate ( glossary_data ) )
144+ end
145+ end
146+
147+ def mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
148+ current_glossary = glossary_data_by_lang [ lang ] || { }
149+
150+ html . gsub ( /<([^&]+)>/ ) do |match |
151+ term = $1
152+ # Only mark terms that exist in the glossary
153+ if current_glossary . key? ( term )
154+ "<span class=\" hover-term\" data-term=\" #{ term } \" ><#{ term } ></span>"
155+ else
156+ match
157+ end
158+ end
159+ end
160+
98161def index_l10n_doc ( filter_tags , doc_list , get_content )
99162 rebuild = ENV . fetch ( "REBUILD_DOC" , nil )
100163 rerun = ENV [ "RERUN" ] || rebuild || false
@@ -139,8 +202,15 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
139202 end
140203
141204 check_paths = Set . new ( [ ] )
205+ glossary_data_by_lang = { }
142206
143- doc_files . each do |entry |
207+ # Process glossary docs first so that we can use the parsed glossary to mark
208+ # tooltip items in the other documents
209+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
210+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
211+ ordered_docs = glossary_docs + other_docs
212+
213+ ordered_docs . each do |entry |
144214 full_path , sha = entry
145215 ids = Set . new ( [ ] )
146216 lang = File . dirname ( full_path )
@@ -177,6 +247,12 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
177247 next if !rerun && lang_data [ lang ] == asciidoc_sha
178248
179249 html = asciidoc . render
250+
251+ if path == 'gitglossary'
252+ glossary_data_by_lang [ lang ] = extract_glossary_from_html ( html )
253+ puts " extracted #{ glossary_data_by_lang [ lang ] . size } glossary terms for #{ lang } "
254+ end
255+
180256 html . gsub! ( /linkgit:(\S +?)\[ (\d +)\] / ) do |line |
181257 x = /^linkgit:(\S +?)\[ (\d +)\] / . match ( line )
182258 relurl = "docs/#{ x [ 1 ] . gsub ( /-/ , '-' ) } /#{ lang } "
@@ -223,6 +299,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
223299 "#{ before } {{< relurl \" #{ after } \" >}}"
224300 end
225301
302+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
303+
226304 # Write <docname>/<lang>.html
227305 front_matter = {
228306 "category" => "manual" ,
@@ -248,6 +326,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
248326 lang_data [ lang ] = asciidoc_sha
249327 end
250328
329+ save_glossary_files ( glossary_data_by_lang )
330+
251331 # In some cases, translations are not complete. As a consequence, some
252332 # translated manual pages may point to other translated manual pages that do
253333 # not exist. In these cases, redirect to the English version.
@@ -432,8 +512,15 @@ def index_doc(filter_tags, doc_list, get_content)
432512 end
433513
434514 check_paths = Set . new ( [ ] )
515+ glossary_data_by_lang = { }
516+
517+ # Process glossary docs first so that we can use the parsed glossary to mark
518+ # tooltip items in the other documents
519+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
520+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
521+ ordered_docs = glossary_docs + other_docs
435522
436- doc_files . each do |entry |
523+ ordered_docs . each do |entry |
437524 path , sha = entry
438525 txt_path = path . sub ( /\. adoc$/ , '.txt' )
439526 ids = Set . new ( [ ] )
@@ -482,6 +569,12 @@ def index_doc(filter_tags, doc_list, get_content)
482569
483570 # Generate HTML
484571 html = asciidoc . render
572+
573+ if docname == 'gitglossary'
574+ glossary_data_by_lang [ 'en' ] = extract_glossary_from_html ( html )
575+ puts " extracted #{ glossary_data_by_lang [ 'en' ] . size } glossary terms for 'en'"
576+ end
577+
485578 html . gsub! ( /linkgit:+(\S +?)\[ (\d +)\] / ) do |line |
486579 x = /^linkgit:+(\S +?)\[ (\d +)\] / . match ( line )
487580 if x [ 1 ] == "curl"
@@ -522,6 +615,8 @@ def index_doc(filter_tags, doc_list, get_content)
522615 "#{ before } {{< relurl \" #{ after } \" >}}"
523616 end
524617
618+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , 'en' )
619+
525620 doc_versions = version_map . keys . sort { |a , b | Version . version_to_num ( a ) <=> Version . version_to_num ( b ) }
526621 doc_version_index = doc_versions . index ( version )
527622
@@ -640,6 +735,9 @@ def index_doc(filter_tags, doc_list, get_content)
640735 end
641736 end
642737 end
738+
739+ save_glossary_files ( glossary_data_by_lang )
740+
643741 data [ "latest-version" ] = version if !data [ "latest-version" ] || Version . version_to_num ( data [ "latest-version" ] ) < Version . version_to_num ( version )
644742 end
645743
0 commit comments