88require  "set" 
99require  'fileutils' 
1010require  'yaml' 
11+ require  'json' 
1112require  'diffy' 
1213require_relative  "version" 
1314require_relative  'asciidoctor-extensions' 
@@ -95,6 +96,86 @@ def extract_headings(html)
9596  headings 
9697end 
9798
99+ def  extract_glossary_from_html ( content ,  lang  =  'en' ) 
100+   # skip front matter 
101+   content  =  content . split ( /^---$/ ) [ 2 ]  || content 
102+ 
103+   doc  =  Nokogiri ::HTML ::DocumentFragment . parse ( content ) 
104+ 
105+   glossary  =  { } 
106+ 
107+   doc . css ( 'dt' ) . each  do  |dt |
108+     def_anchor  =  dt . css ( 'a[id^="def_"]' ) . first 
109+     next  unless  def_anchor 
110+ 
111+     term_id  =  def_anchor [ 'id' ] 
112+     next  unless  term_id &.start_with? ( 'def_' ) 
113+ 
114+     term_name  =  dt . text . strip 
115+     # hack to handle this one weird (also) thing 
116+     term_names  =  [ ] 
117+     if  term_name  == 'tree-ish (also treeish)' 
118+       term_names  =  [ 'tree-ish' ,  'treeish' ] 
119+     elsif  term_name  == 'arbre-esque (aussi arbresque)' 
120+       term_names  =  [ 'arbre-esque' ,  'arbresque' ] 
121+     else 
122+       term_names  =  [ term_name ] 
123+     end 
124+     current_element  =  dt . next_element 
125+     raise  'Expected dd'  unless  current_element &.name  == 'dd' 
126+ 
127+     # Fix up the links because they'regoing to be on a different page 
128+     if  lang  == 'en' 
129+       glossary_url  =  '/docs/gitglossary' 
130+     else 
131+       glossary_url  =  "/docs/gitglossary/#{ lang }  
132+     end 
133+ 
134+     definition_fragment  =  Nokogiri ::HTML ::DocumentFragment . parse ( current_element . inner_html . strip ) 
135+     definition_fragment . css ( 'a[href^="#def_"]' ) . each  do  |link |
136+       href  =  link [ 'href' ] 
137+       if  href &.start_with? ( '#def_' ) 
138+         link [ 'href' ]  =  "#{ glossary_url } #{ href }  
139+         link [ 'target' ]  =  '_blank' 
140+       end 
141+     end 
142+     definition  =  definition_fragment . to_html 
143+     
144+     term_names . each  do  |term |
145+       glossary [ term ]  =  definition 
146+     end 
147+   end 
148+ 
149+   glossary 
150+ end 
151+ 
152+ def  save_glossary_files ( glossary_data_by_lang ) 
153+   return  if  glossary_data_by_lang . empty? 
154+ 
155+   glossary_dir  =  "#{ SITE_ROOT }  
156+   FileUtils . mkdir_p ( glossary_dir ) 
157+ 
158+   glossary_data_by_lang . each  do  |lang ,  glossary_data |
159+     output_file  =  "#{ glossary_dir } #{ lang }  
160+     puts  "   saving glossary data to #{ output_file } #{ glossary_data . size }  
161+     File . write ( output_file ,  JSON . pretty_generate ( glossary_data ) ) 
162+   end 
163+ end 
164+ 
165+ def  mark_glossary_tooltips ( html ,  glossary_data_by_lang ,  lang ) 
166+   current_glossary  =  glossary_data_by_lang [ lang ]  || { } 
167+ 
168+   html . gsub ( /<([^&]+)>/ )  do  |match |
169+     term  =  $1
170+     # Only mark terms that exist in the glossary 
171+     if  current_glossary . key? ( term ) 
172+       "<span class=\" hover-term\"  data-term=\" #{ term } \" ><#{ term }  
173+     else 
174+       match 
175+     end 
176+   end 
177+ end 
178+ 
98179def  index_l10n_doc ( filter_tags ,  doc_list ,  get_content ) 
99180  rebuild  =  ENV . fetch ( "REBUILD_DOC" ,  nil ) 
100181  rerun  =  ENV [ "RERUN" ]  || rebuild  || false 
@@ -139,8 +220,15 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
139220    end 
140221
141222    check_paths  =  Set . new ( [ ] ) 
223+     glossary_data_by_lang  =  { } 
224+ 
225+     # Process glossary docs first so that we can use the parsed glossary to mark 
226+     # tooltip items in the other documents 
227+     glossary_docs  =  doc_files . select  {  |entry | File . basename ( entry [ 0 ] ,  ".#{ ext }  )  == 'gitglossary'  } 
228+     other_docs  =  doc_files . reject  {  |entry | File . basename ( entry [ 0 ] ,  ".#{ ext }  )  == 'gitglossary'  } 
229+     ordered_docs  =  glossary_docs  + other_docs 
142230
143-     doc_files . each  do  |entry |
231+     ordered_docs . each  do  |entry |
144232      full_path ,  sha  =  entry 
145233      ids  =  Set . new ( [ ] ) 
146234      lang  =  File . dirname ( full_path ) 
@@ -177,6 +265,12 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
177265      next  if  !rerun  && lang_data [ lang ]  == asciidoc_sha 
178266
179267      html  =  asciidoc . render 
268+ 
269+       if  path  == 'gitglossary' 
270+         glossary_data_by_lang [ lang ]  =  extract_glossary_from_html ( html ,  lang ) 
271+         puts  "   extracted #{ glossary_data_by_lang [ lang ] . size } #{ lang }  
272+       end 
273+ 
180274      html . gsub! ( /linkgit:(\S +?)\[ (\d +)\] / )  do  |line |
181275        x  =  /^linkgit:(\S +?)\[ (\d +)\] / . match ( line ) 
182276        relurl  =  "docs/#{ x [ 1 ] . gsub ( /-/ ,  '-' ) } #{ lang }  
@@ -223,6 +317,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
223317        "#{ before } \" #{ after } \"  >}}" 
224318      end 
225319
320+       html  =  mark_glossary_tooltips ( html ,  glossary_data_by_lang ,  lang ) 
321+ 
226322      # Write <docname>/<lang>.html 
227323      front_matter  =  { 
228324        "category"  =>  "manual" , 
@@ -248,6 +344,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
248344      lang_data [ lang ]  =  asciidoc_sha 
249345    end 
250346
347+     save_glossary_files ( glossary_data_by_lang ) 
348+ 
251349    # In some cases, translations are not complete. As a consequence, some 
252350    # translated manual pages may point to other translated manual pages that do 
253351    # not exist. In these cases, redirect to the English version. 
@@ -432,8 +530,15 @@ def index_doc(filter_tags, doc_list, get_content)
432530      end 
433531
434532      check_paths  =  Set . new ( [ ] ) 
533+       glossary_data_by_lang  =  { } 
435534
436-       doc_files . each  do  |entry |
535+       # Process glossary docs first so that we can use the parsed glossary to mark 
536+       # tooltip items in the other documents 
537+       glossary_docs  =  doc_files . select  {  |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ ,  '.txt' ) ,  '.txt' )  == 'gitglossary'  } 
538+       other_docs  =  doc_files . reject  {  |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ ,  '.txt' ) ,  '.txt' )  == 'gitglossary'  } 
539+       ordered_docs  =  glossary_docs  + other_docs 
540+ 
541+       ordered_docs . each  do  |entry |
437542        path ,  sha  =  entry 
438543        txt_path  =  path . sub ( /\. adoc$/ ,  '.txt' ) 
439544        ids  =  Set . new ( [ ] ) 
@@ -482,6 +587,12 @@ def index_doc(filter_tags, doc_list, get_content)
482587
483588        # Generate HTML 
484589        html  =  asciidoc . render 
590+ 
591+         if  docname  == 'gitglossary' 
592+           glossary_data_by_lang [ 'en' ]  =  extract_glossary_from_html ( html ,  'en' ) 
593+           puts  "   extracted #{ glossary_data_by_lang [ 'en' ] . size }  
594+         end 
595+ 
485596        html . gsub! ( /linkgit:+(\S +?)\[ (\d +)\] / )  do  |line |
486597          x  =  /^linkgit:+(\S +?)\[ (\d +)\] / . match ( line ) 
487598          if  x [ 1 ]  == "curl" 
@@ -522,6 +633,8 @@ def index_doc(filter_tags, doc_list, get_content)
522633          "#{ before } \" #{ after } \"  >}}" 
523634        end 
524635
636+         html  =  mark_glossary_tooltips ( html ,  glossary_data_by_lang ,  'en' ) 
637+ 
525638        doc_versions  =  version_map . keys . sort { |a ,  b | Version . version_to_num ( a )  <=> Version . version_to_num ( b ) } 
526639        doc_version_index  =  doc_versions . index ( version ) 
527640
@@ -640,6 +753,9 @@ def index_doc(filter_tags, doc_list, get_content)
640753        end 
641754      end 
642755    end 
756+ 
757+     save_glossary_files ( glossary_data_by_lang ) 
758+ 
643759    data [ "latest-version" ]  =  version  if  !data [ "latest-version" ]  || Version . version_to_num ( data [ "latest-version" ] )  < Version . version_to_num ( version ) 
644760  end 
645761
0 commit comments