66import base64
77import datetime
88import hashlib
9+ import itertools
910import json
1011import logging
1112import os
4546CONCURRENCY = 5
4647
4748TAGS_NAME = "tags"
48- FAQ_NAME = "vim_faq.txt"
4949HELP_NAME = "help.txt"
50+ FAQ_NAME = "vim_faq.txt"
51+ MATCHIT_NAME = "matchit.txt"
5052
5153DOC_ITEM_RE = re .compile (r"(?:[-\w]+\.txt|tags)$" )
5254VERSION_TAG_RE = re .compile (r"v?(\d[\w.+-]+)$" )
7375 }
7476 }
7577 """ ,
76- "GetDir" : """
77- query GetDir($org: String!, $repo: String!, $expr: String!) {
78+ "GetDirs" : """
79+ query GetDirs($org: String!, $repo: String!,
80+ $expr1: String!, $expr2: String!) {
7881 repository(owner: $org, name: $repo) {
79- object(expression: $expr) {
82+ dir1: object(expression: $expr1) {
83+ ... on Tree {
84+ entries {
85+ type
86+ name
87+ oid
88+ }
89+ }
90+ }
91+ dir2: object(expression: $expr2) {
8092 ... on Tree {
8193 entries {
8294 type
@@ -185,11 +197,10 @@ def _init_g(self, wipe):
185197 id = self ._project , last_update_time = datetime .datetime .utcnow ()
186198 )
187199
188- logging .info (
189- "%s global info: %s" ,
190- self ._project ,
191- ", " .join ("{} = {}" .format (n , getattr (g , n )) for n in g ._properties .keys ()),
200+ gs = ", " .join (
201+ f"{ n } = { getattr (g , n )} " for n in g ._properties .keys () # noqa: SIM118
192202 )
203+ logging .info ("%s global info: %s" , self ._project , gs )
193204
194205 return g
195206
@@ -210,7 +221,7 @@ def _do_update_vim(self, no_rfi):
210221 is_new_vim_version = self ._g .vim_version_tag != old_vim_version_tag
211222
212223 if is_master_updated :
213- # Kick off retrieval of 'runtime/ doc' dir listing in GitHub. This is against
224+ # Kick off retrieval of doc dirs listing in GitHub. This is against
214225 # the 'master' branch, since the docs often get updated after the tagged
215226 # commits that introduce the relevant changes.
216227 docdir_greenlet = self ._spawn (self ._list_docs_dir , self ._g .master_sha )
@@ -223,11 +234,9 @@ def _do_update_vim(self, no_rfi):
223234
224235 # Kick off FAQ download (this also writes the raw file to the datastore, if
225236 # modified)
226- faq_greenlet = self ._spawn (
227- self ._get_file , FAQ_NAME , "http" , base_url = FAQ_BASE_URL
228- )
237+ faq_greenlet = self ._spawn (self ._get_file , FAQ_NAME , "http" )
229238
230- # Iterate over 'runtime/ doc' dir listing (which also updates the items in
239+ # Iterate over doc dirs listing (which also updates the items in
231240 # 'self._rfi_map') and collect list of new/modified files
232241 if docdir_greenlet is None :
233242 logging .info ("No need to get new doc dir listing" )
@@ -246,31 +255,37 @@ def _do_update_vim(self, no_rfi):
246255 faq_result = None
247256 faq_greenlet = self ._spawn (self ._get_file , FAQ_NAME , "db" )
248257
249- # Get tags file from GitHub or datastore, depending on whether it was changed
250- if TAGS_NAME in updated_file_names :
251- updated_file_names .remove (TAGS_NAME )
252- tags_greenlet = self ._spawn (self ._get_file , TAGS_NAME , "http,db" )
253- else :
254- tags_greenlet = self ._spawn (self ._get_file , TAGS_NAME , "db" )
258+ # Get these files from GitHub or datastore, depending on whether they were
259+ # changed
260+ content_needed_greenlets = {}
261+ for name in (TAGS_NAME , MATCHIT_NAME ):
262+ if name in updated_file_names :
263+ updated_file_names .remove (name )
264+ sources = "http,db"
265+ else :
266+ sources = "db"
267+ content_needed_greenlets [name ] = self ._spawn (self ._get_file , name , sources )
255268
256269 if faq_result is None :
257270 faq_result = faq_greenlet .get ()
258271
259- tags_result = tags_greenlet .get ()
272+ tags_result = content_needed_greenlets [TAGS_NAME ].get ()
273+ matchit_result = content_needed_greenlets [MATCHIT_NAME ].get ()
260274
261275 logging .info ("Beginning vimhelp-to-HTML translations" )
262276
263277 self ._g .last_update_time = datetime .datetime .utcnow ()
264278
265279 # Construct the vimhelp-to-html translator, providing it the tags file content,
266- # and adding on the FAQ for extra tags
280+ # and adding on the FAQ and matchit.txt for extra tags
267281 self ._h2h = vimh2h .VimH2H (
268282 mode = "online" ,
269283 project = "vim" ,
270284 version = version_from_tag (self ._g .vim_version_tag ),
271285 tags = tags_result .content .decode (),
272286 )
273287 self ._h2h .add_tags (FAQ_NAME , faq_result .content .decode ())
288+ self ._h2h .add_tags (MATCHIT_NAME , matchit_result .content .decode ())
274289
275290 greenlets = []
276291
@@ -290,6 +305,10 @@ def track_spawn(f, *args, **kwargs):
290305 if faq_result .is_modified or tags_result .is_modified :
291306 track_spawn (self ._translate , FAQ_NAME , faq_result .content )
292307
308+ # Likewise for matchit.txt
309+ if matchit_result .is_modified or tags_result .is_modified :
310+ track_spawn (self ._translate , MATCHIT_NAME , matchit_result .content )
311+
293312 # If we found a new vim version, ensure we translate help.txt, since we're
294313 # displaying the current vim version in the rendered help.txt.html
295314 if is_new_vim_version :
@@ -322,7 +341,7 @@ def _do_update_neovim(self, no_rfi):
322341 # Kick off retrieval of all RawFileInfo entities from the Datastore
323342 rfi_greenlet = self ._spawn (self ._get_all_rfi , no_rfi )
324343
325- # Kick off retrieval of 'runtime/ doc' dir listing in GitHub for the current
344+ # Kick off retrieval of doc dirs listing in GitHub for the current
326345 # version.
327346 docdir_greenlet = self ._spawn (self ._list_docs_dir , self ._g .vim_version_tag )
328347
@@ -337,7 +356,7 @@ def _do_update_neovim(self, no_rfi):
337356 version = version_from_tag (self ._g .vim_version_tag ),
338357 )
339358
340- # Iterate over 'runtime/ doc' dir listing (which also updates the items in
359+ # Iterate over doc dirs listing (which also updates the items in
341360 # 'self._rfi_map'), kicking off retrieval of files and addition of help tags to
342361 # 'self._h2h'; file retrieval also includes writing the raw file to the
343362 # datastore if modified
@@ -436,17 +455,19 @@ def _get_git_refs(self):
436455 def _list_docs_dir (self , git_ref ):
437456 """
438457 Generator that yields '(name: str, is_modified: bool)' pairs on iteration,
439- representing the set of filenames in the 'runtime/doc' directory of the current
458+ representing the set of filenames in the 'runtime/doc' and
459+ 'runtime/pack/dist/opt/matchit/doc' directories of the current
440460 project, and whether each one is new/modified or not.
441461 'git_ref' is the Git ref to use when looking up the directory.
442462 This function both reads and writes 'self._rfi_map'.
443463 """
444464 response = self ._github_graphql_request (
445- "GetDir " ,
465+ "GetDirs " ,
446466 variables = {
447467 "org" : self ._project ,
448468 "repo" : self ._project ,
449- "expr" : git_ref + ":runtime/doc" ,
469+ "expr1" : git_ref + ":runtime/doc" ,
470+ "expr2" : git_ref + ":runtime/pack/dist/opt/matchit/doc" ,
450471 },
451472 etag = self ._g .docdir_etag ,
452473 )
@@ -458,11 +479,13 @@ def _list_docs_dir(self, git_ref):
458479 etag = response .header ("ETag" )
459480 self ._g .docdir_etag = etag .encode () if etag is not None else None
460481 logging .info ("%s doc dir modified, new etag is %s" , self ._project , etag )
461- resp = json .loads (response .body )["data" ]
462- for item in resp ["repository" ]["object" ]["entries" ]:
482+ resp = json .loads (response .body )["data" ]["repository" ]
483+ done = set () # "tags" filename exists in both dirs, only want first one
484+ for item in itertools .chain (resp ["dir1" ]["entries" ], resp ["dir2" ]["entries" ]):
463485 name = item ["name" ]
464- if item ["type" ] != "blob" or not DOC_ITEM_RE .match (name ):
486+ if item ["type" ] != "blob" or not DOC_ITEM_RE .match (name ) or name in done :
465487 continue
488+ done .add (name )
466489 git_sha = item ["oid" ].encode ()
467490 rfi = self ._rfi_map .get (name )
468491 if rfi is None :
@@ -533,7 +556,7 @@ def _get_file_and_add_tags(self, name, sources):
533556 result = self ._get_file (name , sources )
534557 self ._h2h .add_tags (name , result .content .decode ())
535558
536- def _get_file (self , name , sources , base_url = None ):
559+ def _get_file (self , name , sources ):
537560 """
538561 Get file with given 'name' via HTTP and/or from the Datastore, based on
539562 'sources', which should be one of "http", "db", "http,db". If a new/modified
@@ -544,7 +567,7 @@ def _get_file(self, name, sources, base_url=None):
544567 sources_set = set (sources .split ("," ))
545568
546569 if "http" in sources_set :
547- url = ( base_url or self ._download_url_base ()) + name
570+ url = self ._download_url ( name )
548571 headers = {}
549572 if rfi is None :
550573 rfi = self ._rfi_map [name ] = RawFileInfo (
@@ -570,12 +593,15 @@ def _get_file(self, name, sources, base_url=None):
570593
571594 return result
572595
573- def _download_url_base (self ):
574- sha = self ._g .master_sha if self ._project == "vim" else self ._g .vim_version_tag
575- return (
576- GITHUB_DOWNLOAD_URL_BASE
577- + f"{ self ._project } /{ self ._project } /{ sha } /runtime/doc/"
578- )
596+ def _download_url (self , name ):
597+ if name == FAQ_NAME :
598+ return FAQ_BASE_URL + FAQ_NAME
599+ ref = self ._g .master_sha if self ._project == "vim" else self ._g .vim_version_tag
600+ base = f"{ GITHUB_DOWNLOAD_URL_BASE } { self ._project } /{ self ._project } /{ ref } "
601+ if name == MATCHIT_NAME :
602+ return f"{ base } /runtime/pack/dist/opt/matchit/doc/{ name } "
603+ else :
604+ return f"{ base } /runtime/doc/{ name } "
579605
580606 def _translate (self , name , content ):
581607 """
@@ -586,7 +612,7 @@ def _translate(self, name, content):
586612 logging .info (
587613 "Saving HTML translation of '%s:%s' to Datastore" , self ._project , name
588614 )
589- save_transactional ([phead ] + pparts )
615+ save_transactional ([phead , * pparts ] )
590616
591617 def _get_all_rfi (self , no_rfi ):
592618 if no_rfi :
@@ -656,7 +682,7 @@ def to_html(project, name, content, h2h):
656682def save_raw_file (rfi , content ):
657683 rfi_id = rfi .key .id ()
658684 project , name = rfi_id .split (":" )
659- if project == "neovim" or name in (HELP_NAME , FAQ_NAME , TAGS_NAME ):
685+ if project == "neovim" or name in (HELP_NAME , FAQ_NAME , TAGS_NAME , MATCHIT_NAME ):
660686 logging .info ("Saving raw file '%s' (info and content) to Datastore" , rfi_id )
661687 rfc = RawFileContent (
662688 id = rfi_id , project = project , data = content , encoding = b"UTF-8"
@@ -685,7 +711,7 @@ def version_from_tag(version_tag):
685711
686712
687713def sha1 (content ):
688- digest = hashlib .sha1 ()
714+ digest = hashlib .sha1 () # noqa: S324
689715 digest .update (content )
690716 return digest .digest ()
691717
0 commit comments