2020
2121import os
2222import re
23+ import gzip
24+ # import json
25+ import rapidjson as json
2326import time
2427import logging
2528
4043cfgfact_file_name = 'cfg.nt.gz'
4144
4245
46+ GMAP_JSON_FILE_NAME = 'gmap.json.gz'
47+ GDIFF_JSON_FILE_NAME = 'gdiff.json.gz'
48+
49+
4350RETRY_COUNT = 3
4451
4552DEFAULT_FACT_SIZE_THRESH = 10000
@@ -217,7 +224,7 @@ def get_cache_dir1_(diff_cmd, a,
217224
218225 opts = cache_opt + hash_opt
219226
220- cmd = f'{ diff_cmd } -parseonly{ opts } -getcache { a } '
227+ cmd = f'{ diff_cmd } -parseonly{ opts } -getcache " { a } " '
221228
222229 if not quiet :
223230 logger .info (f'cmd: "{ cmd } "' )
@@ -249,7 +256,7 @@ def get_cache_dir(diff_cmd, a1, a2,
249256
250257 opts = cache_opt + hash_opt
251258
252- cmd = f'{ diff_cmd } { opts } -getcache { a1 } { a2 } '
259+ cmd = f'{ diff_cmd } { opts } -getcache " { a1 } " " { a2 } " '
253260
254261 if not quiet :
255262 logger .info (f'cmd: "{ cmd } "' )
@@ -280,6 +287,20 @@ def get_fact_proj_roots_opt(fact_proj_roots):
280287 return ' ' .join (li )
281288
282289
290+ def read_json (data_path ):
291+ d = None
292+ _open = open
293+ if data_path .endswith ('.gz' ):
294+ _open = gzip .open
295+ try :
296+ with _open (data_path , 'r' ) as f :
297+ d = json .load (f )
298+ except Exception as e :
299+ logger .warning (f'{ data_path } : { e } ' )
300+
301+ return d
302+
303+
283304def read_file (r , name_pat_list , stat_paths , retry_count = RETRY_COUNT ):
284305 count = 0
285306 stat_paths = stat_paths * (int (retry_count / len (stat_paths )) + 1 )
@@ -444,9 +465,7 @@ def diffts(diff_cmd, file1, file2,
444465 if load_fact :
445466 logger .info ('loading fact' )
446467 if fact_versions :
447- fact_opt = \
448- ' -fact -fact:add-versions {}' \
449- .format (get_fact_versions_opt (fact_versions ))
468+ fact_opt = f' -fact -fact:add-versions { get_fact_versions_opt (fact_versions )} '
450469
451470 fact_opt += ' -fact:encoding:' + fact_encoding
452471 fact_opt += ' -fact:hash:' + fact_hash_algo
@@ -467,16 +486,13 @@ def diffts(diff_cmd, file1, file2,
467486 fact_opt += f' -fact:project { fact_proj } '
468487
469488 if fact_proj_roots :
470- fact_opt += \
471- ' {}' .format (get_fact_proj_roots_opt (fact_proj_roots ))
489+ fact_opt += f' { get_fact_proj_roots_opt (fact_proj_roots )} '
472490
473491 if fact_into_virtuoso :
474- fact_opt += \
475- f' -fact:into-virtuoso { fact_into_virtuoso } '
492+ fact_opt += f' -fact:into-virtuoso { fact_into_virtuoso } '
476493
477494 if fact_into_directory :
478- fact_opt += \
479- f' -fact:into-directory { fact_into_directory } '
495+ fact_opt += f' -fact:into-directory { fact_into_directory } '
480496
481497 if fact_for_delta :
482498 fact_opt += ' -fact:delta'
@@ -523,7 +539,7 @@ def diffts(diff_cmd, file1, file2,
523539 cmd = '' .join ((diff_cmd ,
524540 cache_opt , cachedir_opt , prep_opt , prem_opt , fact_opt ,
525541 dumpccs_opt , check_opt , other_opts ))
526- cmd += f' { file1 } { file2 } '
542+ cmd += f' " { file1 } " " { file2 } " '
527543
528544 logger .info (f'cmd="{ cmd } "' )
529545
@@ -535,6 +551,10 @@ def diffts(diff_cmd, file1, file2,
535551 'ninserts' : 0 ,
536552 'ndeletes' : 0 ,
537553 'nrelabels' : 0 ,
554+ 'nmoves' : 0 ,
555+ 'nmovrels' : 0 ,
556+ 'nnodes1' : 0 ,
557+ 'nnodes2' : 0 ,
538558 # 'exitcode': 0,
539559 }
540560
@@ -553,23 +573,47 @@ def diffast(file1, file2, **options):
553573
554574
555575def diffast_get_cache_dir1 (file , ** options ):
556- return get_cache_dir1_ (diffast_cmd , file , ** options )
576+ opts = options .copy ()
577+ if 'usecache' in opts :
578+ del opts ['usecache' ]
579+ if 'quiet' not in opts :
580+ opts ['quiet' ] = True
581+ return get_cache_dir1_ (diffast_cmd , file , ** opts )
557582
558583
559584def diffast_get_cache_dir (file1 , file2 , ** options ):
560- return get_cache_dir (diffast_cmd , file1 , file2 , ** options )
585+ opts = options .copy ()
586+ if 'usecache' in opts :
587+ del opts ['usecache' ]
588+ if 'quiet' not in opts :
589+ opts ['quiet' ] = True
590+ return get_cache_dir (diffast_cmd , file1 , file2 , ** opts )
591+
592+
593+ def diffast_get_gmap (file1 , file2 , ** options ):
594+ cache_dir = diffast_get_cache_dir (file1 , file2 , ** options )
595+ gmap_json = os .path .join (cache_dir , GMAP_JSON_FILE_NAME )
596+ d = read_json (gmap_json )
597+ return d
598+
599+
600+ def diffast_get_gdiff (file1 , file2 , ** options ):
601+ cache_dir = diffast_get_cache_dir (file1 , file2 , ** options )
602+ gdiff_json = os .path .join (cache_dir , GDIFF_JSON_FILE_NAME )
603+ d = read_json (gdiff_json )
604+ return d
561605
562606
563607def dump_unparsed (path , to_path , quiet = False ):
564- cmd = f'{ diffast_cmd } -clearcache -parseonly -dump:src:out { to_path } { path } '
608+ cmd = f'{ diffast_cmd } -clearcache -parseonly -dump:src:out " { to_path } " " { path } " '
565609 if not quiet :
566610 logger .info ('cmd="{}"' .format (cmd ))
567611
568612 return proc .system (cmd , quiet = quiet )
569613
570614
571615def patchast (path , delta_path , out_path , quiet = False ):
572- cmd = f'{ patchast_cmd } -o { out_path } { path } { delta_path } '
616+ cmd = f'{ patchast_cmd } -o " { out_path } " " { path } " " { delta_path } " '
573617 if not quiet :
574618 logger .info (f'cmd="{ cmd } "' )
575619
0 commit comments