2323from xml .etree import ElementTree as CET
2424import zipfile
2525
26- from six .moves .urllib .request import urlopen
27- from six .moves .urllib .error import URLError
28-
2926from . import __version__ , CONFIG_DIR , query_yes_no
3027from .prepare import run as prepare_pronom_to_fido
31- from .pronomutils import get_local_pronom_versions
32- from .pronom .soap import get_pronom_sig_version , get_pronom_signature
28+ from .versions import get_local_versions
29+ from .pronom .soap import get_pronom_sig_version , get_pronom_signature , NS
30+ from .pronom .http import get_sig_xml_for_puid
3331
3432
3533DEFAULTS = {
@@ -56,57 +54,18 @@ def run(defaults=None):
5654 defaults = defaults or DEFAULTS
5755 try :
5856 print ("Contacting PRONOM..." )
59- currentVersion = get_pronom_sig_version ()
60- if not currentVersion :
61- sys .exit ('Failed to obtain PRONOM signature file version number, please try again.' )
62-
63- print ("Querying latest signaturefile version..." )
64- signatureFile = os .path .join (CONFIG_DIR , defaults ['signatureFileName' ].format (currentVersion ))
65- if os .path .isfile (signatureFile ):
66- print ("You already have the latest PRONOM signature file, version" , currentVersion )
67- if not query_yes_no ("Update anyway?" ):
68- sys .exit ('Aborting update...' )
69-
70- print ("Downloading signature file version {}..." .format (currentVersion ))
71- currentFile , _ = get_pronom_signature ()
72- if not currentFile :
73- sys .exit ('Failed to obtain PRONOM signature file, please try again.' )
74- print ("Writing {0}..." .format (defaults ['signatureFileName' ].format (currentVersion )))
75- with open (signatureFile , 'w' ) as file_ :
76- file_ .write (currentFile )
77-
57+ currentVersion , signatureFile = sig_version_check (defaults )
58+ download_sig_file (defaults , currentVersion , signatureFile )
7859 print ("Extracting PRONOM PUID's from signature file..." )
7960 tree = CET .parse (signatureFile )
80- puids = []
81- for node in tree .iter ("{http://www.nationalarchives.gov.uk/pronom/SignatureFile}FileFormat" ):
82- puids .append (node .get ("PUID" ))
83- print ("Found {} PRONOM PUID's" .format (len (puids )))
84-
85- print ("Downloading signatures can take a while" )
86- if not query_yes_no ("Continue and download signatures?" ):
87- sys .exit ('Aborting update...' )
88- tmpdir = defaults ['tmp_dir' ]
89- resume_download = False
90- if os .path .isdir (tmpdir ):
91- print ("Found previously created temporary folder for download:" , tmpdir )
92- resume_download = query_yes_no ('Do you want to resume download (yes) or start over (no)?' )
93- if resume_download :
94- print ("Resuming download..." )
95- else :
96- print ("Creating temporary folder for download:" , tmpdir )
97- try :
98- os .mkdir (tmpdir )
99- except OSError :
100- pass
101- if not os .path .isdir (tmpdir ):
102- sys .stderr .write ("Failed to create temporary folder for PUID's, using: " + tmpdir )
103-
104- download_signatures (defaults , puids , resume_download , tmpdir )
105- create_zip_file (defaults , puids , currentVersion , tmpdir )
61+ format_eles = tree .findall ('.//sig:FileFormat' , NS )
62+ print ("Found {} PRONOM FileFormat elements" .format (len (format_eles )))
63+ tmpdir , resume = init_sig_download (defaults )
64+ download_signatures (defaults , format_eles , resume , tmpdir )
65+ create_zip_file (defaults , format_eles , currentVersion , tmpdir )
10666 if defaults ['deleteTempDirectory' ]:
10767 print ("Deleting temporary folder and files..." )
10868 rmtree (tmpdir , ignore_errors = True )
109-
11069 update_versions_xml (defaults , currentVersion )
11170
11271 # TODO: there should be a check here to handle prepare.main exit() signal (-1/0/1/...)
@@ -118,47 +77,106 @@ def run(defaults=None):
11877 sys .exit ('Aborting update...' )
11978
12079
121- def download_signatures (defaults , puids , resume_download , tmpdir ):
80+ def sig_version_check (defaults ):
81+ """Return a tuple consisting of current sig file version and the derived file name."""
82+ print ("Contacting PRONOM..." )
83+ currentVersion = get_pronom_sig_version ()
84+ if not currentVersion :
85+ sys .exit ('Failed to obtain PRONOM signature file version number, please try again.' )
86+
87+ print ("Querying latest signaturefile version..." )
88+ signatureFile = os .path .join (CONFIG_DIR , defaults ['signatureFileName' ].format (currentVersion ))
89+ if os .path .isfile (signatureFile ):
90+ print ("You already have the latest PRONOM signature file, version" , currentVersion )
91+ if not query_yes_no ("Update anyway?" ):
92+ sys .exit ('Aborting update...' )
93+ return currentVersion , signatureFile
94+
95+
96+ def download_sig_file (defaults , version , signatureFile ):
97+ """Download the latest version of the PRONOM sigs to signatureFile."""
98+ print ("Downloading signature file version {}..." .format (version ))
99+ currentFile , _ = get_pronom_signature ()
100+ if not currentFile :
101+ sys .exit ('Failed to obtain PRONOM signature file, please try again.' )
102+ print ("Writing {0}..." .format (defaults ['signatureFileName' ].format (version )))
103+ with open (signatureFile , 'w' ) as file_ :
104+ file_ .write (currentFile )
105+
106+
107+ def init_sig_download (defaults ):
108+ """
109+ Initialise the download of individual PRONOM signatures.
110+
111+ Handles user input and resumption of interupted downloads.
112+ Return a tuple of the temp directory for writing and a boolean resume flag.
113+ """
114+ print ("Downloading signatures can take a while" )
115+ if not query_yes_no ("Continue and download signatures?" ):
116+ sys .exit ('Aborting update...' )
117+ tmpdir = defaults ['tmp_dir' ]
118+ resume = False
119+ if os .path .isdir (tmpdir ):
120+ print ("Found previously created temporary folder for download:" , tmpdir )
121+ resume = query_yes_no ('Do you want to resume download (yes) or start over (no)?' )
122+ if resume :
123+ print ("Resuming download..." )
124+ else :
125+ print ("Creating temporary folder for download:" , tmpdir )
126+ try :
127+ os .mkdir (tmpdir )
128+ except OSError :
129+ pass
130+ if not os .path .isdir (tmpdir ):
131+ sys .stderr .write ("Failed to create temporary folder for PUID's, using: " + tmpdir )
132+ return tmpdir , resume
133+
134+
135+ def download_signatures (defaults , format_eles , resume , tmpdir ):
122136 """Download PRONOM signatures and write to individual files."""
123137 print ("Downloading signatures, one moment please..." )
124- numberPuids = len (puids )
138+ numberPuids = len (format_eles )
125139 one_percent = (float (numberPuids ) / 100 )
126140 numfiles = 0
127- for puid in puids :
128- puidType , puidNum = puid .split ("/" )
129- puidFileName = "puid." + puidType + "." + puidNum + ".xml"
130- filename = os .path .join (tmpdir , puidFileName )
131- if os .path .isfile (filename ) and resume_download :
132- numfiles += 1
133- continue
134- puid_url = "http://www.nationalarchives.gov.uk/pronom/{}.xml" .format (puid )
135- try :
136- filehandle = urlopen (puid_url )
137- except URLError as e :
138- sys .stderr .write ("Failed to download signature file:" + puid_url )
139- sys .stderr .write ("Error:" + str (e ))
140- sys .exit ('Please restart and resume download.' )
141- with open (filename , 'wb' ) as file_ :
142- for lines in filehandle .readlines ():
143- file_ .write (lines )
144- filehandle .close ()
141+ for format_ele in format_eles :
142+ download_sig (format_ele , tmpdir , resume )
145143 numfiles += 1
146144 percent = int (float (numfiles ) / one_percent )
147145 print (r"{}/{} files [{}%]" .format (numfiles , numberPuids , percent ))
148146 time .sleep (defaults ['http_throttle' ])
149147 print ("100%" )
150148
151149
152- def create_zip_file (defaults , puids , currentVersion , tmpdir ):
150+ def download_sig (format_ele , tmpdir , resume ):
151+ """
152+ Download an individual PRONOM signature.
153+
154+ The signature to be downloaded is identified by the FileFormat element
155+ parameter format_ele. The downloaded signature is written to tmpdir.
156+ """
157+ puid , puidFileName = get_puid_file_name (format_ele )
158+ filename = os .path .join (tmpdir , puidFileName )
159+ if os .path .isfile (filename ) and resume :
160+ return
161+ try :
162+ xml = get_sig_xml_for_puid (puid )
163+ except Exception as e :
164+ sys .stderr .write ("Failed to download signature file:" + puid )
165+ sys .stderr .write ("Error:" + str (e ))
166+ sys .exit ('Please restart and resume download.' )
167+ with open (filename , 'wb' ) as file_ :
168+ file_ .write (xml )
169+
170+
171+ def create_zip_file (defaults , format_eles , currentVersion , tmpdir ):
153172 """Create zip file of signatures."""
154173 print ("Creating PRONOM zip..." )
155174 compression = zipfile .ZIP_DEFLATED if 'zlib' in sys .modules else zipfile .ZIP_STORED
156175 modes = {zipfile .ZIP_DEFLATED : 'deflated' , zipfile .ZIP_STORED : 'stored' }
157176 zf = zipfile .ZipFile (os .path .join (CONFIG_DIR , defaults ['pronomZipFileName' ].format (currentVersion )), mode = 'w' )
158177 print ("Adding files with compression mode" , modes [compression ])
159- for puid in puids :
160- puidType , puidNum = puid .split ("/" )
161- puidFileName = "puid.{}.{}.xml" .format (puidType , puidNum )
178+ for format_ele in format_eles :
179+ _ , puidFileName = get_puid_file_name (format_ele )
162180 filename = os .path .join (tmpdir , puidFileName )
163181 if os .path .isfile (filename ):
164182 zf .write (filename , arcname = puidFileName , compress_type = compression )
@@ -167,10 +185,17 @@ def create_zip_file(defaults, puids, currentVersion, tmpdir):
167185 zf .close ()
168186
169187
188+ def get_puid_file_name (format_ele ):
189+ """Return a tupe of PUID and PUID file name derived from format_ele."""
190+ puid = format_ele .get ('PUID' )
191+ puidType , puidNum = puid .split ("/" )
192+ return puid , 'puid.{}.{}.xml' .format (puidType , puidNum )
193+
194+
170195def update_versions_xml (defaults , currentVersion ):
171196 """Create new versions identified sig XML file."""
172197 print ('Updating versions.xml...' )
173- versions = get_local_pronom_versions ()
198+ versions = get_local_versions ()
174199 versions .pronom_version = str (currentVersion )
175200 versions .pronom_signature = "formats-v" + str (currentVersion ) + ".xml"
176201 versions .pronom_container_signature = defaults ['containerVersion' ]
@@ -188,7 +213,6 @@ def main():
188213 args = parser .parse_args ()
189214 opts = DEFAULTS .copy ()
190215 opts .update (vars (args ))
191-
192216 run (opts )
193217
194218
0 commit comments