Skip to content

Commit 68eb347

Browse files
Decompress ZIP files to calculate SS checksums.
1 parent d32a821 commit 68eb347

File tree

2 files changed

+93
-20
lines changed

2 files changed

+93
-20
lines changed

resources/scrap.py

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import time
3131
import urllib
3232
import urlparse
33+
import zipfile
3334

3435
# --- AEL packages ---
3536
from .constants import *
@@ -3318,23 +3319,19 @@ def get_candidates(self, search_term, rom_FN, rom_checksums_FN, platform, status
33183319
log_debug('ScreenScraper.get_candidates() Scraper disabled. Returning empty data.')
33193320
return None
33203321

3321-
# Prepare data for scraping.
3322-
rombase = rom_FN.getBase()
3323-
rompath = rom_FN.getPath()
3324-
romchecksums_path = rom_checksums_FN.getPath()
3325-
33263322
# --- Get candidates ---
33273323
# ScreenScraper jeuInfos.php returns absolutely everything about a single ROM, including
33283324
# metadata, artwork, etc. jeuInfos.php returns one game or nothing at all.
33293325
# ScreenScraper returns only one game or nothing at all.
3326+
rompath = rom_FN.getPath()
3327+
romchecksums_path = rom_checksums_FN.getPath()
33303328
scraper_platform = AEL_platform_to_ScreenScraper(platform)
3331-
log_debug('ScreenScraper.get_candidates() rombase "{}"'.format(rombase))
33323329
log_debug('ScreenScraper.get_candidates() rompath "{}"'.format(rompath))
33333330
log_debug('ScreenScraper.get_candidates() romchecksums "{}"'.format(romchecksums_path))
33343331
log_debug('ScreenScraper.get_candidates() AEL platform "{}"'.format(platform))
33353332
log_debug('ScreenScraper.get_candidates() SS platform "{}"'.format(scraper_platform))
33363333
candidate_list = self._search_candidates_jeuInfos(
3337-
rombase, rompath, romchecksums_path, platform, scraper_platform, status_dic)
3334+
rom_FN, rom_checksums_FN, platform, scraper_platform, status_dic)
33383335
# _search_candidates_jeuRecherche() does not work for get_metadata() and get_assets()
33393336
# because jeu_dic is not introduced in the internal cache.
33403337
# candidate_list = self._search_candidates_jeuRecherche(
@@ -3559,8 +3556,7 @@ def debug_game_search(self, search_term, rombase_noext, platform, status_dic):
35593556
self._dump_json_debug('ScreenScraper_gameSearch.json', json_data)
35603557

35613558
# Call to ScreenScraper jeuInfos.php.
3562-
def _search_candidates_jeuInfos(self, rombase, rompath, romchecksums_path, platform,
3563-
scraper_platform, status_dic):
3559+
def _search_candidates_jeuInfos(self, rom_FN, rom_checksums_FN, platform, scraper_platform, status_dic):
35643560
# --- Test data ---
35653561
# * Example from ScreenScraper API info page.
35663562
# #crc=50ABC90A&systemeid=1&romtype=rom&romnom=Sonic%20The%20Hedgehog%202%20(World).zip&romtaille=749652
@@ -3585,17 +3581,16 @@ def _search_candidates_jeuInfos(self, rombase, rompath, romchecksums_path, platf
35853581

35863582
# --- IMPORTANT ---
35873583
# ScreenScraper requires all CRC, MD5 and SHA1 and the correct file size of the
3588-
# files scraped. Put these data in a SS checksums cache so it is calculated once for
3589-
# every file.
3584+
# files scraped.
35903585
if self.debug_checksums_flag:
35913586
# Use fake checksums when developing the scraper with fake 0-sized files.
35923587
log_info('Using debug checksums and not computing real ones.')
35933588
checksums = {
3594-
'crc' : self.debug_crc, 'md5' : self.debug_md5,
3595-
'sha1' : self.debug_sha1, 'size' : self.debug_size,
3589+
'crc' : self.debug_crc, 'md5' : self.debug_md5, 'sha1' : self.debug_sha1,
3590+
'size' : self.debug_size, 'rom_name' : rom_FN.getBase(),
35963591
}
35973592
else:
3598-
checksums = misc_calculate_checksums(romchecksums_path)
3593+
checksums = self._get_SS_checksum(rom_checksums_FN)
35993594
if checksums is None:
36003595
status_dic['status'] = False
36013596
status_dic['msg'] = 'Error computing file checksums.'
@@ -3608,8 +3603,8 @@ def _search_candidates_jeuInfos(self, rombase, rompath, romchecksums_path, platf
36083603
crc_str = checksums['crc']
36093604
md5_str = checksums['md5']
36103605
sha1_str = checksums['sha1']
3611-
# rom_name = urllib.quote(rombase)
3612-
rom_name = urllib.quote_plus(rombase)
3606+
# rom_name = urllib.quote(checksums['rom_name'])
3607+
rom_name = urllib.quote_plus(checksums['rom_name'])
36133608
rom_size = checksums['size']
36143609
# log_debug('ScreenScraper._search_candidates_jeuInfos() ssid "{0}"'.format(self.ssid))
36153610
# log_debug('ScreenScraper._search_candidates_jeuInfos() ssid "{0}"'.format('***'))
@@ -3854,6 +3849,46 @@ def _retrieve_all_assets(self, jeu_dic, status_dic):
38543849

38553850
return asset_list
38563851

3852+
# 1) If rom_checksums_FN is a ZIP file and contains one and only one file, then consider that
3853+
# file the ROM, decompress in memory and calculate the checksums.
3854+
# 2) If rom_checksums_FN is a standard file or 1) fails then calculate the checksums of
3855+
# the file.
3856+
# 3) Return a checksums dictionary if everything is OK. Return None in case of any error.
3857+
def _get_SS_checksum(self, rom_checksums_FN):
3858+
f_basename = rom_checksums_FN.getBase()
3859+
f_path = rom_checksums_FN.getPath()
3860+
log_debug('_get_SS_checksum() Processing "{}"'.format(f_path))
3861+
if f_basename.lower().endswith('.zip'):
3862+
log_debug('_get_SS_checksum() ZIP file detected.')
3863+
if not zipfile.is_zipfile(f_path):
3864+
log_error('zipfile.is_zipfile() returns False. Bad ZIP file.')
3865+
return None
3866+
else:
3867+
log_debug('_get_SS_checksum() ZIP file seems to be correct.')
3868+
zip = zipfile.ZipFile(f_path)
3869+
namelist = zip.namelist()
3870+
# log_variable('namelist', namelist)
3871+
if len(namelist) == 1:
3872+
log_debug('_get_SS_checksum() ZIP file has one file only.')
3873+
log_debug('_get_SS_checksum() Decompressing file "{}"'.format(namelist[0]))
3874+
file_bytes = zip.read(namelist[0])
3875+
log_debug('_get_SS_checksum() Decompressed size is {} bytes'.format(len(file_bytes)))
3876+
checksums = misc_calculate_stream_checksums(file_bytes)
3877+
checksums['rom_name'] = namelist[0]
3878+
log_debug('_get_SS_checksum() ROM name is "{}"'.format(checksums['rom_name']))
3879+
return checksums
3880+
else:
3881+
log_debug('_get_SS_checksum() ZIP file has {} files.'.format(len(namelist)))
3882+
log_debug('_get_SS_checksum() Computing checksum of whole ZIP file.')
3883+
else:
3884+
log_debug('_get_SS_checksum() File is not ZIP. Computing checksum of whole ZIP file.')
3885+
# Otherwise calculate checksums of the whole file
3886+
checksums = misc_calculate_file_checksums(f_path)
3887+
checksums['rom_name'] = f_basename
3888+
log_debug('_get_SS_checksum() ROM name is "{}"'.format(checksums['rom_name']))
3889+
3890+
return checksums
3891+
38573892
# ScreenScraper URLs have the developer password and the user password.
38583893
# Clean URLs for safe logging.
38593894
def _clean_URL_for_log(self, url):

resources/utils.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ def misc_generate_random_SID():
651651
#
652652
# Lazy function (generator) to read a file piece by piece. Default chunk size: 8k.
653653
#
654-
def misc_read_in_chunks(file_object, chunk_size = 8192):
654+
def misc_read_file_in_chunks(file_object, chunk_size = 8192):
655655
while True:
656656
data = file_object.read(chunk_size)
657657
if not data: break
@@ -664,14 +664,14 @@ def misc_read_in_chunks(file_object, chunk_size = 8192):
664664
# https://stackoverflow.com/questions/519633/lazy-method-for-reading-big-file-in-python
665665
# https://stackoverflow.com/questions/1742866/compute-crc-of-file-in-python
666666
#
667-
def misc_calculate_checksums(full_file_path):
667+
def misc_calculate_file_checksums(full_file_path):
668668
log_debug('Computing checksums "{}"'.format(full_file_path))
669669
try:
670670
f = open(full_file_path, 'rb')
671671
crc_prev = 0
672672
md5 = hashlib.md5()
673673
sha1 = hashlib.sha1()
674-
for piece in misc_read_in_chunks(f):
674+
for piece in misc_read_file_in_chunks(f):
675675
crc_prev = zlib.crc32(piece, crc_prev)
676676
md5.update(piece)
677677
sha1.update(piece)
@@ -680,7 +680,7 @@ def misc_calculate_checksums(full_file_path):
680680
sha1_digest = sha1.hexdigest()
681681
size = os.path.getsize(full_file_path)
682682
except:
683-
log_debug('(Exception) In misc_calculate_checksums()')
683+
log_debug('(Exception) In misc_calculate_file_checksums()')
684684
log_debug('Returning None')
685685
return None
686686
checksums = {
@@ -692,6 +692,44 @@ def misc_calculate_checksums(full_file_path):
692692

693693
return checksums
694694

695+
# This function not finished yet.
696+
def misc_read_bytes_in_chunks(file_bytes, chunk_size = 8192):
697+
file_length = len(file_bytes)
698+
block_number = 0
699+
while True:
700+
start_index = None
701+
end_index = None
702+
data = file_bytes[start_index:end_index]
703+
yield data
704+
705+
def misc_calculate_stream_checksums(file_bytes):
706+
log_debug('Computing checksums of bytes stream...'.format(len(file_bytes)))
707+
crc_prev = 0
708+
md5 = hashlib.md5()
709+
sha1 = hashlib.sha1()
710+
# Process bytes stream block by block
711+
# for piece in misc_read_bytes_in_chunks(file_bytes):
712+
# crc_prev = zlib.crc32(piece, crc_prev)
713+
# md5.update(piece)
714+
# sha1.update(piece)
715+
# Process bytes in one go
716+
crc_prev = zlib.crc32(file_bytes, crc_prev)
717+
md5.update(file_bytes)
718+
sha1.update(file_bytes)
719+
crc_digest = '{:08X}'.format(crc_prev & 0xFFFFFFFF)
720+
md5_digest = md5.hexdigest()
721+
sha1_digest = sha1.hexdigest()
722+
size = len(file_bytes)
723+
724+
checksums = {
725+
'crc' : crc_digest.upper(),
726+
'md5' : md5_digest.upper(),
727+
'sha1' : sha1_digest.upper(),
728+
'size' : size,
729+
}
730+
731+
return checksums
732+
695733
# -------------------------------------------------------------------------------------------------
696734
# Filesystem helper class
697735
# This class always takes and returns Unicode string paths. Decoding to UTF-8 must be done in

0 commit comments

Comments
 (0)