Skip to content

Commit cdced0b

Browse files
committed
parallel discovery
1 parent 6036b15 commit cdced0b

File tree

1 file changed

+161
-137
lines changed

1 file changed

+161
-137
lines changed

src/pyatsimagebuilder/utils.py

Lines changed: 161 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import yaml
1313
import sys
1414

15+
from concurrent.futures import ThreadPoolExecutor
16+
1517
logger = logging.getLogger(__name__)
1618
logger.setLevel(logging.DEBUG)
1719
logger.addHandler(logging.StreamHandler(sys.stdout))
@@ -347,82 +349,87 @@ def discover_jobs(jobfiles,
347349
return job_paths
348350

349351

352+
def parse_manifest(manifest_file, jobs, search_path, relative_path=None, repo_data=None):
353+
try:
354+
with open(manifest_file) as f:
355+
manifest_data = yaml.safe_load(f.read())
356+
except yaml.error.YAMLError as e:
357+
logger.error('Error loading manifest file {} from yaml\n{}'.format(
358+
manifest_file, str(e)))
359+
return
360+
361+
if manifest_data is None:
362+
logger.warning(f'No manifest data from file {manifest_file}')
363+
return
364+
365+
try:
366+
if relative_path:
367+
manifest_data['file'] = to_image_path(str(manifest_file),
368+
search_path,
369+
relative_path)
370+
else:
371+
manifest_data['file'] = str(manifest_file)
372+
373+
# Find any repo containing this manifest file
374+
for repo in repo_data:
375+
if manifest_data['file'].startswith(repo):
376+
manifest_data['repo_path'] = repo
377+
break
378+
379+
manifest_data['run_type'] = 'manifest'
380+
manifest_data['job_type'] = manifest_data.pop('type', None)
381+
if not manifest_data['job_type']:
382+
logger.warning(f'No job type specified in {manifest_file}')
383+
return
384+
385+
# Pop runtimes and profiles to add them back later as lists
386+
runtimes = manifest_data.pop('runtimes', {})
387+
profiles = manifest_data.pop('profiles', {})
388+
389+
# Create default profile from top level arguments and system environment
390+
default_arguments = manifest_data.pop('arguments', {})
391+
default_runtime = runtimes.get('system', {})
392+
default_environment = default_runtime.get('environment', {})
393+
profiles['DEFAULT'] = {}
394+
profiles['DEFAULT']['runtime'] = 'system'
395+
profiles['DEFAULT']['arguments'] = default_arguments
396+
profiles['DEFAULT']['environment'] = default_environment
397+
398+
# Update profiles with environment from runtimes
399+
for profile_name in profiles:
400+
runtime = profiles[profile_name].get('runtime', 'system')
401+
if runtime in runtimes:
402+
environment = runtimes[runtime].get('environment', {})
403+
if environment:
404+
profiles[profile_name]['environment'] = environment
405+
406+
# Convert profiles from hierarchical dict to list of dict
407+
manifest_data['profiles'] = []
408+
for profile_name in profiles:
409+
manifest_data['profiles'].append(profiles[profile_name])
410+
manifest_data['profiles'][-1]['name'] = profile_name
411+
412+
# Convert runtimes from hierarchical dict to list of dict
413+
manifest_data['runtimes'] = []
414+
for profile_name in runtimes:
415+
manifest_data['runtimes'].append(runtimes[profile_name])
416+
manifest_data['runtimes'][-1]['name'] = profile_name
417+
418+
jobs.append(manifest_data)
419+
420+
except Exception as e:
421+
logger.exception('Error processing manifest file {}'.format(
422+
manifest_file))
423+
424+
350425
def parse_manifests(manifests, search_path, relative_path=None, repo_data=None):
351426
if repo_data is None:
352427
repo_data = {}
353428
jobs = []
354-
for manifest in manifests:
355-
try:
356-
with open(manifest) as f:
357-
manifest_data = yaml.safe_load(f.read())
358-
except yaml.error.YAMLError as e:
359-
logger.error('Error loading manifest file {} from yaml\n{}'.format(
360-
manifest, str(e)))
361-
continue
362-
363-
if manifest_data is None:
364-
logger.warning(f'No manifest data from file {manifest}')
365-
continue
366-
367-
try:
368-
if relative_path:
369-
manifest_data['file'] = to_image_path(str(manifest),
370-
search_path,
371-
relative_path)
372-
else:
373-
manifest_data['file'] = str(manifest)
374-
375-
# Find any repo containing this manifest file
376-
for repo in repo_data:
377-
if manifest_data['file'].startswith(repo):
378-
manifest_data['repo_path'] = repo
379-
break
380-
381-
manifest_data['run_type'] = 'manifest'
382-
manifest_data['job_type'] = manifest_data.pop('type', None)
383-
if not manifest_data['job_type']:
384-
logger.warning(f'No job type specified in {manifest}')
385-
continue
386429

387-
# Pop runtimes and profiles to add them back later as lists
388-
runtimes = manifest_data.pop('runtimes', {})
389-
profiles = manifest_data.pop('profiles', {})
390-
391-
# Create default profile from top level arguments and system environment
392-
default_arguments = manifest_data.pop('arguments', {})
393-
default_runtime = runtimes.get('system', {})
394-
default_environment = default_runtime.get('environment', {})
395-
profiles['DEFAULT'] = {}
396-
profiles['DEFAULT']['runtime'] = 'system'
397-
profiles['DEFAULT']['arguments'] = default_arguments
398-
profiles['DEFAULT']['environment'] = default_environment
399-
400-
# Update profiles with environment from runtimes
401-
for profile_name in profiles:
402-
runtime = profiles[profile_name].get('runtime', 'system')
403-
if runtime in runtimes:
404-
environment = runtimes[runtime].get('environment', {})
405-
if environment:
406-
profiles[profile_name]['environment'] = environment
407-
408-
# Convert profiles from hierarchical dict to list of dict
409-
manifest_data['profiles'] = []
410-
for profile_name in profiles:
411-
manifest_data['profiles'].append(profiles[profile_name])
412-
manifest_data['profiles'][-1]['name'] = profile_name
413-
414-
# Convert runtimes from hierarchical dict to list of dict
415-
manifest_data['runtimes'] = []
416-
for profile_name in runtimes:
417-
manifest_data['runtimes'].append(runtimes[profile_name])
418-
manifest_data['runtimes'][-1]['name'] = profile_name
419-
420-
jobs.append(manifest_data)
421-
422-
except Exception as e:
423-
logger.exception('Error processing manifest file {}'.format(
424-
manifest))
425-
continue
430+
with ThreadPoolExecutor(max_workers=50) as executor:
431+
for manifest in manifests:
432+
executor.submit(parse_manifest, manifest, jobs, search_path, relative_path, repo_data)
426433

427434
return jobs
428435

@@ -527,82 +534,99 @@ def _process_clean_file(profile, yaml_contents):
527534
'clean-file': _process_clean_file
528535
}
529536

530-
def discover_yamls(manifests, search_path, relative_path=None):
531-
""" Discover yaml files referenced in manifest files and extract key
537+
def discover_yamls_from_manifest(manifest, search_path, relative_path=None):
538+
""" Discover yaml files referenced in manifest file and extract key
532539
information
533540
534541
Arguments:
535-
manifests (list): list of contents of discovered manifests
542+
manifest (dict): manifest object
536543
search_path (Path): pathlib Path object with the directory to start discovery from
537544
relative_path (str): String with the directory search results will be relative to
538545
"""
539-
logger.info('Discovering YAML files from manifests')
540-
for manifest in manifests:
541-
manifest_dir = os.path.dirname(manifest['file'])
542-
for profile in manifest['profiles']:
543-
profile['yaml_files'] = []
544-
if not isinstance(profile.get('arguments'), dict):
546+
manifest_dir = os.path.dirname(manifest['file'])
547+
for profile in manifest['profiles']:
548+
profile['yaml_files'] = []
549+
if not isinstance(profile.get('arguments'), dict):
550+
continue
551+
for argument, value in profile['arguments'].items():
552+
if argument not in yaml_processors:
553+
# Filter for only testbed and clean files. No need to
554+
# load other yaml files
545555
continue
546-
for argument, value in profile['arguments'].items():
547-
if argument not in yaml_processors:
548-
# Filter for only testbed and clean files. No need to
549-
# load other yaml files
550-
continue
551-
if not (isinstance(value, str) and value.lower().endswith('.yaml')):
556+
if not (isinstance(value, str) and value.lower().endswith('.yaml')):
557+
continue
558+
559+
# Do not process any files that start with a variable
560+
# or some inaccessible absolute path. If the yaml file
561+
# starts with the relative path, it should be
562+
# accessible in the image, and still valid
563+
if value.startswith('$'):
564+
continue
565+
elif value.startswith('/'):
566+
if not relative_path or not value.startswith(relative_path):
552567
continue
553568

554-
# Do not process any files that start with a variable
555-
# or some inaccessible absolute path. If the yaml file
556-
# starts with the relative path, it should be
557-
# accessible in the image, and still valid
558-
if value.startswith('$'):
569+
# Construct an absolute path using the dir of the manifest
570+
# This will be the relative path to the image root once
571+
# built, not the actual path of the file in the build
572+
# environment
573+
yaml_file = os.path.abspath(os.path.join(manifest_dir, value))
574+
# Convert to a real path so we can find the file in our
575+
# build environment
576+
if relative_path:
577+
yaml_file = to_image_path(yaml_file, relative_path, search_path)
578+
579+
# Append to yaml_files list for this manifest - it doesn't
580+
# matter if it exists at this point, just that the manifest is
581+
# referencing it.
582+
profile['yaml_files'].append(yaml_file)
583+
584+
if os.path.isfile(yaml_file):
585+
try:
586+
with open(yaml_file) as f:
587+
# load yaml contents with handling for an
588+
# empty file
589+
yaml_contents = yaml.safe_load(f.read()) or {}
590+
except Exception as e:
591+
msg = f'Error loading YAML file {value} from ' \
592+
f'manifest {manifest["file"]}'
593+
logger.exception(msg)
594+
yaml_contents = None
559595
continue
560-
elif value.startswith('/'):
561-
if not relative_path or not value.startswith(relative_path):
562-
continue
563-
564-
# Construct an absolute path using the dir of the manifest
565-
# This will be the relative path to the image root once
566-
# built, not the actual path of the file in the build
567-
# environment
568-
yaml_file = os.path.abspath(os.path.join(manifest_dir, value))
569-
# Convert to a real path so we can find the file in our
570-
# build environment
571-
if relative_path:
572-
yaml_file = to_image_path(yaml_file, relative_path, search_path)
573-
574-
# Append to yaml_files list for this manifest - it doesn't
575-
# matter if it exists at this point, just that the manifest is
576-
# referencing it.
577-
profile['yaml_files'].append(yaml_file)
578-
579-
if os.path.isfile(yaml_file):
580-
try:
581-
with open(yaml_file) as f:
582-
# load yaml contents with handling for an
583-
# empty file
584-
yaml_contents = yaml.safe_load(f.read()) or {}
585-
except Exception as e:
586-
msg = f'Error loading YAML file {value} from ' \
587-
f'manifest {manifest["file"]}'
588-
logger.exception(msg)
589-
yaml_contents = None
590-
continue
591-
else:
592-
# YAML file relative path from manifest does not
593-
# exist.
594-
msg = f'Could not find YAML file {value} from ' \
596+
else:
597+
# YAML file relative path from manifest does not
598+
# exist.
599+
msg = f'Could not find YAML file {value} from ' \
600+
f'manifest {manifest["file"]}'
601+
logger.warning(msg)
602+
continue
603+
604+
processor = yaml_processors.get(argument)
605+
if processor and yaml_contents:
606+
try:
607+
processor(profile, yaml_contents)
608+
except Exception as e:
609+
# Problem processing the specific type of YAML file
610+
msg = f'Error processing {argument} {value} from ' \
595611
f'manifest {manifest["file"]}'
596-
logger.warning(msg)
597-
598-
processor = yaml_processors.get(argument)
599-
if processor and yaml_contents:
600-
try:
601-
processor(profile, yaml_contents)
602-
except Exception as e:
603-
# Problem processing the specific type of YAML file
604-
msg = f'Error processing {argument} {value} from ' \
605-
f'manifest {manifest["file"]}'
606-
logger.exception(msg)
612+
logger.exception(msg)
613+
614+
615+
def discover_yamls(manifests, search_path, relative_path=None):
616+
""" Discover yaml files referenced in manifest files and extract key
617+
information
618+
619+
Arguments:
620+
manifests (list): list of contents of discovered manifests
621+
search_path (Path): pathlib Path object with the directory to start discovery from
622+
relative_path (str): String with the directory search results will be relative to
623+
"""
624+
logger.info('Discovering YAML files from manifests')
625+
with ThreadPoolExecutor(max_workers=50) as executor:
626+
for manifest in manifests:
627+
executor.submit(discover_yamls_from_manifest,
628+
manifest,
629+
search_path,
630+
relative_path)
607631

608632
return manifests

0 commit comments

Comments
 (0)