|
12 | 12 | import yaml |
13 | 13 | import sys |
14 | 14 |
|
| 15 | +from concurrent.futures import ThreadPoolExecutor |
| 16 | + |
15 | 17 | logger = logging.getLogger(__name__) |
16 | 18 | logger.setLevel(logging.DEBUG) |
17 | 19 | logger.addHandler(logging.StreamHandler(sys.stdout)) |
@@ -347,82 +349,87 @@ def discover_jobs(jobfiles, |
347 | 349 | return job_paths |
348 | 350 |
|
349 | 351 |
|
| 352 | +def parse_manifest(manifest_file, jobs, search_path, relative_path=None, repo_data=None): |
| 353 | + try: |
| 354 | + with open(manifest_file) as f: |
| 355 | + manifest_data = yaml.safe_load(f.read()) |
| 356 | + except yaml.error.YAMLError as e: |
| 357 | + logger.error('Error loading manifest file {} from yaml\n{}'.format( |
| 358 | + manifest_file, str(e))) |
| 359 | + return |
| 360 | + |
| 361 | + if manifest_data is None: |
| 362 | + logger.warning(f'No manifest data from file {manifest_file}') |
| 363 | + return |
| 364 | + |
| 365 | + try: |
| 366 | + if relative_path: |
| 367 | + manifest_data['file'] = to_image_path(str(manifest_file), |
| 368 | + search_path, |
| 369 | + relative_path) |
| 370 | + else: |
| 371 | + manifest_data['file'] = str(manifest_file) |
| 372 | + |
| 373 | + # Find any repo containing this manifest file |
| 374 | + for repo in repo_data: |
| 375 | + if manifest_data['file'].startswith(repo): |
| 376 | + manifest_data['repo_path'] = repo |
| 377 | + break |
| 378 | + |
| 379 | + manifest_data['run_type'] = 'manifest' |
| 380 | + manifest_data['job_type'] = manifest_data.pop('type', None) |
| 381 | + if not manifest_data['job_type']: |
| 382 | + logger.warning(f'No job type specified in {manifest_file}') |
| 383 | + return |
| 384 | + |
| 385 | + # Pop runtimes and profiles to add them back later as lists |
| 386 | + runtimes = manifest_data.pop('runtimes', {}) |
| 387 | + profiles = manifest_data.pop('profiles', {}) |
| 388 | + |
| 389 | + # Create default profile from top level arguments and system environment |
| 390 | + default_arguments = manifest_data.pop('arguments', {}) |
| 391 | + default_runtime = runtimes.get('system', {}) |
| 392 | + default_environment = default_runtime.get('environment', {}) |
| 393 | + profiles['DEFAULT'] = {} |
| 394 | + profiles['DEFAULT']['runtime'] = 'system' |
| 395 | + profiles['DEFAULT']['arguments'] = default_arguments |
| 396 | + profiles['DEFAULT']['environment'] = default_environment |
| 397 | + |
| 398 | + # Update profiles with environment from runtimes |
| 399 | + for profile_name in profiles: |
| 400 | + runtime = profiles[profile_name].get('runtime', 'system') |
| 401 | + if runtime in runtimes: |
| 402 | + environment = runtimes[runtime].get('environment', {}) |
| 403 | + if environment: |
| 404 | + profiles[profile_name]['environment'] = environment |
| 405 | + |
| 406 | + # Convert profiles from hierarchical dict to list of dict |
| 407 | + manifest_data['profiles'] = [] |
| 408 | + for profile_name in profiles: |
| 409 | + manifest_data['profiles'].append(profiles[profile_name]) |
| 410 | + manifest_data['profiles'][-1]['name'] = profile_name |
| 411 | + |
| 412 | + # Convert runtimes from hierarchical dict to list of dict |
| 413 | + manifest_data['runtimes'] = [] |
| 414 | + for profile_name in runtimes: |
| 415 | + manifest_data['runtimes'].append(runtimes[profile_name]) |
| 416 | + manifest_data['runtimes'][-1]['name'] = profile_name |
| 417 | + |
| 418 | + jobs.append(manifest_data) |
| 419 | + |
| 420 | + except Exception as e: |
| 421 | + logger.exception('Error processing manifest file {}'.format( |
| 422 | + manifest_file)) |
| 423 | + |
| 424 | + |
350 | 425 | def parse_manifests(manifests, search_path, relative_path=None, repo_data=None): |
351 | 426 | if repo_data is None: |
352 | 427 | repo_data = {} |
353 | 428 | jobs = [] |
354 | | - for manifest in manifests: |
355 | | - try: |
356 | | - with open(manifest) as f: |
357 | | - manifest_data = yaml.safe_load(f.read()) |
358 | | - except yaml.error.YAMLError as e: |
359 | | - logger.error('Error loading manifest file {} from yaml\n{}'.format( |
360 | | - manifest, str(e))) |
361 | | - continue |
362 | | - |
363 | | - if manifest_data is None: |
364 | | - logger.warning(f'No manifest data from file {manifest}') |
365 | | - continue |
366 | | - |
367 | | - try: |
368 | | - if relative_path: |
369 | | - manifest_data['file'] = to_image_path(str(manifest), |
370 | | - search_path, |
371 | | - relative_path) |
372 | | - else: |
373 | | - manifest_data['file'] = str(manifest) |
374 | | - |
375 | | - # Find any repo containing this manifest file |
376 | | - for repo in repo_data: |
377 | | - if manifest_data['file'].startswith(repo): |
378 | | - manifest_data['repo_path'] = repo |
379 | | - break |
380 | | - |
381 | | - manifest_data['run_type'] = 'manifest' |
382 | | - manifest_data['job_type'] = manifest_data.pop('type', None) |
383 | | - if not manifest_data['job_type']: |
384 | | - logger.warning(f'No job type specified in {manifest}') |
385 | | - continue |
386 | 429 |
|
387 | | - # Pop runtimes and profiles to add them back later as lists |
388 | | - runtimes = manifest_data.pop('runtimes', {}) |
389 | | - profiles = manifest_data.pop('profiles', {}) |
390 | | - |
391 | | - # Create default profile from top level arguments and system environment |
392 | | - default_arguments = manifest_data.pop('arguments', {}) |
393 | | - default_runtime = runtimes.get('system', {}) |
394 | | - default_environment = default_runtime.get('environment', {}) |
395 | | - profiles['DEFAULT'] = {} |
396 | | - profiles['DEFAULT']['runtime'] = 'system' |
397 | | - profiles['DEFAULT']['arguments'] = default_arguments |
398 | | - profiles['DEFAULT']['environment'] = default_environment |
399 | | - |
400 | | - # Update profiles with environment from runtimes |
401 | | - for profile_name in profiles: |
402 | | - runtime = profiles[profile_name].get('runtime', 'system') |
403 | | - if runtime in runtimes: |
404 | | - environment = runtimes[runtime].get('environment', {}) |
405 | | - if environment: |
406 | | - profiles[profile_name]['environment'] = environment |
407 | | - |
408 | | - # Convert profiles from hierarchical dict to list of dict |
409 | | - manifest_data['profiles'] = [] |
410 | | - for profile_name in profiles: |
411 | | - manifest_data['profiles'].append(profiles[profile_name]) |
412 | | - manifest_data['profiles'][-1]['name'] = profile_name |
413 | | - |
414 | | - # Convert runtimes from hierarchical dict to list of dict |
415 | | - manifest_data['runtimes'] = [] |
416 | | - for profile_name in runtimes: |
417 | | - manifest_data['runtimes'].append(runtimes[profile_name]) |
418 | | - manifest_data['runtimes'][-1]['name'] = profile_name |
419 | | - |
420 | | - jobs.append(manifest_data) |
421 | | - |
422 | | - except Exception as e: |
423 | | - logger.exception('Error processing manifest file {}'.format( |
424 | | - manifest)) |
425 | | - continue |
| 430 | + with ThreadPoolExecutor(max_workers=50) as executor: |
| 431 | + for manifest in manifests: |
| 432 | + executor.submit(parse_manifest, manifest, jobs, search_path, relative_path, repo_data) |
426 | 433 |
|
427 | 434 | return jobs |
428 | 435 |
|
@@ -527,82 +534,99 @@ def _process_clean_file(profile, yaml_contents): |
527 | 534 | 'clean-file': _process_clean_file |
528 | 535 | } |
529 | 536 |
|
530 | | -def discover_yamls(manifests, search_path, relative_path=None): |
531 | | - """ Discover yaml files referenced in manifest files and extract key |
| 537 | +def discover_yamls_from_manifest(manifest, search_path, relative_path=None): |
| 538 | + """ Discover yaml files referenced in manifest file and extract key |
532 | 539 | information |
533 | 540 |
|
534 | 541 | Arguments: |
535 | | - manifests (list): list of contents of discovered manifests |
| 542 | + manifest (dict): manifest object |
536 | 543 | search_path (Path): pathlib Path object with the directory to start discovery from |
537 | 544 | relative_path (str): String with the directory search results will be relative to |
538 | 545 | """ |
539 | | - logger.info('Discovering YAML files from manifests') |
540 | | - for manifest in manifests: |
541 | | - manifest_dir = os.path.dirname(manifest['file']) |
542 | | - for profile in manifest['profiles']: |
543 | | - profile['yaml_files'] = [] |
544 | | - if not isinstance(profile.get('arguments'), dict): |
| 546 | + manifest_dir = os.path.dirname(manifest['file']) |
| 547 | + for profile in manifest['profiles']: |
| 548 | + profile['yaml_files'] = [] |
| 549 | + if not isinstance(profile.get('arguments'), dict): |
| 550 | + continue |
| 551 | + for argument, value in profile['arguments'].items(): |
| 552 | + if argument not in yaml_processors: |
| 553 | + # Filter for only testbed and clean files. No need to |
| 554 | + # load other yaml files |
545 | 555 | continue |
546 | | - for argument, value in profile['arguments'].items(): |
547 | | - if argument not in yaml_processors: |
548 | | - # Filter for only testbed and clean files. No need to |
549 | | - # load other yaml files |
550 | | - continue |
551 | | - if not (isinstance(value, str) and value.lower().endswith('.yaml')): |
| 556 | + if not (isinstance(value, str) and value.lower().endswith('.yaml')): |
| 557 | + continue |
| 558 | + |
| 559 | + # Do not process any files that start with a variable |
| 560 | + # or some inaccessible absolute path. If the yaml file |
| 561 | + # starts with the relative path, it should be |
| 562 | + # accessible in the image, and still valid |
| 563 | + if value.startswith('$'): |
| 564 | + continue |
| 565 | + elif value.startswith('/'): |
| 566 | + if not relative_path or not value.startswith(relative_path): |
552 | 567 | continue |
553 | 568 |
|
554 | | - # Do not process any files that start with a variable |
555 | | - # or some inaccessible absolute path. If the yaml file |
556 | | - # starts with the relative path, it should be |
557 | | - # accessible in the image, and still valid |
558 | | - if value.startswith('$'): |
| 569 | + # Construct an absolute path using the dir of the manifest |
| 570 | + # This will be the relative path to the image root once |
| 571 | + # built, not the actual path of the file in the build |
| 572 | + # environment |
| 573 | + yaml_file = os.path.abspath(os.path.join(manifest_dir, value)) |
| 574 | + # Convert to a real path so we can find the file in our |
| 575 | + # build environment |
| 576 | + if relative_path: |
| 577 | + yaml_file = to_image_path(yaml_file, relative_path, search_path) |
| 578 | + |
| 579 | + # Append to yaml_files list for this manifest - it doesn't |
| 580 | + # matter if it exists at this point, just that the manifest is |
| 581 | + # referencing it. |
| 582 | + profile['yaml_files'].append(yaml_file) |
| 583 | + |
| 584 | + if os.path.isfile(yaml_file): |
| 585 | + try: |
| 586 | + with open(yaml_file) as f: |
| 587 | + # load yaml contents with handling for an |
| 588 | + # empty file |
| 589 | + yaml_contents = yaml.safe_load(f.read()) or {} |
| 590 | + except Exception as e: |
| 591 | + msg = f'Error loading YAML file {value} from ' \ |
| 592 | + f'manifest {manifest["file"]}' |
| 593 | + logger.exception(msg) |
| 594 | + yaml_contents = None |
559 | 595 | continue |
560 | | - elif value.startswith('/'): |
561 | | - if not relative_path or not value.startswith(relative_path): |
562 | | - continue |
563 | | - |
564 | | - # Construct an absolute path using the dir of the manifest |
565 | | - # This will be the relative path to the image root once |
566 | | - # built, not the actual path of the file in the build |
567 | | - # environment |
568 | | - yaml_file = os.path.abspath(os.path.join(manifest_dir, value)) |
569 | | - # Convert to a real path so we can find the file in our |
570 | | - # build environment |
571 | | - if relative_path: |
572 | | - yaml_file = to_image_path(yaml_file, relative_path, search_path) |
573 | | - |
574 | | - # Append to yaml_files list for this manifest - it doesn't |
575 | | - # matter if it exists at this point, just that the manifest is |
576 | | - # referencing it. |
577 | | - profile['yaml_files'].append(yaml_file) |
578 | | - |
579 | | - if os.path.isfile(yaml_file): |
580 | | - try: |
581 | | - with open(yaml_file) as f: |
582 | | - # load yaml contents with handling for an |
583 | | - # empty file |
584 | | - yaml_contents = yaml.safe_load(f.read()) or {} |
585 | | - except Exception as e: |
586 | | - msg = f'Error loading YAML file {value} from ' \ |
587 | | - f'manifest {manifest["file"]}' |
588 | | - logger.exception(msg) |
589 | | - yaml_contents = None |
590 | | - continue |
591 | | - else: |
592 | | - # YAML file relative path from manifest does not |
593 | | - # exist. |
594 | | - msg = f'Could not find YAML file {value} from ' \ |
| 596 | + else: |
| 597 | + # YAML file relative path from manifest does not |
| 598 | + # exist. |
| 599 | + msg = f'Could not find YAML file {value} from ' \ |
| 600 | + f'manifest {manifest["file"]}' |
| 601 | + logger.warning(msg) |
| 602 | + continue |
| 603 | + |
| 604 | + processor = yaml_processors.get(argument) |
| 605 | + if processor and yaml_contents: |
| 606 | + try: |
| 607 | + processor(profile, yaml_contents) |
| 608 | + except Exception as e: |
| 609 | + # Problem processing the specific type of YAML file |
| 610 | + msg = f'Error processing {argument} {value} from ' \ |
595 | 611 | f'manifest {manifest["file"]}' |
596 | | - logger.warning(msg) |
597 | | - |
598 | | - processor = yaml_processors.get(argument) |
599 | | - if processor and yaml_contents: |
600 | | - try: |
601 | | - processor(profile, yaml_contents) |
602 | | - except Exception as e: |
603 | | - # Problem processing the specific type of YAML file |
604 | | - msg = f'Error processing {argument} {value} from ' \ |
605 | | - f'manifest {manifest["file"]}' |
606 | | - logger.exception(msg) |
| 612 | + logger.exception(msg) |
| 613 | + |
| 614 | + |
| 615 | +def discover_yamls(manifests, search_path, relative_path=None): |
| 616 | + """ Discover yaml files referenced in manifest files and extract key |
| 617 | + information |
| 618 | +
|
| 619 | + Arguments: |
| 620 | + manifests (list): list of contents of discovered manifests |
| 621 | + search_path (Path): pathlib Path object with the directory to start discovery from |
| 622 | + relative_path (str): String with the directory search results will be relative to |
| 623 | + """ |
| 624 | + logger.info('Discovering YAML files from manifests') |
| 625 | + with ThreadPoolExecutor(max_workers=50) as executor: |
| 626 | + for manifest in manifests: |
| 627 | + executor.submit(discover_yamls_from_manifest, |
| 628 | + manifest, |
| 629 | + search_path, |
| 630 | + relative_path) |
607 | 631 |
|
608 | 632 | return manifests |
0 commit comments