|
28 | 28 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
|
30 | 30 | import configparser |
31 | | -import errno |
32 | 31 | import json |
33 | 32 | import logging |
34 | 33 | import math |
35 | 34 | import os |
36 | | -import shutil |
37 | 35 | import sys |
38 | 36 | import time |
39 | 37 | from io import BytesIO |
40 | 38 | from pathlib import Path |
41 | | -from typing import Any, Dict, List, Optional, Tuple, Union |
| 39 | +from typing import Any, Dict, List, Tuple, Union |
42 | 40 |
|
43 | 41 | import torch |
44 | 42 | import torch.nn as nn |
45 | | -from torch import Tensor |
46 | 43 |
|
47 | 44 | from compressai_vision.codecs.utils import FpnUtils |
48 | 45 | from compressai_vision.model_wrappers import BaseWrapper |
|
53 | 50 |
|
54 | 51 | from .encdec_utils import * |
55 | 52 | from .encdec_utils.png_yuv import PngFilesToYuvFileConverter, YuvFileToPngFilesConverter |
56 | | -from .utils import ( |
57 | | - MIN_MAX_DATASET, |
58 | | - compute_frame_resolution, |
59 | | - min_max_inv_normalization, |
60 | | - min_max_normalization, |
61 | | - tensor_to_tiled, |
62 | | - tiled_to_tensor, |
63 | | -) |
| 53 | +from .utils import MIN_MAX_DATASET, min_max_inv_normalization, min_max_normalization |
64 | 54 |
|
65 | 55 |
|
66 | 56 | def get_filesize(filepath: Union[Path, str]) -> int: |
@@ -405,231 +395,6 @@ def get_decode_cmd( |
405 | 395 | self.logger.debug(cmd) |
406 | 396 | return cmd |
407 | 397 |
|
408 | | - def convert_input_to_yuv(self, input: Dict, file_prefix: str): |
409 | | - """ |
410 | | - Converts the input image or video to YUV format using ffmpeg, or use existing YUV if available. |
411 | | - Args: |
412 | | - input (Dict): A dictionary containing information about the input. It should have the following keys: |
413 | | - - file_names (List[str]): A list of file names for the input. If it contains more than one file, it is considered a video. |
414 | | - - last_frame (int): The last frame number of the video. |
415 | | - - frame_skip (int): The number of frames to skip in the video. |
416 | | - - org_input_size (Dict[str, int]): A dictionary containing the width and height of the input. |
417 | | - file_prefix (str): The prefix for the output file name. |
418 | | - Returns: |
419 | | - Tuple[str, int, int, int, str]: A tuple containing the following: |
420 | | - - yuv_in_path (str): The path to the converted YUV input file. |
421 | | - - nb_frames (int): The number of frames in the input. |
422 | | - - frame_width (int): The width of the frames in the input. |
423 | | - - frame_height (int): The height of the frames in the input. |
424 | | - - file_prefix (str): The updated file prefix. |
425 | | - Raises: |
426 | | - AssertionError: If the number of images in the input folder does not match the expected number of frames. |
427 | | - """ |
428 | | - file_names = input["file_names"] |
429 | | - yuv_file = None |
430 | | - if len(file_names) > 1: # video |
431 | | - # NOTE: using glob for now, should be more robust and look at skipped |
432 | | - # NOTE: somewhat rigid pattern (lowercase png) |
433 | | - filename_pattern = f"{str(Path(file_names[0]).parent)}/*.png" |
434 | | - nb_frames = input["last_frame"] - input["frame_skip"] |
435 | | - images_in_folder = len( |
436 | | - [file for file in Path(file_names[0]).parent.glob("*.png")] |
437 | | - ) |
438 | | - assert ( |
439 | | - images_in_folder == nb_frames |
440 | | - ), f"input folder contains {images_in_folder} images, {nb_frames} were expected" |
441 | | - |
442 | | - input_info = [ |
443 | | - "-pattern_type", |
444 | | - "glob", |
445 | | - "-i", |
446 | | - filename_pattern, |
447 | | - ] |
448 | | - |
449 | | - yuv_file = Path(f"{Path(file_names[0]).parent.parent}.yuv") |
450 | | - print(f"Checking if YUV is available: {yuv_file}") |
451 | | - if not yuv_file.is_file(): |
452 | | - yuv_file = None |
453 | | - else: |
454 | | - input_info = ["-i", file_names[0]] |
455 | | - |
456 | | - chroma_format = self.enc_cfgs["chroma_format"] |
457 | | - input_bitdepth = self.enc_cfgs["input_bitdepth"] |
458 | | - |
459 | | - frame_width = math.ceil(input["org_input_size"]["width"] / 2) * 2 |
460 | | - frame_height = math.ceil(input["org_input_size"]["height"] / 2) * 2 |
461 | | - file_prefix = f"{file_prefix}_{frame_width}x{frame_height}_{self.frame_rate}fps_{input_bitdepth}bit_p{chroma_format}" |
462 | | - yuv_in_path = f"{file_prefix}_input.yuv" |
463 | | - |
464 | | - pix_fmt_suffix = "10le" if input_bitdepth == 10 else "" |
465 | | - chroma_format = "gray" if chroma_format == "400" else f"yuv{chroma_format}p" |
466 | | - |
467 | | - # Use existing YUV (if found): |
468 | | - if yuv_file is not None: |
469 | | - size = yuv_file.stat().st_size |
470 | | - bytes_per_luma_sample = {"yuv420p": 1.5}[chroma_format] |
471 | | - bytes_per_sample = (input_bitdepth + 7) >> 3 |
472 | | - expected_size = int( |
473 | | - frame_width |
474 | | - * frame_height |
475 | | - * bytes_per_luma_sample |
476 | | - * bytes_per_sample |
477 | | - * nb_frames |
478 | | - ) |
479 | | - assert ( |
480 | | - size == expected_size |
481 | | - ), f"YUV found for input but expected size of {expected_size} bytes differs from actual size of {size} bytes" |
482 | | - shutil.copy(yuv_file, yuv_in_path) |
483 | | - print(f"Using pre-existing YUV file: {yuv_file}") |
484 | | - return (yuv_in_path, nb_frames, frame_width, frame_height, file_prefix) |
485 | | - |
486 | | - # TODO (fracape) |
487 | | - # we don't enable skipping frames (codec.skip_n_frames) nor use n_frames_to_be_encoded in video mode |
488 | | - |
489 | | - convert_cmd = [ |
490 | | - "ffmpeg", |
491 | | - "-y", |
492 | | - "-hide_banner", |
493 | | - "-loglevel", |
494 | | - f"{self.ffmpeg_loglevel}", |
495 | | - ] |
496 | | - convert_cmd += input_info |
497 | | - convert_cmd += [ |
498 | | - "-vf", |
499 | | - "pad=ceil(iw/2)*2:ceil(ih/2)*2", |
500 | | - "-f", |
501 | | - "rawvideo", |
502 | | - "-pix_fmt", |
503 | | - f"{chroma_format}{pix_fmt_suffix}", |
504 | | - "-dst_range", |
505 | | - "1", # (fracape) convert to full range for now |
506 | | - ] |
507 | | - |
508 | | - convert_cmd.append(yuv_in_path) |
509 | | - self.logger.debug(convert_cmd) |
510 | | - |
511 | | - run_cmdline(convert_cmd) |
512 | | - |
513 | | - return (yuv_in_path, nb_frames, frame_width, frame_height, file_prefix) |
514 | | - |
515 | | - def convert_yuv_to_pngs( |
516 | | - self, |
517 | | - output_file_prefix: str, |
518 | | - dec_path: str, |
519 | | - yuv_dec_path: Path, |
520 | | - org_img_size: Dict = None, |
521 | | - vcm_mode: bool = False, |
522 | | - ): |
523 | | - """ |
524 | | - Converts a YUV file to a series of PNG images using ffmpeg. |
525 | | - Args: |
526 | | - output_file_prefix (str): The prefix of the output file name. |
527 | | - dec_path (str): The path to the directory where the PNG images will be saved. |
528 | | - yuv_dec_path (Path): The path to the input YUV file. |
529 | | - org_img_size (Dict, optional): The original image size. Defaults to None. |
530 | | - Returns: |
531 | | - None |
532 | | - Raises: |
533 | | - AssertionError: If the video format is not YUV420. |
534 | | - """ |
535 | | - video_info = get_raw_video_file_info(yuv_dec_path.split("qp")[-1]) |
536 | | - frame_width = video_info["width"] |
537 | | - frame_height = video_info["height"] |
538 | | - |
539 | | - assert ( |
540 | | - "420" in video_info["format"].value |
541 | | - ), f"Only support yuv420, but got {video_info['format']}" |
542 | | - pix_fmt_suffix = "10le" if video_info["bitdepth"] == 10 else "" |
543 | | - chroma_format = f"yuv420p" |
544 | | - |
545 | | - convert_cmd = [ |
546 | | - "ffmpeg", |
547 | | - "-y", |
548 | | - "-hide_banner", |
549 | | - "-loglevel", |
550 | | - "error", |
551 | | - "-f", |
552 | | - "rawvideo", |
553 | | - "-pix_fmt", |
554 | | - f"{chroma_format}{pix_fmt_suffix}", |
555 | | - "-s", |
556 | | - f"{frame_width}x{frame_height}", |
557 | | - ] |
558 | | - if not vcm_mode: |
559 | | - convert_cmd.extend( |
560 | | - [ |
561 | | - "-src_range", |
562 | | - "1", # (fracape) assume dec yuv is full range for now |
563 | | - ] |
564 | | - ) |
565 | | - convert_cmd.extend( |
566 | | - [ |
567 | | - "-i", |
568 | | - f"{yuv_dec_path}", |
569 | | - "-pix_fmt", |
570 | | - "rgb24", |
571 | | - ] |
572 | | - ) |
573 | | - if vcm_mode: |
574 | | - convert_cmd.extend( |
575 | | - [ |
576 | | - "-vsync", |
577 | | - "1", |
578 | | - ] |
579 | | - ) |
580 | | - |
581 | | - # TODO (fracape) hacky, clean this |
582 | | - if self.datacatalog == "MPEGOIV6": |
583 | | - output_png = f"{dec_path}/{output_file_prefix}.png" |
584 | | - elif self.datacatalog == "SFUHW": |
585 | | - prefix = output_file_prefix.split("qp")[0] |
586 | | - output_png = f"{dec_path}/{prefix}%03d.png" |
587 | | - convert_cmd += ["-start_number", "0"] |
588 | | - elif self.datacatalog in ["MPEGHIEVE"]: |
589 | | - convert_cmd += ["-start_number", "0"] |
590 | | - output_png = f"{dec_path}/%06d.png" |
591 | | - elif self.datacatalog in ["MPEGTVDTRACKING"]: |
592 | | - convert_cmd += ["-start_number", "1"] |
593 | | - output_png = f"{dec_path}/%06d.png" |
594 | | - convert_cmd.append(output_png) |
595 | | - |
596 | | - run_cmdline(convert_cmd) |
597 | | - |
598 | | - if org_img_size is not None: |
599 | | - discrepancy = ( |
600 | | - True |
601 | | - if frame_height != org_img_size["height"] |
602 | | - or frame_width != org_img_size["width"] |
603 | | - else False |
604 | | - ) |
605 | | - |
606 | | - if discrepancy: |
607 | | - self.logger.warning( |
608 | | - f"Different original input size found. It must be {org_img_size['width']}x{org_img_size['height']}, but {frame_width}x{frame_height} are parsed from YUV" |
609 | | - ) |
610 | | - self.logger.warning( |
611 | | - f"Use {org_img_size['width']}x{org_img_size['height']}, instead of {frame_width}x{frame_height}" |
612 | | - ) |
613 | | - |
614 | | - final_png = f"{dec_path}/{Path(output_png).stem}_tmp.png" |
615 | | - |
616 | | - convert_cmd = [ |
617 | | - "ffmpeg", |
618 | | - "-y", |
619 | | - "-hide_banner", |
620 | | - "-loglevel", |
621 | | - "error", |
622 | | - "-i", |
623 | | - output_png, |
624 | | - "-vf", |
625 | | - f"crop={org_img_size['width']}:{org_img_size['height']}", |
626 | | - final_png, # no name change |
627 | | - ] |
628 | | - run_cmdline(convert_cmd) |
629 | | - |
630 | | - Path(output_png).unlink() |
631 | | - Path(final_png).rename(output_png) |
632 | | - |
633 | 398 | def encode( |
634 | 399 | self, |
635 | 400 | x: Dict, |
@@ -849,7 +614,11 @@ def decode( |
849 | 614 | self.logger.debug(f"dec_time:{dec_time}") |
850 | 615 |
|
851 | 616 | self.convert_yuv_to_pngs( |
852 | | - output_file_prefix, dec_path, yuv_dec_path, org_img_size, vcm_mode |
| 617 | + output_file_prefix, |
| 618 | + dec_path, |
| 619 | + yuv_dec_path, |
| 620 | + org_img_size=org_img_size, |
| 621 | + vcm_mode=vcm_mode, |
853 | 622 | ) |
854 | 623 |
|
855 | 624 | # output the list of file paths for each frame |
|
0 commit comments