|
| 1 | +import unittest |
| 2 | +import torch |
| 3 | +import transformers |
| 4 | +from onnx_diagnostic.ext_test_case import ( |
| 5 | + ExtTestCase, |
| 6 | + hide_stdout, |
| 7 | + requires_diffusers, |
| 8 | + requires_torch, |
| 9 | +) |
| 10 | +from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs |
| 11 | +from onnx_diagnostic.torch_export_patches import torch_export_patches |
| 12 | +from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str |
| 13 | + |
| 14 | + |
| 15 | +class TestTasksImageToVideo(ExtTestCase): |
| 16 | + @hide_stdout() |
| 17 | + @requires_diffusers("0.35") |
| 18 | + @requires_torch("2.8.99") |
| 19 | + def test_cosmos_predict(self): |
| 20 | + kwargs = { |
| 21 | + "_diffusers_version": "0.34.0.dev0", |
| 22 | + "_class_name": "CosmosTransformer3DModel", |
| 23 | + "max_size": [128, 240, 240], |
| 24 | + "text_embed_dim": 128, |
| 25 | + "use_cache": True, |
| 26 | + "in_channels": 3, |
| 27 | + "out_channels": 16, |
| 28 | + "num_layers": 2, |
| 29 | + "model_type": "dia", |
| 30 | + "patch_size": [1, 2, 2], |
| 31 | + "rope_scale": [1.0, 3.0, 3.0], |
| 32 | + "attention_head_dim": 16, |
| 33 | + "mlp_ratio": 0.4, |
| 34 | + "initializer_range": 0.02, |
| 35 | + "num_attention_heads": 16, |
| 36 | + "is_encoder_decoder": True, |
| 37 | + "adaln_lora_dim": 16, |
| 38 | + "concat_padding_mask": True, |
| 39 | + "extra_pos_embed_type": None, |
| 40 | + } |
| 41 | + config = transformers.DiaConfig(**kwargs) |
| 42 | + mid = "nvidia/Cosmos-Predict2-2B-Video2World" |
| 43 | + data = get_untrained_model_with_inputs( |
| 44 | + mid, |
| 45 | + verbose=1, |
| 46 | + add_second_input=True, |
| 47 | + subfolder="transformer", |
| 48 | + config=config, |
| 49 | + inputs_kwargs=dict(image_height=8 * 50, image_width=8 * 80), |
| 50 | + ) |
| 51 | + self.assertEqual(data["task"], "image-to-video") |
| 52 | + model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"] |
| 53 | + model(**inputs) |
| 54 | + model(**data["inputs2"]) |
| 55 | + with torch.fx.experimental._config.patch( |
| 56 | + backed_size_oblivious=True |
| 57 | + ), torch_export_patches( |
| 58 | + patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1 |
| 59 | + ): |
| 60 | + torch.export.export( |
| 61 | + model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False |
| 62 | + ) |
| 63 | + |
| 64 | + |
| 65 | +if __name__ == "__main__": |
| 66 | + unittest.main(verbosity=2) |
0 commit comments