4848 pa .field ("fps" , pa .float64 ()),
4949])
5050
51+ pyarrow_schema_i2v_validation = pa .schema ([
52+ pa .field ("id" , pa .string ()),
53+ # --- Image/Video VAE latents ---
54+ # Tensors are stored as raw bytes with shape and dtype info for loading
55+ pa .field ("vae_latent_bytes" , pa .binary ()),
56+ # e.g., [C, T, H, W] or [C, H, W]
57+ pa .field ("vae_latent_shape" , pa .list_ (pa .int64 ())),
58+ # e.g., 'float32'
59+ pa .field ("vae_latent_dtype" , pa .string ()),
60+ # --- Text encoder output tensor ---
61+ # Tensors are stored as raw bytes with shape and dtype info for loading
62+ pa .field ("text_embedding_bytes" , pa .binary ()),
63+ # e.g., [SeqLen, Dim]
64+ pa .field ("text_embedding_shape" , pa .list_ (pa .int64 ())),
65+ # e.g., 'bfloat16' or 'float32'
66+ pa .field ("text_embedding_dtype" , pa .string ()),
67+ pa .field ("text_attention_mask_bytes" , pa .binary ()),
68+ # e.g., [SeqLen]
69+ pa .field ("text_attention_mask_shape" , pa .list_ (pa .int64 ())),
70+ # e.g., 'bool' or 'int8'
71+ pa .field ("text_attention_mask_dtype" , pa .string ()),
72+ #I2V
73+ pa .field ("clip_feature_bytes" , pa .binary ()),
74+ pa .field ("clip_feature_shape" , pa .list_ (pa .int64 ())),
75+ pa .field ("clip_feature_dtype" , pa .string ()),
76+ # I2V Validation
77+ pa .field ("pil_image_bytes" , pa .binary ()),
78+ pa .field ("pil_image_shape" , pa .list_ (pa .int64 ())),
79+ pa .field ("pil_image_dtype" , pa .string ()),
80+ # --- Metadata ---
81+ pa .field ("file_name" , pa .string ()),
82+ pa .field ("caption" , pa .string ()),
83+ pa .field ("media_type" , pa .string ()), # 'image' or 'video'
84+ pa .field ("width" , pa .int64 ()),
85+ pa .field ("height" , pa .int64 ()),
86+ # -- Video-specific (can be null/default for images) ---
87+ # Number of frames processed (e.g., 1 for image, N for video)
88+ pa .field ("num_frames" , pa .int64 ()),
89+ pa .field ("duration_sec" , pa .float64 ()),
90+ pa .field ("fps" , pa .float64 ()),
91+ ])
92+
5193pyarrow_schema_t2v = pa .schema ([
5294 pa .field ("id" , pa .string ()),
5395 # --- Image/Video VAE latents ---
80122 pa .field ("num_frames" , pa .int64 ()),
81123 pa .field ("duration_sec" , pa .float64 ()),
82124 pa .field ("fps" , pa .float64 ()),
83- ])
125+ ])
126+
127+ pyarrow_schema_t2v_validation = pa .schema ([
128+ pa .field ("id" , pa .string ()),
129+ # --- Image/Video VAE latents ---
130+ # Tensors are stored as raw bytes with shape and dtype info for loading
131+ pa .field ("vae_latent_bytes" , pa .binary ()),
132+ # e.g., [C, T, H, W] or [C, H, W]
133+ pa .field ("vae_latent_shape" , pa .list_ (pa .int64 ())),
134+ # e.g., 'float32'
135+ pa .field ("vae_latent_dtype" , pa .string ()),
136+ # --- Text encoder output tensor ---
137+ # Tensors are stored as raw bytes with shape and dtype info for loading
138+ pa .field ("text_embedding_bytes" , pa .binary ()),
139+ # e.g., [SeqLen, Dim]
140+ pa .field ("text_embedding_shape" , pa .list_ (pa .int64 ())),
141+ # e.g., 'bfloat16' or 'float32'
142+ pa .field ("text_embedding_dtype" , pa .string ()),
143+ pa .field ("text_attention_mask_bytes" , pa .binary ()),
144+ # e.g., [SeqLen]
145+ pa .field ("text_attention_mask_shape" , pa .list_ (pa .int64 ())),
146+ # e.g., 'bool' or 'int8'
147+ pa .field ("text_attention_mask_dtype" , pa .string ()),
148+ # --- Metadata ---
149+ pa .field ("file_name" , pa .string ()),
150+ pa .field ("caption" , pa .string ()),
151+ pa .field ("media_type" , pa .string ()), # 'image' or 'video'
152+ pa .field ("width" , pa .int64 ()),
153+ pa .field ("height" , pa .int64 ()),
154+ # -- Video-specific (can be null/default for images) ---
155+ # Number of frames processed (e.g., 1 for image, N for video)
156+ pa .field ("num_frames" , pa .int64 ()),
157+ pa .field ("duration_sec" , pa .float64 ()),
158+ pa .field ("fps" , pa .float64 ()),
159+ ])
0 commit comments