Skip to content

Commit 40cb633

Browse files
Resolve a few warnings in the docs build (#1120)
Signed-off-by: James Bourbeau <[email protected]> Co-authored-by: Sarah Yurick <[email protected]>
1 parent 95a695b commit 40cb633

File tree

2 files changed

+5
-34
lines changed

2 files changed

+5
-34
lines changed

nemo_curator/stages/deduplication/fuzzy/connected_components.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424
from nemo_curator.backends.experimental.utils import RayStageSpecKeys
2525
from nemo_curator.stages.base import ProcessingStage
2626
from nemo_curator.stages.deduplication.fuzzy.utils import CURATOR_FUZZY_DUPLICATE_GROUP_FIELD
27-
from nemo_curator.stages.deduplication.id_generator import (
28-
CURATOR_DEDUP_ID_STR,
29-
)
27+
from nemo_curator.stages.deduplication.id_generator import CURATOR_DEDUP_ID_STR
3028
from nemo_curator.stages.deduplication.io_utils import DeduplicationIO
3129
from nemo_curator.stages.resources import Resources
3230
from nemo_curator.tasks.file_group import FileGroupTask
@@ -40,8 +38,8 @@ class ConnectedComponentsStage(ProcessingStage[FileGroupTask, FileGroupTask], De
4038
def __init__(
4139
self,
4240
output_path: str,
43-
source_field: str = f"{CURATOR_DEDUP_ID_STR}_x",
44-
destination_field: str = f"{CURATOR_DEDUP_ID_STR}_y",
41+
source_field: str | None = None,
42+
destination_field: str | None = None,
4543
read_kwargs: dict | None = None,
4644
write_kwargs: dict | None = None,
4745
):
@@ -54,8 +52,8 @@ def __init__(
5452
write_kwargs: Keyword arguments to pass for writing the output files.
5553
"""
5654

57-
self.source_field = source_field
58-
self.destination_field = destination_field
55+
self.source_field = source_field or f"{CURATOR_DEDUP_ID_STR}_x"
56+
self.destination_field = destination_field or f"{CURATOR_DEDUP_ID_STR}_y"
5957
self.read_kwargs = read_kwargs if read_kwargs is not None else {}
6058
self.write_kwargs = write_kwargs if write_kwargs is not None else {}
6159

nemo_curator/stages/video/io/clip_writer.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -334,33 +334,6 @@ def _write_clip_mp4(self, clip: Clip, *, filtered: bool = False) -> ClipStats:
334334
clip_stats.num_passed += 1
335335
return clip_stats
336336

337-
def _write_clip_embedding_to_buffer(self, clip: Clip) -> ClipStats:
338-
clip_stats = ClipStats()
339-
if clip.intern_video_2_embedding is not None:
340-
self._iv2_embedding_buffer.append(
341-
{
342-
"id": str(clip.uuid),
343-
"embedding": clip.intern_video_2_embedding.reshape(-1).tolist(),
344-
},
345-
)
346-
elif self.generate_embeddings and self.embedding_algorithm == "internvideo2":
347-
logger.error(
348-
f"Clip {clip.uuid} from {clip.source_video} has no InternVideo2 embedding, skip adding to buffer"
349-
)
350-
if clip.cosmos_embed1_embedding is not None:
351-
self._ce1_embedding_buffer.append(
352-
{
353-
"id": str(clip.uuid),
354-
"embedding": clip.cosmos_embed1_embedding.reshape(-1).tolist(),
355-
},
356-
)
357-
elif self.generate_embeddings and self.embedding_algorithm == "cosmos-embed1":
358-
logger.error(
359-
f"Clip {clip.uuid} from {clip.source_video} has no Cosmos-Embed1 embedding, skip adding to buffer"
360-
)
361-
362-
return clip_stats
363-
364337
def _write_clip_embedding(self, clip: Clip) -> ClipStats:
365338
clip_stats = ClipStats()
366339
if clip.intern_video_2_embedding is not None:

0 commit comments

Comments
 (0)