NVIDIA-NeMo
diff --git a/‎api-design.md‎
Lines changed: 2 additions & 2 deletions b/‎api-design.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/about/concepts/image/data-loading-concepts.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/about/concepts/image/data-loading-concepts.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/about/concepts/video/abstractions.md‎
Lines changed: 2 additions & 5 deletions b/‎docs/about/concepts/video/abstractions.md‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎docs/about/release-notes/migration-guide.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/about/release-notes/migration-guide.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/curate-images/load-data/tar-archives.md‎
Lines changed: 6 additions & 6 deletions b/‎docs/curate-images/load-data/tar-archives.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/curate-images/process-data/embeddings/clip-embedder.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/curate-images/process-data/embeddings/clip-embedder.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/curate-images/process-data/filters/aesthetic.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/curate-images/process-data/filters/aesthetic.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/curate-images/process-data/filters/nsfw.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/curate-images/process-data/filters/nsfw.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/curate-images/save-export.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/curate-images/save-export.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/curate-images/tutorials/beginner.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/curate-images/tutorials/beginner.md‎
Lines changed: 2 additions & 2 deletions
@@ -110,11 +110,11 @@ class ProcessingStage(ABC, Generic[X, Y], metaclass=StageMeta):
 
     @property
     @abstractmethod
-    def name(self) -> str:
+    def _name(self) -> str:
         """Unique name for this stage."""
 
     @property
-    def resources(self) -> Resources:
+    def _resources(self) -> Resources:
         """Resource requirements for this stage."""
         return Resources(cpus=1.0)
 
 
@@ -68,7 +68,7 @@ pipeline.add_stage(FilePartitioningStage(
 
 # Load images with DALI
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     verbose=True,
     num_threads=8,
     num_gpus_per_worker=0.25,
 
@@ -58,11 +58,8 @@ Composite stages provide a user-facing convenience API and decompose into one or
 
 ```python
 class MyStage(ProcessingStage[X, Y]):
-    @property
-    def name(self) -> str: ...
-
-    @property
-    def resources(self) -> Resources: ...
+    name: str = "..."
+    resources: Resources = Resources(...)
 
     def inputs(self) -> tuple[list[str], list[str]]: ...
     def outputs(self) -> tuple[list[str], list[str]]: ...
 
@@ -247,7 +247,7 @@ In the new version, data loading is encapsulated in a dedicated pipeline stage (
 ```python
 # New: Read images from webdataset tar files
 read_stage = ImageReaderStage(
-    task_batch_size=args.task_batch_size,
+    batch_size=args.batch_size,
     num_threads=16,
     num_gpus_per_worker=0.25,
 )
 
@@ -63,7 +63,7 @@ pipeline.add_stage(FilePartitioningStage(
 
 # Stage 2: Read JPEG images from tar files using DALI
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     verbose=True,
     num_threads=16,
     num_gpus_per_worker=0.25,
@@ -77,7 +77,7 @@ results = pipeline.run()
 
 - `file_paths`: Path to directory containing tar files
 - `files_per_partition`: Number of tar files to process per partition (controls parallelism)
-- `task_batch_size`: Number of images per ImageBatch for processing
+- `batch_size`: Number of images per ImageBatch for processing
 
 ---
 
@@ -152,7 +152,7 @@ The `ImageReaderStage` is the core component that handles tar archive loading wi
   - Type
   - Default
   - Description
-* - `task_batch_size`
+* - `batch_size`
   - int
   - 100
   - Number of images per ImageBatch for processing
@@ -205,7 +205,7 @@ ImageObject(
 ```python
 # Optimal configuration for GPU acceleration
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=256,        # Larger batches for GPU throughput
+    batch_size=256,        # Larger batches for GPU throughput
     num_threads=16,             # More threads for I/O parallelism
     num_gpus_per_worker=0.5,    # Allocate more GPU memory
     verbose=True,
@@ -217,7 +217,7 @@ pipeline.add_stage(ImageReaderStage(
 ```python
 # Optimized for CPU decoding
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=64,         # Smaller batches to avoid memory pressure
+    batch_size=64,         # Smaller batches to avoid memory pressure
     num_threads=8,              # Fewer threads for CPU processing
     num_gpus_per_worker=0,      # No GPU allocation
     verbose=True,
@@ -228,7 +228,7 @@ pipeline.add_stage(ImageReaderStage(
 
 - **GPU Acceleration**: Use a GPU-enabled environment for optimal performance. The stage automatically detects CUDA availability and uses GPU decoding when possible.
 - **Parallelism Control**: Adjust `files_per_partition` to control how many tar files are processed together. Lower values increase parallelism but may increase overhead.
-- **Batch Size Tuning**: Increase `task_batch_size` for better throughput, but ensure sufficient memory is available.
+- **Batch Size Tuning**: Increase `batch_size` for better throughput, but ensure sufficient memory is available.
 - **Thread Configuration**: Adjust `num_threads` for I/O operations based on your storage system's characteristics.
 
 ---
 
@@ -45,7 +45,7 @@ pipeline.add_stage(FilePartitioningStage(
 
 # Stage 2: Read images
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     num_gpus_per_worker=0.25,
 ))
 
 
@@ -51,7 +51,7 @@ pipeline.add_stage(FilePartitioningStage(
 
 # Stage 2: Read images
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     num_gpus_per_worker=0.25,
 ))
 
 
@@ -51,7 +51,7 @@ pipeline.add_stage(FilePartitioningStage(
 
 # Stage 2: Read images
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     num_gpus_per_worker=0.25,
 ))
 
 
@@ -86,7 +86,7 @@ pipeline.add_stage(FilePartitioningStage(
 ))
 
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,
+    batch_size=100,
     num_threads=16,
     num_gpus_per_worker=0.25,
 ))
 
@@ -105,7 +105,7 @@ Load images from tar archives and extract metadata.
 
 ```python
 pipeline.add_stage(ImageReaderStage(
-    task_batch_size=100,  # Images per batch
+    batch_size=100,  # Images per batch
     verbose=True,
     num_threads=16,       # I/O threads
     num_gpus_per_worker=0.25,
@@ -216,7 +216,7 @@ def create_image_curation_pipeline():
     ))
 
     pipeline.add_stage(ImageReaderStage(
-        task_batch_size=100,
+        batch_size=100,
         verbose=True,
         num_threads=16,
         num_gpus_per_worker=0.25,
Original file line number	Diff line number	Diff line change
`@@ -247,7 +247,7 @@ In the new version, data loading is encapsulated in a dedicated pipeline stage (`
`247`	`247`	```python
`248`	`248`	`# New: Read images from webdataset tar files`
`249`	`249`	`read_stage = ImageReaderStage(`
`250`		`- task_batch_size=args.task_batch_size,`
	`250`	`+ batch_size=args.batch_size,`
`251`	`251`	`num_threads=16,`
`252`	`252`	`num_gpus_per_worker=0.25,`
`253`	`253`	`)`