tenstorrent
diff --git a/‎models/demos/mobilenetv2/tests/perf/test_perf_mobilenetv2.py‎
Lines changed: 1 addition & 1 deletion b/‎models/demos/mobilenetv2/tests/perf/test_perf_mobilenetv2.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎models/demos/mobilenetv2/tt/common.py‎
Lines changed: 0 additions & 5 deletions b/‎models/demos/mobilenetv2/tt/common.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎models/demos/segformer/tt/common.py‎
Lines changed: 0 additions & 1 deletion b/‎models/demos/segformer/tt/common.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎models/demos/ttnn_resnet/tt/ttnn_functional_resnet50.py‎
Lines changed: 0 additions & 23 deletions b/‎models/demos/ttnn_resnet/tt/ttnn_functional_resnet50.py‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎models/demos/ufld_v2/ttnn/common.py‎
Lines changed: 0 additions & 1 deletion b/‎models/demos/ufld_v2/ttnn/common.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎models/demos/vanilla_unet/common.py‎
Lines changed: 1 addition & 1 deletion b/‎models/demos/vanilla_unet/common.py‎
Lines changed: 1 addition & 1 deletion
@@ -14,7 +14,7 @@
 @pytest.mark.parametrize(
     "batch_size, expected_perf",
     [
-        [MOBILENETV2_BATCH_SIZE, 3436],
+        [MOBILENETV2_BATCH_SIZE, 3445],
     ],
 )
 def test_perf_device_mobilenetv2(batch_size, expected_perf):
 
@@ -23,7 +23,6 @@ def __init__(
         width_shard=False,
         act_blocks=32,
         enable_act_double_buffer=False,
-        enable_split_reader=False,
         reshard_if_not_optimal=True,
         activation_dtype=ttnn.bfloat8_b,
         shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
@@ -42,7 +41,6 @@ def __init__(
         self.width_shard = width_shard
         self.act_blocks = act_blocks
         self.enable_act_double_buffer = enable_act_double_buffer
-        self.enable_split_reader = enable_split_reader
         self.reshard_if_not_optimal = reshard_if_not_optimal
         self.batch_size = batch_size
         self.shard_layout = shard_layout
@@ -64,9 +62,6 @@ def _initialize_conv_config(self):
             act_block_w_div=1,
             deallocate_activation=self.deallocate_activation,
             enable_act_double_buffer=self.enable_act_double_buffer,
-            enable_split_reader=True
-            if self.shard_layout == ttnn.TensorMemoryLayout.HEIGHT_SHARDED
-            else self.enable_split_reader,
             output_layout=self.output_layout,
             reallocate_halo_output=False,
             reshard_if_not_optimal=self.reshard_if_not_optimal,
 
@@ -48,7 +48,6 @@ def __call__(self, device, input_tensor):
             deallocate_activation=self.deallocate,
             reallocate_halo_output=True,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             output_layout=self.output_layout,
         )
         compute_config = ttnn.init_device_compute_kernel_config(
 
@@ -150,7 +150,6 @@ def run_downsample_if_req(
         height_sharding=None,
         packer_l1_accum_enabled=True,
         enable_act_double_buffer=False,
-        enable_split_reader=False,
     ):
         if self.downsample:
             logger.debug(f"Running downsample")
@@ -180,7 +179,6 @@ def run_downsample_if_req(
                     if input_width < 56
                     else False,
                     enable_weights_double_buffer=True if input_width < 56 else False,
-                    enable_split_reader=enable_split_reader,
                     full_inner_dim=True,
                 ),
             }
@@ -217,7 +215,6 @@ def __call__(
         eltwise_binary_out_in_place=True,
         packer_l1_acc=True,
         enable_act_double_buffer=False,
-        enable_split_reader=False,
         ops_parallel_config=None,
         layer_module=None,
     ):
@@ -287,7 +284,6 @@ def __call__(
                 height_sharding,
                 packer_l1_accum_enabled=packer_l1_acc,
                 enable_act_double_buffer=False,
-                enable_split_reader=enable_split_reader,
             )
             if layer_module and layer_module == "layer4_module1":
                 if ops_parallel_config and "layer4_module1_downsample" not in ops_parallel_config:
@@ -331,7 +327,6 @@ def __call__(
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 enable_act_double_buffer=enable_act_double_buffer,
                 enable_weights_double_buffer=True,
-                enable_split_reader=enable_split_reader,
                 full_inner_dim=True,
             ),
         }
@@ -439,7 +434,6 @@ def __call__(
                 height_sharding,
                 packer_l1_accum_enabled=packer_l1_acc,
                 enable_act_double_buffer=enable_act_double_buffer,
-                enable_split_reader=enable_split_reader,
             )
 
         assert ds_out is not None, "ds_out is None"
@@ -578,7 +572,6 @@ def __init__(
             deallocate_activation=dealloc_input,
             act_block_h_override=act_block_h_override,
             enable_act_double_buffer=is_wormhole_b0() or is_blackhole(),
-            enable_split_reader=True,
             shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
             reshard_if_not_optimal=False,
             # otherwise act block h is not big enough for the reuse
@@ -812,7 +805,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             reshard_if_not_optimal=reshard,
             height_sharding=height_shard,
             enable_act_double_buffer=True,
-            enable_split_reader=True,
         )
 
         if is_first_run:
@@ -833,7 +825,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=False,
-            enable_split_reader=True,
             layer_module="layer1_module2",
         )
 
@@ -845,7 +836,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=False,
-            enable_split_reader=True,
             layer_module="layer1_module3",
         )
 
@@ -864,7 +854,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             reshard_if_not_optimal=reshard,
             height_sharding=height_shard,
             enable_act_double_buffer=True,
-            enable_split_reader=True,
             layer_module="layer2_module1",
         )
 
@@ -886,7 +875,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=True,
             layer_module="layer2_module2",
         )
 
@@ -898,7 +886,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=True,
             layer_module="layer2_module3",
         )
 
@@ -910,7 +897,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=True,
             layer_module="layer2_module4",
         )
 
@@ -931,7 +917,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             reshard_if_not_optimal=reshard,
             height_sharding=height_shard,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
         )
 
         if is_first_run:
@@ -952,7 +937,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
         )
 
         logger.debug(f"==== Running layer 3 module 3")
@@ -963,7 +947,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             layer_module="layer3_module3",
         )
 
@@ -975,7 +958,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             layer_module="layer3_module4",
         )
 
@@ -987,7 +969,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             layer_module="layer3_module5",
         )
 
@@ -1000,7 +981,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_width,
             eltwise_binary_out_in_place=True,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
         )
 
         reshard = is_blackhole() and self.batch_size == 20
@@ -1031,7 +1011,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             reshard_if_not_optimal=reshard,
             height_sharding=height_shard,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             ops_parallel_config=ops_parallel_config,
             layer_module="layer4_module1",
         )
@@ -1044,7 +1023,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             layer_module="layer4_module2",
         )
 
@@ -1056,7 +1034,6 @@ def run(self, input_tensor, device, ops_parallel_config) -> ttnn.Tensor:
             x_height,
             x_width,
             enable_act_double_buffer=True,
-            enable_split_reader=False,
             layer_module="layer4_module3",
         )
 
 
@@ -46,7 +46,6 @@ def __init__(
             deallocate_activation=dealloc_act,
             enable_act_double_buffer=True if is_blk else False,
             enable_weights_double_buffer=True if is_blk else False,
-            enable_split_reader=True if not is_blk else False,
             reshard_if_not_optimal=True,
             activation=activation,
         )
 
@@ -8,7 +8,7 @@
 
 from models.demos.vanilla_unet.reference.unet import UNet
 
-VANILLA_UNET_L1_SMALL_SIZE = (7 * 8192) + 1730
+VANILLA_UNET_L1_SMALL_SIZE = (7 * 8192) + 2592
 
 
 def load_torch_model(model_location_generator=None):
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`@pytest.mark.parametrize(`
`15`	`15`	`"batch_size, expected_perf",`
`16`	`16`	`[`
`17`		`- [MOBILENETV2_BATCH_SIZE, 3436],`
	`17`	`+ [MOBILENETV2_BATCH_SIZE, 3445],`
`18`	`18`	`],`
`19`	`19`	`)`
`20`	`20`	`def test_perf_device_mobilenetv2(batch_size, expected_perf):`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,6 @@ def __call__(self, device, input_tensor):`
`48`	`48`	`deallocate_activation=self.deallocate,`
`49`	`49`	`reallocate_halo_output=True,`
`50`	`50`	`enable_act_double_buffer=True,`
`51`		`- enable_split_reader=False,`
`52`	`51`	`output_layout=self.output_layout,`
`53`	`52`	`)`
`54`	`53`	`compute_config = ttnn.init_device_compute_kernel_config(`
Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,6 @@ def __init__(`
`46`	`46`	`deallocate_activation=dealloc_act,`
`47`	`47`	`enable_act_double_buffer=True if is_blk else False,`
`48`	`48`	`enable_weights_double_buffer=True if is_blk else False,`
`49`		`- enable_split_reader=True if not is_blk else False,`
`50`	`49`	`reshard_if_not_optimal=True,`
`51`	`50`	`activation=activation,`
`52`	`51`	`)`