Remove 0 bytes lines; merge getitem with batch_norm nodes

Olivia-liu · facebook-github-bot · commit e07d15e71f70 · 2024-10-28T23:31:51.000-07:00
Differential Revision: D65121374
diff --git a/exir/memory_planning.py b/exir/memory_planning.py
@@ -421,6 +421,7 @@ def collect_specs_from_nodes(  # noqa: C901
                 if spec in unique_spec:
                     continue
                 else:
+                    spec.node_name = node.name
                     unique_spec.add(spec)
             yield spec
 
@@ -501,6 +502,7 @@ def pick_shared_obj(
     """
     # TODO: do better than linear scan
     picked = None
+    # This function goes over all of tensors and figure out which of them can share memory with no conflict.
     for sobj in shared_objects:
         if spec.lifetime[0] > sobj.last_used_index:
             if picked is None or _size_abs_dif(sobj, spec) < _size_abs_dif(
diff --git a/exir/tensor.py b/exir/tensor.py
@@ -146,6 +146,7 @@ def __init__(
         self.is_sparse = is_sparse
         self.init_mem_planning_fields()
         self.shape_dynamism: TensorShapeDynamism = determine_tensor_dynanism(self.shape)
+        self.node_name = None
 
     @property
     def allocated_memory(self) -> int:
diff --git a/util/activation_memory_profiler.py b/util/activation_memory_profiler.py
@@ -28,6 +28,7 @@ class Allocation:
     size_bytes: int
     fqn: str
     file_and_line_num: str
+    overlap_with_subsequent: bool = False
 
 
 @dataclass
@@ -76,17 +77,18 @@ def create_tensor_allocation_info(graph: torch.fx.Graph) -> List[MemoryTimeline]
                     # pyre-ignore
                     memory_timeline[j] = MemoryTimeline()
                 # pyre-ignore
-                memory_timeline[j].allocations.append(
-                    Allocation(
-                        node.name,
-                        node.target,
-                        tensor_spec.mem_id,
-                        tensor_spec.mem_offset,
-                        size,
-                        fqn,
-                        stack_trace,
-                    )
-                )
+                if size != 0:
+                    memory_timeline[j].allocations.append(
+                        Allocation(
+                            node.name,
+                            node.target,
+                            tensor_spec.mem_id,
+                            tensor_spec.mem_offset,
+                            size,
+                            fqn,
+                            stack_trace,
+                        ))
+
     # pyre-ignore
     return memory_timeline
 
@@ -137,6 +139,15 @@ def generate_memory_trace(
         start_time = 0
         if memory_timeline_event is None:
             continue
+        # "collapse" tensors that shared the same memory space at a given time
+        for index, element in enumerate(memory_timeline_event.allocations):
+            if index == len(memory_timeline_event.allocations) - 1:
+                break
+            if (
+                element.memory_offset
+                == memory_timeline_event.allocations[index + 1].memory_offset
+            ):
+                element.overlap_with_subsequent = True
         for allocation in memory_timeline_event.allocations:
             e = {}
             e["name"] = allocation.name
@@ -159,7 +170,8 @@ def generate_memory_trace(
             e["args"]["fqn"] = f"{allocation.fqn}"
             e["args"]["source"] = f"{allocation.file_and_line_num}"
             e["args"]["bytes"] = allocation.size_bytes
-            start_time += allocation_size_kb
+            if not allocation.overlap_with_subsequent:
+                start_time += allocation_size_kb
             trace_events.append(e)
         tid += 1