-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBUILD.bazel
More file actions
120 lines (113 loc) · 3.57 KB
/
BUILD.bazel
File metadata and controls
120 lines (113 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
load("//:defs.bzl", "demo_hierarchical", "demo_sram")
load("//:gallery.bzl", "demo_gallery_image", "demo_stage_images")
FAST = {
"GPL_ROUTABILITY_DRIVEN": "0",
"GPL_TIMING_DRIVEN": "0",
"SKIP_CTS_REPAIR_TIMING": "1",
"SKIP_INCREMENTAL_REPAIR": "1",
"SKIP_LAST_GASP": "1",
"FILL_CELLS": "",
"TAPCELL_TCL": "",
"SETUP_SLACK_MARGIN": "1000",
}
# Level 0: MAC Processing Element — 748 cells, instantiated 16x in systolic_array.
# Uses platform BLOCK_grid_strategy: M1/M2 followpins, M5 pins via M4/M5 ring.
demo_sram(
name = "mac_pe",
abstract_stage = "place",
arguments = FAST | {
"CORE_UTILIZATION": "40",
"PLACE_DENSITY": "0.65",
"MAX_ROUTING_LAYER": "M5",
"PDN_TCL": "$(PLATFORM_DIR)/openRoad/pdn/BLOCK_grid_strategy.tcl",
},
mock_area = 0.5,
sources = {
"SDC_FILE": [":constraints.sdc"],
"IO_CONSTRAINTS": [":mac_pe-io.tcl"],
},
substeps = True,
verilog_files = ["@tensor_accelerator//:mac_pe_rtl"],
)
# Level 1: Systolic array — mac_pe macros + skew/deskew registers.
# Uses platform BLOCKS_grid_strategy: mac_pe M5 pins → M5/M6 straps → M6 pins.
demo_sram(
name = "systolic_array",
arguments = FAST | {
"SYNTH_HIERARCHICAL": "1",
"CORE_UTILIZATION": "",
"DIE_AREA": "0 0 80 80",
"CORE_AREA": "3 3 77 77",
"PLACE_DENSITY": "0.85",
"MACRO_PLACE_HALO": "1 1",
"MAX_ROUTING_LAYER": "M6",
"PLACE_PINS_ARGS": "-annealing",
"PDN_TCL": "$(PLATFORM_DIR)/openRoad/pdn/BLOCKS_grid_strategy.tcl",
},
macros = [":mac_pe_generate_abstract"],
mock_area = 0.5,
sources = {
"SDC_FILE": [":constraints.sdc"],
"IO_CONSTRAINTS": [":systolic_array-io.tcl"],
},
substeps = True,
verilog_files = ["@tensor_accelerator//:systolic_array_rtl"],
)
# Level 2: Tensor Processing Cluster — replicated 4x in the top module.
# systolic_array macro exposes M6 → TPC uses M7/M8, pins on M8.
demo_sram(
name = "tensor_processing_cluster",
arguments = FAST | {
"SYNTH_HIERARCHICAL": "1",
"CORE_UTILIZATION": "",
"DIE_AREA": "0 0 210 210",
"CORE_AREA": "5 5 205 205",
"MACRO_PLACE_HALO": "5 5",
"MAX_ROUTING_LAYER": "M8",
"PLACE_PINS_ARGS": "-annealing",
},
macros = [":systolic_array_generate_abstract"],
mock_area = 0.5,
sources = {
"SDC_FILE": [":constraints.sdc"],
"IO_CONSTRAINTS": [":tpc-io.tcl"],
"PDN_TCL": [":pdn-tpc.tcl"],
},
substeps = True,
verilog_files = ["@tensor_accelerator//:tpc_rtl"],
)
# Level 3: Top — 4x TPC + global command processor + NoC crossbar.
# Full metal stack M1-M9. TPC macros expose M8.
demo_hierarchical(
name = "tensor_accelerator_top",
arguments = FAST | {
"SYNTH_HIERARCHICAL": "1",
"MACRO_PLACE_HALO": "5 5",
"PLACE_PINS_ARGS": "-annealing",
"CORE_UTILIZATION": "",
"DIE_AREA": "0 0 350 350",
"CORE_AREA": "5 5 345 345",
"PLACE_DENSITY": "0.65",
},
macros = [":tensor_processing_cluster_generate_abstract"],
sources = {
"SDC_FILE": [":constraints.sdc"],
"PDN_TCL": [":top-pdn.tcl"],
},
substeps = True,
verilog_files = ["@tensor_accelerator//:top_rtl"],
)
demo_gallery_image(
name = "tensor_accelerator_top_gallery",
src = ":tensor_accelerator_top_place",
stage = "place",
)
demo_stage_images(
name = "tensor_accelerator_top_images",
module = "tensor_accelerator_top",
stages = [
"floorplan",
"place",
"cts",
],
)