@@ -19,30 +19,30 @@ namespace snps_arc::metaware::mli::ref {
1919
2020MoveBroadcast::MoveBroadcast (void * kernel_private_data_buffer, size_t size, uint64_t membases[], int num_mems) {
2121 MLI_ASSERT (size == sizeof (MoveBroadcastPrivateData));
22- MoveBroadcastPrivateData private_data ;
23- memcpy (&private_data , kernel_private_data_buffer, sizeof (MoveBroadcastPrivateData));
24- MLI_ASSERT (private_data .kernel_id == kMoveBroadcastId );
25- MLI_ASSERT (private_data .size == sizeof (MoveBroadcastPrivateData));
26-
27- m_src = TensorIterator<InternalBuffer, kMoveBroadcastRank , kMoveBroadcastIterRank >(private_data .src , membases, num_mems) ;
28- m_dst = TensorIterator<InternalBuffer, kMoveBroadcastRank , kMoveBroadcastIterRank >(private_data .dst , membases, num_mems) ;
29- m_src.Reset ( );
30- m_dst.Reset ( );
22+ MoveBroadcastPrivateData private_buffer ;
23+ memcpy (&private_buffer , kernel_private_data_buffer, sizeof (MoveBroadcastPrivateData));
24+ MLI_ASSERT (private_buffer .kernel_id == kMoveBroadcastId );
25+ MLI_ASSERT (private_buffer .size == sizeof (MoveBroadcastPrivateData));
26+
27+ m_src = private_buffer .src ;
28+ m_dst = private_buffer .dst ;
29+ m_tile_src = Tensor<InternalBuffer, kMoveBroadcastRank >( m_src.GetSubTensor (), membases, num_mems );
30+ m_tile_dst = Tensor<InternalBuffer, kMoveBroadcastRank >( m_dst.GetSubTensor (), membases, num_mems );
3131}
3232
3333template <typename buf_T, unsigned N>
34- int32_t tensor_read (TensorIterator <buf_T, N , N> tsr, uint32_t *index) {
34+ int32_t tensor_read (Tensor <buf_T, N> tsr, uint32_t *index) {
3535 int32_t result = 0 ;
36- int32_t offset = tsr.get_tensor (). get_offset (index);
37- switch (tsr.get_tensor (). get_elem_size ()) {
36+ int32_t offset = tsr.get_offset (index);
37+ switch (tsr.get_elem_size ()) {
3838 case sizeof (int8_t ):
39- result = tsr.get_tensor (). template read <int8_t >(offset);
39+ result = tsr.template read <int8_t >(offset);
4040 break ;
4141 case sizeof (int16_t ):
42- result = tsr.get_tensor (). template read <int16_t >(offset);
42+ result = tsr.template read <int16_t >(offset);
4343 break ;
4444 case sizeof (int32_t ):
45- result = tsr.get_tensor (). template read <int32_t >(offset);
45+ result = tsr.template read <int32_t >(offset);
4646 break ;
4747 default :
4848 MLI_ASSERT (false );
@@ -51,17 +51,17 @@ int32_t tensor_read(TensorIterator<buf_T, N, N> tsr, uint32_t *index) {
5151}
5252
5353template <typename buf_T, unsigned N>
54- void tensor_write (TensorIterator <buf_T, N , N> tsr, uint32_t *index, int32_t value) {
55- int32_t offset = tsr.get_tensor (). get_offset (index);
56- switch (tsr.get_tensor (). get_elem_size ()) {
54+ void tensor_write (Tensor <buf_T, N> tsr, uint32_t *index, int32_t value) {
55+ int32_t offset = tsr.get_offset (index);
56+ switch (tsr.get_elem_size ()) {
5757 case sizeof (int8_t ):
58- tsr.get_tensor (). template write <int8_t >(offset, value);
58+ tsr.template write <int8_t >(offset, value);
5959 break ;
6060 case sizeof (int16_t ):
61- tsr.get_tensor (). template write <int16_t >(offset, value);
61+ tsr.template write <int16_t >(offset, value);
6262 break ;
6363 case sizeof (int32_t ):
64- tsr.get_tensor (). template write <int32_t >(offset, value);
64+ tsr.template write <int32_t >(offset, value);
6565 break ;
6666 default :
6767 MLI_ASSERT (false );
@@ -70,19 +70,19 @@ void tensor_write(TensorIterator<buf_T, N, N> tsr, uint32_t *index, int32_t valu
7070
7171// Move Broadcast Core Function
7272template <typename buf_T, unsigned N>
73- void MoveBroadcast::MoveBroadcastRun (TensorIterator <buf_T, N, N> src, TensorIterator <buf_T, N, N> dst) {
73+ void MoveBroadcast::MoveBroadcastRun (Tensor <buf_T, N> & src, Tensor <buf_T, N> & dst) {
7474 uint32_t src_idx[N] = {0 };
7575 uint32_t dst_idx[N] = {0 };
7676 uint32_t src_shape[N] = {0 };
7777 uint32_t dst_shape[N] = {0 };
78- uint32_t src_rank = src.get_tensor (). get_rank ();
79- uint32_t dst_rank = dst.get_tensor (). get_rank ();
78+ uint32_t src_rank = src.get_rank ();
79+ uint32_t dst_rank = dst.get_rank ();
8080
8181 MLI_ASSERT (src_rank == dst_rank);
8282
8383 // get shapes
84- src.get_full_shape (src_shape);
85- dst.get_full_shape (dst_shape);
84+ src.get_dims (src_shape);
85+ dst.get_dims (dst_shape);
8686
8787 // Tensors with rank less than MLI_MAX_RANK, the tensor is automatically filled with 1's
8888 for (uint32_t i = src_rank; i < kMoveBroadcastRank ; i++) {
@@ -96,12 +96,14 @@ void MoveBroadcast::MoveBroadcastRun(TensorIterator<buf_T, N, N> src, TensorIter
9696 for (int d1_cnt = 0 ; d1_cnt < (int )dst_shape[1 ]; d1_cnt++) {
9797 for (int d2_cnt = 0 ; d2_cnt < (int )dst_shape[2 ]; d2_cnt++) {
9898 for (int d3_cnt = 0 ; d3_cnt < (int )dst_shape[3 ]; d3_cnt++) {
99- for (int d4_cnt = 0 ; d4_cnt < (int )dst_shape[4 ]; d4_cnt++) {
99+ // ToDo: when mli_tensor takes [rank=5]
100+ // for (int d4_cnt = 0; d4_cnt < (int)dst_shape[4]; d4_cnt++) {
100101 dst_idx[0 ] = d0_cnt;
101102 dst_idx[1 ] = d1_cnt;
102103 dst_idx[2 ] = d2_cnt;
103104 dst_idx[3 ] = d3_cnt;
104- dst_idx[4 ] = d4_cnt;
105+ // dst_idx[4] = d4_cnt;
106+
105107 // inner loop for move broad cast.
106108 for (uint32_t i = 0 ; i < dst_rank; i++) {
107109 if (src_shape[i] != dst_shape[i]) {
@@ -114,15 +116,15 @@ void MoveBroadcast::MoveBroadcastRun(TensorIterator<buf_T, N, N> src, TensorIter
114116 }
115117 int32_t value = tensor_read<buf_T, N>(src, src_idx);
116118 tensor_write<buf_T, N>(dst, dst_idx, value);
117- }
119+ // }
118120 }
119121 }
120122 }
121123 }
122124}
123125
124- mli_status MoveBroadcast::Issue () {
125- MoveBroadcastRun<InternalBuffer, kMoveBroadcastRank >(m_src, m_dst );
126+ mli_status MoveBroadcast::Issue () {
127+ MoveBroadcastRun<InternalBuffer, kMoveBroadcastRank >(m_tile_src, m_tile_dst );
126128 return MLI_STATUS_OK;
127129}
128130
@@ -131,7 +133,33 @@ mli_status MoveBroadcast::Prefetch() {
131133}
132134
133135mli_status MoveBroadcast::Update () {
136+ m_src.Next ();
137+ m_dst.Next ();
138+
139+ const auto src_tile_tensor = m_src.GetSubTensor ();
140+ uint32_t src_tile_shape[kMoveBroadcastRank ];
141+ src_tile_tensor.get_dims (src_tile_shape);
142+ m_tile_src = Tensor<InternalBuffer, kMoveBroadcastRank >(m_tile_src, src_tile_shape);
143+
144+ const auto dst_tile_tensor = m_dst.GetSubTensor ();
145+ uint32_t dst_tile_shape[kMoveBroadcastRank ];
146+ dst_tile_tensor.get_dims (dst_tile_shape);
147+ m_tile_dst = Tensor<InternalBuffer, kMoveBroadcastRank >(m_tile_dst, dst_tile_shape);
148+
134149 return MLI_STATUS_OK;
135150}
136151
152+ void MoveBroadcast::GetIOSizesAndOffsets (uint32_t src_size[kMoveBroadcastRank ], uint32_t dst_size[kMoveBroadcastRank ],
153+ int32_t src_offsets[kMoveBroadcastRank ], int32_t dst_offsets[kMoveBroadcastRank ]) {
154+
155+ m_src.get_pos (src_offsets);
156+ m_dst.get_pos (dst_offsets);
157+
158+ const auto src_tile_tensor = m_src.GetSubTensor ();
159+ src_tile_tensor.get_dims (src_size);
160+
161+ const auto dst_tile_tensor = m_dst.GetSubTensor ();
162+ dst_tile_tensor.get_dims (dst_size);
163+ }
164+
137165} // namespace snps_arc::metaware::mli::ref
0 commit comments