@@ -71,6 +71,8 @@ CodeContainer::CodeContainer()
7171 fNumPassives(0 ),
7272 fSubContainerType(kInt ),
7373 fGeneratedSR(false ),
74+ fComputeByBlock(false ),
75+ fComputeBlockIndex(" " ),
7476 fExtGlobalDeclarationInstructions(IB::genBlockInst()),
7577 fGlobalDeclarationInstructions(IB::genBlockInst()),
7678 fDeclarationInstructions(IB::genBlockInst()),
@@ -123,6 +125,80 @@ CodeContainer::CodeContainer()
123125 }
124126}
125127
128+ void CodeContainer::setComputeByBlock (bool b)
129+ {
130+ fComputeByBlock = b;
131+ if (b && fComputeBlockIndex .empty ()) {
132+ fComputeBlockIndex = gGlobal ->getFreshID (" index" );
133+ }
134+ }
135+
136+ BlockInst* CodeContainer::generateComputeBlockLoop (bool loop_var_in_bytes)
137+ {
138+ BlockInst* block = IB::genBlockInst ();
139+
140+ if (!fComputeByBlock ) {
141+ block->merge (fComputeBlockInstructions );
142+ block->pushBackInst (fCurLoop ->generateScalarLoop (fFullCount , loop_var_in_bytes));
143+ block->merge (fPostComputeBlockInstructions );
144+ return block;
145+ }
146+
147+ ValueInst* fullcount_value = IB::genLoadFunArgsVar (fFullCount );
148+ ValueInst* vec_size = IB::genInt32NumInst (gGlobal ->gVecSize );
149+
150+ ValueInst* byte_size = nullptr ;
151+ ValueInst* step = nullptr ;
152+
153+ // int fullcount = count;
154+ DeclareVarInst* fullcount = nullptr ;
155+ if (loop_var_in_bytes) {
156+ int byte_size_int = 1 << (gGlobal ->gFloatSize + 1 );
157+ byte_size = IB::genInt32NumInst (byte_size_int);
158+ fullcount = IB::genDecStackVar (" fullcount" , IB::genInt32Typed (),
159+ IB::genMul (byte_size, fullcount_value));
160+ step = IB::genInt32NumInst (gGlobal ->gVecSize * byte_size_int);
161+ } else {
162+ fullcount = IB::genDecStackVar (" fullcount" , IB::genInt32Typed (), fullcount_value);
163+ step = vec_size;
164+ }
165+ block->pushBackInst (fullcount);
166+
167+ // for (int index = 0; index < fullcount; index += gVecSize)
168+ if (fComputeBlockIndex .empty ()) {
169+ fComputeBlockIndex = gGlobal ->getFreshID (" index" );
170+ }
171+ DeclareVarInst* index_decl =
172+ IB::genDecLoopVar (fComputeBlockIndex , IB::genInt32Typed (), IB::genInt32NumInst (0 ));
173+
174+ BlockInst* loop_code = IB::genBlockInst ();
175+
176+ // int count = min(gVecSize, fullcount - index);
177+ ValueInst* remaining = IB::genSub (fullcount->load (), index_decl->load ());
178+ ValueInst* count_base = remaining;
179+ if (loop_var_in_bytes) {
180+ count_base = IB::genDiv (remaining, byte_size);
181+ }
182+ Values min_fun_args;
183+ min_fun_args.push_back (vec_size);
184+ min_fun_args.push_back (count_base);
185+ ValueInst* count_val = IB::genFunCallInst (" min_i" , min_fun_args);
186+ DeclareVarInst* count_decl = IB::genDecStackVar (" count" , IB::genInt32Typed (), count_val);
187+ loop_code->pushBackInst (count_decl);
188+
189+ loop_code->merge (fComputeBlockInstructions );
190+ loop_code->pushBackInst (
191+ fCurLoop ->generateScalarLoop (count_decl->load (), loop_var_in_bytes));
192+ loop_code->merge (fPostComputeBlockInstructions );
193+
194+ ValueInst* loop_end = IB::genLessThan (index_decl->load (), fullcount->load ());
195+ StoreVarInst* loop_inc = index_decl->store (IB::genAdd (index_decl->load (), step));
196+ ForLoopInst* loop = IB::genForLoopInst (index_decl, loop_end, loop_inc, loop_code, false );
197+ block->pushBackInst (loop);
198+
199+ return block;
200+ }
201+
126202int ZoneArray::gInternalMemorySize = 0 ;
127203
128204CodeContainer::~CodeContainer ()
0 commit comments