Skip to content

Commit 4563745

Browse files
stefan-iligcbot
authored andcommitted
Cache pseudo declarations
Currently, declarations are stored in map from caller to PseudoDcls struct. Checking if declaration is pseudo then requires iterating through whole map and checking each element of structs. We add additional map to get fast lookups.
1 parent a3d528d commit 4563745

File tree

3 files changed

+29
-47
lines changed

3 files changed

+29
-47
lines changed

visa/FlowGraph.cpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,19 @@ SPDX-License-Identifier: MIT
88

99

1010
#include "FlowGraph.h"
11-
#include "BitSet.h"
1211
#include "BuildIR.h"
1312
#include "CFGStructurizer.h"
1413
#include "DebugInfo.h"
1514
#include "G4_Kernel.hpp"
1615
#include "Option.h"
17-
#include "PhyRegUsage.h"
1816
#include "visa_wa.h"
1917

20-
#include "BinaryEncodingIGA.h"
21-
#include "iga/IGALibrary/api/iga.h"
22-
#include "iga/IGALibrary/api/iga.hpp"
23-
2418
#include <algorithm>
2519
#include <chrono>
2620
#include <cstdlib>
27-
#include <fstream>
28-
#include <functional>
2921
#include <iostream>
3022
#include <iterator>
3123
#include <random>
32-
#include <set>
33-
#include <sstream>
34-
#include <string>
35-
#include <unordered_map>
36-
#include <unordered_set>
3724

3825
using namespace vISA;
3926

@@ -3678,6 +3665,7 @@ void FlowGraph::addSaveRestorePseudoDeclares(IR_Builder &builder) {
36783665
pseudoVCEDcl = builder.createDeclare(
36793666
"VCE_SAVE", G4_GRF, builder.numEltPerGRF<Type_UD>(),
36803667
static_cast<unsigned short>(numRowsVCE), Type_UD);
3668+
pseudoDcls.insert(pseudoVCEDcl);
36813669
} else {
36823670
pseudoVCEDcl->getRegVar()->setPhyReg(NULL, 0);
36833671
}
@@ -3710,11 +3698,22 @@ void FlowGraph::addSaveRestorePseudoDeclares(IR_Builder &builder) {
37103698
name = builder.getNameString(64, "SFLAG_%d", i);
37113699
G4_Declare *saveFLAG = builder.createDeclare(
37123700
name, G4_FLAG, (uint16_t)builder.getNumFlagRegisters(), 1, Type_UW);
3713-
fcallToPseudoDclMap[callSite->asCFInst()] = {VCA, saveA0, saveFLAG};
3701+
fillPseudoDclMap(callSite->asCFInst(), VCA, saveA0, saveFLAG);
37143702
i++;
37153703
}
37163704
}
37173705

3706+
void FlowGraph::fillPseudoDclMap(G4_InstCF *cfInst, G4_Declare *VCA,
3707+
G4_Declare *saveA0, G4_Declare *saveFlag) {
3708+
fcallToPseudoDclMap[cfInst] = {VCA, saveA0, saveFlag};
3709+
pseudoDcls.insert({VCA, saveA0, saveFlag});
3710+
pseudoVCADcls.insert(VCA);
3711+
vISA_ASSERT((3 * fcallToPseudoDclMap.size() + 1) == pseudoDcls.size(),
3712+
"Found inconsistency between fcallToPseudoDclMap and pseudoDcls");
3713+
vISA_ASSERT(fcallToPseudoDclMap.size() == pseudoVCADcls.size(),
3714+
"Found inconsistency between fcallToPseudoDclMap and pseudoVCADcls");
3715+
}
3716+
37183717
//
37193718
// Since we don't do SIMD augmentation in RA for CM, we have to add an edge
37203719
// between the then and else block of an if branch to ensure that liveness is

visa/FlowGraph.h

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ SPDX-License-Identifier: MIT
1111

1212
#include "Assertions.h"
1313
#include "G4_BB.hpp"
14+
#include "G4_Declare.h"
1415
#include "G4_IR.hpp"
1516
#include "LoopAnalysis.h"
16-
#include "RelocationInfo.h"
1717

1818
#include <list>
1919
#include <map>
@@ -257,6 +257,10 @@ class FlowGraph {
257257
// TODO: remove this in favor of LoopAnalysis.
258258
Loop naturalLoops;
259259

260+
// Caches to speed up the lookup of pseudo declares
261+
std::unordered_set<const G4_Declare *> pseudoDcls;
262+
std::unordered_set<const G4_Declare *> pseudoVCADcls;
263+
260264
public:
261265
Mem_Manager &mem; // mem mananger for creating BBs & starting IP table
262266
INST_LIST_NODE_ALLOCATOR &instListAlloc;
@@ -293,7 +297,7 @@ class FlowGraph {
293297
G4_Declare *Flag;
294298
};
295299

296-
std::unordered_map<G4_InstCF *, struct PseudoDcls> fcallToPseudoDclMap;
300+
std::unordered_map<G4_InstCF *, PseudoDcls> fcallToPseudoDclMap;
297301

298302
// offset in unit of OW
299303
unsigned callerSaveAreaOffset = 0;
@@ -376,12 +380,7 @@ class FlowGraph {
376380
return dcl == pseudoVCEDcl;
377381
}
378382
bool isPseudoVCADcl(const G4_Declare *dcl) const {
379-
for (const auto &iter : fcallToPseudoDclMap) {
380-
if (iter.second.VCA == dcl) {
381-
return true;
382-
}
383-
}
384-
return false;
383+
return pseudoVCADcls.count(dcl) == 1;
385384
}
386385
bool isPseudoA0Dcl(const G4_Declare *dcl) const {
387386
for (const auto &iter : fcallToPseudoDclMap) {
@@ -403,16 +402,7 @@ class FlowGraph {
403402
if (!getHasStackCalls() && !getIsStackCallFunc()) {
404403
return false;
405404
}
406-
if (isPseudoVCEDcl(dcl)) {
407-
return true;
408-
}
409-
for (const auto &iter : fcallToPseudoDclMap) {
410-
if (iter.second.A0 == dcl || iter.second.Flag == dcl ||
411-
iter.second.VCA == dcl) {
412-
return true;
413-
}
414-
}
415-
return false;
405+
return pseudoDcls.count(dcl) == 1;
416406
}
417407

418408
//
@@ -681,6 +671,8 @@ class FlowGraph {
681671
using BBPrePostIDMap = std::unordered_map<G4_BB *, std::array<uint32_t, 2>>;
682672
void DFSTraverse(G4_BB *bb, unsigned &preId, unsigned &postId, FuncInfo *fn,
683673
BBPrePostIDMap &BBIdMap);
674+
void fillPseudoDclMap(G4_InstCF *cfInst, G4_Declare *VCA, G4_Declare *saveA0,
675+
G4_Declare *saveFlag);
684676

685677
}; // FlowGraph
686678

visa/GraphColor.cpp

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4945,23 +4945,14 @@ void Augmentation::handleSIMDIntf(G4_Declare *firstDcl, G4_Declare *secondDcl,
49454945
return;
49464946
}
49474947

4948-
auto contain = [](const auto &C, auto pred) {
4949-
return std::find_if(C.cbegin(), C.cend(), pred) != C.cend();
4950-
};
4951-
49524948
bool isFirstDcl = true;
4949+
bool isPseudoVCADcl = kernel.fg.isPseudoVCADcl(firstDcl);
4950+
if (!isPseudoVCADcl){
4951+
isPseudoVCADcl = kernel.fg.isPseudoVCADcl(secondDcl);
4952+
isFirstDcl = false;
4953+
}
49534954

4954-
auto pred = [firstDcl, secondDcl, &isFirstDcl](const auto &el) {
4955-
if (el.second.VCA == firstDcl)
4956-
return true;
4957-
if (el.second.VCA == secondDcl) {
4958-
isFirstDcl = false;
4959-
return true;
4960-
}
4961-
return false;
4962-
};
4963-
4964-
if (contain(kernel.fg.fcallToPseudoDclMap, pred)) {
4955+
if (isPseudoVCADcl) {
49654956
// Mark intf for following pattern:
49664957
// V33 =
49674958
// ...

0 commit comments

Comments
 (0)