Skip to content

Commit 21b6fc3

Browse files
authored
Merge pull request #3 from sirzooro/optimizations2
Optimizations2
2 parents 3689bff + 422243d commit 21b6fc3

File tree

13 files changed

+873
-428
lines changed

13 files changed

+873
-428
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# RakeSearch
2+
Rake search of Diagonal Latin Squares
3+
4+
## Compilation
5+
6+
Application is compiled using gcc and make. Windows version is compiled using MinGW gcc crosscompiler.
7+
8+
To compile everything, you must have BOINC client libraries. Make sure you compile them using the same gcc version as this app, otherwise you may get link errors.
9+
10+
When compiling app for x86/x86_64, you will need gcc 8.x (I used gcc 8.2). Older gcc versions may not support some options used in Makefile.
11+
12+
To compile app, enter RakeSearch/RakeDiagSearch/RakeDiagSearch directory first. Open Makefile and update `BOINC_DIR` variable, so it will point to place where you have BOINC client library and its include files. After doing so, type `make` to start compilation.
13+
14+
Makefile supports number of extra parameters. Here are ones used for x86 and x86_64:
15+
16+
- `SSE2=1` - enable SSE2 instructions (x86 and x86_64)
17+
- `AVX=1` - enable AVX instructions (x86_64 only)
18+
- `AVX2=1` - enable AVX2 and BMI1/2 instructions (x86_64 only)
19+
- `AVX512=1` - enable AVX512 instructions (x86_64 only)
20+
21+
Note: SSE2 is always enabled on x86_64, support for it is part of AMD64 specification.
22+
23+
You can also specify target platform:
24+
- `M32=1` - compile 32-bit app version (used for Linux app)
25+
- `MinGW64=1` - compile 64-bit app for Windows using MinGW crosscompiler
26+
- `MinGW32=1` - compile 32-bit app for Windows using MinGW crosscompiler
27+
28+
You can also compile app for ARM (32-bit) and AARCH64 (64-bit). ARM may support NEON instructions, so there is compilation option `NEON=1` to enable it. AARCH64 always support NEON, so there is no special option for it.

RakeDiagSearch/RakeDiagSearch/Generator.cpp

Lines changed: 168 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424

2525
#define ffs __builtin_ffs
2626

27+
// Square::Empty is equal -1, all other values and non-negative.
28+
// CPU sets sign bit in status register automatically when executing instructions,
29+
// so sign check instead of value check can give faster code.
30+
#define IsCellEmpty(val) ((val) < 0)
31+
2732

2833
using namespace std;
2934

@@ -265,7 +270,54 @@ void Generator::Read(std::istream& is)
265270

266271
// Read the number of generated squares
267272
is >> squaresCount;
268-
273+
274+
// Data loaded. Perform necessary post-loading tasks.
275+
if (cellId == cellsInPath - 1)
276+
{
277+
// Start from WU
278+
// Convert old checkpoint format to new one if used
279+
int row = path[cellsInPath - 2][0], col = path[cellsInPath - 2][1];
280+
if (0 != cellsHistory[row][col])
281+
{
282+
int tmpColumns[Rank];
283+
int tmpRows[Rank];
284+
memcpy(tmpColumns, columns, sizeof(columns));
285+
memcpy(tmpRows, rows, sizeof(rows));
286+
287+
// Convert cellsHistory into candidates
288+
for (int i = cellsInPath - 1; i >= 0; --i)
289+
{
290+
row = path[i][0];
291+
col = path[i][1];
292+
int bit = 1 << newSquare.Matrix[row][col];
293+
tmpColumns[col] |= bit;
294+
tmpRows[row] |= bit;
295+
cellsHistory[row][col] &= tmpColumns[col] & tmpRows[row];
296+
297+
// Update rows/cols data for last cell in path, it is no longer set
298+
if (i == cellsInPath - 1)
299+
{
300+
columns[col] = tmpColumns[col];
301+
rows[row] = tmpRows[row];
302+
}
303+
}
304+
}
305+
}
306+
else
307+
{
308+
// Start from checkpoint
309+
// Check if there are no cells on diagonals in path
310+
for (int i = 0; i < cellsInPath; i++)
311+
{
312+
int row = path[i][0], col = path[i][1];
313+
if ((row == col) || (row == Rank - 1 - col))
314+
{
315+
std::cerr << "Error: Cell on diagonal in path! R=" << row << " C=" << col << std::endl;
316+
return;
317+
}
318+
}
319+
}
320+
269321
// Set initialization flag
270322
isInitialized = Yes;
271323
}
@@ -447,160 +499,160 @@ void Generator::CreateCheckpoint()
447499
void Generator::Start()
448500
{
449501
// Check value of keyValue and pass result as a type to StartImpl
450-
if (keyValue == Square::Empty)
502+
if (IsCellEmpty(keyValue))
451503
StartImpl<true_type>();
452504
else
453505
StartImpl<false_type>();
454506
}
455507

456508
// Actual implementation of the squares generation
509+
// Note: values on diagonal are preset in WU, so corresponding parts of code are commented out.
510+
// It turned out that it was quite costly to have instructions which were doing nothing.
457511
template<typename IsKeyValueEmpty>
458512
inline void Generator::StartImpl()
459513
{
460514
int cellValue; // New value for the cell
461-
int oldCellValue; // Old value from the cell
515+
int cellValueCandidates; // Candidates for value for the cell
462516

463517
// Create constant copies of used fields to speedup calculations
464-
const int cellsInPath = this->cellsInPath;
518+
const int_fast32_t cellsInPath = this->cellsInPath;
465519
const int keyValue = this->keyValue;
466-
const int keyRowId = this->keyRowId;
467-
const int keyColumnId = this->keyColumnId;
520+
const int_fast32_t keyRowId = this->keyRowId;
521+
const int_fast32_t keyColumnId = this->keyColumnId;
522+
523+
// Use registers for local variables instead of memory
524+
int_fast32_t rowId, columnId;
525+
int_fast32_t cellId = this->cellId;
526+
527+
// Checkpoint may be written after new ODLS is created only.
528+
// Class members moved to registers above are constant in checkpoint
529+
// file, so they can be set to proper values here.
530+
this->rowId = path[cellsInPath - 1][0];
531+
this->columnId = path[cellsInPath - 1][1];
532+
this->cellId = cellsInPath - 1;
533+
534+
// Selection of the value for the next cell
535+
// Read coordinates of the cell
536+
rowId = path[cellId][0];
537+
columnId = path[cellId][1];
538+
539+
// Generate new value for the cell (rowId, columnId)
540+
// Select the value for the cell
541+
// Check the i value for possibility to be written into the cell (rowId, columnId)
542+
cellValueCandidates = columns[columnId] & rows[rowId];
468543

469544
if (isInitialized == Yes)
470545
{
546+
// Check if there are no candidates at the beginning, or if calculations are resumed from checkpoint
547+
if ((cellId == cellsInPath - 1) || (0 == cellValueCandidates))
548+
goto StepDown;
549+
471550
// Selection of the cells values
472551
while(1)
473552
{
474-
// Selection of the value for the next cell
475-
// Read coordinates of the cell
476-
rowId = path[cellId][0];
477-
columnId = path[cellId][1];
478-
479-
// Generate new value for the cell (rowId, columnId)
480-
// Select the value for the cell
481-
// Check the i value for possibility to be written into the cell (rowId, columnId)
482-
cellValue = columns[columnId] & rows[rowId] & cellsHistory[rowId][columnId];
483-
484-
// Test the value: has it been used in diagonals
485-
// Test the main diagonal
486-
if(columnId == rowId)
487-
{
488-
cellValue &= primary;
489-
}
490-
491-
// Test the secondary diagonal
492-
if (rowId == Rank - 1 - columnId)
493-
{
494-
cellValue &= secondary;
495-
}
496-
497553
// Process the search result
498-
if (cellValue)
554+
// 1st loop (used to be "if (cellValueCandidates)" part) - handle case when at least one cell value candidate is present
555+
while (1)
499556
{
500-
// Get index of first bit set
501-
cellValue = ffs(cellValue) - 1;
502-
// Process the new found value
503-
// Read the current value
504-
oldCellValue = newSquare.Matrix[rowId][columnId];
505-
// Write the new value
506-
// Write the value into the square
507-
newSquare.Matrix[rowId][columnId] = cellValue;
508-
// Mark the value in columns
509-
SetUsed(columns[columnId], cellValue);
510-
// Mark the value in rows
511-
SetUsed(rows[rowId], cellValue);
512-
// Mark the value in diagonals
513-
if (rowId == columnId)
514-
{
515-
SetUsed(primary, cellValue);
516-
}
517-
if (rowId == Rank - 1 - columnId)
518-
{
519-
SetUsed(secondary, cellValue);
520-
}
521-
// Mark the value in the history of cell values
522-
SetUsed(cellsHistory[rowId][columnId], cellValue);
523-
524-
// Restore the previous value without clearing the history (because we are working with this cell)
525-
if (oldCellValue != Square::Empty)
557+
// Extract lowest bit set
558+
int bit = (-cellValueCandidates) & cellValueCandidates;
559+
560+
// Write the value into the square
561+
newSquare.Matrix[rowId][columnId] = __builtin_ctz(bit);
562+
563+
// Process the finish of the square generation
564+
if (cellId == cellsInPath - 1)
565+
{
566+
// Process the found square
567+
ProcessSquare();
568+
569+
// Check the finish condition of search
570+
if (!IsKeyValueEmpty::value)
526571
{
527-
// Restore the value into columns
528-
SetFree(columns[columnId], oldCellValue);
529-
// Restore the value into rows
530-
SetFree(rows[rowId], oldCellValue);
531-
// Restore the value into diagonals
532-
if (rowId == columnId)
533-
{
534-
SetFree(primary, oldCellValue);
535-
}
536-
if (rowId == Rank - 1 - columnId)
572+
// Set the flag if the terminal value is other
573+
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
537574
{
538-
SetFree(secondary, oldCellValue);
575+
break;
539576
}
540577
}
541-
542-
// Process the finish of the square generation
543-
if (cellId == cellsInPath - 1)
544-
{
545-
// Process the found square
546-
ProcessSquare();
547-
}
548-
else
578+
579+
break;
580+
}
581+
else
582+
{
583+
// Mark the value in columns
584+
columns[columnId] &= ~bit;
585+
// Mark the value in rows
586+
rows[rowId] &= ~bit;
587+
588+
// Mark the value in the history of cell values
589+
cellsHistory[rowId][columnId] = cellValueCandidates & ~bit;
590+
591+
// Step forward
592+
cellId++;
593+
594+
// Check the finish condition of search
595+
if (!IsKeyValueEmpty::value)
549596
{
550-
// Step forward
551-
cellId++;
597+
// Set the flag if the terminal value is other
598+
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
599+
{
600+
break;
601+
}
552602
}
603+
604+
// Selection of the value for the next cell
605+
// Read coordinates of the cell
606+
rowId = path[cellId][0];
607+
columnId = path[cellId][1];
608+
609+
// Generate new value for the cell (rowId, columnId)
610+
// Select the value for the cell
611+
// Check the i value for possibility to be written into the cell (rowId, columnId)
612+
cellValueCandidates = columns[columnId] & rows[rowId];
613+
614+
if (!cellValueCandidates)
615+
break;
616+
}
553617
}
554-
else
618+
619+
// 2nd loop (used to be "else" part) - handle case when there are no cell value candidates
620+
StepDown:
621+
while (1)
555622
{
556-
// Process the fact of not-founding a new value in the cell (rowId; columnId)
557-
// Restore the previous value from the square into arrays
558-
// Read the current value
559-
cellValue = newSquare.Matrix[rowId][columnId];
560-
// Restore the value into auxilary arrays
561-
if (cellValue != Square::Empty)
562-
{
563-
// Restore the value into columns
564-
SetFree(columns[columnId], cellValue);
565-
// Restore the value into rows
566-
SetFree(rows[rowId], cellValue);
567-
// Restore the value into diagonals
568-
if (rowId == columnId)
569-
{
570-
SetFree(primary, cellValue);
571-
}
572-
if (rowId == Rank - 1 - columnId)
573-
{
574-
SetFree(secondary, cellValue);
575-
}
576-
// Reset the cell of the square
577-
newSquare.Matrix[rowId][columnId] = Square::Empty;
578-
// Clear the history of the cell (rowId, columnId)
579-
cellsHistory[rowId][columnId] = AllBitsMask(Rank);
580-
}
581-
582623
// Step backward
583624
cellId--;
584625

585626
// Check the finish condition of search
586627
if (IsKeyValueEmpty::value)
587628
{
588629
// Set the flag if the terminal value is "-1" which means we must leave the cell
589-
if (cellId < 0 && newSquare.Matrix[keyRowId][keyColumnId] == Square::Empty)
630+
if (cellId < 0 /*&& IsCellEmpty(newSquare.Matrix[keyRowId][keyColumnId])*/)
590631
{
591-
break;
632+
return;
592633
}
593634
}
594-
}
595-
596-
// Check the finish condition of search
597-
if (!IsKeyValueEmpty::value)
598-
{
599-
// Set the flag if the terminal value is other
600-
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
601-
{
602-
break;
603-
}
635+
636+
// Selection of the value for the next cell
637+
// Read coordinates of the cell
638+
rowId = path[cellId][0];
639+
columnId = path[cellId][1];
640+
641+
// Process the fact of not-founding a new value in the cell (rowId; columnId)
642+
// Restore the previous value from the square into arrays
643+
// Read the current value
644+
cellValue = newSquare.Matrix[rowId][columnId];
645+
646+
// Restore the value into auxilary arrays
647+
// Restore the value into columns
648+
SetFree(columns[columnId], cellValue);
649+
// Restore the value into rows
650+
SetFree(rows[rowId], cellValue);
651+
652+
cellValueCandidates = cellsHistory[rowId][columnId];
653+
654+
if (cellValueCandidates)
655+
break;
604656
}
605657
}
606658
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Various helper stuff
2+
3+
# if !defined Helpers_h
4+
# define Helpers_h
5+
6+
#ifdef __AVX512F__
7+
#define ALIGNED __attribute__((aligned(64)))
8+
#elif defined (__SSE2__) || defined(__ARM_NEON)
9+
#define ALIGNED __attribute__((aligned(32)))
10+
#else
11+
#define ALIGNED
12+
#endif
13+
14+
# endif

0 commit comments

Comments
 (0)