Skip to content

Commit d336e5a

Browse files
automated evaluation result saving
1 parent 7885f16 commit d336e5a

File tree

8 files changed

+97
-30
lines changed

8 files changed

+97
-30
lines changed
File renamed without changes.
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
#./bin/daphne --timing --second-read-opt test/api/cli/io/evalReadFrame.daphne
2-
readFrame("data_1000r_10c_NUMBER.csv");
2+
readFrame("evaluation/data_1000r_10c_NUMBER.csv");

evaluation/evalReadFrame2.daphne

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
readFrame("evaluation/data_1000r_1000c_NUMBER.csv");

src/runtime/local/io/ReadCsvFile.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,6 @@ template <> struct ReadCsvFile<Frame> {
360360
rawCols[i] = reinterpret_cast<uint8_t *>(res->getColumnRaw(i));
361361
colTypes[i] = res->getColumnType(i);
362362
}
363-
using clock = std::chrono::high_resolution_clock;
364-
auto time = clock::now();
365363
// Determine if any optimized branch should be used.
366364
bool useOptimized = false;
367365
bool useBin = false;
@@ -375,6 +373,8 @@ template <> struct ReadCsvFile<Frame> {
375373
fName = daphneFile;
376374
}
377375
}
376+
using clock = std::chrono::high_resolution_clock;
377+
auto time = clock::now();
378378
if (useOptimized) {
379379
if (useBin) {
380380
try {

test/api/cli/io/ReadOptimizationEvaluation.cpp

Lines changed: 93 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@
2424
#include <regex>
2525
#include <string>
2626

27+
std::string createDaphneScript(const std::string &evaluationDir,
28+
const std::string &csvFilename,
29+
const std::string &daphneScript) {
30+
std::filesystem::create_directories(evaluationDir); // ensure directory exists
31+
std::string daphneFilePath = evaluationDir + daphneScript;
32+
if (std::filesystem::exists(daphneFilePath)) {
33+
return daphneFilePath;
34+
}
35+
std::ofstream ofs(daphneFilePath);
36+
if(!ofs) {
37+
throw std::runtime_error("Could not create Daphne script file: " + daphneFilePath);
38+
}
39+
ofs << "readFrame(\"" << evaluationDir + csvFilename << "\");";
40+
ofs.close();
41+
return daphneFilePath;
42+
}
43+
2744
template <typename... Args>
2845
std::string runDaphneEval( const std::string &scriptFilePath, Args... args) {
2946
std::stringstream out;
@@ -36,7 +53,7 @@ std::string runDaphneEval( const std::string &scriptFilePath, Args... args) {
3653
// output.
3754
CHECK(status == StatusCode::SUCCESS);
3855
//std::cout << out.str() << std::endl;
39-
CHECK(err.str() == "");
56+
//CHECK(err.str() == "");
4057
return out.str()+err.str();
4158
}
4259
// New data structure for timing values.
@@ -121,21 +138,16 @@ TimingData extractTiming(const std::string &output, bool expectWriteTime = false
121138
return timingData;
122139
}
123140

124-
void writeResultsToFile(const std::string& feature, const std::string &csvFilename, bool opt, bool firstRead, std::string timing, std::string readTime, std::string writeTime) {
125-
// default results file where all timings are stored
126-
const std::string resultsFile = "evaluation_results_"+ feature + ".csv";
141+
void writeResultsToFile(const std::string& feature, const std::string &csvFilename, bool opt, bool firstRead, const TimingData &timingData) {
142+
const std::string resultsFile = "evaluation/evaluation_results_" + feature + ".csv";
127143
bool fileExists = std::filesystem::exists(resultsFile);
128144
std::ofstream ofs(resultsFile, std::ios::app);
129145
if (!ofs) {
130146
throw std::runtime_error("Could not open " + resultsFile + " for writing.");
131147
}
132-
// Write CSV header if file did not already exist
133148
if (!fileExists) {
134-
ofs << "CSVFile,OptEnabled,NumCols,NumRows,FileType,Timing,ReadTime,WriteTime\n";
149+
ofs << "CSVFile,OptEnabled,FirstRead,NumCols,NumRows,FileType,ReadTime,WriteTime,StartupSeconds,ParsingSeconds,CompilationSeconds,ExecutionSeconds,TotalSeconds,WriteTime\n";
135150
}
136-
// Create string representation for bool and FileType.
137-
std::string optStr = opt? "true" : "false";
138-
std::string firstReadStr = firstRead? "true" : "false";
139151

140152
// Extract numRows, numCols, and FileType from the filename.
141153
// Expected format: data_<numRows>r_<numCols>c_<FileType>.csv
@@ -154,51 +166,106 @@ void writeResultsToFile(const std::string& feature, const std::string &csvFilena
154166
std::string type = "";
155167
if (parts.size() >= 4) {
156168
// parts[0] is "data", parts[1] is "<numRows>r", parts[2] is "<numCols>c", parts[3] is "<FileType>"
157-
std::string rowToken = parts[1]; // e.g. "100r"
169+
std::string rowToken = parts[1]; // e.g. "1000r"
158170
std::string colToken = parts[2]; // e.g. "10c"
159171
if (!rowToken.empty() && rowToken.back() == 'r') {
160-
rowToken.pop_back(); // remove the trailing 'r'
172+
rowToken.pop_back(); // remove trailing 'r'
161173
}
162174
if (!colToken.empty() && colToken.back() == 'c') {
163-
colToken.pop_back(); // remove the trailing 'c'
175+
colToken.pop_back(); // remove trailing 'c'
164176
}
165177
numRows = std::stoi(rowToken);
166178
numCols = std::stoi(colToken);
167179
type = parts[3];
168180
}
169181

170-
// Append the result line. Use csvFilename (i.e. the name of the read CSV file) as identifier.
182+
std::string optStr = opt ? "true" : "false";
183+
std::string firstReadStr = firstRead ? "true" : "false";
171184
ofs << csvFilename << ","
172185
<< optStr << ","
173186
<< firstReadStr << ","
174187
<< numCols << ","
175188
<< numRows << ","
176189
<< type << ","
177-
<< timing << ","
178-
<< readTime << ","
179-
<< writeTime << "\n";
190+
<< timingData.readTime << ","
191+
<< timingData.writeTime << ","
192+
<< timingData.startupSeconds << ","
193+
<< timingData.parsingSeconds << ","
194+
<< timingData.compilationSeconds << ","
195+
<< timingData.executionSeconds << ","
196+
<< timingData.totalSeconds << "\n";
180197
ofs.close();
181198
}
182199

183-
const std::string dirPath = "test/api/cli/io/";
200+
void runEvalTestCase(const std::string &csvFilename,
201+
std::string daphneScript= "",
202+
const std::string &dirPath= "evaluation/"
203+
) {
204+
// Remove potential binary output file.
205+
std::filesystem::remove(dirPath + csvFilename + ".dbdf");
206+
if (daphneScript.empty()) {
207+
daphneScript = createDaphneScript(dirPath, csvFilename, csvFilename+".daphne");
208+
}else{
209+
daphneScript = dirPath + daphneScript;
210+
}
211+
212+
// Normal read for comparison.
213+
std::string output = runDaphneEval(daphneScript, "--timing");
214+
std::cout << output << std::endl;
215+
TimingData timingData = extractTiming(output);
216+
writeResultsToFile("binopt", csvFilename, false, true, timingData);
217+
218+
// Build binary file and positional map on first read.
219+
output = runDaphneEval(daphneScript, "--timing", "--second-read-opt");
220+
std::cout << output << std::endl;
221+
timingData = extractTiming(output, true);
222+
writeResultsToFile("binopt", csvFilename, true, true, timingData);
223+
CHECK(std::filesystem::exists(dirPath + csvFilename + ".dbdf"));
224+
225+
// Subsequent read.
226+
output = runDaphneEval( daphneScript, "--timing", "--second-read-opt");
227+
std::cout << output << std::endl;
228+
timingData = extractTiming(output);
229+
writeResultsToFile("binopt", csvFilename, true, false, timingData);
230+
}
231+
232+
TEST_CASE("EvalTestCaseVariant60KB", TAG_IO) {
233+
// Example instantiation.
234+
const std::string csvFilename = "data_1000r_10c_NUMBER.csv";
235+
const std::string daphneScript = "evalReadFrame.daphne";
236+
runEvalTestCase(csvFilename, daphneScript);
237+
}
238+
239+
TEST_CASE("EvalTestCaseVariant6MB", TAG_IO) {
240+
// Example instantiation.
241+
const std::string csvFilename = "data_1000r_1000c_NUMBER.csv";
242+
const std::string daphneScript = "evalReadFrame2.daphne";
243+
runEvalTestCase(csvFilename);//, daphneScript);
244+
}
245+
246+
const std::string dirPath = "evaluation/";
184247
TEST_CASE("evalFrameFromCSVBinOpt", TAG_IO) {
248+
//assumes file has been generated via create_csv.py in /daphne
185249
std::string filename = "data_1000r_10c_NUMBER.csv";
186-
std::string filepath = dirPath + filename;
187-
std::filesystem::remove(filename + ".dbdf");
250+
std::filesystem::remove(dirPath + filename + ".dbdf");
188251
// normal read for comparison
189252
std::string output;
190253
output = runDaphneEval(dirPath + "evalReadFrame.daphne", "--timing");
191254
std::cout << output << std::endl;
192-
//TODO: extract read time from output and clear it
255+
193256
std::string timing, readTime, writeTime;
194-
//{timing, readTime, writeTime} = extractTiming(output);
195-
writeResultsToFile("binopt", filename, false, true, output, "0", "0");
257+
TimingData timingData = extractTiming(output);
258+
writeResultsToFile("binopt", filename, false, true, timingData);
259+
196260
// build binary file and positional map on first read
197261
output = runDaphneEval(dirPath + "evalReadFrame.daphne", "--timing", "--second-read-opt");
198262
std::cout << output << std::endl;
199-
writeResultsToFile("binopt", filename, true, true, output, "0", "0");
200-
CHECK(std::filesystem::exists(filepath + ".dbdf"));
263+
timingData = extractTiming(output, true);
264+
writeResultsToFile("binopt", filename, true, true, timingData);
265+
REQUIRE(std::filesystem::exists(dirPath + filename + ".dbdf"));
266+
201267
output= runDaphneEval(dirPath + "evalReadFrame.daphne", "--timing", "--second-read-opt");
202-
std::cout << output << std::endl;
203-
writeResultsToFile("binopt", filename, true, false, output, "0", "0");
268+
std::cout << output << std::endl;
269+
timingData = extractTiming(output);
270+
writeResultsToFile("binopt", filename, true, false, timingData);
204271
}

test/api/cli/io/evalReadFrame2.daphne

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)