Skip to content

Commit ad0a46d

Browse files
committed
Test for checking results aws
Added query generator which generates equivalent queries for aws and ceph. After runing these queries, their results are matched. Signed-off-by: Girjesh Rajoria <grajoria@redhat.com>
1 parent a8e6ec6 commit ad0a46d

File tree

4 files changed

+409
-0
lines changed

4 files changed

+409
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include <iostream>
2+
#include <fstream>
3+
4+
using namespace std;
5+
6+
int main()
7+
{
8+
fstream query_file, cmd_file;
9+
query_file.open("aws_queries.txt", ios::in);
10+
cmd_file.open("aws_cmds.sh", ios::out);
11+
cmd_file << "#!/bin/sh\nset -x\nset -e\n\n";
12+
cmd_file << "mkdir -p aws_results\n";
13+
string bucket, csv_file, query, aws_cmd;
14+
cout << "Enter bucket name: ";
15+
cin >> bucket;
16+
cout << "Enter file name: ";
17+
cin >> csv_file;
18+
for(int i = 1; getline(query_file, query); i++)
19+
{
20+
aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\"";
21+
cmd_file << aws_cmd << endl;
22+
}
23+
cmd_file.close();
24+
query_file.close();
25+
return 0;
26+
}
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
#include <iostream>
2+
#include <fstream>
3+
#include <vector>
4+
#include <bits/stdc++.h>
5+
#define NUM_COLUMN 3
6+
7+
using namespace std;
8+
9+
enum Return_type { INTEGER = 0,
10+
STRING = 1,
11+
TIMESTAMP = 2,
12+
MIX_COL_NUM = 3,
13+
COLUMN = 4,
14+
NUMBER = 5};
15+
16+
auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];};
17+
18+
auto random_compare_op = []()
19+
{vector<string> op={">", "<", ">=", "<=", "==", "!="};
20+
return op[ rand() % op.size() ];
21+
};
22+
23+
auto random_date_part = []()
24+
{vector<string> op={"year", "month", "day", "hour", "minute", "second"};
25+
return op[ rand() % op.size() ];
26+
};
27+
28+
/*auto random_date_part_extract = []()
29+
{vector<string> op={"year", "month", "day", "hour", "minute", "second",
30+
"timezone_hour", "timezone_minute"};
31+
return op[ rand() % op.size() ];
32+
};*/
33+
34+
string random_timestamp_string(string& aws_expr)
35+
{
36+
auto year = [](){return rand()%100 + 1900;};
37+
auto month = [](){return 1 + rand()%12;};
38+
auto day = [](){return 1 + rand()%28;};
39+
auto hours = [](){return rand()%24;};
40+
auto minutes = [](){return rand()%60;};
41+
auto seconds = [](){return rand()%60;};
42+
auto fraction_sec = [](){return rand()%1000000;};
43+
stringstream timestamp_str;
44+
45+
timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" <<std::setw(2) << std::setfill('0') << hours() << ":" << std::setw(2) << std::setfill('0') << minutes() << ":" << std::setw(2) << std::setfill('0') <<seconds() << "." << fraction_sec() << "Z";
46+
aws_expr = timestamp_str.str();
47+
return aws_expr;
48+
}
49+
50+
string random_tm_format_string()
51+
{
52+
auto random_format = []()
53+
{vector<string> op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "};
54+
return op[ rand() % op.size() ];
55+
};
56+
int loop = rand() % 10;
57+
string frmt;
58+
while(loop)
59+
{
60+
frmt += random_format();
61+
loop--;
62+
}
63+
return frmt;
64+
}
65+
66+
string random_col(string& aws_expr)
67+
{
68+
int num = 1 + (rand() % NUM_COLUMN);
69+
aws_expr = "cast(_" + to_string(num) + " as int)";
70+
return "int(_" + to_string(num) + ")";
71+
}
72+
73+
string random_number(string& aws_expr)
74+
{
75+
int num = rand() % 10 + 1;
76+
aws_expr = to_string(num);
77+
return "int(" + to_string(num) + ")";
78+
}
79+
80+
string random_num_expr(int depth, string& aws_expr)
81+
{
82+
string aws_expr1, aws_expr2, ceph_expr, op;
83+
if (depth == 0)
84+
{
85+
ceph_expr = random_number(aws_expr1);
86+
aws_expr = aws_expr1;
87+
return ceph_expr;
88+
}
89+
op = random_arth_op();
90+
ceph_expr = random_num_expr(depth-1, aws_expr1) + op +
91+
random_num_expr(depth-1, aws_expr2);
92+
aws_expr = aws_expr1 + op + aws_expr2;
93+
return ceph_expr;
94+
}
95+
96+
string random_num_col_expr(int depth, string& aws_expr)
97+
{
98+
string aws_expr1, aws_expr2, ceph_expr, op;
99+
if (depth == 0)
100+
{
101+
if ((rand() % 2) == 0)
102+
{
103+
ceph_expr = random_col(aws_expr1);
104+
aws_expr = aws_expr1;
105+
return ceph_expr;
106+
}
107+
else
108+
{
109+
ceph_expr = random_number(aws_expr1);
110+
aws_expr = aws_expr1;
111+
return ceph_expr;
112+
}
113+
}
114+
op = random_arth_op();
115+
ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op +
116+
random_num_col_expr(depth-1, aws_expr2);
117+
aws_expr = aws_expr1 + op + aws_expr2;
118+
return ceph_expr;
119+
}
120+
121+
string random_query_expr(int depth, string& input_str, int type, string& aws_expr)
122+
{
123+
string ceph_expr;
124+
if (depth == 0)
125+
{
126+
switch (type)
127+
{
128+
case INTEGER:
129+
ceph_expr = random_number(aws_expr);
130+
break;
131+
case STRING:
132+
ceph_expr = "\'" + input_str + "\'";
133+
aws_expr = "\'" + input_str + "\'";
134+
break;
135+
case MIX_COL_NUM:
136+
ceph_expr = random_num_col_expr(depth, aws_expr);
137+
break;
138+
case TIMESTAMP:
139+
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')";
140+
aws_expr = "to_timestamp(\'" + aws_expr + "\')";
141+
break;
142+
}
143+
return ceph_expr;
144+
}
145+
146+
int option;
147+
if (type == INTEGER) //return type is int
148+
{
149+
string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2;
150+
switch (option = rand() % 9)
151+
{
152+
case 0:
153+
ceph_col = random_col(aws_col);
154+
op1 = random_arth_op();
155+
op2 = random_arth_op();
156+
ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
157+
") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")";
158+
aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 +
159+
") as int)";
160+
break;
161+
case 1:
162+
ceph_col = random_col(aws_col);
163+
op1 = random_arth_op();
164+
op2 = random_arth_op();
165+
ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
166+
") " + op2 + " " + random_num_expr(depth-1, aws_expr2);
167+
aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
168+
break;
169+
case 2:
170+
ceph_col = random_col(aws_col);
171+
op1 = random_arth_op();
172+
op2 = random_arth_op();
173+
ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " +
174+
op2 + " " + random_num_expr(depth-1, aws_expr2);
175+
aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
176+
break;
177+
case 3:
178+
ceph_col = random_col(aws_col);
179+
op1 = random_arth_op();
180+
op2 = random_arth_op();
181+
ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " +
182+
op2 + " " + random_num_expr(depth-1, aws_expr2);
183+
aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
184+
break;
185+
case 4:
186+
ceph_col = random_col(aws_col);
187+
op1 = random_arth_op();
188+
op2 = random_arth_op();
189+
ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
190+
") " + op2 + " " + random_num_expr(depth-1, aws_expr2);
191+
aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
192+
break;
193+
case 5:
194+
ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING,
195+
aws_expr1) + ")";
196+
aws_expr = "char_length(" + aws_expr1 + ")";
197+
break;
198+
case 6:
199+
ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING,
200+
aws_expr1) + ")";
201+
aws_expr = "character_length(" + aws_expr1 + ")";
202+
break;
203+
case 7:
204+
op1 = random_date_part();
205+
ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str,
206+
TIMESTAMP, aws_expr1) + ")";
207+
aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")";
208+
break;
209+
case 8:
210+
op1 = random_date_part();
211+
ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str,
212+
TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str,
213+
TIMESTAMP, aws_expr2) + ")";
214+
aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")";
215+
break;
216+
}
217+
}
218+
else if (type == STRING) // return type is string
219+
{
220+
string aws_expr1, aws_expr2, aws_expr3;
221+
switch (option = rand() % 4)
222+
{
223+
case 0:
224+
ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
225+
")";
226+
aws_expr = "lower(" + aws_expr1 + ")";
227+
break;
228+
case 1:
229+
ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
230+
")";
231+
aws_expr = "upper(" + aws_expr1 + ")";
232+
break;
233+
case 2:
234+
ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
235+
", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " +
236+
random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")";
237+
aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")";
238+
break;
239+
case 3:
240+
aws_expr2 = random_tm_format_string();
241+
ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1)
242+
+ ", \'" + aws_expr2 + "\')";
243+
aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')";
244+
break;
245+
}
246+
}
247+
else if (type == TIMESTAMP) // return type is TIMESTAMP
248+
{
249+
string aws_expr1, aws_expr2, date_part;
250+
switch (option = rand() % 2)
251+
{
252+
case 0:
253+
date_part = random_date_part();
254+
ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " +
255+
random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")";
256+
aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")";
257+
break;
258+
case 1:
259+
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')";
260+
aws_expr = "to_timestamp(\'" + aws_expr1 + "\')";
261+
break;
262+
}
263+
}
264+
else if (type == MIX_COL_NUM)
265+
{
266+
ceph_expr = random_num_col_expr(depth-1, aws_expr);
267+
}
268+
else if (type == COLUMN) // return type integer column number
269+
{
270+
ceph_expr = random_col(aws_expr);
271+
}
272+
else if (type == NUMBER) // return type randon number
273+
{
274+
ceph_expr = random_number(aws_expr);
275+
}
276+
else
277+
{
278+
aws_expr = "error";
279+
ceph_expr = "error";
280+
}
281+
return ceph_expr;
282+
}
283+
284+
int main()
285+
{
286+
srand(time(0));
287+
int reps, depth;
288+
fstream query_file, aws_query_file;
289+
query_file.open("queries.txt", ios::out);
290+
aws_query_file.open("aws_queries.txt", ios::out);
291+
string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## ";
292+
cout << "Enter number of quries to be generated: ";
293+
cin >> reps;
294+
cout << "Enter depth of queries to be generated: ";
295+
cin >> depth;
296+
if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open
297+
{
298+
while (reps)
299+
{
300+
string aws_expr;
301+
int type;
302+
string ceph_query = "select ";
303+
string aws_query = "select ";
304+
/*int projection = rand() % 4;
305+
while (projection > 1)
306+
{
307+
type = rand() % 4;
308+
ceph_query = ceph_query + random_query_expr(depth, input_str,
309+
type, aws_expr) + ", ";
310+
aws_query = aws_query + aws_expr + ", ";
311+
projection--;
312+
}*/
313+
type = rand() % 4;
314+
ceph_query = ceph_query + random_query_expr(depth, input_str, type,
315+
aws_expr)+ " from stdin;";
316+
aws_query = aws_query + aws_expr + " from s3object;";
317+
query_file << ceph_query << endl;
318+
aws_query_file << aws_query <<endl;
319+
reps--;
320+
}
321+
query_file.close();
322+
}
323+
return 0;
324+
}
325+

test/queries_generator/run.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
set -x
3+
set -e
4+
5+
g++ -o queries_generator queries_generator.cpp
6+
g++ -o generate_aws_cmds generate_aws_cmds.cpp
7+
8+
./queries_generator
9+
./generate_aws_cmds
10+
11+
chmod +x aws_cmds.sh
12+
13+
./aws_cmds.sh

0 commit comments

Comments
 (0)