-
Notifications
You must be signed in to change notification settings - Fork 621
Expand file tree
/
Copy pathDataSets.java
More file actions
88 lines (77 loc) · 3.37 KB
/
DataSets.java
File metadata and controls
88 lines (77 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package com.clickhouse.benchmark.data;
import com.clickhouse.benchmark.clients.BenchmarkBase;
import com.clickhouse.client.api.metadata.TableSchema;
import com.clickhouse.data.ClickHouseFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class DataSets {
private static final Logger LOGGER = LoggerFactory.getLogger(DataSets.class);
private static final Map<String, DataSet> sets;
static {
sets = Map.of(
"simple", new SimpleDataSet()
);
}
public static DataSet from(String name) {
return sets.get(name);
}
/**
* A simple way to parse a CREATE TABLE statement into a TableSchema. Each column is expected to be on a new line (i.e. 'ColumnName ColumnType,\n').
* @param createTableStatement - a CREATE TABLE statement
* @return TableSchema
*/
public static TableSchema parseSchema(String createTableStatement) {//TODO: Consider replacing this, as it's not very robust.
TableSchema schema = new TableSchema();
if (createTableStatement == null || createTableStatement.isEmpty()) {
return schema;
}
try {
createTableStatement = createTableStatement.substring(createTableStatement.indexOf("(") + 1, createTableStatement.indexOf(") Engine")).trim();
BufferedReader br = new BufferedReader(new StringReader(createTableStatement));
br.lines().forEach(line -> {
line = line.trim();
String name = line.substring(0, line.indexOf(" "));
String type = line.substring(line.indexOf(" ") + 1);
if (type.endsWith(","))//Removing trailing comma
type = type.substring(0, type.length() - 1);
schema.addColumn(name, type);
});
} catch (Exception e) {
LOGGER.error("Error parsing schema", e);
return new TableSchema();
}
return schema;
}
public static void initializeTables(DataSet set, boolean insertData) {
BenchmarkBase.runQuery(set.getCreateTableString(), true);
ClickHouseFormat format = set.getFormat();
BenchmarkBase.insertData(set.getTableName(), set.getInputStream(format), format);
if (!insertData) {
BenchmarkBase.loadClickHouseRecords(set.getTableName(), set);
BenchmarkBase.runQuery("TRUNCATE TABLE " + set.getTableName(), true);
}
}
public static List<byte[]> convert(List<Map<String, Object>> data, ClickHouseFormat format) {
List<byte[]> bytes = new ArrayList<>(data.size());
switch (format) {
case JSONEachRow:
for (Map<String, Object> row : data) {
String json = row.entrySet().stream()
.map(entry -> "\"" + entry.getKey() + "\":\"" + entry.getValue() + "\"")
.collect(Collectors.joining(", ", "{", "}"));
json += "\n";
bytes.add(json.getBytes(StandardCharsets.UTF_8));
}
return bytes;
default:
throw new IllegalArgumentException("Unsupported format: " + format);
}
}
}