|
| 1 | +machine='t3a.small'; clickhouse-client --query " |
| 2 | +
|
| 3 | +SELECT system FROM ( |
| 4 | +
|
| 5 | +WITH |
| 6 | + extract(content, 'System: ([^\n]+)') AS system, |
| 7 | + extract(content, 'Machine: ([^\n]+)') AS machine, |
| 8 | +
|
| 9 | + toUInt64OrZero(extract(content, 'Disk usage after: (\d+)')) - toUInt64OrZero(extract(content, 'Disk usage before: (\d+)')) AS disk_space_diff, |
| 10 | + toUInt64OrZero(extract(content, 'Total time: (\d+)')) AS total_time, |
| 11 | +
|
| 12 | + match(content, 'Load time:\s*(?:COPY \d+\n)?(\d+)') ? arraySum(x -> toFloat64(x), extractAll(content, 'Load time:\s*(?:COPY \d+\n)?(\d+)')) : NULL AS load_time, |
| 13 | + match(content, 'Data size: *(\d+)') ? arraySum(x -> toUInt64(x), extractAll(content, 'Data size: *(\d+)')) : NULL AS data_size, |
| 14 | +
|
| 15 | + extractAllGroups(content, '\n *\[([\d\.]+|null),\s*([\d\.]+|null),\s*([\d\.]+|null)\]') AS runtimes, |
| 16 | +
|
| 17 | + load_time IS NOT NULL AND length(runtimes) = 43 AND data_size >= 5000000000 |
| 18 | + AND arrayExists(x -> arrayExists(y -> toFloat64OrZero(y) > 1, x), runtimes) AS good |
| 19 | +
|
| 20 | +SELECT time, system, machine, total_time, disk_space_diff, load_time, data_size, length(runtimes), runtimes |
| 21 | +FROM sink.data |
| 22 | +WHERE time >= today() - 2 AND content NOT LIKE 'Cloud-init%' AND good AND machine = '${machine}' |
| 23 | +ORDER BY time DESC LIMIT 1 BY system |
| 24 | +
|
| 25 | +) |
| 26 | +
|
| 27 | +" | while read system; do echo $system; clickhouse-local --query " |
| 28 | +
|
| 29 | +WITH file('${system}/results/c6a.4xlarge.json') AS template |
| 30 | +
|
| 31 | +SELECT '{ |
| 32 | + \"system\": ' || visitParamExtractRaw(template, 'system') || ', |
| 33 | + \"date\": \"' || time::Date || '\", |
| 34 | + \"machine\": \"' || machine || '\", |
| 35 | + \"cluster_size\": 1, |
| 36 | + \"proprietary\": ' || visitParamExtractRaw(template, 'proprietary') || ', |
| 37 | + \"tuned\": ' || visitParamExtractRaw(template, 'tuned') || ', |
| 38 | + \"tags\": ' || visitParamExtractRaw(template, 'tags') || ', |
| 39 | + \"load_time\": ' || load_time || ', |
| 40 | + \"data_size\": ' || data_size || ', |
| 41 | + \"result\": ' || runtimes_formatted || ' |
| 42 | +} |
| 43 | +' |
| 44 | +
|
| 45 | +FROM ( |
| 46 | +
|
| 47 | +WITH |
| 48 | + extract(content, 'System: ([^\n]+)') AS system, |
| 49 | + extract(content, 'Machine: ([^\n]+)') AS machine, |
| 50 | +
|
| 51 | + toUInt64OrZero(extract(content, 'Disk usage after: (\d+)')) - toUInt64OrZero(extract(content, 'Disk usage before: (\d+)')) AS disk_space_diff, |
| 52 | + toUInt64OrZero(extract(content, 'Total time: (\d+)')) AS total_time, |
| 53 | +
|
| 54 | + match(content, 'Load time:\s*(?:COPY \d+\n)?(\d+)') ? arraySum(x -> toFloat64(x), extractAll(content, 'Load time:\s*(?:COPY \d+\n)?(\d+)')) : NULL AS load_time, |
| 55 | + match(content, 'Data size: *(\d+)') ? arraySum(x -> toUInt64(x), extractAll(content, 'Data size: *(\d+)')) : NULL AS data_size, |
| 56 | +
|
| 57 | + extractAllGroups(content, '\n *\[([\d\.]+|null),\s*([\d\.]+|null),\s*([\d\.]+|null)\]') AS runtimes, |
| 58 | + '[\n' || arrayStringConcat(arrayMap(x -> ' [' || arrayStringConcat(arrayMap(v -> v == 'null' ? v : round(v::Float64, 3)::String, x), ', ') || ']', runtimes), ',\n') || '\n]' AS runtimes_formatted, |
| 59 | +
|
| 60 | + load_time IS NOT NULL AND length(runtimes) = 43 AND data_size >= 5000000000 |
| 61 | + AND arrayExists(x -> arrayExists(y -> toFloat64OrZero(y) > 1, x), runtimes) AS good |
| 62 | +
|
| 63 | +SELECT time, system, machine, total_time, disk_space_diff, load_time, data_size, length(runtimes), runtimes, runtimes_formatted |
| 64 | +FROM remote('127.0.0.2', sink.data) |
| 65 | +WHERE time >= today() - 2 AND content NOT LIKE 'Cloud-init%' AND good AND machine = '${machine}' AND system = '${system}' |
| 66 | +ORDER BY time DESC LIMIT 1 |
| 67 | +) |
| 68 | +
|
| 69 | +INTO OUTFILE '${system}/results/${machine}.json' TRUNCATE |
| 70 | +FORMAT RawBLOB |
| 71 | +
|
| 72 | +"; done |
0 commit comments