Skip to content

Commit 2a9f00b

Browse files
authored
Merge pull request ClickHouse#361 from nkunal/main
2 parents 345c03a + 0451066 commit 2a9f00b

File tree

6 files changed

+159
-218
lines changed

6 files changed

+159
-218
lines changed

siglens/benchmark.sh

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/bin/bash
22

3-
# Requires at least 700GB of free disk space on the main partition for the dataset, intermediate files, and SigLens data.
3+
# Requires at least 300GB of free disk space on the main partition for the dataset, intermediate files, and SigLens data.
44

55
echo "Install prerequisites"
66
sudo apt-get install --yes git golang
77

88
echo "Get and build SigLens"
9-
git clone https://github.com/siglens/siglens.git --branch 1.0.25
9+
git clone https://github.com/siglens/siglens.git --branch 1.0.41
1010
cd siglens
1111
go mod tidy
1212
go build -o siglens cmd/siglens/main.go
@@ -15,18 +15,10 @@ cd ..
1515

1616
echo "Download and unzip dataset"
1717
wget --continue https://datasets.clickhouse.com/hits_compatible/hits.json.gz
18-
gzip -d -f hits.json.gz
19-
20-
# Add the _index line and fix the UserID from string to num and preprocesses the dataset for loading
21-
python3 fix_hits.py
22-
23-
echo "Split into 10 files to increase parallelism"
24-
rm hits.json
25-
split -l 20000000 sighits.json splithits_
26-
rm sighits.json
18+
gzip -d hits.json.gz
2719

2820
echo "Load data into SigLens, this can take a few hours"
29-
time python3 send_data.py
21+
time python3 send_datawithactionline.py
3022

3123
echo "Run queries"
3224
./run.sh

siglens/fix_hits.py

Lines changed: 0 additions & 59 deletions
This file was deleted.

siglens/queries.spl

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,38 @@ AdvEngineID != 0 | stats count
66
* | stats dc(SearchPhrase)
77
* | stats min(EventDate), max(EventDate)
88
AdvEngineID != 0 | stats count as cnt by AdvEngineID | sort -cnt
9-
* | stats dc(UserID) as u BY RegionID | sort -u | head 10
10-
* | stats sum(AdvEngineID), count as c, avg(ResolutionWidth), dc(UserID) by RegionID | sort -c | head 10
11-
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhoneModel | sort -u | head 10
12-
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhone, MobilePhoneModel | sort -u | head 10
13-
SearchPhrase != \"\" | stats count as c by SearchPhrase | sort -c | head 10
14-
SearchPhrase != \"\" | stats dc(UserID) as u by SearchPhrase | sort -u | head 10
15-
SearchPhrase != \"\" | stats count as c by SearchEngineID, SearchPhrase | sort -c | head 10
16-
* | stats count as cnt by UserID | sort -cnt | head 10
17-
* | stats count as cnt by UserID, SearchPhrase | sort -cnt | head 10
9+
* | stats dc(UserID) as u BY RegionID | sort 10 -u
10+
* | stats sum(AdvEngineID), count as c, avg(ResolutionWidth), dc(UserID) by RegionID | sort 10 -c
11+
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhoneModel | sort 10 -u
12+
MobilePhoneModel != \"\" | stats dc(UserID) as u by MobilePhone, MobilePhoneModel | sort 10 -u
13+
SearchPhrase != \"\" | stats count as c by SearchPhrase | sort 10 -c
14+
SearchPhrase != \"\" | stats dc(UserID) as u by SearchPhrase | sort 10 -u
15+
SearchPhrase != \"\" | stats count as c by SearchEngineID, SearchPhrase | sort 10 -c
16+
* | stats count as cnt by UserID | sort 10 -cnt
17+
* | stats count as cnt by UserID, SearchPhrase | sort 10 -cnt
1818
* | stats count by UserID, SearchPhrase | head 10
1919
null
2020
UserID=435090932899640449 | fields UserID
2121
* | regex URL = \".*google.*\" | stats count
22-
SearchPhrase != \"\" | regex URL = \".*google.*\" | stats count as c, min(eval(URL)) by SearchPhrase | sort -c | head 10
23-
SearchPhrase != \"\" | regex Title = \".*Google.*\" | regex URL != \".*\\.google\\..*\" | stats count as c, min(eval(URL)), min(eval(Title)), dc(UserID) by SearchPhrase | sort -c | head 10
24-
* | regex URL = \".*google.*\" | sort str(EventTime) | head 10
25-
SearchPhrase != \"\" | sort str(EventTime) | head 10 | fields SearchPhrase
26-
SearchPhrase != \"\" | sort str(SearchPhrase) | head 10 | fields SearchPhrase
27-
SearchPhrase != \"\" | sort str(EventTime), str(SearchPhrase) | head 10 | fields SearchPhrase
28-
URL != \"\" | stats avg(eval(len(URL))) as l, count as c by CounterID | where c > 100000 | sort -l | head 25
29-
null
22+
SearchPhrase != \"\" | regex URL = \".*google.*\" | stats count as c, min(eval(URL)) by SearchPhrase | sort 10 -c
23+
SearchPhrase != \"\" | regex Title = \".*Google.*\" | regex URL != \".*\\.google\\..*\" | stats count as c, min(eval(URL)), min(eval(Title)), dc(UserID) by SearchPhrase | sort 10 -c
24+
* | regex URL = \".*google.*\" | sort 10 str(EventTime)
25+
SearchPhrase != \"\" | sort 10 str(EventTime) | fields SearchPhrase
26+
SearchPhrase != \"\" | sort 10 str(SearchPhrase) | fields SearchPhrase
27+
SearchPhrase != \"\" | sort 10 str(EventTime), str(SearchPhrase) | fields SearchPhrase
28+
URL != \"\" | stats avg(eval(len(URL))) as l, count as c by CounterID | where c > 100000 | sort 25 -l
29+
Referer != \"\" | rex field=Referer \"^https?://(?:www\\.)?(?<k>[^/]+)\" | stats avg(eval(len(Referer))) as l, count as c, min(eval(Referer)) by k | where c > 100000 | sort\n 25 -l
3030
* | stats sum(ResolutionWidth) as sum, count as cnt | eval sum2 = sum + cnt*2, sum3 = sum + cnt*3, sum4 = sum + cnt*4, sum5 = sum + cnt*5, sum6 = sum + cnt*6, sum7 = sum + cnt*7, sum8 = sum + cnt*8, sum9 = sum + cnt*9, sum10 = sum + cnt*10, sum11 = sum + cnt*11, sum12 = sum + cnt*12, sum13 = sum + cnt*13, sum14 = sum + cnt*14, sum15 = sum + cnt*15, sum16 = sum + cnt*16, sum17 = sum + cnt*17, sum18 = sum + cnt*18, sum19 = sum + cnt*19, sum20 = sum + cnt*20, sum21 = sum + cnt*21, sum22 = sum + cnt*22, sum23 = sum + cnt*23, sum24 = sum + cnt*24, sum25 = sum + cnt*25, sum26 = sum + cnt*26, sum27 = sum + cnt*27, sum28 = sum + cnt*28, sum29 = sum + cnt*29, sum30 = sum + cnt*30, sum31 = sum + cnt*31, sum32 = sum + cnt*32, sum33 = sum + cnt*33, sum34 = sum + cnt*34, sum34 = sum + cnt*35, sum36 = sum + cnt*36, sum37 = sum + cnt*37, sum38 = sum + cnt*38, sum39 = sum + cnt*39, sum40 = sum + cnt*40, sum41 = sum + cnt*41, sum42 = sum + cnt*42, sum43 = sum + cnt*43, sum44 = sum + cnt*44, sum45 = sum + cnt*45, sum46 = sum + cnt*46, sum47 = sum + cnt*47, sum48 = sum + cnt*48, sum49 = sum + cnt*49, sum50 = sum + cnt*50, sum51 = sum + cnt*51, sum52 = sum + cnt*52, sum53 = sum + cnt*53, sum54 = sum + cnt*54, sum55 = sum + cnt*55, sum56 = sum + cnt*56, sum57 = sum + cnt*57, sum58 = sum + cnt*58, sum59 = sum + cnt*59, sum60 = sum + cnt*60, sum61 = sum + cnt*61, sum62 = sum + cnt*62, sum63 = sum + cnt*63, sum64 = sum + cnt*64, sum65 = sum + cnt*65, sum66 = sum + cnt*66, sum67 = sum + cnt*67, sum68 = sum + cnt*68, sum69 = sum + cnt*69, sum70 = sum + cnt*70, sum71 = sum + cnt*71, sum72 = sum + cnt*72, sum73 = sum + cnt*73, sum74 = sum + cnt*74, sum75 = sum + cnt*75, sum76 = sum + cnt*76, sum77 = sum + cnt*77, sum78 = sum + cnt*78, sum79 = sum + cnt*79, sum80 = sum + cnt*80, sum81 = sum + cnt*81, sum82 = sum + cnt*82, sum83 = sum + cnt*83, sum84 = sum + cnt*84, sum85 = sum + cnt*85, sum86 = sum + cnt*86, sum87 = sum + cnt*87, sum88 = sum + cnt*88, sum89 = sum + cnt*89 | fields sum, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18, sum19, sum20, sum21, sum22, sum23, sum24, sum25, sum26, sum27, sum28, sum29, sum30, sum31, sum32, sum33, sum34, sum34, sum36, sum37, sum38, sum39, sum40, sum41, sum42, sum43, sum44, sum45, sum46, sum47, sum48, sum49, sum50, sum51, sum52, sum53, sum54, sum55, sum56, sum57, sum58, sum59, sum60, sum61, sum62, sum63, sum64, sum65, sum66, sum67, sum68, sum69, sum70, sum71, sum72, sum73, sum74, sum75, sum76, sum77, sum78, sum79, sum80, sum81, sum82, sum83, sum84, sum85, sum86, sum87, sum88, sum89
31-
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by SearchEngineID, ClientIP | sort -c | head 10
32-
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort -c | head 10
33-
* | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort -c | head 10
34-
* | stats count as c by URL | sort -c | head 10
35-
* | eval n = 1 | stats count as c by n, URL | sort -c | head 10
36-
* | eval cp = ClientIP-1, cp2 = ClientIP-2, cp3 = ClientIP-3 | stats count as c by ClientIP, cp, cp2, cp3 | sort -c | head 10
37-
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND URL != \"\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort -PageViews | head 10
38-
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND Title != \"\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by Title | sort -PageViews | head 10
39-
CounterID = 62 AND IsRefresh = 0 AND IsLink != 0 AND IsDownload = 0 | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort -PageViews | head 1010 | tail 10 | tail 10
40-
CounterID = 62 AND IsRefresh = 0 | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval Src=if(SearchEngineID=0 AND AdvEngineID=0, Referer, \"\") | rename URL as Dst | stats count as PageViews by TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst | sort -PageViews | head 1010 | tail 10 | tail 10
41-
CounterID = 62 AND IsRefresh = 0 AND RefererHash = \"3594120000172545465\" | where TraficSourceID in(-1, 6) | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URLHash, EventDate | sort -PageViews | head 110 | tail 10 | tail 10
42-
CounterID = 62 AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = \"2868770270353813622\" | where strptime(EventDate,\"%Y-%m-%d\") >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND strptime(EventDate,\"%Y-%m-%d\") <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by WindowClientWidth, WindowClientHeight | sort -PageViews | head 10010 | tail 10 | tail 10
43-
null
31+
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by SearchEngineID, ClientIP | sort 10 -c
32+
SearchPhrase != \"\" | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort 10 -c
33+
* | stats count as c, sum(IsRefresh), avg(ResolutionWidth) by WatchID, ClientIP | sort 10 -c
34+
* | stats count as c by URL | sort 10 -c
35+
* | stats count AS c by URL | sort 10 -c | eval n=1 | fields n, URL, c
36+
* | stats count as c by ClientIP | sort 10 -c | eval cp = ClientIP-1, cp2 = ClientIP-2, cp3 = ClientIP-3
37+
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND URL != \"\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort 10 -PageViews
38+
CounterID = 62 AND DontCountHits = 0 AND IsRefresh = 0 AND Title != \"\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by Title | sort 10 -PageViews
39+
CounterID = 62 AND IsRefresh = 0 AND IsLink != 0 AND IsDownload = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URL | sort 1010 -PageViews | tail 10 | tail 10
40+
CounterID = 62 AND IsRefresh = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval Src=if(SearchEngineID=0 AND AdvEngineID=0, Referer, \"\") | rename URL as Dst | stats count as PageViews by TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst | sort 1010 -PageViews | tail 10 | tail 10
41+
CounterID = 62 AND IsRefresh = 0 AND RefererHash = \"3594120000172545465\" | where TraficSourceID in(-1, 6) | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by URLHash, EventDate | sort 110 -PageViews | tail 10 | tail 10
42+
CounterID = 62 AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = \"2868770270353813622\" | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | stats count as PageViews by WindowClientWidth, WindowClientHeight | sort 1010 -PageViews | tail 10 | tail 10
43+
CounterID=62 DontCountHits = 0 IsRefresh = 0 | eval ptime = strptime(EventDate,\"%Y-%m-%d\") | where ptime >= strptime(\"2013-07-01\", \"%Y-%m-%d\") AND ptime <= strptime(\"2013-07-31\", \"%Y-%m-%d\") | eval truncTime = strftime(tonumber(strptime(EventTime, \"%Y-%m-%d %H:%M:%S\")), \"%Y-%m-%d %H:%M:00\") | stats count as PageViews by truncTime | sort 1010 -truncTime | tail 10 | tail 10

siglens/results/c6a.4xlarge.json

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,54 @@
11
{
22
"system": "SigLens",
3-
"date": "2025-03-07",
4-
"machine": "c6a.4xlarge, 700gb gp2",
3+
"date": "2025-05-05",
4+
"machine": "c6a.4xlarge, 300gb gp2",
55
"cluster_size": 1,
66
"tags": ["Go", "logs", "search", "SigLens", "observability"],
7-
"load_time": 6345.886,
7+
"load_time": 5198.83,
88
"data_size": 28396387,
99
"result": [
10-
[0.166, 0.079, 0.090],
11-
[0.730, 0.423, 0.411],
12-
[0.076, 0.081, 0.082],
13-
[0.081, 0.082, 0.080],
14-
[0.079, 0.083, 0.081],
15-
[0.081, 0.081, 0.082],
16-
[0.083, 0.081, 0.085],
17-
[0.449, 0.903, 0.284],
18-
[6.341, 5.590, 5.617],
19-
[7.673, 7.360, 7.446],
20-
[1.265, 0.673, 0.653],
21-
[1.272, 0.690, 0.700],
22-
[0.257, 0.222, 0.244],
23-
[0.272, 0.307, 0.346],
24-
[0.276, 0.266, 0.244],
25-
[0.159, 0.165, 0.273],
26-
[0.247, 0.229, 0.220],
27-
[0.241, 0.327, 0.308],
10+
[0.209, 0.082, 0.082],
11+
[0.602, 0.308, 0.306],
12+
[0.162, 0.082, 0.079],
13+
[0.182, 0.074, 0.080],
14+
[0.172, 0.081, 0.086],
15+
[0.155, 0.075, 0.086],
16+
[0.168, 0.083, 0.080],
17+
[0.625, 0.886, 0.178],
18+
[5.415, 4.735, 4.679],
19+
[7.755, 6.464, 6.525],
20+
[2.299, 1.369, 0.639],
21+
[9.720, 0.687, 0.683],
22+
[0.318, 0.223, 0.243],
23+
[0.407, 0.264, 0.259],
24+
[0.372, 0.243, 0.240],
25+
[0.262, 0.181, 0.181],
26+
[0.350, 0.233, 0.229],
27+
[0.349, 0.218, 0.218],
2828
[null, null, null],
29-
[1.251, 0.478, 0.479],
30-
[32.722, 30.116, 29.918],
31-
[6.781, 2.027, 1.952],
32-
[33.342, 5.674, 5.526],
33-
[44.928, 7.360, 7.137],
34-
[26.121, 23.556, 23.550],
35-
[22.963, 23.171, 23.025],
36-
[30.059, 30.029, 29.992],
37-
[14.537, 12.449, 12.391],
38-
[null, null, null],
39-
[0.141, 0.131, 0.123],
40-
[0.314, 0.270, 0.273],
41-
[0.407, 0.352, 0.381],
42-
[0.532, 0.528, 0.554],
43-
[0.346, 0.333, 0.419],
44-
[65.397, 65.075, 66.119],
45-
[95.028, 94.013, 93.870],
46-
[10.960, 9.286, 9.354],
47-
[10.594, 9.325, 9.323],
48-
[3.163, 2.150, 2.136],
49-
[68.890, 68.193, 68.379],
50-
[2.111, 1.500, 1.496],
51-
[2.427, 1.745, 1.738],
52-
[null, null, null]
29+
[1.020, 1.066, 0.256],
30+
[10.224, 2.242, 2.237],
31+
[13.308, 2.421, 1.683],
32+
[23.581, 3.912, 2.874],
33+
[14.173, 1.890, 1.362],
34+
[12.512, 8.067, 8.055],
35+
[10.056, 7.677, 7.659],
36+
[12.969, 10.032, 10.020],
37+
[12.546, 6.239, 5.523],
38+
[131.366, 129.492, 128.019],
39+
[0.177, 0.095, 0.094],
40+
[0.464, 0.283, 0.273],
41+
[0.651, 0.355, 0.325],
42+
[0.673, 0.475, 0.448],
43+
[0.525, 0.328, 0.333],
44+
[0.536, 0.344, 0.317],
45+
[0.261, 0.183, 0.189],
46+
[12.303, 6.702, 6.646],
47+
[11.801, 6.439, 6.456],
48+
[2.272, 1.155, 1.140],
49+
[7.274, 5.705, 5.726],
50+
[2.006, 1.113, 1.106],
51+
[4.910, 2.525, 2.540],
52+
[10.214, 7.921, 7.923]
5353
]
5454
}

siglens/send_data.py

Lines changed: 0 additions & 72 deletions
This file was deleted.

0 commit comments

Comments
 (0)