Skip to content

Commit 228f239

Browse files
committed
Add Ursa
1 parent 9a22401 commit 228f239

File tree

6 files changed

+273
-0
lines changed

6 files changed

+273
-0
lines changed

ursa/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Ursa
2+
3+
Ursa is a research project based on ClickHouse to make ClickHouse faster than Umbra.
4+
5+
The goal is to research all possible optimizations, even if they are backward incompatible with original ClickHouse. If
6+
some of them will be useful and provide a lot of performance improvements, they can be backported to ClickHouse with keeping backward compatibility.

ursa/benchmark.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
# Install
4+
5+
wget "https://ursa-private-builds.s3.eu-central-1.amazonaws.com/ursa"
6+
chmod +x ursa
7+
8+
./ursa server &
9+
10+
while true
11+
do
12+
./ursa client --query "SELECT 1" && break
13+
sleep 1
14+
done
15+
16+
# Load the data
17+
18+
./ursa client < create.sql
19+
20+
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
21+
gzip -d hits.tsv.gz
22+
23+
./ursa client --time --query "INSERT INTO hits FORMAT TSV" < hits.tsv
24+
25+
# Run the queries
26+
27+
./run.sh "$1"
28+
29+
./ursa client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'"

ursa/create.sql

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
CREATE TABLE hits
2+
(
3+
WatchID BIGINT NOT NULL,
4+
JavaEnable SMALLINT NOT NULL,
5+
Title TEXT NOT NULL,
6+
GoodEvent SMALLINT NOT NULL,
7+
EventTime TIMESTAMP NOT NULL,
8+
EventDate Date NOT NULL,
9+
CounterID INTEGER NOT NULL,
10+
ClientIP INTEGER NOT NULL,
11+
RegionID INTEGER NOT NULL,
12+
UserID BIGINT NOT NULL,
13+
CounterClass SMALLINT NOT NULL,
14+
OS SMALLINT NOT NULL,
15+
UserAgent SMALLINT NOT NULL,
16+
URL TEXT NOT NULL,
17+
Referer TEXT NOT NULL,
18+
IsRefresh SMALLINT NOT NULL,
19+
RefererCategoryID SMALLINT NOT NULL,
20+
RefererRegionID INTEGER NOT NULL,
21+
URLCategoryID SMALLINT NOT NULL,
22+
URLRegionID INTEGER NOT NULL,
23+
ResolutionWidth SMALLINT NOT NULL,
24+
ResolutionHeight SMALLINT NOT NULL,
25+
ResolutionDepth SMALLINT NOT NULL,
26+
FlashMajor SMALLINT NOT NULL,
27+
FlashMinor SMALLINT NOT NULL,
28+
FlashMinor2 TEXT NOT NULL,
29+
NetMajor SMALLINT NOT NULL,
30+
NetMinor SMALLINT NOT NULL,
31+
UserAgentMajor SMALLINT NOT NULL,
32+
UserAgentMinor VARCHAR(255) NOT NULL,
33+
CookieEnable SMALLINT NOT NULL,
34+
JavascriptEnable SMALLINT NOT NULL,
35+
IsMobile SMALLINT NOT NULL,
36+
MobilePhone SMALLINT NOT NULL,
37+
MobilePhoneModel TEXT NOT NULL,
38+
Params TEXT NOT NULL,
39+
IPNetworkID INTEGER NOT NULL,
40+
TraficSourceID SMALLINT NOT NULL,
41+
SearchEngineID SMALLINT NOT NULL,
42+
SearchPhrase TEXT NOT NULL,
43+
AdvEngineID SMALLINT NOT NULL,
44+
IsArtifical SMALLINT NOT NULL,
45+
WindowClientWidth SMALLINT NOT NULL,
46+
WindowClientHeight SMALLINT NOT NULL,
47+
ClientTimeZone SMALLINT NOT NULL,
48+
ClientEventTime TIMESTAMP NOT NULL,
49+
SilverlightVersion1 SMALLINT NOT NULL,
50+
SilverlightVersion2 SMALLINT NOT NULL,
51+
SilverlightVersion3 INTEGER NOT NULL,
52+
SilverlightVersion4 SMALLINT NOT NULL,
53+
PageCharset TEXT NOT NULL,
54+
CodeVersion INTEGER NOT NULL,
55+
IsLink SMALLINT NOT NULL,
56+
IsDownload SMALLINT NOT NULL,
57+
IsNotBounce SMALLINT NOT NULL,
58+
FUniqID BIGINT NOT NULL,
59+
OriginalURL TEXT NOT NULL,
60+
HID INTEGER NOT NULL,
61+
IsOldCounter SMALLINT NOT NULL,
62+
IsEvent SMALLINT NOT NULL,
63+
IsParameter SMALLINT NOT NULL,
64+
DontCountHits SMALLINT NOT NULL,
65+
WithHash SMALLINT NOT NULL,
66+
HitColor CHAR NOT NULL,
67+
LocalEventTime TIMESTAMP NOT NULL,
68+
Age SMALLINT NOT NULL,
69+
Sex SMALLINT NOT NULL,
70+
Income SMALLINT NOT NULL,
71+
Interests SMALLINT NOT NULL,
72+
Robotness SMALLINT NOT NULL,
73+
RemoteIP INTEGER NOT NULL,
74+
WindowName INTEGER NOT NULL,
75+
OpenerName INTEGER NOT NULL,
76+
HistoryLength SMALLINT NOT NULL,
77+
BrowserLanguage TEXT NOT NULL,
78+
BrowserCountry TEXT NOT NULL,
79+
SocialNetwork TEXT NOT NULL,
80+
SocialAction TEXT NOT NULL,
81+
HTTPError SMALLINT NOT NULL,
82+
SendTiming INTEGER NOT NULL,
83+
DNSTiming INTEGER NOT NULL,
84+
ConnectTiming INTEGER NOT NULL,
85+
ResponseStartTiming INTEGER NOT NULL,
86+
ResponseEndTiming INTEGER NOT NULL,
87+
FetchTiming INTEGER NOT NULL,
88+
SocialSourceNetworkID SMALLINT NOT NULL,
89+
SocialSourcePage TEXT NOT NULL,
90+
ParamPrice BIGINT NOT NULL,
91+
ParamOrderID TEXT NOT NULL,
92+
ParamCurrency TEXT NOT NULL,
93+
ParamCurrencyID SMALLINT NOT NULL,
94+
OpenstatServiceName TEXT NOT NULL,
95+
OpenstatCampaignID TEXT NOT NULL,
96+
OpenstatAdID TEXT NOT NULL,
97+
OpenstatSourceID TEXT NOT NULL,
98+
UTMSource TEXT NOT NULL,
99+
UTMMedium TEXT NOT NULL,
100+
UTMCampaign TEXT NOT NULL,
101+
UTMContent TEXT NOT NULL,
102+
UTMTerm TEXT NOT NULL,
103+
FromTag TEXT NOT NULL,
104+
HasGCLID SMALLINT NOT NULL,
105+
RefererHash BIGINT NOT NULL,
106+
URLHash BIGINT NOT NULL,
107+
CLID INTEGER NOT NULL,
108+
PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
109+
)
110+
ENGINE = MergeTree;

ursa/queries.sql

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
SELECT COUNT(*) FROM hits;
2+
SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
3+
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
4+
SELECT AVG(UserID) FROM hits;
5+
SELECT COUNT(DISTINCT UserID) FROM hits;
6+
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
7+
SELECT MIN(EventDate), MAX(EventDate) FROM hits;
8+
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
9+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
10+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
11+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
12+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
13+
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
14+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
15+
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
16+
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
17+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
18+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
19+
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
20+
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
21+
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
22+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
23+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
24+
SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
25+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
26+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
27+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
28+
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
30+
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
31+
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
32+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
33+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
34+
SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
35+
SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
36+
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
37+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
38+
SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
39+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
40+
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
41+
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
42+
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
43+
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;

ursa/results/c6a.4xlarge.json

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"system": "Ursa",
3+
"date": "2025-02-02",
4+
"machine": "c6a.4xlarge, 500gb gp2",
5+
"cluster_size": 1,
6+
"comment": "",
7+
"tags": ["C++", "column-oriented", "ClickHouse derivative"],
8+
"load_time": 501.085,
9+
"data_size": 15572828006,
10+
"result": [
11+
[0.008, 0.001, 0.001],
12+
[0.004, 0.001, 0.001],
13+
[0.075, 0.021, 0.020],
14+
[0.116, 0.029, 0.028],
15+
[0.533, 0.339, 0.335],
16+
[0.617, 0.407, 0.406],
17+
[0.003, 0.001, 0.002],
18+
[0.044, 0.017, 0.016],
19+
[0.564, 0.500, 0.480],
20+
[0.651, 0.526, 0.522],
21+
[0.228, 0.147, 0.145],
22+
[0.233, 0.158, 0.156],
23+
[0.973, 0.522, 0.517],
24+
[1.612, 0.791, 0.780],
25+
[0.860, 0.552, 0.574],
26+
[0.640, 0.498, 0.504],
27+
[2.016, 1.521, 1.490],
28+
[1.448, 0.852, 0.861],
29+
[4.423, 3.075, 3.110],
30+
[0.085, 0.002, 0.002],
31+
[9.239, 0.293, 0.296],
32+
[10.738, 0.106, 0.098],
33+
[13.580, 0.684, 0.688],
34+
[10.951, 0.356, 0.356],
35+
[1.985, 0.157, 0.150],
36+
[0.953, 0.136, 0.132],
37+
[2.472, 0.158, 0.152],
38+
[0.364, 0.111, 0.112],
39+
[8.527, 4.933, 4.929],
40+
[0.056, 0.028, 0.027],
41+
[0.379, 0.284, 0.289],
42+
[3.390, 0.492, 0.488],
43+
[5.369, 2.974, 2.955],
44+
[10.013, 2.717, 2.719],
45+
[9.974, 2.731, 2.729],
46+
[0.480, 0.349, 0.347],
47+
[0.070, 0.033, 0.034],
48+
[0.039, 0.020, 0.019],
49+
[0.051, 0.016, 0.016],
50+
[0.101, 0.066, 0.064],
51+
[0.037, 0.013, 0.016],
52+
[0.024, 0.011, 0.012],
53+
[0.024, 0.009, 0.009]
54+
]
55+
}
56+

ursa/run.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
# Determine which set of files to use depending on the type of run
4+
if [ "$1" != "" ] && [ "$1" != "tuned" ] && [ "$1" != "tuned-memory" ]; then
5+
echo "Error: command line argument must be one of {'', 'tuned', 'tuned-memory'}"
6+
exit 1
7+
else if [ ! -z "$1" ]; then
8+
SUFFIX="-$1"
9+
fi
10+
fi
11+
12+
TRIES=3
13+
QUERY_NUM=1
14+
cat queries"$SUFFIX".sql | while read query; do
15+
[ -z "$FQDN" ] && sync
16+
[ -z "$FQDN" ] && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
17+
18+
echo -n "["
19+
for i in $(seq 1 $TRIES); do
20+
RES=$(./ursa client --host "${FQDN:=localhost}" --password "${PASSWORD:=}" ${PASSWORD:+--secure} --time --format=Null --query="$query" --progress 0 2>&1 ||:)
21+
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
22+
[[ "$i" != $TRIES ]] && echo -n ", "
23+
24+
echo "${QUERY_NUM},${i},${RES}" >> result.csv
25+
done
26+
echo "],"
27+
28+
QUERY_NUM=$((QUERY_NUM + 1))
29+
done

0 commit comments

Comments
 (0)