Skip to content

Commit 578d921

Browse files
committed
update sql and result
1 parent 88270e7 commit 578d921

File tree

7 files changed

+54
-10
lines changed

7 files changed

+54
-10
lines changed

doris/ddl_materialized.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ CREATE TABLE bluesky (
33
operation VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.operation')) NULL,
44
collection VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data, '$.commit.collection')) NULL,
55
did VARCHAR(100) GENERATED ALWAYS AS (get_json_string(data,'$.did')) NOT NULL,
6-
time_us BIGINT GENERATED ALWAYS AS (get_json_bigint(data, '$.time_us')) NOT NULL,
6+
time DATETIME GENERATED ALWAYS AS (from_microsecond(get_json_bigint(data, '$.time_us'))) NOT NULL,
77
`data` variant NOT NULL
88
)
99
DUPLICATE KEY (kind, operation, collection)

doris/queries_default.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC;
2-
SELECT get_json_string(data, 'commit.collection') AS event, COUNT(*) AS count, COUNT(DISTINCT get_json_string(data, 'did')) AS users FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' GROUP BY event ORDER BY count DESC;
3-
SELECT get_json_string(data, 'commit.collection') AS event, HOUR(from_microsecond(get_json_int(data, 'time_us'))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event;
4-
SELECT get_json_string(data, 'did') AS user_id, MIN(from_microsecond(get_json_int(data, 'time_us'))) AS first_post_ts FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3;
5-
SELECT get_json_string(data, 'did') AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(get_json_int(data, 'time_us'))),MIN(from_microsecond(get_json_int(data, 'time_us')))) AS activity_span FROM bluesky WHERE get_json_string(data, 'kind') = 'commit' AND get_json_string(data, 'commit.operation') = 'create' AND get_json_string(data, 'commit.collection') = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3;
1+
SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC;
2+
SELECT cast(data['commit']['collection'] AS TEXT ) AS event, COUNT(*) AS count, COUNT(DISTINCT cast(data['did'] AS TEXT )) AS users FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' GROUP BY event ORDER BY count DESC;
3+
SELECT cast(data['commit']['collection'] AS TEXT ) AS event, HOUR(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event;
4+
SELECT cast(data['did'] AS TEXT ) AS user_id, MIN(from_microsecond(CAST(data['time_us'] AS BIGINT))) AS first_post_ts FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3;
5+
SELECT cast(data['did'] AS TEXT ) AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(CAST(data['time_us'] AS BIGINT))),MIN(from_microsecond(CAST(data['time_us'] AS BIGINT)))) AS activity_span FROM bluesky WHERE cast(data['kind'] AS TEXT ) = 'commit' AND cast(data['commit']['operation'] AS TEXT ) = 'create' AND cast(data['commit']['collection'] AS TEXT ) = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3;

doris/queries_materialized.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
SELECT collection AS event, COUNT(*) AS count FROM bluesky GROUP BY event ORDER BY count DESC;
22
SELECT collection AS event, COUNT(*) AS count, COUNT(DISTINCT did) AS users FROM bluesky WHERE kind = 'commit' AND operation = 'create' GROUP BY event ORDER BY count DESC;
3-
SELECT collection AS event, HOUR(from_microsecond(time_us)) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event;
4-
SELECT did AS user_id, MIN(from_microsecond(time_us)) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3;
5-
SELECT did AS user_id, MILLISECONDS_DIFF(MAX(from_microsecond(time_us)),MIN(from_microsecond(time_us))) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3;
3+
SELECT collection AS event, HOUR(time) AS hour_of_day, COUNT(*) AS count FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection IN ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like') GROUP BY event, hour_of_day ORDER BY hour_of_day, event;
4+
SELECT did AS user_id, MIN(time) AS first_post_ts FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY first_post_ts ASC LIMIT 3;
5+
SELECT did AS user_id, MILLISECONDS_DIFF(MAX(time),MIN(time)) AS activity_span FROM bluesky WHERE kind = 'commit' AND operation = 'create' AND collection = 'app.bsky.feed.post' GROUP BY user_id ORDER BY activity_span DESC LIMIT 3;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"system": "Apache Doris (default)",
3+
"version": "doris-3.0.5-rc01-e277cfb83f",
4+
"os": "Ubuntu 24.04",
5+
"date": "2025-05-07",
6+
"machine": "m6i.8xlarge, 10000gib gp3",
7+
"retains_structure": "yes",
8+
"tags": [
9+
],
10+
"dataset_size": 1000000000,
11+
"num_loaded_documents": 999999994,
12+
"total_size": 214692530225,
13+
"result": [
14+
[4.83,1.69,1.69],
15+
[221.55,11.01,10.25],
16+
[27.94,7.46,7.44],
17+
[234.42,6.37,5.72],
18+
[235.32,6.14,5.88]
19+
]
20+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"system": "Apache Doris (materialized)",
3+
"version": "doris-3.0.5-rc01-e277cfb83f",
4+
"os": "Ubuntu 24.04",
5+
"date": "2025-05-07",
6+
"machine": "m6i.8xlarge, 10000gib gp3",
7+
"retains_structure": "yes",
8+
"tags": [
9+
],
10+
"dataset_size": 1000000000,
11+
"num_loaded_documents": 999999245,
12+
"total_size": 215010357805,
13+
"result": [
14+
[1.67,1.50,1.50],
15+
[119.07,5.62,5.38],
16+
[14.23,1.55,1.56],
17+
[11.38,0.51,0.50],
18+
[0.54,0.53,0.53]
19+
]
20+
}

doris/run_queries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cat $QUERIES_FILE | while read -r query; do
2828

2929
# Execute the query multiple times
3030
for i in $(seq 1 $TRIES); do
31-
RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; print $t }' ||:)
31+
RESP=$(mysql -vvv -h127.1 -P9030 -uroot "$DB_NAME" -e "$query" | perl -nle 'if (/\((?:(\d+) min )?(\d+\.\d+) sec\)/) { $t = ($1 || 0) * 60 + $2; printf "%.2f\n", $t }' ||:)
3232
echo "Response time: ${RESP} s"
3333
done;
3434
done;

doris/start.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/bash
22

3+
export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64"
4+
sysctl -w vm.max_map_count=2000000
5+
ulimit -n 655350
6+
37
${DORIS_FULL_NAME}/be/bin/start_be.sh --daemon
48
${DORIS_FULL_NAME}/fe/bin/start_fe.sh --daemon
59

0 commit comments

Comments
 (0)