Skip to content

Commit 3c8fecd

Browse files
authored
Merge pull request #53 from scality/improvement/WKBCH-13-clickhouse
WKBCH-13: Add Clickhouse for bucket logging
2 parents 5867821 + 0bf77e3 commit 3c8fecd

19 files changed

+375
-0
lines changed

cmd/config.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ type EnvironmentConfig struct {
5757
Redis RedisConfig `yaml:"redis"`
5858
Utapi UtapiConfig `yaml:"utapi"`
5959
MigrationTools MigrationToolsConfig `yaml:"migration_tools"`
60+
Clickhouse ClickhouseConfig `yaml:"clickhouse"`
6061
}
6162

6263
type GlobalConfig struct {
@@ -70,6 +71,7 @@ type FeatureConfig struct {
7071
CrossRegionReplication CrossRegionReplicationFeatureConfig `yaml:"cross_region_replication"`
7172
Utapi UtapiFeatureConfig `yaml:"utapi"`
7273
Migration MigrationFeatureConfig `yaml:"migration"`
74+
AccessLogging AccessLoggingFeatureConfig `yaml:"access_logging"`
7375
}
7476

7577
type ScubaFeatureConfig struct {
@@ -212,6 +214,15 @@ type RedisConfig struct {
212214
LogLevel string `yaml:"log_level"`
213215
}
214216

217+
type ClickhouseConfig struct {
218+
Image string `yaml:"image"`
219+
LogLevel string `yaml:"log_level"`
220+
}
221+
222+
type AccessLoggingFeatureConfig struct {
223+
Enabled bool `yaml:"enabled"`
224+
}
225+
215226
func DefaultEnvironmentConfig() EnvironmentConfig {
216227
return EnvironmentConfig{
217228
Global: GlobalConfig{
@@ -237,6 +248,9 @@ func DefaultEnvironmentConfig() EnvironmentConfig {
237248
CrossRegionReplication: CrossRegionReplicationFeatureConfig{
238249
Enabled: false,
239250
},
251+
AccessLogging: AccessLoggingFeatureConfig{
252+
Enabled: false,
253+
},
240254
},
241255
Cloudserver: CloudserverConfig{},
242256
S3Metadata: MetadataConfig{
@@ -272,6 +286,7 @@ func DefaultEnvironmentConfig() EnvironmentConfig {
272286
},
273287
Utapi: UtapiConfig{},
274288
MigrationTools: MigrationToolsConfig{},
289+
Clickhouse: ClickhouseConfig{},
275290
}
276291
}
277292

@@ -348,5 +363,9 @@ func LoadEnvironmentConfig(path string) (EnvironmentConfig, error) {
348363
cfg.MigrationTools.LogLevel = cfg.Global.LogLevel
349364
}
350365

366+
if cfg.Clickhouse.LogLevel == "" {
367+
cfg.Clickhouse.LogLevel = cfg.Global.LogLevel
368+
}
369+
351370
return cfg, nil
352371
}

cmd/configure.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ func createLogDirectories(envDir string) error {
4040
filepath.Join(envDir, "logs", "scuba"),
4141
filepath.Join(envDir, "logs", "backbeat"),
4242
filepath.Join(envDir, "logs", "migration-tools"),
43+
filepath.Join(envDir, "logs", "clickhouse-shard-1"),
44+
filepath.Join(envDir, "logs", "clickhouse-shard-2"),
4345
}
4446

4547
for _, dir := range logDirs {
@@ -71,6 +73,7 @@ func configureEnv(cfg EnvironmentConfig, envDir string) error {
7173
generateKafkaConfig,
7274
generateUtapiConfig,
7375
generateMigrationToolsConfig,
76+
generateClickhouseConfig,
7477
}
7578

7679
configDir := filepath.Join(envDir, "config")
@@ -199,3 +202,23 @@ func generateMigrationToolsConfig(cfg EnvironmentConfig, path string) error {
199202

200203
return renderTemplates(cfg, "templates/migration-tools", filepath.Join(path, "migration-tools"), templates)
201204
}
205+
206+
func generateClickhouseConfig(cfg EnvironmentConfig, path string) error {
207+
templates := []string{
208+
"Dockerfile.shard",
209+
"Dockerfile.setup",
210+
"entrypoint.sh",
211+
"cluster-config.xml",
212+
"ports-shard-1.xml",
213+
"ports-shard-2.xml",
214+
"init-schema.sh",
215+
"init.d/01-create-database.sql",
216+
"init.d/02-create-ingest-table.sql",
217+
"init.d/03-create-storage-table.sql",
218+
"init.d/04-create-offsets-table.sql",
219+
"init.d/05-create-distributed-tables.sql",
220+
"init.d/06-create-materialized-view.sql",
221+
}
222+
223+
return renderTemplates(cfg, "templates/clickhouse", filepath.Join(path, "clickhouse"), templates)
224+
}

cmd/util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ func getComposeProfiles(cfg EnvironmentConfig) []string {
8888
profiles = append(profiles, "feature-crr")
8989
}
9090

91+
if cfg.Features.AccessLogging.Enabled {
92+
profiles = append(profiles, "feature-access-logging")
93+
}
94+
9195
return profiles
9296
}
9397

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
ARG BASE_IMAGE
2+
FROM $BASE_IMAGE
3+
4+
USER root
5+
6+
COPY --chmod=755 init-schema.sh /opt/
7+
COPY init.d/*.sql /opt/init.d/
8+
9+
CMD ["/opt/init-schema.sh"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ARG BASE_IMAGE
2+
FROM $BASE_IMAGE
3+
4+
USER root
5+
6+
COPY --chmod=755 entrypoint.sh /usr/local/bin/
7+
8+
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?xml version="1.0"?>
2+
<clickhouse>
3+
<remote_servers>
4+
<workbench_cluster>
5+
<shard>
6+
<replica>
7+
<host>127.0.0.1</host>
8+
<port>9002</port>
9+
<user>default</user>
10+
<password></password>
11+
</replica>
12+
</shard>
13+
<shard>
14+
<replica>
15+
<host>127.0.0.1</host>
16+
<port>9003</port>
17+
<user>default</user>
18+
<password></password>
19+
</replica>
20+
</shard>
21+
</workbench_cluster>
22+
</remote_servers>
23+
</clickhouse>

templates/clickhouse/entrypoint.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/sh
2+
set -e
3+
4+
# Fix permissions on data and log directories
5+
chown -R clickhouse:clickhouse /var/lib/clickhouse
6+
chown -R clickhouse:clickhouse /var/log/clickhouse-server
7+
8+
# Switch to clickhouse user and start server
9+
exec su clickhouse -s /bin/sh -c 'exec /usr/bin/clickhouse-server --config-file=/etc/clickhouse-server/config.xml'
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env sh
2+
set -e
3+
set -x
4+
5+
echo "[clickhouse-setup] Starting schema initialization..."
6+
7+
# Wait for both shards to be ready
8+
echo "[clickhouse-setup] Waiting for shard 1..."
9+
until clickhouse-client --host 127.0.0.1 --port 9002 --query "SELECT 1" > /dev/null 2>&1; do
10+
echo "[clickhouse-setup] Shard 1 not ready, waiting 2s..."
11+
sleep 2
12+
done
13+
echo "[clickhouse-setup] Shard 1 is ready!"
14+
15+
echo "[clickhouse-setup] Waiting for shard 2..."
16+
until clickhouse-client --host 127.0.0.1 --port 9003 --query "SELECT 1" > /dev/null 2>&1; do
17+
echo "[clickhouse-setup] Shard 2 not ready, waiting 2s..."
18+
sleep 2
19+
done
20+
echo "[clickhouse-setup] Shard 2 is ready!"
21+
22+
# Execute SQL files on both shards
23+
for sql_file in /opt/init.d/*.sql; do
24+
filename=$(basename "$sql_file")
25+
echo "[clickhouse-setup] Executing $filename on shard 1..."
26+
clickhouse-client --host 127.0.0.1 --port 9002 --multiquery < "$sql_file"
27+
28+
echo "[clickhouse-setup] Executing $filename on shard 2..."
29+
clickhouse-client --host 127.0.0.1 --port 9003 --multiquery < "$sql_file"
30+
done
31+
32+
echo "[clickhouse-setup] Schema initialization completed successfully!"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE DATABASE IF NOT EXISTS logs;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
CREATE TABLE IF NOT EXISTS logs.access_logs_ingest
2+
(
3+
-- Common
4+
timestamp DateTime,
5+
insertedAt DateTime DEFAULT now(),
6+
hostname LowCardinality(String),
7+
8+
-- Analytics
9+
action LowCardinality(String),
10+
accountName String,
11+
accountDisplayName String,
12+
userName String,
13+
clientPort UInt32,
14+
httpMethod LowCardinality(String),
15+
bytesDeleted UInt64,
16+
bytesReceived UInt64,
17+
bodyLength UInt64,
18+
contentLength UInt64,
19+
elapsed_ms Float32,
20+
21+
-- AWS access server logs fields https://docs.aws.amazon.com/AmazonS3/latest/userguide/LogFormat.html
22+
startTime DateTime64(3), -- AWS "Time" field
23+
requester String,
24+
operation String,
25+
requestURI String,
26+
errorCode String,
27+
objectSize UInt64,
28+
totalTime Float32,
29+
turnAroundTime Float32,
30+
referer String,
31+
userAgent String,
32+
versionId String,
33+
signatureVersion LowCardinality(String),
34+
cipherSuite LowCardinality(String),
35+
authenticationType LowCardinality(String),
36+
hostHeader String,
37+
tlsVersion LowCardinality(String),
38+
aclRequired LowCardinality(String),
39+
40+
-- Shared between AWS access server logs and Analytics logs
41+
bucketOwner String, -- AWS "Bucket Owner" field
42+
bucketName String, -- AWS "Bucket" field
43+
req_id String, -- AWS "Request ID" field
44+
bytesSent UInt64, -- AWS "Bytes Sent" field
45+
clientIP String, -- AWS "Remote IP" field
46+
httpCode UInt16, -- AWS "HTTP Status" field
47+
objectKey String, -- AWS "Key" field
48+
49+
-- Scality server access logs extra fields.
50+
logFormatVersion LowCardinality(String),
51+
loggingEnabled Bool,
52+
loggingTargetBucket String,
53+
loggingTargetPrefix String,
54+
awsAccessKeyID String,
55+
raftSessionID UInt16
56+
)
57+
Engine = Null();

0 commit comments

Comments
 (0)