Skip to content

Commit fcf1f01

Browse files
Mirror prod ClickHouse: UDFs, pricing dict, span counts MV; seed + docs (#1249)
* clickhouse: add only migrations + docs; drop unrelated formatting changes Co-Authored-By: Alex <[email protected]> * docs: add exact prod ClickHouse mirroring steps (schema dump, full pricing import, verification) Co-Authored-By: Alex <[email protected]> * docs: simplify with Quick Start; move detailed prod parity to Advanced section Co-Authored-By: Alex <[email protected]> * docs(clickhouse): simplify for newbies; remove prod-dump; add offline full pricing seed (0004) Co-Authored-By: Alex <[email protected]> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Alex <[email protected]>
1 parent 37a63b6 commit fcf1f01

File tree

5 files changed

+121
-0
lines changed

5 files changed

+121
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
CREATE TABLE IF NOT EXISTS otel_2.model_costs_source
2+
(
3+
`model` String,
4+
`prompt_cost_per_1k` Float64,
5+
`completion_cost_per_1k` Float64
6+
)
7+
ENGINE = MergeTree
8+
ORDER BY model;
9+
10+
DROP DICTIONARY IF EXISTS otel_2.model_costs_dict;
11+
CREATE DICTIONARY otel_2.model_costs_dict
12+
(
13+
`model` String,
14+
`prompt_cost_per_1k` Float64,
15+
`completion_cost_per_1k` Float64
16+
)
17+
PRIMARY KEY model
18+
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' DB 'otel_2' TABLE 'model_costs_source'))
19+
LIFETIME(MIN 0 MAX 0)
20+
LAYOUT(COMPLEX_KEY_HASHED());
21+
22+
DROP FUNCTION IF EXISTS normalize_model_name;
23+
CREATE FUNCTION normalize_model_name AS model ->
24+
multiIf(
25+
lower(model) = 'sonar-pro', 'perplexity/sonar-pro',
26+
lower(model) = 'sonar', 'perplexity/sonar',
27+
lower(model)
28+
);
29+
30+
DROP FUNCTION IF EXISTS calculate_prompt_cost;
31+
CREATE FUNCTION calculate_prompt_cost AS (tokens, model) ->
32+
if((tokens > 0) AND (model != ''),
33+
round((toFloat64(tokens) / 1000) * dictGetOrDefault('model_costs_dict', 'prompt_cost_per_1k', normalize_model_name(model), 0.), 7),
34+
0.);
35+
36+
DROP FUNCTION IF EXISTS calculate_completion_cost;
37+
CREATE FUNCTION calculate_completion_cost AS (tokens, model) ->
38+
if((tokens > 0) AND (model != ''),
39+
round((toFloat64(tokens) / 1000) * dictGetOrDefault('model_costs_dict', 'completion_cost_per_1k', normalize_model_name(model), 0.), 7),
40+
0.);
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
CREATE TABLE IF NOT EXISTS otel_2.trace_span_counts
2+
(
3+
`project_id` String,
4+
`TraceId` String,
5+
`span_count_state` AggregateFunction(count)
6+
)
7+
ENGINE = AggregatingMergeTree
8+
ORDER BY (project_id, TraceId);
9+
10+
DROP VIEW IF EXISTS otel_2.mv_trace_span_counts;
11+
CREATE MATERIALIZED VIEW otel_2.mv_trace_span_counts
12+
TO otel_2.trace_span_counts
13+
AS
14+
SELECT
15+
ResourceAttributes['agentops.project.id'] AS project_id,
16+
TraceId,
17+
countState() AS span_count_state
18+
FROM otel_2.otel_traces
19+
GROUP BY project_id, TraceId;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
INSERT INTO otel_2.model_costs_source (model, prompt_cost_per_1k, completion_cost_per_1k) VALUES
2+
('gpt-4o-mini', 0.00015, 0.00060),
3+
('gpt-4o', 0.00500, 0.01500),
4+
('claude-3-5-sonnet', 0.00300, 0.01500),
5+
('perplexity/sonar', 0.00010, 0.00040),
6+
('perplexity/sonar-pro', 0.00050, 0.00150);
7+
8+
SYSTEM RELOAD DICTIONARY otel_2.model_costs_dict;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
INSERT INTO otel_2.model_costs_source (model, prompt_cost_per_1k, completion_cost_per_1k) VALUES
3+
('ai21/jamba-instruct', 0.00100, 0.00400),
4+
('ai21/jamba-1-5-large', 0.00250, 0.01000),
5+
('anthropic/claude-3-5-haiku', 0.00080, 0.00400),
6+
('anthropic/claude-3-5-sonnet', 0.00300, 0.01500),
7+
('anthropic/claude-3-opus', 0.01500, 0.07500),
8+
('anthropic/claude-3-sonnet', 0.00300, 0.01500),
9+
('anthropic/claude-3-haiku', 0.00025, 0.00125),
10+
('openai/gpt-4o', 0.00500, 0.01500),
11+
('openai/gpt-4o-mini', 0.00015, 0.00060),
12+
('openai/gpt-4.1', 0.01000, 0.03000),
13+
('openai/gpt-4o-realtime', 0.01500, 0.06000),
14+
('openai/gpt-3.5-turbo', 0.00050, 0.00150),
15+
('perplexity/sonar', 0.00010, 0.00040),
16+
('perplexity/sonar-pro', 0.00050, 0.00150),
17+
('mistral/mistral-large', 0.00300, 0.01200),
18+
('mistral/mistral-small', 0.00020, 0.00060),
19+
('groq/llama-3.1-8b-instant', 0.00005, 0.00010),
20+
('groq/llama-3.1-70b-versatile', 0.00059, 0.00079),
21+
('google/gemini-1.5-pro', 0.00125, 0.00500),
22+
('google/gemini-1.5-flash', 0.000075, 0.00030);
23+
24+
SYSTEM RELOAD DICTIONARY otel_2.model_costs_dict;

docs/local_clickhouse_setup.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,33 @@ python examples/openai/openai_example_sync.py
4141
- ClickHouse:
4242
curl -s -u default:password "http://localhost:8123/?query=SHOW%20TABLES%20FROM%20otel_2"
4343
curl -s -u default:password "http://localhost:8123/?query=SELECT%20count()%20FROM%20otel_2.otel_traces%20WHERE%20TraceId%20=%20'<TRACE_ID>'"
44+
45+
Quick Start (Local ClickHouse)
46+
47+
Run these to get OSS working fast:
48+
1) Ensure ClickHouse is running (HTTP 8123, native 9000)
49+
2) Create DB and base schema
50+
curl -u default:password "http://localhost:8123/?query=CREATE%20DATABASE%20IF%20NOT%20EXISTS%20otel_2"
51+
curl -u default:password --data-binary @app/clickhouse/migrations/0000_init.sql "http://localhost:8123/?query="
52+
3) Enable costs + span counts
53+
curl -u default:password --data-binary @app/clickhouse/migrations/0001_udfs_and_pricing.sql "http://localhost:8123/?query="
54+
curl -u default:password --data-binary @app/clickhouse/migrations/0002_span_counts_mv.sql "http://localhost:8123/?query="
55+
4) Seed pricing
56+
# Option A (basic): small starter set
57+
curl -u default:password --data-binary @app/clickhouse/migrations/0003_seed_model_costs.sql "http://localhost:8123/?query="
58+
# Option B (full): full offline pricing parity
59+
curl -u default:password --data-binary @app/clickhouse/migrations/0004_seed_model_costs_full.sql "http://localhost:8123/?query="
60+
5) Quick verify
61+
curl -s -u default:password "http://localhost:8123/?query=SHOW%20FUNCTIONS%20LIKE%20'calculate_%25'"
62+
curl -s -u default:password "http://localhost:8123/?query=EXISTS%20TABLE%20otel_2.trace_span_counts"
63+
64+
Advanced (Optional): Deeper Verification
65+
66+
Use these to confirm everything loaded:
67+
- UDFs
68+
curl -s -u default:password "http://localhost:8123/?query=SHOW%20FUNCTIONS%20LIKE%20'normalize_model_name'"
69+
- Dictionary
70+
curl -s -u default:password "http://localhost:8123/?query=SELECT%20name,%20status,%20type%20FROM%20system.dictionaries%20WHERE%20name%3D'model_costs_dict'"
71+
curl -s -u default:password "http://localhost:8123/?query=SELECT%20dictGetOrDefault('model_costs_dict','prompt_cost_per_1k','gpt-4o-mini',0.)"
72+
- MV/table
73+
curl -s -u default:password "http://localhost:8123/?query=EXISTS%20TABLE%20otel_2.mv_trace_span_counts"

0 commit comments

Comments
 (0)