Skip to content

Commit 1531386

Browse files
macoboukutaht
andauthored
FULL join for time:hour as well as time:minute (#5715)
* FULL join for time:hour as well as time:minute Follow-up to https://github.com/plausible/analytics/pull/5694/files#r2321567271 A session might be active over multiple hours, but not (currently) reported as such when requesting only specific metrics per hour. This fixes that problem. * Handle full join logic correctly --------- Co-authored-by: Uku Taht <[email protected]>
1 parent d02636d commit 1531386

File tree

5 files changed

+55
-17
lines changed

5 files changed

+55
-17
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ All notable changes to this project will be documented in this file.
3030
- Realtime and hourly graphs of visit duration, views per visit no longer overcount due to long-lasting sessions, instead showing each visit
3131
when they occurred.
3232
- Fixed realtime and hourly graphs of visits overcounting
33+
- When reporting only `visitors` and `visits` per hour, count visits in each hour they were active in.
3334

3435
## v3.0.0 - 2025-04-11
3536

lib/plausible/stats/query_optimizer.ex

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,12 @@ defmodule Plausible.Stats.QueryOptimizer do
318318
# Normally we can always LEFT JOIN as this is more performant and tables
319319
# are expected to contain the same dimensions.
320320

321-
# The only exception is using the "time:minute" dimension where the sessions
321+
# The only exception is using the "time:minute"/"time:hour" dimension where the sessions
322322
# subquery might return more rows than the events one. That's because we're
323323
# counting sessions in all time buckets they were active in even if no event
324-
# occurred during that particular minute.
324+
# occurred during that particular bucket.
325325
defp set_sql_join_type(query) do
326-
if "time:minute" in query.dimensions do
326+
if "time:minute" in query.dimensions or "time:hour" in query.dimensions do
327327
Query.set(query, sql_join_type: :full)
328328
else
329329
query

lib/plausible/stats/sql/query_builder.ex

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
185185
)
186186
|> select_join_metrics(query, query.metrics -- [:sample_percent])
187187
end)
188-
|> select_dimensions(main_query)
188+
|> select_dimensions(main_query, queries)
189189
end
190190

191191
# NOTE: Old queries do their own pagination
@@ -223,19 +223,31 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
223223
end)
224224
end
225225

226-
defp select_dimensions(q, query) do
226+
defp select_dimensions(q, query, queries) do
227227
Enum.reduce(query.dimensions, q, fn dimension, q ->
228-
# We generally select dimensions from the left-most table. Only exception is time:minute where
229-
# we use sessions table as sessions are considered on-going during the whole period.
230-
if query.sql_join_type == :full and "time:minute" == dimension do
231-
select_merge_as(q, [..., x], %{
232-
shortname(query, dimension) => field(x, ^shortname(query, dimension))
233-
})
234-
else
235-
select_merge_as(q, [x], %{
236-
shortname(query, dimension) => field(x, ^shortname(query, dimension))
237-
})
228+
case select_from(dimension, query, queries) do
229+
:leftmost_table ->
230+
select_merge_as(q, [x], %{
231+
shortname(query, dimension) => field(x, ^shortname(query, dimension))
232+
})
233+
234+
:rightmost_table ->
235+
select_merge_as(q, [..., x], %{
236+
shortname(query, dimension) => field(x, ^shortname(query, dimension))
237+
})
238238
end
239239
end)
240240
end
241+
242+
defp select_from(dimension, query, queries) do
243+
smeared? = Enum.any?(queries, fn {_table_type, query, _q} -> query.smear_session_metrics end)
244+
245+
cond do
246+
query.sql_join_type == :left -> :leftmost_table
247+
# We generally select dimensions from the left-most table. Only exception is time:minute/time:hour where
248+
# we use sessions table as smeared sessions are considered on-going during the whole period.
249+
dimension in ["time:minute", "time:hour"] and smeared? -> :rightmost_table
250+
true -> :leftmost_table
251+
end
252+
end
241253
end

test/plausible/stats/query_optimizer_test.exs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,12 +365,14 @@ defmodule Plausible.Stats.QueryOptimizerTest do
365365
end
366366

367367
describe "set_sql_join_type" do
368-
test "updates sql_join_type to :full if time:minute dimension is present" do
368+
test "updates sql_join_type to :full if time:minute or time:hour dimension is present" do
369369
assert perform(%{dimensions: ["time:minute"]}).sql_join_type == :full
370+
assert perform(%{dimensions: ["time:hour"]}).sql_join_type == :full
370371
end
371372

372373
test "keeps default sql_join_type otherwise" do
373-
assert perform(%{dimensions: ["time:hour"]}).sql_join_type == :left
374+
assert perform(%{dimensions: ["time:day"]}).sql_join_type == :left
375+
assert perform(%{dimensions: []}).sql_join_type == :left
374376
end
375377
end
376378
end

test/plausible_web/controllers/api/external_stats_controller/query_test.exs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,29 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
13721372
]
13731373
end
13741374

1375+
test "breakdown by time:hour (internal API), counts visitors and visits in all buckets their session was active in",
1376+
%{conn: conn, site: site} do
1377+
populate_stats(site, [
1378+
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:20:00]),
1379+
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:40:00]),
1380+
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 01:00:00]),
1381+
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 01:20:00])
1382+
])
1383+
1384+
conn =
1385+
post(conn, "/api/v2/query-internal-test", %{
1386+
"site_id" => site.domain,
1387+
"metrics" => ["visitors", "visits", "visit_duration"],
1388+
"date_range" => ["2021-01-01", "2021-01-02"],
1389+
"dimensions" => ["time:hour"]
1390+
})
1391+
1392+
assert json_response(conn, 200)["results"] == [
1393+
%{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 1, 0]},
1394+
%{"dimensions" => ["2021-01-01 01:00:00"], "metrics" => [1, 1, 3600]}
1395+
]
1396+
end
1397+
13751398
test "shows hourly data for a certain date with time_labels", %{conn: conn, site: site} do
13761399
populate_stats(site, [
13771400
build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:00:00]),

0 commit comments

Comments
 (0)