diff --git a/src/trace_processor/perfetto_sql/generator/structured_query_generator.cc b/src/trace_processor/perfetto_sql/generator/structured_query_generator.cc index 2091ea5dcc7..393bf0cf65d 100644 --- a/src/trace_processor/perfetto_sql/generator/structured_query_generator.cc +++ b/src/trace_processor/perfetto_sql/generator/structured_query_generator.cc @@ -1202,11 +1202,13 @@ base::StatusOr GeneratorImpl::CreateSlices( // Use _interval_create! macro which delegates to // __intrinsic_interval_create, an O(n+m) two-pointer C++ implementation. - // The macro expects inputs with a `ts` column, so we rename if needed. + // The macro expects inputs with `id` and `ts` columns. We pass a dummy id + // since structured queries don't need start_id/end_id, and exclude those + // columns from the output. return base::StackString<1024>( - "(SELECT * FROM _interval_create!(" - "(SELECT %s AS ts FROM %s), " - "(SELECT %s AS ts FROM %s)))", + "(SELECT ts, dur FROM _interval_create!(" + "(SELECT 1 AS id, %s AS ts FROM %s), " + "(SELECT 1 AS id, %s AS ts FROM %s)))", starts_ts_col.c_str(), starts_table.c_str(), ends_ts_col.c_str(), ends_table.c_str()) .ToStdString(); diff --git a/src/trace_processor/perfetto_sql/generator/structured_query_generator_unittest.cc b/src/trace_processor/perfetto_sql/generator/structured_query_generator_unittest.cc index e8d12905ac8..275a35c6dc2 100644 --- a/src/trace_processor/perfetto_sql/generator/structured_query_generator_unittest.cc +++ b/src/trace_processor/perfetto_sql/generator/structured_query_generator_unittest.cc @@ -4150,7 +4150,7 @@ TEST(StructuredQueryGeneratorTest, ExperimentalCreateSlicesBasic) { sq_2 AS (SELECT * FROM end_events), sq_1 AS (SELECT * FROM start_events), sq_0 AS ( - SELECT * FROM (SELECT * FROM _interval_create!((SELECT ts AS ts FROM sq_1), (SELECT ts AS ts FROM sq_2))) + SELECT * FROM (SELECT ts, dur FROM _interval_create!((SELECT 1 AS id, ts AS ts FROM sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))) ) SELECT * FROM sq_0 )")); @@ -4200,7 +4200,7 @@ TEST(StructuredQueryGeneratorTest, sq_2 AS (SELECT * FROM slice WHERE name GLOB '*_end'), sq_1 AS (SELECT * FROM slice WHERE name GLOB '*_begin'), sq_0 AS ( - SELECT * FROM (SELECT * FROM _interval_create!((SELECT ts AS ts FROM sq_1), (SELECT ts AS ts FROM sq_2))) + SELECT * FROM (SELECT ts, dur FROM _interval_create!((SELECT 1 AS id, ts AS ts FROM sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))) ) SELECT * FROM sq_0 )")); @@ -4229,9 +4229,10 @@ TEST(StructuredQueryGeneratorTest, )"); auto ret = gen.Generate(proto.data(), proto.size()); ASSERT_OK_AND_ASSIGN(std::string res, ret); - EXPECT_THAT( - res, testing::HasSubstr("_interval_create!((SELECT acquire_ts AS ts FROM " - "sq_1), (SELECT release_ts AS ts FROM sq_2))")); + EXPECT_THAT(res, + testing::HasSubstr( + "_interval_create!((SELECT 1 AS id, acquire_ts AS ts FROM " + "sq_1), (SELECT 1 AS id, release_ts AS ts FROM sq_2))")); } TEST(StructuredQueryGeneratorTest, ExperimentalCreateSlicesWithFilters) { @@ -4496,8 +4497,9 @@ TEST(StructuredQueryGeneratorTest, auto ret = gen.Generate(proto.data(), proto.size()); ASSERT_OK_AND_ASSIGN(std::string res, ret); // Should default starts_ts_column to "ts" - EXPECT_THAT(res, testing::HasSubstr("_interval_create!((SELECT ts AS ts FROM " - "sq_1), (SELECT ts AS ts FROM sq_2))")); + EXPECT_THAT(res, testing::HasSubstr( + "_interval_create!((SELECT 1 AS id, ts AS ts FROM " + "sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))")); } TEST(StructuredQueryGeneratorTest, @@ -4523,8 +4525,9 @@ TEST(StructuredQueryGeneratorTest, auto ret = gen.Generate(proto.data(), proto.size()); ASSERT_OK_AND_ASSIGN(std::string res, ret); // Should default ends_ts_column to "ts" - EXPECT_THAT(res, testing::HasSubstr("_interval_create!((SELECT ts AS ts FROM " - "sq_1), (SELECT ts AS ts FROM sq_2))")); + EXPECT_THAT(res, testing::HasSubstr( + "_interval_create!((SELECT 1 AS id, ts AS ts FROM " + "sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))")); } TEST(StructuredQueryGeneratorTest, @@ -4554,7 +4557,7 @@ TEST(StructuredQueryGeneratorTest, sq_2 AS (SELECT * FROM end_events), sq_1 AS (SELECT * FROM start_events), sq_0 AS ( - SELECT * FROM (SELECT * FROM _interval_create!((SELECT ts AS ts FROM sq_1), (SELECT ts AS ts FROM sq_2))) + SELECT * FROM (SELECT ts, dur FROM _interval_create!((SELECT 1 AS id, ts AS ts FROM sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))) ) SELECT * FROM sq_0 )")); @@ -4642,8 +4645,9 @@ TEST(StructuredQueryGeneratorTest, auto ret = gen.Generate(proto.data(), proto.size()); ASSERT_OK_AND_ASSIGN(std::string res, ret); // Empty string should default to "ts" - EXPECT_THAT(res, testing::HasSubstr("_interval_create!((SELECT ts AS ts FROM " - "sq_1), (SELECT ts AS ts FROM sq_2))")); + EXPECT_THAT(res, testing::HasSubstr( + "_interval_create!((SELECT 1 AS id, ts AS ts FROM " + "sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))")); } TEST(StructuredQueryGeneratorTest, ExperimentalCreateSlicesEmptyEndsTsColumn) { @@ -4669,8 +4673,9 @@ TEST(StructuredQueryGeneratorTest, ExperimentalCreateSlicesEmptyEndsTsColumn) { auto ret = gen.Generate(proto.data(), proto.size()); ASSERT_OK_AND_ASSIGN(std::string res, ret); // Empty string should default to "ts" - EXPECT_THAT(res, testing::HasSubstr("_interval_create!((SELECT ts AS ts FROM " - "sq_1), (SELECT ts AS ts FROM sq_2))")); + EXPECT_THAT(res, testing::HasSubstr( + "_interval_create!((SELECT 1 AS id, ts AS ts FROM " + "sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))")); } TEST(StructuredQueryGeneratorTest, @@ -4714,7 +4719,7 @@ TEST(StructuredQueryGeneratorTest, sq_2 AS (SELECT * FROM end_events WHERE ts < 0), sq_1 AS (SELECT * FROM start_events WHERE ts < 0), sq_0 AS ( - SELECT * FROM (SELECT * FROM _interval_create!((SELECT ts AS ts FROM sq_1), (SELECT ts AS ts FROM sq_2))) + SELECT * FROM (SELECT ts, dur FROM _interval_create!((SELECT 1 AS id, ts AS ts FROM sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))) ) SELECT * FROM sq_0 )")); @@ -4759,7 +4764,7 @@ TEST(StructuredQueryGeneratorTest, ExperimentalCreateSlicesNoMatchingEnds) { sq_2 AS (SELECT * FROM events WHERE ts > 10000), sq_1 AS (SELECT * FROM events WHERE ts < 1000), sq_0 AS ( - SELECT * FROM (SELECT * FROM _interval_create!((SELECT ts AS ts FROM sq_1), (SELECT ts AS ts FROM sq_2))) + SELECT * FROM (SELECT ts, dur FROM _interval_create!((SELECT 1 AS id, ts AS ts FROM sq_1), (SELECT 1 AS id, ts AS ts FROM sq_2))) ) SELECT * FROM sq_0 )")); diff --git a/src/trace_processor/perfetto_sql/intrinsics/functions/create_intervals.cc b/src/trace_processor/perfetto_sql/intrinsics/functions/create_intervals.cc index ea81951ed59..243c6ffd3fd 100644 --- a/src/trace_processor/perfetto_sql/intrinsics/functions/create_intervals.cc +++ b/src/trace_processor/perfetto_sql/intrinsics/functions/create_intervals.cc @@ -63,8 +63,9 @@ struct IntervalCreate : public sqlite::Function { auto* ends = sqlite::value::Pointer( argv[1], SortedTimestamps::kName); - std::vector col_names{"ts", "dur"}; - std::vector col_types{ColType::kInt64, ColType::kInt64}; + std::vector col_names{"ts", "dur", "start_id", "end_id"}; + std::vector col_types{ColType::kInt64, ColType::kInt64, + ColType::kInt64, ColType::kInt64}; dataframe::AdhocDataframeBuilder builder( col_names, GetUserData(ctx)->pool, @@ -80,7 +81,9 @@ struct IntervalCreate : public sqlite::Function { } const auto& start_ts = starts->timestamps; + const auto& start_ids = starts->ids; const auto& end_ts = ends->timestamps; + const auto& end_ids = ends->ids; // Two-pointer matching: O(n + m). // Both arrays are already sorted (guaranteed by ORDER BY in the SQL macro). @@ -96,6 +99,8 @@ struct IntervalCreate : public sqlite::Function { } builder.PushNonNullUnchecked(0, start_ts[i]); builder.PushNonNullUnchecked(1, end_ts[end_idx] - start_ts[i]); + builder.PushNonNullUnchecked(2, start_ids[i]); + builder.PushNonNullUnchecked(3, end_ids[end_idx]); } SQLITE_ASSIGN_OR_RETURN(ctx, auto ret_table, std::move(builder).Build()); diff --git a/src/trace_processor/perfetto_sql/intrinsics/functions/type_builders.cc b/src/trace_processor/perfetto_sql/intrinsics/functions/type_builders.cc index 9db1884a194..69c69a442c5 100644 --- a/src/trace_processor/perfetto_sql/intrinsics/functions/type_builders.cc +++ b/src/trace_processor/perfetto_sql/intrinsics/functions/type_builders.cc @@ -436,13 +436,13 @@ struct IntervalTreeIntervalsAgg } }; -// An SQL aggregate function which collects timestamps into a vector. +// An SQL aggregate function which collects (id, ts) pairs into vectors. // Used as input to __intrinsic_interval_create. The caller is responsible -// for ensuring timestamps are passed in sorted order (e.g. via ORDER BY). +// for ensuring rows are passed in sorted order by ts (e.g. via ORDER BY). struct TimestampSetAgg : public sqlite::AggregateFunction { static constexpr char kName[] = "__intrinsic_timestamp_set_agg"; - static constexpr int kArgCount = 1; + static constexpr int kArgCount = 2; struct AggCtx : sqlite::AggregateContext { perfetto_sql::SortedTimestamps data; }; @@ -450,7 +450,8 @@ struct TimestampSetAgg static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) { PERFETTO_DCHECK(argc == kArgCount); auto& data = AggCtx::GetOrCreateContextForStep(ctx).data; - data.timestamps.push_back(sqlite::value::Int64(argv[0])); + data.ids.push_back(sqlite::value::Int64(argv[0])); + data.timestamps.push_back(sqlite::value::Int64(argv[1])); } static void Final(sqlite3_context* ctx) { diff --git a/src/trace_processor/perfetto_sql/intrinsics/types/sorted_timestamps.h b/src/trace_processor/perfetto_sql/intrinsics/types/sorted_timestamps.h index 8bef9ebbcd4..b498635d0d0 100644 --- a/src/trace_processor/perfetto_sql/intrinsics/types/sorted_timestamps.h +++ b/src/trace_processor/perfetto_sql/intrinsics/types/sorted_timestamps.h @@ -22,13 +22,16 @@ namespace perfetto::trace_processor::perfetto_sql { -// A sorted collection of timestamps used as an intermediate type for the -// interval_create intrinsic function. Timestamps are collected via an -// aggregate function and must be passed in ascending order (via ORDER BY) -// to the scalar function. +// A sorted collection of timestamps (with associated IDs) used as an +// intermediate type for the interval_create intrinsic function. Timestamps +// are collected via an aggregate function and must be passed in ascending +// order (via ORDER BY) to the scalar function. struct SortedTimestamps { static constexpr char kName[] = "SORTED_TIMESTAMPS"; std::vector timestamps; + // Row IDs corresponding 1:1 with timestamps. These are the original row + // IDs from the input table, used to produce start_id/end_id in output. + std::vector ids; }; } // namespace perfetto::trace_processor::perfetto_sql diff --git a/src/trace_processor/perfetto_sql/stdlib/intervals/create_intervals.sql b/src/trace_processor/perfetto_sql/stdlib/intervals/create_intervals.sql index bc44d20d725..77acda531dc 100644 --- a/src/trace_processor/perfetto_sql/stdlib/intervals/create_intervals.sql +++ b/src/trace_processor/perfetto_sql/stdlib/intervals/create_intervals.sql @@ -17,20 +17,22 @@ -- intervals by matching each start with the next end timestamp strictly greater -- than it. -- --- Both input tables must have a column named `ts`. Uses an efficient O(n+m) --- two-pointer algorithm implemented in C++. +-- Both input tables must have columns named `id` and `ts`. Uses an efficient +-- O(n+m) two-pointer algorithm implemented in C++. -- -- Example: -- ``` -- SELECT * FROM _interval_create!( --- (SELECT ts FROM starts_table), --- (SELECT ts FROM ends_table) +-- (SELECT id, ts FROM starts_table), +-- (SELECT id, ts FROM ends_table) -- ) -- ``` CREATE PERFETTO MACRO _interval_create( - -- Table or subquery containing start timestamps (must have a `ts` column). + -- Table or subquery containing start timestamps (must have `id` and `ts` + -- columns). starts_table TableOrSubquery, - -- Table or subquery containing end timestamps (must have a `ts` column). + -- Table or subquery containing end timestamps (must have `id` and `ts` + -- columns). ends_table TableOrSubquery ) -- Table with the schema: @@ -38,18 +40,25 @@ CREATE PERFETTO MACRO _interval_create( -- The start timestamp. -- dur DURATION, -- The duration from start to the matched end. +-- start_id LONG, +-- The id of the matched start row. +-- end_id LONG, +-- The id of the matched end row. RETURNS TableOrSubquery AS ( SELECT c0 AS ts, - c1 AS dur + c1 AS dur, + c2 AS start_id, + c3 AS end_id FROM __intrinsic_table_ptr( __intrinsic_interval_create( ( SELECT - __intrinsic_timestamp_set_agg(ordered_s.ts) + __intrinsic_timestamp_set_agg(ordered_s.id, ordered_s.ts) FROM ( SELECT + id, ts FROM $starts_table ORDER BY @@ -58,9 +67,10 @@ RETURNS TableOrSubquery AS ), ( SELECT - __intrinsic_timestamp_set_agg(ordered_e.ts) + __intrinsic_timestamp_set_agg(ordered_e.id, ordered_e.ts) FROM ( SELECT + id, ts FROM $ends_table ORDER BY @@ -70,5 +80,8 @@ RETURNS TableOrSubquery AS ) ) WHERE - __intrinsic_table_ptr_bind(c0, 'ts') AND __intrinsic_table_ptr_bind(c1, 'dur') + __intrinsic_table_ptr_bind(c0, 'ts') + AND __intrinsic_table_ptr_bind(c1, 'dur') + AND __intrinsic_table_ptr_bind(c2, 'start_id') + AND __intrinsic_table_ptr_bind(c3, 'end_id') ); diff --git a/test/trace_processor/diff_tests/stdlib/intervals/create_intervals_tests.py b/test/trace_processor/diff_tests/stdlib/intervals/create_intervals_tests.py index 7d57166c8fd..423ac3b06c5 100644 --- a/test/trace_processor/diff_tests/stdlib/intervals/create_intervals_tests.py +++ b/test/trace_processor/diff_tests/stdlib/intervals/create_intervals_tests.py @@ -25,19 +25,19 @@ def test_create_intervals_basic(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (100), (200), (300) + WITH starts(id, ts) AS ( + VALUES (10, 100), (11, 200), (12, 300) ), - ends(ts) AS ( - VALUES (150), (250), (350) + ends(id, ts) AS ( + VALUES (20, 150), (21, 250), (22, 350) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 100,50 - 200,50 - 300,50 + "ts","dur","start_id","end_id" + 100,50,10,20 + 200,50,11,21 + 300,50,12,22 """)) def test_create_intervals_multiple_starts_same_end(self): @@ -45,19 +45,19 @@ def test_create_intervals_multiple_starts_same_end(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (100), (200), (300) + WITH starts(id, ts) AS ( + VALUES (10, 100), (11, 200), (12, 300) ), - ends(ts) AS ( - VALUES (500) + ends(id, ts) AS ( + VALUES (20, 500) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 100,400 - 200,300 - 300,200 + "ts","dur","start_id","end_id" + 100,400,10,20 + 200,300,11,20 + 300,200,12,20 """)) def test_create_intervals_no_matching_ends(self): @@ -65,16 +65,16 @@ def test_create_intervals_no_matching_ends(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (500), (600) + WITH starts(id, ts) AS ( + VALUES (10, 500), (11, 600) ), - ends(ts) AS ( - VALUES (100), (200) + ends(id, ts) AS ( + VALUES (20, 100), (21, 200) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" + "ts","dur","start_id","end_id" """)) def test_create_intervals_interleaved(self): @@ -82,19 +82,19 @@ def test_create_intervals_interleaved(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (10), (30), (50) + WITH starts(id, ts) AS ( + VALUES (1, 10), (2, 30), (3, 50) ), - ends(ts) AS ( - VALUES (20), (40), (60) + ends(id, ts) AS ( + VALUES (4, 20), (5, 40), (6, 60) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 10,10 - 30,10 - 50,10 + "ts","dur","start_id","end_id" + 10,10,1,4 + 30,10,2,5 + 50,10,3,6 """)) def test_create_intervals_partial_match(self): @@ -102,19 +102,19 @@ def test_create_intervals_partial_match(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (10), (30), (50), (70) + WITH starts(id, ts) AS ( + VALUES (1, 10), (2, 30), (3, 50), (4, 70) ), - ends(ts) AS ( - VALUES (25), (55) + ends(id, ts) AS ( + VALUES (5, 25), (6, 55) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 10,15 - 30,25 - 50,5 + "ts","dur","start_id","end_id" + 10,15,1,5 + 30,25,2,6 + 50,5,3,6 """)) def test_create_intervals_empty_starts(self): @@ -122,16 +122,16 @@ def test_create_intervals_empty_starts(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - SELECT 0 WHERE 0 + WITH starts(id, ts) AS ( + SELECT 0, 0 WHERE 0 ), - ends(ts) AS ( - VALUES (100), (200) + ends(id, ts) AS ( + VALUES (1, 100), (2, 200) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" + "ts","dur","start_id","end_id" """)) def test_create_intervals_equal_start_and_end(self): @@ -139,18 +139,18 @@ def test_create_intervals_equal_start_and_end(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (100), (200) + WITH starts(id, ts) AS ( + VALUES (1, 100), (2, 200) ), - ends(ts) AS ( - VALUES (100), (200), (300) + ends(id, ts) AS ( + VALUES (3, 100), (4, 200), (5, 300) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 100,100 - 200,100 + "ts","dur","start_id","end_id" + 100,100,1,4 + 200,100,2,5 """)) def test_create_intervals_unsorted_input(self): @@ -158,17 +158,17 @@ def test_create_intervals_unsorted_input(self): trace=TextProto(""), query=""" INCLUDE PERFETTO MODULE intervals.create_intervals; - WITH starts(ts) AS ( - VALUES (300), (100), (200) + WITH starts(id, ts) AS ( + VALUES (1, 300), (2, 100), (3, 200) ), - ends(ts) AS ( - VALUES (350), (150), (250) + ends(id, ts) AS ( + VALUES (4, 350), (5, 150), (6, 250) ) SELECT * FROM _interval_create!(starts, ends) """, out=Csv(""" - "ts","dur" - 100,50 - 200,50 - 300,50 + "ts","dur","start_id","end_id" + 100,50,2,5 + 200,50,3,6 + 300,50,1,4 """))