Add function that returns several landmark offsets

MarcialRosales · MarcialRosales · commit 56d966534629 · 2026-03-05T16:37:56.000+01:00
diff --git a/src/osiris_log.erl b/src/osiris_log.erl
@@ -57,6 +57,8 @@
          directory/1,
          delete_directory/1,
          counter_fields/0,
+         stream_offset_landmarks/1,
+         last_offset_and_timestamp/1,
          make_counter/1,
          generate_log/4]).
 
@@ -3504,6 +3506,210 @@ write_in_chunks(ToWrite, MsgsPerChunk, Msg, W0) when ToWrite > 0 ->
 write_in_chunks(_, _, _, W) ->
     W.
 
+%% Scans all index files for the log at Dir and returns the first chunk
+%% (offset + timestamp), last chunk (offset + timestamp), and the chunk
+%% closest to 25%, 50% and 75% of the offset range (with offset and
+%% timestamp). Percent positions may not fall on a chunk boundary, so
+%% the chunk with the closest offset is chosen.
+-spec stream_offset_landmarks(file:filename_all() | config()) ->
+    {ok, #{first => {offset(), osiris:timestamp()},
+           last => {offset(), osiris:timestamp()},
+           p25 => {offset(), osiris:timestamp()},
+           p50 => {offset(), osiris:timestamp()},
+           p75 => {offset(), osiris:timestamp()}}} |
+    {error, empty}.
+stream_offset_landmarks(#{dir := Dir}) ->
+    stream_offset_landmarks(Dir);
+stream_offset_landmarks(Dir) when ?IS_STRING(Dir) ->
+    IdxFiles = sorted_index_files(Dir),
+    case scan_index_chunks_files(IdxFiles, []) of
+        {ok, []} ->
+            {error, empty};
+        {ok, [One]} ->
+            {LastOff, LastTs} =
+                case last_offset_and_timestamp_from_files(IdxFiles) of
+                    {ok, L} -> L;
+                    _ -> One
+                end,
+            {ok, #{first => One,
+                   last => {LastOff, LastTs},
+                   p25 => One,
+                   p50 => One,
+                   p75 => One}};
+        {ok, Chunks} ->
+            First = hd(Chunks),
+            LastChunk = lists:last(Chunks),
+            {FirstOffset, _FirstTs} = First,
+            {LastChunkId, _LastChunkTs} = LastChunk,
+            Last = case last_offset_and_timestamp_from_files(IdxFiles) of
+                       {ok, L} -> L;
+                       _ -> LastChunk
+                   end,
+            Range = LastChunkId - FirstOffset,
+            Targets = case Range of
+                          0 ->
+                              [FirstOffset, FirstOffset, FirstOffset];
+                          _ ->
+                              [FirstOffset + (Range * 25) div 100,
+                               FirstOffset + (Range * 50) div 100,
+                               FirstOffset + (Range * 75) div 100]
+                      end,
+            [P25, P50, P75] = closest_chunks_to_targets(Chunks, Targets),
+            {ok, #{first => First,
+                   last => Last,
+                   p25 => P25,
+                   p50 => P50,
+                   p75 => P75}}
+    end.
+
+%% Returns {ok, {LastOffset, Timestamp}} where LastOffset is the very last
+%% offset in the log (last offset in the last chunk), not the last chunk's
+%% first offset. Timestamp is the last chunk's timestamp.
+-spec last_offset_and_timestamp(file:filename_all()) ->
+    {ok, {offset(), osiris:timestamp()}} | {error, empty}.
+last_offset_and_timestamp(Dir) ->
+    last_offset_and_timestamp_from_files(sorted_index_files(Dir)).
+
+last_offset_and_timestamp_from_files(IdxFiles) ->
+    case non_empty_index_files(IdxFiles) of
+        [] ->
+            {error, empty};
+        NonEmpty ->
+            LastIdxFile = lists:last(NonEmpty),
+            last_offset_and_timestamp_from_file(LastIdxFile)
+    end.
+
+last_offset_and_timestamp_from_file(LastIdxFile) ->
+    case file:open(LastIdxFile, [read, raw, binary]) of
+        {ok, IdxFd} ->
+            try
+                case position_at_idx_record_boundary(IdxFd, eof) of
+                    {ok, Pos} when Pos >= ?IDX_HEADER_SIZE + ?INDEX_RECORD_SIZE_B ->
+                        ReadPos = Pos - ?INDEX_RECORD_SIZE_B,
+                        case file:pread(IdxFd, ReadPos, ?INDEX_RECORD_SIZE_B) of
+                            {ok, <<ChunkId:64/unsigned,
+                                   IdxTs:64/signed,
+                                   _Epoch:64/unsigned,
+                                   FilePos:32/unsigned,
+                                   _ChType:8/unsigned>>}
+                              when ChunkId =/= 0 orelse IdxTs =/= 0 ->
+                                SegFile = segment_from_index_file(LastIdxFile),
+                                case file:open(SegFile, [read, raw, binary]) of
+                                    {ok, SegFd} ->
+                                        try
+                                            case file:pread(SegFd, FilePos, ?HEADER_SIZE_B) of
+                                                {ok, <<_:32,
+                                                       NumRecords:32/unsigned,
+                                                       SegTs:64/signed,
+                                                       _/binary>>} ->
+                                                    LastOffset = ChunkId + NumRecords - 1,
+                                                    Ts = if IdxTs < 1000000000000 -> SegTs;
+                                                            true -> IdxTs
+                                                         end,
+                                                    {ok, {LastOffset, Ts}};
+                                                _ ->
+                                                    {ok, {ChunkId, IdxTs}}
+                                            end
+                                        after
+                                            file:close(SegFd)
+                                        end;
+                                    _ ->
+                                        {ok, {ChunkId, IdxTs}}
+                                end;
+                            _ ->
+                                {error, empty}
+                        end;
+                    _ ->
+                        {error, empty}
+                end
+            after
+                file:close(IdxFd)
+            end;
+        _ ->
+            {error, empty}
+    end.
+
+scan_index_chunks_files([], Acc) ->
+    {ok, lists:reverse(Acc)};
+scan_index_chunks_files([IdxFile | Rest], Acc) ->
+    case scan_one_index_file(IdxFile) of
+        {ok, Chunks} ->
+            scan_index_chunks_files(Rest, lists:reverse(Chunks) ++ Acc);
+        {error, _} = Err ->
+            Err
+    end.
+
+scan_one_index_file(IdxFile) ->
+    case file:open(IdxFile, [read, raw, binary]) of
+        {ok, Fd} ->
+            try
+                {ok, _} = file:position(Fd, ?IDX_HEADER_SIZE),
+                scan_index_records(Fd, [])
+            after
+                _ = file:close(Fd)
+            end;
+        Err ->
+            Err
+    end.
+
+scan_index_records(Fd, Acc) ->
+    case file:read(Fd, ?INDEX_RECORD_SIZE_B) of
+        {ok, <<ChunkId:64/unsigned,
+               Timestamp:64/signed,
+               _Epoch:64/unsigned,
+               _FilePos:32/unsigned,
+               _ChType:8/unsigned>>} when ChunkId =/= 0 orelse Timestamp =/= 0 ->
+            scan_index_records(Fd, [{ChunkId, Timestamp} | Acc]);
+        {ok, ?ZERO_IDX_MATCH(_)} ->
+            scan_index_records(Fd, Acc);
+        {ok, _} ->
+            scan_index_records(Fd, Acc);
+        eof ->
+            {ok, lists:reverse(Acc)}
+    end.
+
+%% Returns [chunk closest to T25, to T50, to T75]. Chunks are ordered by offset.
+%% Uses binary search per target for O(log n) lookups after O(n) list-to-tuple.
+closest_chunks_to_targets(Chunks, [T25, T50, T75]) ->
+    Tuple = list_to_tuple(Chunks),
+    [closest_to_target(Tuple, T25),
+     closest_to_target(Tuple, T50),
+     closest_to_target(Tuple, T75)].
+
+%% First 1-based index i such that element(i, Tuple) has offset >= Target,
+%% or tuple_size(Tuple) + 1 if all offsets are < Target.
+find_first_ge(Tuple, Target, Low, High) when Low < High ->
+    Mid = (Low + High) div 2,
+    {O, _} = element(Mid, Tuple),
+    if O >= Target -> find_first_ge(Tuple, Target, Low, Mid);
+       true -> find_first_ge(Tuple, Target, Mid + 1, High)
+    end;
+find_first_ge(Tuple, Target, Low, _High) ->
+    {O, _} = element(Low, Tuple),
+    if O >= Target -> Low; true -> Low + 1 end.
+
+find_first_ge(Tuple, Target) ->
+    Size = tuple_size(Tuple),
+    find_first_ge(Tuple, Target, 1, Size).
+
+%% Chunk in Tuple whose offset is closest to Target (Chunks ordered by offset).
+closest_to_target(Tuple, Target) ->
+    Size = tuple_size(Tuple),
+    Idx = find_first_ge(Tuple, Target),
+    if Idx =< 1 ->
+            element(1, Tuple);
+        Idx > Size ->
+            element(Size, Tuple);
+        true ->
+            C1 = element(Idx, Tuple),
+            C2 = element(Idx - 1, Tuple),
+            {O1, _} = C1,
+            {O2, _} = C2,
+            if abs(O1 - Target) =< abs(O2 - Target) -> C1;
+               true -> C2
+            end
+    end.
+
 -ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
 
diff --git a/test/osiris_log_SUITE.erl b/test/osiris_log_SUITE.erl
@@ -100,7 +100,12 @@ all_tests() ->
      read_ahead_send_file_on_off,
      resolve_offset_spec_empty,
      resolve_offset_spec_empty_directory,
-     resolve_offset_spec
+     resolve_offset_spec,
+     stream_offset_landmarks_empty,
+     stream_offset_landmarks_single_chunk,
+     stream_offset_landmarks_multiple_chunks,
+     stream_offset_landmarks_percentiles,
+     stream_offset_landmarks_config_map
     ].
 
 groups() ->
@@ -2049,6 +2054,104 @@ overview_with_missing_index_at_start(Config) ->
                         filename:join(?config(dir, Config), "*.index")))),
     ok.
 
+stream_offset_landmarks_empty(Config) ->
+    %% Empty log (init but no writes) and non-existent directory return {error, empty}.
+    LDir = ?config(leader_dir, Config),
+    Log0 = seed_log(LDir, [], Config),
+    osiris_log:close(Log0),
+    ?assertEqual({error, empty}, osiris_log:stream_offset_landmarks(LDir)),
+    NonExistent = filename:join(?config(priv_dir, Config), "stream_offset_landmarks_empty_nonexistent"),
+    ?assertEqual({error, empty}, osiris_log:stream_offset_landmarks(NonExistent)),
+    ok.
+
+stream_offset_landmarks_single_chunk(Config) ->
+    %% Single chunk: first, last, p25, p50, p75 all equal. last is the last
+    %% message offset (same as first when the only chunk has one record).
+    Now = now_ms(),
+    FirstTs = Now - 10000,
+    EpochChunks = [{2, FirstTs, [<<"one">>, <<"two">>]}],
+    LDir = ?config(leader_dir, Config),
+    Log0 = seed_log(LDir, EpochChunks, Config),
+    osiris_log:close(Log0),
+    {ok, Landmarks} = osiris_log:stream_offset_landmarks(LDir),
+    ?assertMatch(#{first := {0, FirstTs},
+                   last := {1, FirstTs},
+                   p25 := {0, FirstTs},
+                   p50 := {0, FirstTs},
+                   p75 := {0, FirstTs}}, Landmarks),
+    ok.
+
+stream_offset_landmarks_multiple_chunks(Config) ->
+    %% Multiple chunks: first < p25 <= p50 <= p75 < last (by offset). last is
+    %% the very last message offset in the log (last offset in the last chunk),
+    %% not the last chunk's first offset. Last chunk here has 2 records -> 5.
+    Now = now_ms(),
+    FirstTs = Now - 10000,
+    LastTs = Now - 3000,
+    EpochChunks =
+        [{1, FirstTs, [<<"one">>]},
+         {1, Now - 8000, [<<"two">>]},
+         {2, Now - 5000, [<<"three">>, <<"four">>]},
+         {2, LastTs, [<<"five">>, <<"six">>]}],
+
+    LDir = ?config(leader_dir, Config),
+    Log0 = seed_log(LDir, EpochChunks, Config),
+    osiris_log:close(Log0),
+    {ok, Landmarks} = osiris_log:stream_offset_landmarks(LDir),
+    #{first := First, last := Last, p25 := P25, p50 := P50, p75 := P75} = Landmarks,
+    {FirstOff, FirstTs} = First,
+    {LastOff, LastTs} = Last,
+    {P25Off, _} = P25,
+    {P50Off, _} = P50,
+    {P75Off, _} = P75,
+    ?assert(FirstOff =< P25Off),
+    ?assert(P25Off =< P50Off),
+    ?assert(P50Off =< P75Off),
+    ?assert(P75Off =< LastOff),
+    ?assertEqual(FirstOff, 0),
+    ?assertEqual(LastOff, 5),
+    ok.
+
+stream_offset_landmarks_percentiles(Config) ->
+    %% Minimum layout for non-overlapping percentiles: chunk starts at 0,1,2,3,4
+    %% so Range=4, T25=1, T50=2, T75=3 each land on a distinct chunk.
+    Now = now_ms(),
+    Ts0 = Now - 10000,
+    Ts1 = Now - 8000,
+    Ts2 = Now - 5000,
+    Ts3 = Now - 3000,
+    Ts4 = Now - 1000,
+    EpochChunks =
+        [{1, Ts0, [<<"a">>]},
+         {1, Ts1, [<<"b">>]},
+         {1, Ts2, [<<"c">>]},
+         {1, Ts3, [<<"d">>]},
+         {1, Ts4, [<<"e">>]}],
+
+    LDir = ?config(leader_dir, Config),
+    Log0 = seed_log(LDir, EpochChunks, Config),
+    osiris_log:close(Log0),
+    {ok, Landmarks} = osiris_log:stream_offset_landmarks(LDir),
+    #{first := First, last := Last, p25 := P25, p50 := P50, p75 := P75} = Landmarks,
+    {0, Ts0} = First,
+    {4, Ts4} = Last,
+    {1, Ts1} = P25,
+    {2, Ts2} = P50,
+    {3, Ts3} = P75.
+
+stream_offset_landmarks_config_map(Config) ->
+    %% Calling with config map #{dir => Dir} works like path.
+    EpochChunks = [{1, [<<"a">>]}, {1, [<<"b">>]}],
+    LDir = ?config(leader_dir, Config),
+    Log0 = seed_log(LDir, EpochChunks, Config),
+    osiris_log:close(Log0),
+    {ok, ByPath} = osiris_log:stream_offset_landmarks(LDir),
+    Conf = ?config(osiris_conf, Config),
+    RConf = Conf#{dir => LDir},
+    {ok, ByConf} = osiris_log:stream_offset_landmarks(RConf),
+    ?assertEqual(ByPath, ByConf),
+    ok.
+
 read_ahead_send_file(Config) ->
     RAL = 4096, %% read ahead limit
     HS = ?HEADER_SIZE_B,
@@ -2587,7 +2690,7 @@ write_chunk(Conf, Epoch, Now, Records, Trk0, Log0) ->
             %% need to re-init as new epoch
             osiris_log:close(Log1),
             Log = osiris_log:init(Conf#{epoch => Epoch}),
-            {Trk1, osiris_log:write(lists:reverse(Records), Log)}
+            {Trk1, osiris_log:write(lists:reverse(Records), Now, Log)}
     end.
 
 now_ms() ->