|
57 | 57 | directory/1, |
58 | 58 | delete_directory/1, |
59 | 59 | counter_fields/0, |
| 60 | + stream_offset_landmarks/1, |
| 61 | + last_offset_and_timestamp/1, |
60 | 62 | make_counter/1, |
61 | 63 | generate_log/4]). |
62 | 64 |
|
@@ -3504,6 +3506,210 @@ write_in_chunks(ToWrite, MsgsPerChunk, Msg, W0) when ToWrite > 0 -> |
3504 | 3506 | write_in_chunks(_, _, _, W) -> |
3505 | 3507 | W. |
3506 | 3508 |
|
| 3509 | +%% Scans all index files for the log at Dir and returns the first chunk |
| 3510 | +%% (offset + timestamp), last chunk (offset + timestamp), and the chunk |
| 3511 | +%% closest to 25%, 50% and 75% of the offset range (with offset and |
| 3512 | +%% timestamp). Percent positions may not fall on a chunk boundary, so |
| 3513 | +%% the chunk with the closest offset is chosen. |
| 3514 | +-spec stream_offset_landmarks(file:filename_all() | config()) -> |
| 3515 | + {ok, #{first => {offset(), osiris:timestamp()}, |
| 3516 | + last => {offset(), osiris:timestamp()}, |
| 3517 | + p25 => {offset(), osiris:timestamp()}, |
| 3518 | + p50 => {offset(), osiris:timestamp()}, |
| 3519 | + p75 => {offset(), osiris:timestamp()}}} | |
| 3520 | + {error, empty}. |
| 3521 | +stream_offset_landmarks(#{dir := Dir}) -> |
| 3522 | + stream_offset_landmarks(Dir); |
| 3523 | +stream_offset_landmarks(Dir) when ?IS_STRING(Dir) -> |
| 3524 | + IdxFiles = sorted_index_files(Dir), |
| 3525 | + case scan_index_chunks_files(IdxFiles, []) of |
| 3526 | + {ok, []} -> |
| 3527 | + {error, empty}; |
| 3528 | + {ok, [One]} -> |
| 3529 | + {LastOff, LastTs} = |
| 3530 | + case last_offset_and_timestamp_from_files(IdxFiles) of |
| 3531 | + {ok, L} -> L; |
| 3532 | + _ -> One |
| 3533 | + end, |
| 3534 | + {ok, #{first => One, |
| 3535 | + last => {LastOff, LastTs}, |
| 3536 | + p25 => One, |
| 3537 | + p50 => One, |
| 3538 | + p75 => One}}; |
| 3539 | + {ok, Chunks} -> |
| 3540 | + First = hd(Chunks), |
| 3541 | + LastChunk = lists:last(Chunks), |
| 3542 | + {FirstOffset, _FirstTs} = First, |
| 3543 | + {LastChunkId, _LastChunkTs} = LastChunk, |
| 3544 | + Last = case last_offset_and_timestamp_from_files(IdxFiles) of |
| 3545 | + {ok, L} -> L; |
| 3546 | + _ -> LastChunk |
| 3547 | + end, |
| 3548 | + Range = LastChunkId - FirstOffset, |
| 3549 | + Targets = case Range of |
| 3550 | + 0 -> |
| 3551 | + [FirstOffset, FirstOffset, FirstOffset]; |
| 3552 | + _ -> |
| 3553 | + [FirstOffset + (Range * 25) div 100, |
| 3554 | + FirstOffset + (Range * 50) div 100, |
| 3555 | + FirstOffset + (Range * 75) div 100] |
| 3556 | + end, |
| 3557 | + [P25, P50, P75] = closest_chunks_to_targets(Chunks, Targets), |
| 3558 | + {ok, #{first => First, |
| 3559 | + last => Last, |
| 3560 | + p25 => P25, |
| 3561 | + p50 => P50, |
| 3562 | + p75 => P75}} |
| 3563 | + end. |
| 3564 | + |
| 3565 | +%% Returns {ok, {LastOffset, Timestamp}} where LastOffset is the very last |
| 3566 | +%% offset in the log (last offset in the last chunk), not the last chunk's |
| 3567 | +%% first offset. Timestamp is the last chunk's timestamp. |
| 3568 | +-spec last_offset_and_timestamp(file:filename_all()) -> |
| 3569 | + {ok, {offset(), osiris:timestamp()}} | {error, empty}. |
| 3570 | +last_offset_and_timestamp(Dir) -> |
| 3571 | + last_offset_and_timestamp_from_files(sorted_index_files(Dir)). |
| 3572 | + |
| 3573 | +last_offset_and_timestamp_from_files(IdxFiles) -> |
| 3574 | + case non_empty_index_files(IdxFiles) of |
| 3575 | + [] -> |
| 3576 | + {error, empty}; |
| 3577 | + NonEmpty -> |
| 3578 | + LastIdxFile = lists:last(NonEmpty), |
| 3579 | + last_offset_and_timestamp_from_file(LastIdxFile) |
| 3580 | + end. |
| 3581 | + |
| 3582 | +last_offset_and_timestamp_from_file(LastIdxFile) -> |
| 3583 | + case file:open(LastIdxFile, [read, raw, binary]) of |
| 3584 | + {ok, IdxFd} -> |
| 3585 | + try |
| 3586 | + case position_at_idx_record_boundary(IdxFd, eof) of |
| 3587 | + {ok, Pos} when Pos >= ?IDX_HEADER_SIZE + ?INDEX_RECORD_SIZE_B -> |
| 3588 | + ReadPos = Pos - ?INDEX_RECORD_SIZE_B, |
| 3589 | + case file:pread(IdxFd, ReadPos, ?INDEX_RECORD_SIZE_B) of |
| 3590 | + {ok, <<ChunkId:64/unsigned, |
| 3591 | + IdxTs:64/signed, |
| 3592 | + _Epoch:64/unsigned, |
| 3593 | + FilePos:32/unsigned, |
| 3594 | + _ChType:8/unsigned>>} |
| 3595 | + when ChunkId =/= 0 orelse IdxTs =/= 0 -> |
| 3596 | + SegFile = segment_from_index_file(LastIdxFile), |
| 3597 | + case file:open(SegFile, [read, raw, binary]) of |
| 3598 | + {ok, SegFd} -> |
| 3599 | + try |
| 3600 | + case file:pread(SegFd, FilePos, ?HEADER_SIZE_B) of |
| 3601 | + {ok, <<_:32, |
| 3602 | + NumRecords:32/unsigned, |
| 3603 | + SegTs:64/signed, |
| 3604 | + _/binary>>} -> |
| 3605 | + LastOffset = ChunkId + NumRecords - 1, |
| 3606 | + Ts = if IdxTs < 1000000000000 -> SegTs; |
| 3607 | + true -> IdxTs |
| 3608 | + end, |
| 3609 | + {ok, {LastOffset, Ts}}; |
| 3610 | + _ -> |
| 3611 | + {ok, {ChunkId, IdxTs}} |
| 3612 | + end |
| 3613 | + after |
| 3614 | + file:close(SegFd) |
| 3615 | + end; |
| 3616 | + _ -> |
| 3617 | + {ok, {ChunkId, IdxTs}} |
| 3618 | + end; |
| 3619 | + _ -> |
| 3620 | + {error, empty} |
| 3621 | + end; |
| 3622 | + _ -> |
| 3623 | + {error, empty} |
| 3624 | + end |
| 3625 | + after |
| 3626 | + file:close(IdxFd) |
| 3627 | + end; |
| 3628 | + _ -> |
| 3629 | + {error, empty} |
| 3630 | + end. |
| 3631 | + |
| 3632 | +scan_index_chunks_files([], Acc) -> |
| 3633 | + {ok, lists:reverse(Acc)}; |
| 3634 | +scan_index_chunks_files([IdxFile | Rest], Acc) -> |
| 3635 | + case scan_one_index_file(IdxFile) of |
| 3636 | + {ok, Chunks} -> |
| 3637 | + scan_index_chunks_files(Rest, lists:reverse(Chunks) ++ Acc); |
| 3638 | + {error, _} = Err -> |
| 3639 | + Err |
| 3640 | + end. |
| 3641 | + |
| 3642 | +scan_one_index_file(IdxFile) -> |
| 3643 | + case file:open(IdxFile, [read, raw, binary]) of |
| 3644 | + {ok, Fd} -> |
| 3645 | + try |
| 3646 | + {ok, _} = file:position(Fd, ?IDX_HEADER_SIZE), |
| 3647 | + scan_index_records(Fd, []) |
| 3648 | + after |
| 3649 | + _ = file:close(Fd) |
| 3650 | + end; |
| 3651 | + Err -> |
| 3652 | + Err |
| 3653 | + end. |
| 3654 | + |
| 3655 | +scan_index_records(Fd, Acc) -> |
| 3656 | + case file:read(Fd, ?INDEX_RECORD_SIZE_B) of |
| 3657 | + {ok, <<ChunkId:64/unsigned, |
| 3658 | + Timestamp:64/signed, |
| 3659 | + _Epoch:64/unsigned, |
| 3660 | + _FilePos:32/unsigned, |
| 3661 | + _ChType:8/unsigned>>} when ChunkId =/= 0 orelse Timestamp =/= 0 -> |
| 3662 | + scan_index_records(Fd, [{ChunkId, Timestamp} | Acc]); |
| 3663 | + {ok, ?ZERO_IDX_MATCH(_)} -> |
| 3664 | + scan_index_records(Fd, Acc); |
| 3665 | + {ok, _} -> |
| 3666 | + scan_index_records(Fd, Acc); |
| 3667 | + eof -> |
| 3668 | + {ok, lists:reverse(Acc)} |
| 3669 | + end. |
| 3670 | + |
| 3671 | +%% Returns [chunk closest to T25, to T50, to T75]. Chunks are ordered by offset. |
| 3672 | +%% Uses binary search per target for O(log n) lookups after O(n) list-to-tuple. |
| 3673 | +closest_chunks_to_targets(Chunks, [T25, T50, T75]) -> |
| 3674 | + Tuple = list_to_tuple(Chunks), |
| 3675 | + [closest_to_target(Tuple, T25), |
| 3676 | + closest_to_target(Tuple, T50), |
| 3677 | + closest_to_target(Tuple, T75)]. |
| 3678 | + |
| 3679 | +%% First 1-based index i such that element(i, Tuple) has offset >= Target, |
| 3680 | +%% or tuple_size(Tuple) + 1 if all offsets are < Target. |
| 3681 | +find_first_ge(Tuple, Target, Low, High) when Low < High -> |
| 3682 | + Mid = (Low + High) div 2, |
| 3683 | + {O, _} = element(Mid, Tuple), |
| 3684 | + if O >= Target -> find_first_ge(Tuple, Target, Low, Mid); |
| 3685 | + true -> find_first_ge(Tuple, Target, Mid + 1, High) |
| 3686 | + end; |
| 3687 | +find_first_ge(Tuple, Target, Low, _High) -> |
| 3688 | + {O, _} = element(Low, Tuple), |
| 3689 | + if O >= Target -> Low; true -> Low + 1 end. |
| 3690 | + |
| 3691 | +find_first_ge(Tuple, Target) -> |
| 3692 | + Size = tuple_size(Tuple), |
| 3693 | + find_first_ge(Tuple, Target, 1, Size). |
| 3694 | + |
| 3695 | +%% Chunk in Tuple whose offset is closest to Target (Chunks ordered by offset). |
| 3696 | +closest_to_target(Tuple, Target) -> |
| 3697 | + Size = tuple_size(Tuple), |
| 3698 | + Idx = find_first_ge(Tuple, Target), |
| 3699 | + if Idx =< 1 -> |
| 3700 | + element(1, Tuple); |
| 3701 | + Idx > Size -> |
| 3702 | + element(Size, Tuple); |
| 3703 | + true -> |
| 3704 | + C1 = element(Idx, Tuple), |
| 3705 | + C2 = element(Idx - 1, Tuple), |
| 3706 | + {O1, _} = C1, |
| 3707 | + {O2, _} = C2, |
| 3708 | + if abs(O1 - Target) =< abs(O2 - Target) -> C1; |
| 3709 | + true -> C2 |
| 3710 | + end |
| 3711 | + end. |
| 3712 | + |
3507 | 3713 | -ifdef(TEST). |
3508 | 3714 | -include_lib("eunit/include/eunit.hrl"). |
3509 | 3715 |
|
|
0 commit comments