@@ -52,10 +52,12 @@ struct tensor_entry {
5252};
5353
5454struct tensor_slice_stats {
55- bool computed = false ;
56- size_t valid = 0 ;
57- float min = 0 .0f ;
58- float max = 0 .0f ;
55+ bool computed = false ;
56+ size_t valid = 0 ;
57+ float min = 0 .0f ;
58+ float max = 0 .0f ;
59+ float lower_percent = 0 .0f ;
60+ float upper_percent = 0 .0f ;
5961};
6062
6163struct tokenizer_info {
@@ -86,6 +88,9 @@ struct server_state {
8688 std::mutex mutex;
8789};
8890
91+ constexpr float SLICE_PERCENTILE_LOWER = 0 .01f ;
92+ constexpr float SLICE_PERCENTILE_UPPER = 0 .99f ;
93+
8994struct model_descriptor {
9095 std::string relative;
9196 std::string name;
@@ -761,6 +766,78 @@ struct tensor_window_result {
761766 std::vector<float > values;
762767};
763768
769+ struct percentile_range {
770+ size_t finite_count = 0 ;
771+ float lower = 0 .0f ;
772+ float upper = 0 .0f ;
773+ bool has_range = false ;
774+ };
775+
776+ percentile_range compute_percentile_range (
777+ const std::vector<float > & values,
778+ float lower_fraction,
779+ float upper_fraction) {
780+ percentile_range result;
781+
782+ if (values.empty ()) {
783+ return result;
784+ }
785+
786+ std::vector<float > finite_values;
787+ finite_values.reserve (values.size ());
788+ for (float value : values) {
789+ if (std::isfinite (value)) {
790+ finite_values.push_back (value);
791+ }
792+ }
793+
794+ result.finite_count = finite_values.size ();
795+ if (finite_values.empty ()) {
796+ return result;
797+ }
798+
799+ std::sort (finite_values.begin (), finite_values.end ());
800+
801+ const auto sample_percentile = [&](float fraction) {
802+ fraction = std::clamp (fraction, 0 .0f , 1 .0f );
803+ if (finite_values.size () == 1 ) {
804+ return finite_values.front ();
805+ }
806+
807+ const float rank = fraction * static_cast <float >(finite_values.size () - 1 );
808+ const size_t lower_index = static_cast <size_t >(std::floor (rank));
809+ const size_t upper_index = std::min (finite_values.size () - 1 , lower_index + 1 );
810+ const float weight = rank - static_cast <float >(lower_index);
811+ const float lower_value = finite_values[lower_index];
812+ const float upper_value = finite_values[upper_index];
813+ return lower_value + (upper_value - lower_value) * weight;
814+ };
815+
816+ result.lower = sample_percentile (lower_fraction);
817+ result.upper = sample_percentile (upper_fraction);
818+ result.has_range = true ;
819+ return result;
820+ }
821+
822+ tensor_slice_stats summarize_tensor_slice (
823+ const std::vector<float > & values,
824+ float lower_fraction,
825+ float upper_fraction) {
826+ tensor_slice_stats stats;
827+ stats.computed = true ;
828+ stats.lower_percent = lower_fraction * 100 .0f ;
829+ stats.upper_percent = upper_fraction * 100 .0f ;
830+
831+ const auto percentile = compute_percentile_range (values, lower_fraction, upper_fraction);
832+ stats.valid = percentile.finite_count ;
833+ if (percentile.has_range ) {
834+ stats.min = percentile.lower ;
835+ stats.max = percentile.upper ;
836+ }
837+
838+ return stats;
839+ }
840+
764841struct tensor_tile_result {
765842 size_t x = 0 ;
766843 size_t y = 0 ;
@@ -1086,13 +1163,10 @@ bool tensor_slice_statistics(
10861163 return false ;
10871164 }
10881165
1089- tensor_slice_stats computed;
1090- computed.computed = true ;
1091- computed.valid = slice_window.count ;
1092- if (slice_window.count > 0 ) {
1093- computed.min = slice_window.min ;
1094- computed.max = slice_window.max ;
1095- }
1166+ tensor_slice_stats computed = summarize_tensor_slice (
1167+ slice_window.values ,
1168+ SLICE_PERCENTILE_LOWER,
1169+ SLICE_PERCENTILE_UPPER);
10961170
10971171 store_cached_slice_stats (state, entry, slice_index, computed);
10981172
@@ -1279,28 +1353,33 @@ bool tensor_slice_histogram(
12791353 return true ;
12801354 }
12811355
1356+ tensor_slice_stats stats;
1357+ const bool have_cached_stats = try_get_cached_slice_stats (state, entry, slice_index, stats);
1358+ if (have_cached_stats && (!stats.computed || stats.valid == 0 )) {
1359+ out.slice = slice_index;
1360+ out.bins .assign (bin_count, 0 );
1361+ return true ;
1362+ }
1363+
12821364 tensor_window_result window;
12831365 if (!tensor_window_values (state, entry, base_offset, slice_count, window, error)) {
12841366 return false ;
12851367 }
12861368
1287- if (window.count == 0 || window.values .empty ()) {
1288- return true ;
1289- }
1290-
1291- if (window.count > 0 ) {
1292- tensor_slice_stats computed;
1293- computed.computed = true ;
1294- computed.valid = window.count ;
1295- computed.min = window.min ;
1296- computed.max = window.max ;
1297- store_cached_slice_stats (state, entry, slice_index, computed);
1369+ if (!have_cached_stats) {
1370+ stats = summarize_tensor_slice (window.values , SLICE_PERCENTILE_LOWER, SLICE_PERCENTILE_UPPER);
1371+ store_cached_slice_stats (state, entry, slice_index, stats);
12981372 }
12991373
13001374 out.slice = slice_index;
13011375 out.bins .assign (bin_count, 0 );
1302- out.range_min = window.min ;
1303- out.range_max = window.max ;
1376+
1377+ if (!stats.computed || stats.valid == 0 ) {
1378+ return true ;
1379+ }
1380+
1381+ out.range_min = stats.min ;
1382+ out.range_max = stats.max ;
13041383
13051384 if (!std::isfinite (out.range_min ) || !std::isfinite (out.range_max )) {
13061385 return true ;
@@ -1345,6 +1424,10 @@ bool tensor_slice_histogram(
13451424 continue ;
13461425 }
13471426
1427+ if (value < out.range_min || value > out.range_max ) {
1428+ continue ;
1429+ }
1430+
13481431 const float relative = (value - out.range_min ) * bin_scale;
13491432 size_t index = relative < 0 .0f ? 0 : static_cast <size_t >(relative);
13501433 if (index >= bin_count) {
@@ -1611,6 +1694,24 @@ void setup_routes(httplib::Server & server, std::shared_ptr<server_state> state)
16111694 body[" max" ] = nullptr ;
16121695 }
16131696
1697+ json percentiles;
1698+ const float lower_percent = (stats.computed && stats.lower_percent > 0 .0f )
1699+ ? stats.lower_percent
1700+ : SLICE_PERCENTILE_LOWER * 100 .0f ;
1701+ const float upper_percent = (stats.computed && stats.upper_percent > 0 .0f )
1702+ ? stats.upper_percent
1703+ : SLICE_PERCENTILE_UPPER * 100 .0f ;
1704+ percentiles[" lowerPercent" ] = lower_percent;
1705+ percentiles[" upperPercent" ] = upper_percent;
1706+ if (stats.computed && stats.valid > 0 ) {
1707+ percentiles[" lower" ] = stats.min ;
1708+ percentiles[" upper" ] = stats.max ;
1709+ } else {
1710+ percentiles[" lower" ] = nullptr ;
1711+ percentiles[" upper" ] = nullptr ;
1712+ }
1713+ body[" percentiles" ] = std::move (percentiles);
1714+
16141715 set_json_response (res, body);
16151716 });
16161717 });
0 commit comments