|
| 1 | +#include "../../shared/fd_config.h" |
| 2 | +#include "../../shared/fd_action.h" |
| 3 | +#include "../../../disco/metrics/fd_metrics.h" |
| 4 | + |
| 5 | +#include <errno.h> |
| 6 | +#include <signal.h> |
| 7 | +#include <stdio.h> |
| 8 | +#include <unistd.h> |
| 9 | + |
| 10 | +extern action_t * ACTIONS[]; |
| 11 | + |
| 12 | +static int running = 1; |
| 13 | + |
| 14 | +static void |
| 15 | +exit_signal( int sig FD_PARAM_UNUSED ) { |
| 16 | + running = 0; |
| 17 | +} |
| 18 | + |
| 19 | +static void |
| 20 | +metrics_record_cmd_args( int * pargc, |
| 21 | + char *** pargv, |
| 22 | + args_t * args ) { |
| 23 | + |
| 24 | + if( fd_env_strip_cmdline_contains( pargc, pargv, "--help" ) || |
| 25 | + fd_env_strip_cmdline_contains( pargc, pargv, "-h" ) || |
| 26 | + fd_env_strip_cmdline_contains( pargc, pargv, "help" ) ) { |
| 27 | + fputs( |
| 28 | + "\nUsage: firedancer-dev metrics-record [GLOBAL FLAGS] [FLAGS] metric0 metric1 ... metricN\n" |
| 29 | + "\n" |
| 30 | + "Flags:\n" |
| 31 | + " --topo TOPO Attach to metrics of non-standard topo, such as snapshot-load\n" |
| 32 | + " --interval SECONDS How frequently to print a row. Defaults to 1.0 seconds.\n" |
| 33 | + "\n" |
| 34 | + "Metrics:\n" |
| 35 | + " Selector format: `metric_name[,tile_kind[,tile_kind_id]]`\n" |
| 36 | + "\n" |
| 37 | + " Metrics are primarily identified by their name string. A tile kind string can also\n" |
| 38 | + " be given to limit the given metric to only one specific tile type. Similarly, a\n" |
| 39 | + " tile kind id can be given (only if tile_kind is also given) to limit to a particular\n" |
| 40 | + " tile instance. If these tile kind filters are not given, all matching metrics will\n" |
| 41 | + " be recorded.\n" |
| 42 | + "\n" |
| 43 | + " Examples:\n" |
| 44 | + " tile_pid\n" |
| 45 | + " tile_backpressure_count,gossip\n" |
| 46 | + " tile_status,net,1\n" |
| 47 | + "\n", |
| 48 | + stderr ); |
| 49 | + exit( EXIT_SUCCESS ); |
| 50 | + } |
| 51 | + |
| 52 | + fd_memset( &args->metrics_record, 0, sizeof(args->metrics_record) ); |
| 53 | + fd_cstr_ncpy( args->metrics_record.topo, fd_env_strip_cmdline_cstr( pargc, pargv, "--topo", NULL, "" ), sizeof(args->metrics_record.topo) ); |
| 54 | + |
| 55 | + float _interval = fd_env_strip_cmdline_float( pargc, pargv, "--interval", NULL, 1.0f ); |
| 56 | + args->metrics_record.interval_ns = fd_ulong_max( 1UL, (ulong)(_interval*1.0e9f) ); |
| 57 | + |
| 58 | + ulong const selectors_cnt_max = sizeof(args->metrics_record.selectors)/sizeof(args->metrics_record.selectors[0]); |
| 59 | + while( *pargc ) { |
| 60 | + if( FD_UNLIKELY( args->metrics_record.selectors_cnt>=selectors_cnt_max ) ) FD_LOG_ERR(( "too many metric selectors given %lu", selectors_cnt_max )); |
| 61 | + struct fd_action_metrics_record_selector * selector = &args->metrics_record.selectors[ args->metrics_record.selectors_cnt++ ]; |
| 62 | + |
| 63 | + char * name = *pargv[ 0 ]; |
| 64 | + char * kind = strchr( name, ',' ); |
| 65 | + char * kind_id = NULL; |
| 66 | + if( kind!=NULL ) { |
| 67 | + fd_cstr_fini( kind ); |
| 68 | + kind += 1; |
| 69 | + kind_id = strchr( kind, ',' ); |
| 70 | + if( kind_id!=NULL ) { |
| 71 | + fd_cstr_fini( kind_id ); |
| 72 | + kind_id += 1; |
| 73 | + if( FD_UNLIKELY( NULL!=strchr( kind_id, ',' ) ) ) FD_LOG_ERR(( "invalid metric selector %s %s %s", name, kind, kind_id )); |
| 74 | + } |
| 75 | + } |
| 76 | + *pargc -= 1; |
| 77 | + *pargv += 1; |
| 78 | + |
| 79 | + if( FD_UNLIKELY( NULL==name || strlen( name )>=sizeof(selector->name)) ) FD_LOG_ERR(( "invalid metric selector name %s", name )); |
| 80 | + fd_cstr_ncpy( selector->name, name, sizeof(selector->name) ); |
| 81 | + if( FD_UNLIKELY( NULL!=kind && strlen( kind )>=sizeof(selector->kind)) ) FD_LOG_ERR(( "invalid metric selector kind %s", kind )); |
| 82 | + fd_cstr_ncpy( selector->kind, kind, sizeof(selector->kind) ); |
| 83 | + selector->kind_id = NULL==kind_id ? ULONG_MAX : fd_cstr_to_ulong( kind_id ); |
| 84 | + } |
| 85 | +} |
| 86 | + |
| 87 | +static int |
| 88 | +selector_matches( struct fd_action_metrics_record_selector const * selector, |
| 89 | + char const * metric_name, |
| 90 | + char const * tile_name, |
| 91 | + ulong tile_id ) { |
| 92 | + if( 0!=strcmp( metric_name, selector->name ) ) return 0; |
| 93 | + if( selector->kind[ 0 ] && 0!=strcmp( tile_name, selector->kind ) ) return 0; |
| 94 | + if( ULONG_MAX!=selector->kind_id && tile_id!=selector->kind_id ) return 0; |
| 95 | + return 1; |
| 96 | +} |
| 97 | + |
| 98 | +static void |
| 99 | +reconstruct_topo( fd_config_t * config, |
| 100 | + char const * topo_name ) { |
| 101 | + if( !topo_name[0] ) return; /* keep default action topo */ |
| 102 | + |
| 103 | + action_t const * selected = NULL; |
| 104 | + for( action_t ** a=ACTIONS; a!=NULL; a++ ) { |
| 105 | + action_t const * action = *a; |
| 106 | + if( 0==strcmp( action->name, topo_name ) ) { |
| 107 | + selected = action; |
| 108 | + break; |
| 109 | + } |
| 110 | + } |
| 111 | + |
| 112 | + if( !selected ) FD_LOG_ERR(( "Unknown --topo %s", topo_name )); |
| 113 | + if( !selected->topo ) FD_LOG_ERR(( "Cannot recover topology for --topo %s", topo_name )); |
| 114 | + |
| 115 | + selected->topo( config ); |
| 116 | +} |
| 117 | + |
| 118 | +static void |
| 119 | +metrics_record_cmd_fn( args_t * args, |
| 120 | + fd_config_t * config ) { |
| 121 | + |
| 122 | + struct sigaction sa = { .sa_handler = exit_signal }; |
| 123 | + if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) ) FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) )); |
| 124 | + if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) ) FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) )); |
| 125 | + |
| 126 | + reconstruct_topo( config, args->metrics_record.topo ); |
| 127 | + |
| 128 | + fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY ); |
| 129 | + fd_topo_fill( &config->topo ); |
| 130 | + |
| 131 | + uchar write_buf[ 4096 ]; |
| 132 | + fd_io_buffered_ostream_t out[1]; |
| 133 | + FD_TEST( out==fd_io_buffered_ostream_init( out, STDOUT_FILENO, write_buf, sizeof(write_buf) ) ); |
| 134 | + |
| 135 | + fd_io_buffered_ostream_write( out, "timestamp", 9 ); |
| 136 | + |
| 137 | + ulong metrics_cnt = 0UL; |
| 138 | + struct { |
| 139 | + fd_metrics_meta_t const * meta; |
| 140 | + volatile ulong const * value; |
| 141 | + } metrics[ 4096 ]; |
| 142 | + |
| 143 | + for( ulong i=0UL; i<FD_METRICS_ALL_TOTAL; i++ ) { |
| 144 | + fd_metrics_meta_t const * metric = &FD_METRICS_ALL[ i ]; |
| 145 | + if( metric->type!=FD_METRICS_TYPE_GAUGE && metric->type!=FD_METRICS_TYPE_COUNTER ) continue; |
| 146 | + for( ulong j=0UL; j<config->topo.tile_cnt; j++ ) { |
| 147 | + fd_topo_tile_t const * tile = &config->topo.tiles[ j ]; |
| 148 | + char const * tile_name = tile->metrics_name[ 0 ] ? tile->metrics_name : tile->name; |
| 149 | + for( ulong s=0UL; s<args->metrics_record.selectors_cnt; s++ ) { |
| 150 | + if( FD_LIKELY( !selector_matches( &args->metrics_record.selectors[ s ], metric->name, tile_name, tile->kind_id ) ) ) continue; |
| 151 | + if( FD_UNLIKELY( metrics_cnt>=(sizeof(metrics)/sizeof(metrics[0])) ) ) FD_LOG_ERR(( "too many metrics %lu", metrics_cnt )); |
| 152 | + metrics[ metrics_cnt ].meta = metric; |
| 153 | + metrics[ metrics_cnt ].value = fd_metrics_tile( tile->metrics ) + metric->offset; |
| 154 | + ++metrics_cnt; |
| 155 | + |
| 156 | + char buf[ 1024 ]; |
| 157 | + char * p = fd_cstr_append_printf( fd_cstr_init( buf ), ",%s{kind=%s,kind_id=%lu", metric->name, tile->name, tile->kind_id ); |
| 158 | + if( metric->enum_name ) p = fd_cstr_append_printf( p, ",%s=%s", metric->enum_name, metric->enum_variant ); |
| 159 | + p = fd_cstr_append_char( p, '}' ); |
| 160 | + fd_io_buffered_ostream_write( out, buf, (ulong)(p-buf) ); |
| 161 | + break; |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + /* TODO: Add support for in/out link metrics */ |
| 167 | + |
| 168 | + for( ulong i=0UL; i<FD_METRICS_TILE_KIND_CNT; i++ ) { |
| 169 | + for( ulong j=0UL; j<FD_METRICS_TILE_KIND_SIZES[ i ]; j++ ) { |
| 170 | + fd_metrics_meta_t const * metric = &FD_METRICS_TILE_KIND_METRICS[ i ][ j ]; |
| 171 | + if( metric->type!=FD_METRICS_TYPE_GAUGE && metric->type!=FD_METRICS_TYPE_COUNTER ) continue; |
| 172 | + for( ulong k=0UL; k<config->topo.tile_cnt; k++ ) { |
| 173 | + fd_topo_tile_t const * tile = &config->topo.tiles[ k ]; |
| 174 | + char const * tile_name = tile->metrics_name[ 0 ] ? tile->metrics_name : tile->name; |
| 175 | + if( 0!=strcmp( tile_name, FD_METRICS_TILE_KIND_NAMES[ i ] ) ) continue; |
| 176 | + for( ulong s=0UL; s<args->metrics_record.selectors_cnt; s++ ) { |
| 177 | + if( FD_LIKELY( !selector_matches( &args->metrics_record.selectors[ s ], metric->name, tile_name, tile->kind_id ) ) ) continue; |
| 178 | + if( FD_UNLIKELY( metrics_cnt>=(sizeof(metrics)/sizeof(metrics[0])) ) ) FD_LOG_ERR(( "too many metrics %lu", metrics_cnt )); |
| 179 | + metrics[ metrics_cnt ].meta = metric; |
| 180 | + metrics[ metrics_cnt ].value = fd_metrics_tile( tile->metrics ) + metric->offset; |
| 181 | + ++metrics_cnt; |
| 182 | + |
| 183 | + char buf[ 1024 ]; |
| 184 | + char * p = fd_cstr_append_printf( fd_cstr_init( buf ), ",%s{kind=%s,kind_id=%lu", metric->name, tile->name, tile->kind_id ); |
| 185 | + if( metric->enum_name ) p = fd_cstr_append_printf( p, ",%s=%s", metric->enum_name, metric->enum_variant ); |
| 186 | + p = fd_cstr_append_char( p, '}' ); |
| 187 | + fd_io_buffered_ostream_write( out, buf, (ulong)(p-buf) ); |
| 188 | + break; |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + } |
| 193 | + |
| 194 | + if( FD_UNLIKELY( metrics_cnt==0UL ) ) FD_LOG_ERR(( "no matching metrics found" )); |
| 195 | + fd_io_buffered_ostream_write( out, "\n", 1 ); |
| 196 | + fd_io_buffered_ostream_flush( out ); |
| 197 | + |
| 198 | + ulong count = 0UL, skip = 0UL; |
| 199 | + long const start = fd_log_wallclock(); |
| 200 | + long const interval = (long)args->metrics_record.interval_ns; |
| 201 | + long next = ((start/interval)*interval)+interval; |
| 202 | + while( running ) { |
| 203 | + long now = fd_log_wait_until( next ); |
| 204 | + for( next+=interval; next<=now; next+=interval ) skip++; |
| 205 | + |
| 206 | + char * const b = fd_io_buffered_ostream_peek( out ); |
| 207 | + char * const e = b + fd_io_buffered_ostream_peek_sz( out ); |
| 208 | + char * p = b; |
| 209 | + if( FD_UNLIKELY( e-p<=20L ) ) FD_LOG_ERR(( "increase write buffer size" )); |
| 210 | + p = fd_cstr_append_ulong_as_text( p, ' ', '\0', (ulong)now, fd_ulong_base10_dig_cnt( (ulong)now ) ); |
| 211 | + |
| 212 | + for( ulong i=0UL; i<metrics_cnt; i++ ) { |
| 213 | + ulong value = *metrics[ i ].value; |
| 214 | + switch( metrics[ i ].meta->converter ) { |
| 215 | + case FD_METRICS_CONVERTER_NANOSECONDS: value = fd_metrics_convert_ticks_to_nanoseconds( value ); break; |
| 216 | + case FD_METRICS_CONVERTER_SECONDS: value = (ulong)(fd_metrics_convert_ticks_to_seconds( value ) + 0.5); /* round, not truncate */ break; |
| 217 | + case FD_METRICS_CONVERTER_NONE: break; |
| 218 | + default: FD_LOG_ERR(( "unknown converter %i", metrics[ i ].meta->converter )); |
| 219 | + } |
| 220 | + if( FD_UNLIKELY( e-p<=22L ) ) FD_LOG_ERR(( "increase write buffer size" )); |
| 221 | + p = fd_cstr_append_char( p, ',' ); |
| 222 | + p = fd_cstr_append_ulong_as_text( p, ' ', '\0', value, fd_ulong_base10_dig_cnt( value ) ); |
| 223 | + } |
| 224 | + p = fd_cstr_append_char( p, '\n' ); |
| 225 | + fd_io_buffered_ostream_seek( out, (ulong)(p-b) ); |
| 226 | + fd_io_buffered_ostream_flush( out ); |
| 227 | + count++; |
| 228 | + } |
| 229 | + |
| 230 | + FD_LOG_NOTICE(( "recorded %lu samples in %f seconds", count, (double)(fd_log_wallclock()-start)/1.0e9 )); |
| 231 | + if( skip ) FD_LOG_WARNING(( "skipped %lu samples, try reducing metric count or increasing interval", skip )); |
| 232 | + |
| 233 | + fd_io_buffered_ostream_flush( out ); |
| 234 | + fd_io_buffered_ostream_fini( out ); |
| 235 | + |
| 236 | + fd_topo_leave_workspaces( &config->topo ); |
| 237 | +} |
| 238 | + |
| 239 | +action_t fd_action_metrics_record = { |
| 240 | + .name = "metrics-record", |
| 241 | + .description = "Continuously print a select subset of metrics to STDOUT in CSV format", |
| 242 | + .is_diagnostic = 1, |
| 243 | + .args = metrics_record_cmd_args, |
| 244 | + .fn = metrics_record_cmd_fn, |
| 245 | +}; |
0 commit comments