diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c index bcf8b43153..bd4ad7cdf5 100644 --- a/src/bench_ecmult.c +++ b/src/bench_ecmult.c @@ -16,9 +16,15 @@ #include "scalar_impl.h" #include "ecmult_impl.h" #include "bench.h" +#include "tests_common.h" #define POINTS 32768 +/* Default memory limit (64 MB) */ +#define DEFAULT_MEM_LIMIT (64 * 1024 * 1024) +/* Select bench algorithm automatically */ +#define BENCH_ALGO_AUTO (-1) + static void help(char **argv, int default_iters) { printf("Benchmark EC multiplication algorithms\n"); printf("\n"); @@ -30,23 +36,25 @@ static void help(char **argv, int default_iters) { printf("function name. The letter 'g' indicates that one of the points is the generator.\n"); printf("The benchmarks are divided by the number of points.\n"); printf("\n"); - printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n"); - printf(" batch size\n"); - printf("pippenger_wnaf: for all batch sizes\n"); - printf("strauss_wnaf: for all batch sizes\n"); - printf("simple: multiply and sum each point individually\n"); + printf("default (auto): automatically select best algorithm\n"); + printf("pippenger_wnaf: for all batch sizes\n"); + printf("strauss_wnaf: for all batch sizes\n"); + printf("simple: multiply and sum each point individually\n"); + printf("\n"); } typedef struct { /* Setup once in advance */ secp256k1_context* ctx; - secp256k1_scratch_space* scratch; secp256k1_scalar* scalars; secp256k1_ge* pubkeys; secp256k1_gej* pubkeys_gej; secp256k1_scalar* seckeys; secp256k1_gej* expected_output; - secp256k1_ecmult_multi_func ecmult_multi; + + /* Algorithm selection */ + int forced_algo; + size_t mem_limit; /* Changes per benchmark */ size_t count; @@ -62,6 +70,142 @@ typedef struct { secp256k1_fe* output_xonly; } bench_data; +/* + * ABCD Calibration Benchmarks + * + * Measures the performance of each algorithm at various batch sizes and + * outputs. Use tools/ecmult_multi_calib.py to calculate optimal C and D + * values from the output. + * + * Each algorithm is only calibrated within its optimal batch size range + * to avoid skewing results with uninteresting results far away from that + * range. + */ +static void run_ecmult_multi_calib(bench_data* data) { + static const size_t batch_sizes[] = { + /* Small numbers should help stabilize Strauss intercept */ + 2, 3, 5, 7, 10, 15, 20, 30, 50, 70, + /* Crossover region between Strauss and Pippenger */ + 85, 88, 90, 100, 120, 150, 175, + /* Pippenger windows, getting progressively larger */ + 200, 300, 500, 750, 1000, 1200, 1500, 2000, 3000, 5000, 7500, 10000, 15000, 20000, 30000 + }; + static const size_t n_batch_sizes = sizeof(batch_sizes) / sizeof(batch_sizes[0]); + + static const char* algo_names[] = { + "TRIVIAL", "STRAUSS", "PIPPENGER_1", "PIPPENGER_2", "PIPPENGER_3", "PIPPENGER_4", "PIPPENGER_5", "PIPPENGER_6", "PIPPENGER_7", "PIPPENGER_8", "PIPPENGER_9", "PIPPENGER_10", "PIPPENGER_11", "PIPPENGER_12" + }; + + /* Maximum batch size for Strauss calibration. */ + static const size_t STRAUSS_MAX_CALIB_BATCH = 500; + + /* Per-window min/max batch sizes for Pippenger calibration. */ + static const size_t pippenger_min_calib_batch[12] = { + /*w=1 2 3 4 5 6 7 8 9 10 11 12 */ + 5, 5, 10, 30, 70, 150, 300, 750, 1500, 3000, 7500, 15000 + }; + static const size_t pippenger_max_calib_batch[12] = { + /*w=1 2 3 4 5 6 7 8 9 10 11 12 */ + 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 30000, 30000, 30000, 30000 + }; + + secp256k1_ge *points = NULL; + secp256k1_scalar *scalars = NULL; + secp256k1_gej result; + size_t max_points = batch_sizes[n_batch_sizes - 1]; + int algo; + size_t i, j; + int base_iters = 1000; + + points = (secp256k1_ge *)malloc(max_points * sizeof(secp256k1_ge)); + scalars = (secp256k1_scalar *)malloc(max_points * sizeof(secp256k1_scalar)); + CHECK(points != NULL); + CHECK(scalars != NULL); + + for (i = 0; i < max_points; i++) { + points[i] = data->pubkeys[i % POINTS]; + scalars[i] = data->scalars[i % POINTS]; + } + + printf("# ECMULT_MULTI Calibration Data\n"); + printf("# Format: ALGO,N,TIME_US (microseconds per batch)\n"); + printf("# Copy the DATA section below into the Python script\n"); + printf("#\n"); + printf("# BEGIN DATA\n"); + + /* Measure STRAUSS */ + algo = SECP256K1_ECMULT_MULTI_ALGO_STRAUSS; + for (i = 0; i < n_batch_sizes; i++) { + size_t n = batch_sizes[i]; + int64_t t_start, t_end; + double time_us; + int iters = base_iters; + int iter; + + /* Only run up to the max to not skew result */ + if (n > STRAUSS_MAX_CALIB_BATCH) continue; + + /* Using many iterations in Strauss since batch sizes are small */ + if (n >= 300) iters = base_iters / 2; + if (iters < 100) iters = 100; + + t_start = gettime_i64(); + for (iter = 0; iter < iters; iter++) { + secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo, + &result, n, points, scalars, NULL); + } + t_end = gettime_i64(); + + time_us = (double)(t_end - t_start) / iters; + printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us); + } + + /* Measure PIPPENGER variants */ + for (algo = SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1; + algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12; + algo++) { + int window = algo - SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1; + size_t min_batch = pippenger_min_calib_batch[window]; + size_t max_batch = pippenger_max_calib_batch[window]; + + for (j = 0; j < n_batch_sizes; j++) { + size_t n = batch_sizes[j]; + int64_t t_start, t_end; + double time_us; + int iters = base_iters; + int iter; + + /* Only run for the selected range of each algo */ + if (n < min_batch || n > max_batch) continue; + + /* Limiting iterations to keep run-time managable */ + if (n >= 1000) iters = base_iters / 10; + if (n >= 5000) iters = base_iters / 50; + if (n >= 15000) iters = base_iters / 100; + if (iters < 3) iters = 3; + + t_start = gettime_i64(); + for (iter = 0; iter < iters; iter++) { + secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo, + &result, n, points, scalars, NULL); + } + t_end = gettime_i64(); + + time_us = (double)(t_end - t_start) / iters; + printf("%s,%lu,%.3f\n", algo_names[algo], (unsigned long)n, time_us); + } + } + + printf("# END DATA\n"); + printf("#\n"); + printf("# To calculate ABCD constants, run:\n"); + printf("# ./bench_ecmult calib 2>&1 | python3 tools/ecmult_multi_calib.py\n"); + printf("#\n"); + + free(points); + free(scalars); +} + /* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */ static void hash_into_offset(bench_data* data, size_t x) { data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS; @@ -214,32 +358,54 @@ static void run_ecmult_bench(bench_data* data, int iters) { run_benchmark(str, bench_ecmult_1p_g, bench_ecmult_setup, bench_ecmult_1p_g_teardown, data, 10, 2*iters); } -static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) { - bench_data* data = (bench_data*)arg; - if (data->includes_g) ++idx; - if (idx == 0) { - *sc = data->scalars[data->offset1]; - *ge = secp256k1_ge_const_g; - } else { - *sc = data->scalars[(data->offset1 + idx) % POINTS]; - *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS]; - } - return 1; -} - static void bench_ecmult_multi(void* arg, int iters) { bench_data* data = (bench_data*)arg; int includes_g = data->includes_g; int iter; int count = data->count; + size_t n_points = count - includes_g; + secp256k1_ecmult_multi_algo algo; + secp256k1_ge *points = NULL; + secp256k1_scalar *scalars = NULL; + size_t i; iters = iters / data->count; + if (n_points > 0) { + points = (secp256k1_ge *)malloc(n_points * sizeof(secp256k1_ge)); + scalars = (secp256k1_scalar *)malloc(n_points * sizeof(secp256k1_scalar)); + CHECK(points != NULL); + CHECK(scalars != NULL); + } + for (iter = 0; iter < iters; ++iter) { - data->ecmult_multi(&data->ctx->error_callback, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g); + const secp256k1_scalar *g_scalar_ptr = NULL; + + if (includes_g) { + g_scalar_ptr = &data->scalars[data->offset1]; + } + + for (i = 0; i < n_points; ++i) { + size_t idx = includes_g ? i + 1 : i; + scalars[i] = data->scalars[(data->offset1 + idx) % POINTS]; + points[i] = data->pubkeys[(data->offset2 + i) % POINTS]; + } + + if (data->forced_algo >= 0) { + algo = data->forced_algo; + } else { + algo = secp256k1_ecmult_multi_select(data->mem_limit, n_points); + } + + CHECK(secp256k1_ecmult_multi_internal(&data->ctx->error_callback, algo, &data->output[iter], + n_points, points, scalars, g_scalar_ptr)); + data->offset1 = (data->offset1 + count) % POINTS; data->offset2 = (data->offset2 + count - 1) % POINTS; } + + free(points); + free(scalars); } static void bench_ecmult_multi_setup(void* arg) { @@ -309,7 +475,7 @@ static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_ int main(int argc, char **argv) { bench_data data; int i, p; - size_t scratch_size; + int run_calib = 0; int default_iters = 10000; int iters = get_iters(default_iters); @@ -318,7 +484,8 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } - data.ecmult_multi = secp256k1_ecmult_multi_var; + data.forced_algo = BENCH_ALGO_AUTO; + data.mem_limit = DEFAULT_MEM_LIMIT; if (argc > 1) { if(have_flag(argc, argv, "-h") @@ -328,12 +495,19 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } else if(have_flag(argc, argv, "pippenger_wnaf")) { printf("Using pippenger_wnaf:\n"); - data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single; + /* TODO: Make this a dynamic selection again */ + data.forced_algo = SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_4; } else if(have_flag(argc, argv, "strauss_wnaf")) { printf("Using strauss_wnaf:\n"); - data.ecmult_multi = secp256k1_ecmult_strauss_batch_single; + data.forced_algo = SECP256K1_ECMULT_MULTI_ALGO_STRAUSS; } else if(have_flag(argc, argv, "simple")) { printf("Using simple algorithm:\n"); + data.forced_algo = SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL; + } else if(have_flag(argc, argv, "auto")) { + printf("Using automatic algorithm selection:\n"); + data.forced_algo = BENCH_ALGO_AUTO; + } else if(have_flag(argc, argv, "calib")) { + run_calib = 1; } else { fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]); help(argv, default_iters); @@ -342,12 +516,6 @@ int main(int argc, char **argv) { } data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE); - scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT; - if (!have_flag(argc, argv, "simple")) { - data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size); - } else { - data.scratch = NULL; - } /* Allocate stuff */ data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS); @@ -370,32 +538,32 @@ int main(int argc, char **argv) { } secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS); + if (run_calib) { + run_ecmult_multi_calib(&data); + } else { + print_output_table_header_row(); + /* Initialize offset1 and offset2 */ + hash_into_offset(&data, 0); + run_ecmult_bench(&data, iters); - print_output_table_header_row(); - /* Initialize offset1 and offset2 */ - hash_into_offset(&data, 0); - run_ecmult_bench(&data, iters); - - for (i = 1; i <= 8; ++i) { - run_ecmult_multi_bench(&data, i, 1, iters); - } + for (i = 1; i <= 8; ++i) { + run_ecmult_multi_bench(&data, i, 1, iters); + } - /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1 - * and the higher it goes the longer the computation takes(more points) - * So we don't run this benchmark with low iterations to prevent slow down */ - if (iters > 2) { - for (p = 0; p <= 11; ++p) { - for (i = 9; i <= 16; ++i) { - run_ecmult_multi_bench(&data, i << p, 1, iters); + /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1 + * and the higher it goes the longer the computation takes(more points) + * So we don't run this benchmark with low iterations to prevent slow down */ + if (iters > 2) { + for (p = 0; p <= 11; ++p) { + for (i = 9; i <= 16; ++i) { + run_ecmult_multi_bench(&data, i << p, 1, iters); + } } + } else { + printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n"); } - } else { - printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n"); } - if (data.scratch != NULL) { - secp256k1_scratch_space_destroy(data.ctx, data.scratch); - } secp256k1_context_destroy(data.ctx); free(data.scalars); free(data.pubkeys); diff --git a/src/ecmult.h b/src/ecmult.h index 8d0a9f4905..06b5223c0b 100644 --- a/src/ecmult.h +++ b/src/ecmult.h @@ -9,7 +9,6 @@ #include "group.h" #include "scalar.h" -#include "scratch.h" #ifndef ECMULT_WINDOW_SIZE # define ECMULT_WINDOW_SIZE 15 @@ -43,19 +42,84 @@ /** Double multiply: R = na*A + ng*G */ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng); -typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data); +/** + * Algorithm identifiers for multi-scalar multiplication. + * + * TRIVIAL: Simple algorithm, no extra memory needed + * STRAUSS: Strauss algorithm (efficient for small batches) + * PIPPENGER_n: Pippenger algorithm with bucket window size n + */ +typedef enum { + SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL = 0, + SECP256K1_ECMULT_MULTI_ALGO_STRAUSS = 1, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1 = 2, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_2 = 3, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_3 = 4, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_4 = 5, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_5 = 6, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_6 = 7, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_7 = 8, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_8 = 9, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_9 = 10, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_10 = 11, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_11 = 12, + SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12 = 13 +} secp256k1_ecmult_multi_algo; + +#define SECP256K1_ECMULT_MULTI_NUM_ALGOS 14 /** - * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai. - * Chooses the right algorithm for a given number of points and scratch space - * size. Resets and overwrites the given scratch space. If the points do not - * fit in the scratch space the algorithm is repeatedly run with batches of - * points. If no scratch space is given then a simple algorithm is used that - * simply multiplies the points with the corresponding scalars and adds them up. - * Returns: 1 on success (including when inp_g_sc is NULL and n is 0) - * 0 if there is not enough scratch space for a single point or - * callback returns 0 + * Calculate max batch size for a given memory limit. + * + * For each algorithm, memory usage is modeled as m(x) = A*x + B and + * running time as c(x) = C*x + D, where x is the batch size. This + * function finds the algorithm that minimizes time per operation + * C + D/x at the maximum batch size x = (mem_limit - B) / A. + * + * Returns: The optimal batch size, or 0 if memory is insufficient. */ -static int secp256k1_ecmult_multi_var(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n); +static size_t secp256k1_ecmult_multi_batch_size(size_t mem_limit); + +/** + * Select the best algorithm for a given batch size within the memory + * limit. + * + * Among algorithms that fit within mem_limit for the given batch_size, + * selects the one that minimizes time per operation C + D/batch_size. + * + * Returns: The optimal algorithm identifier. + */ +static secp256k1_ecmult_multi_algo secp256k1_ecmult_multi_select( + size_t mem_limit, + size_t batch_size +); + +/** + * Multi-multiply: R = scalar_g * G + sum_i scalars[i] * points[i]. + * + * Chooses the right algorithm for the given number of points. + * + * Returns: 1 on success, 0 on memory allocation failure. + */ +static int secp256k1_ecmult_multi( + const secp256k1_callback *error_callback, + secp256k1_gej *r, + size_t n_points, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g, + size_t mem_limit +); + +/* Only for benchmarks and testing */ +static int secp256k1_ecmult_multi_internal( + const secp256k1_callback *error_callback, + secp256k1_ecmult_multi_algo algo, + secp256k1_gej *r, + size_t n_points, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g +); #endif /* SECP256K1_ECMULT_H */ diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index 1a05244c24..8c0d616832 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -45,15 +45,8 @@ #define WNAF_SIZE_BITS(bits, w) CEIL_DIV(bits, w) #define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w) -/* The number of objects allocated on the scratch space for ecmult_multi algorithms */ -#define PIPPENGER_SCRATCH_OBJECTS 6 -#define STRAUSS_SCRATCH_OBJECTS 5 - #define PIPPENGER_MAX_BUCKET_WINDOW 12 -/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ -#define ECMULT_PIPPENGER_THRESHOLD 88 - #define ECMULT_MAX_POINTS_PER_BATCH 5000000 /** Fill a table 'pre_a' with precomputed odd multiples of a. @@ -374,59 +367,6 @@ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const sec secp256k1_ecmult_strauss_wnaf(&state, r, 1, a, na, ng); } -static size_t secp256k1_strauss_scratch_size(size_t n_points) { - static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); - return n_points*point_size; -} - -static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) { - secp256k1_gej* points; - secp256k1_scalar* scalars; - struct secp256k1_strauss_state state; - size_t i; - const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch); - - secp256k1_gej_set_infinity(r); - if (inp_g_sc == NULL && n_points == 0) { - return 1; - } - - /* We allocate STRAUSS_SCRATCH_OBJECTS objects on the scratch space. If these - * allocations change, make sure to update the STRAUSS_SCRATCH_OBJECTS - * constant and strauss_scratch_size accordingly. */ - points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej)); - scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar)); - state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); - state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); - state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state)); - - if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.ps == NULL) { - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 0; - } - - for (i = 0; i < n_points; i++) { - secp256k1_ge point; - if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) { - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 0; - } - secp256k1_gej_set_ge(&points[i], &point); - } - secp256k1_ecmult_strauss_wnaf(&state, r, n_points, points, scalars, inp_g_sc); - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 1; -} - -/* Wrapper for secp256k1_ecmult_multi_func interface */ -static int secp256k1_ecmult_strauss_batch_single(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { - return secp256k1_ecmult_strauss_batch(error_callback, scratch, r, inp_g_sc, cb, cbdata, n, 0); -} - -static size_t secp256k1_strauss_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) { - return secp256k1_scratch_max_allocation(error_callback, scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1); -} - /** Convert a number to WNAF notation. * The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. * It has the following guarantees: @@ -590,58 +530,6 @@ static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_wi return 1; } -/** - * Returns optimal bucket_window (number of bits of a scalar represented by a - * set of buckets) for a given number of points. - */ -static int secp256k1_pippenger_bucket_window(size_t n) { - if (n <= 1) { - return 1; - } else if (n <= 4) { - return 2; - } else if (n <= 20) { - return 3; - } else if (n <= 57) { - return 4; - } else if (n <= 136) { - return 5; - } else if (n <= 235) { - return 6; - } else if (n <= 1260) { - return 7; - } else if (n <= 4420) { - return 9; - } else if (n <= 7880) { - return 10; - } else if (n <= 16050) { - return 11; - } else { - return PIPPENGER_MAX_BUCKET_WINDOW; - } -} - -/** - * Returns the maximum optimal number of points for a bucket_window. - */ -static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) { - switch(bucket_window) { - case 1: return 1; - case 2: return 4; - case 3: return 20; - case 4: return 57; - case 5: return 136; - case 6: return 235; - case 7: return 1260; - case 8: return 1260; - case 9: return 4420; - case 10: return 7880; - case 11: return 16050; - case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; - } - return 0; -} - - SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { secp256k1_scalar tmp = *s1; secp256k1_scalar_split_lambda(s1, s2, &tmp); @@ -658,212 +546,365 @@ SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, s } /** - * Returns the scratch size required for a given number of points (excluding - * base point G) without considering alignment. + * Algorithm Selection: ABCD Model + * + * For each possible batch algorithm choice, memory usage is + * m(x) = A*x + B and running time is + * c(x) = C*x + D, where x is the batch size. + * + * A = per-point memory (bytes) + * B = fixed memory overhead (bytes) + * C = per-point time cost + * D = fixed time overhead */ -static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) { - size_t entries = 2*n_points + 2; - size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); - return (sizeof(secp256k1_gej) << bucket_window) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size; -} -static int secp256k1_ecmult_pippenger_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) { - const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch); - /* Use 2(n+1) with the endomorphism, when calculating batch - * sizes. The reason for +1 is that we add the G scalar to the list of - * other scalars. */ - size_t entries = 2*n_points + 2; - secp256k1_ge *points; - secp256k1_scalar *scalars; - secp256k1_gej *buckets; - struct secp256k1_pippenger_state *state_space; - size_t idx = 0; - size_t point_idx = 0; - int bucket_window; +struct secp256k1_ecmult_multi_abcd { + size_t A; + size_t B; + size_t C; + size_t D; +}; - secp256k1_gej_set_infinity(r); - if (inp_g_sc == NULL && n_points == 0) { - return 1; - } - bucket_window = secp256k1_pippenger_bucket_window(n_points); - - /* We allocate PIPPENGER_SCRATCH_OBJECTS objects on the scratch space. If - * these allocations change, make sure to update the - * PIPPENGER_SCRATCH_OBJECTS constant and pippenger_scratch_size - * accordingly. */ - points = (secp256k1_ge *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*points)); - scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*scalars)); - state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(error_callback, scratch, sizeof(*state_space)); - if (points == NULL || scalars == NULL || state_space == NULL) { - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 0; - } - state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(error_callback, scratch, entries * sizeof(*state_space->ps)); - state_space->wnaf_na = (int *) secp256k1_scratch_alloc(error_callback, scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int)); - buckets = (secp256k1_gej *) secp256k1_scratch_alloc(error_callback, scratch, ((size_t)1 << bucket_window) * sizeof(*buckets)); - if (state_space->ps == NULL || state_space->wnaf_na == NULL || buckets == NULL) { - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 0; - } +/* Strauss per-point memory */ +#define SECP256K1_STRAUSS_POINT_SIZE \ + ((sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) \ + + sizeof(struct secp256k1_strauss_point_state) \ + + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar)) - if (inp_g_sc != NULL) { - scalars[0] = *inp_g_sc; - points[0] = secp256k1_ge_const_g; - idx++; - secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]); - idx++; - } +/* Pippenger per-entry memory */ +#define SECP256K1_PIPPENGER_ENTRY_SIZE(w) \ + (sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) \ + + sizeof(struct secp256k1_pippenger_point_state) \ + + WNAF_SIZE((w)+1) * sizeof(int)) + +/* Pippenger per-point memory: 2 entries due to endomorphism */ +#define SECP256K1_PIPPENGER_POINT_SIZE(w) (2 * SECP256K1_PIPPENGER_ENTRY_SIZE(w)) + +/* Pippenger fixed overhead: buckets + state + 2 extra entries */ +#define SECP256K1_PIPPENGER_FIXED_SIZE(w) \ + ((sizeof(secp256k1_gej) << (w)) \ + + 2 * SECP256K1_PIPPENGER_ENTRY_SIZE(w)) + +/* + * ABCD constants for all batch algorithms. + * + * Index 0 = TRIVIAL (no memory, very slow) + * Index 1 = STRAUSS (efficient for small batche sizes) + * Index 2-13 = PIPPENGER with window sizes 1-12 + */ + static const struct secp256k1_ecmult_multi_abcd secp256k1_ecmult_multi_abcds[SECP256K1_ECMULT_MULTI_NUM_ALGOS] = { +/* A (per-point bytes) B (fixed bytes) C D */ + {0, 0, 1000, 0 }, + {SECP256K1_STRAUSS_POINT_SIZE, 0, 109, 120 }, + {SECP256K1_PIPPENGER_POINT_SIZE(1), SECP256K1_PIPPENGER_FIXED_SIZE(1), 197, 403 }, + {SECP256K1_PIPPENGER_POINT_SIZE(2), SECP256K1_PIPPENGER_FIXED_SIZE(2), 148, 590 }, + {SECP256K1_PIPPENGER_POINT_SIZE(3), SECP256K1_PIPPENGER_FIXED_SIZE(3), 117, 877 }, + {SECP256K1_PIPPENGER_POINT_SIZE(4), SECP256K1_PIPPENGER_FIXED_SIZE(4), 100, 1340 }, + {SECP256K1_PIPPENGER_POINT_SIZE(5), SECP256K1_PIPPENGER_FIXED_SIZE(5), 86, 2187 }, + {SECP256K1_PIPPENGER_POINT_SIZE(6), SECP256K1_PIPPENGER_FIXED_SIZE(6), 75, 3703 }, + {SECP256K1_PIPPENGER_POINT_SIZE(7), SECP256K1_PIPPENGER_FIXED_SIZE(7), 66, 6324 }, + {SECP256K1_PIPPENGER_POINT_SIZE(8), SECP256K1_PIPPENGER_FIXED_SIZE(8), 61, 10681 }, + {SECP256K1_PIPPENGER_POINT_SIZE(9), SECP256K1_PIPPENGER_FIXED_SIZE(9), 56, 19223 }, + {SECP256K1_PIPPENGER_POINT_SIZE(10), SECP256K1_PIPPENGER_FIXED_SIZE(10), 53, 36521 }, + {SECP256K1_PIPPENGER_POINT_SIZE(11), SECP256K1_PIPPENGER_FIXED_SIZE(11), 49, 71369 }, + {SECP256K1_PIPPENGER_POINT_SIZE(12), SECP256K1_PIPPENGER_FIXED_SIZE(12), 46, 130576}, +}; - while (point_idx < n_points) { - if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) { - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 0; +static size_t secp256k1_ecmult_multi_batch_size(size_t mem_limit) { + /* We are implicitly using the TRIVIAL algorithm as a fallback + * but we will only use it if no other algorithm fits. If that + * is the case we can use max points as the batch size. */ + size_t best_batch_size = ECMULT_MAX_POINTS_PER_BATCH; + size_t min_optime = SIZE_MAX; + int i; + + for (i = 1 /* ignores TRIVIAL */; i < SECP256K1_ECMULT_MULTI_NUM_ALGOS; i++) { + const struct secp256k1_ecmult_multi_abcd *p = &secp256k1_ecmult_multi_abcds[i]; + size_t A = p->A, B = p->B, C = p->C, D = p->D, optime, batch_size; + + if (mem_limit <= B) continue; + + batch_size = (mem_limit - B) / A; + + if (batch_size == 0) continue; + + optime = C + D / batch_size; + + if (optime < min_optime) { + min_optime = optime; + best_batch_size = batch_size; } - idx++; - secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]); - idx++; - point_idx++; } - secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx); - secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint); - return 1; + return best_batch_size; } -/* Wrapper for secp256k1_ecmult_multi_func interface */ -static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { - return secp256k1_ecmult_pippenger_batch(error_callback, scratch, r, inp_g_sc, cb, cbdata, n, 0); -} +static secp256k1_ecmult_multi_algo secp256k1_ecmult_multi_select(size_t mem_limit, size_t batch_size) { + secp256k1_ecmult_multi_algo best_algo = SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL; + size_t min_optime = SIZE_MAX; + int i; -/** - * Returns the maximum number of points in addition to G that can be used with - * a given scratch space. The function ensures that fewer points may also be - * used. - */ -static size_t secp256k1_pippenger_max_points(const secp256k1_callback* error_callback, secp256k1_scratch *scratch) { - size_t max_alloc = secp256k1_scratch_max_allocation(error_callback, scratch, PIPPENGER_SCRATCH_OBJECTS); - int bucket_window; - size_t res = 0; - - for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) { - size_t n_points; - size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window); - size_t space_for_points; - size_t space_overhead; - size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); - - entry_size = 2*entry_size; - space_overhead = (sizeof(secp256k1_gej) << bucket_window) + entry_size + sizeof(struct secp256k1_pippenger_state); - if (space_overhead > max_alloc) { - break; - } - space_for_points = max_alloc - space_overhead; + /* Use TRIVIAL fallback */ + if (batch_size == 0) return best_algo; - n_points = space_for_points/entry_size; - n_points = n_points > max_points ? max_points : n_points; - if (n_points > res) { - res = n_points; - } - if (n_points < max_points) { - /* A larger bucket_window may support even more points. But if we - * would choose that then the caller couldn't safely use any number - * smaller than what this function returns */ - break; + for (i = 0; i < SECP256K1_ECMULT_MULTI_NUM_ALGOS; i++) { + const struct secp256k1_ecmult_multi_abcd *p = &secp256k1_ecmult_multi_abcds[i]; + size_t A = p->A, B = p->B, C = p->C, D = p->D, optime; + size_t mem_usage = A * batch_size + B; + + if (mem_usage > mem_limit) continue; + + optime = C + D / batch_size; + + if (optime < min_optime) { + min_optime = optime; + best_algo = (secp256k1_ecmult_multi_algo)i; } } - return res; + + return best_algo; } -/* Computes ecmult_multi by simply multiplying and adding each point. Does not - * require a scratch space */ -static int secp256k1_ecmult_multi_simple_var(secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points) { - size_t point_idx; +/* Trivial algorithm: Computes ecmult_multi by simply multiplying and adding each point. */ +static int secp256k1_ecmult_multi_trivial( + secp256k1_gej *r, + size_t n, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g +) { + size_t i; secp256k1_gej tmpj; secp256k1_gej_set_infinity(r); - secp256k1_gej_set_infinity(&tmpj); - /* r = inp_g_sc*G */ - secp256k1_ecmult(r, &tmpj, &secp256k1_scalar_zero, inp_g_sc); - for (point_idx = 0; point_idx < n_points; point_idx++) { - secp256k1_ge point; + + if (scalar_g != NULL) { + secp256k1_gej_set_infinity(&tmpj); + secp256k1_ecmult(r, &tmpj, &secp256k1_scalar_zero, scalar_g); + } + + for (i = 0; i < n; i++) { secp256k1_gej pointj; - secp256k1_scalar scalar; - if (!cb(&scalar, &point, point_idx, cbdata)) { - return 0; + if (secp256k1_ge_is_infinity(&points[i])) { + continue; } - /* r += scalar*point */ - secp256k1_gej_set_ge(&pointj, &point); - secp256k1_ecmult(&tmpj, &pointj, &scalar, NULL); + secp256k1_gej_set_ge(&pointj, &points[i]); + secp256k1_ecmult(&tmpj, &pointj, &scalars[i], NULL); secp256k1_gej_add_var(r, r, &tmpj, NULL); } + return 1; } -/* Compute the number of batches and the batch size given the maximum batch size and the - * total number of points */ -static int secp256k1_ecmult_multi_batch_size_helper(size_t *n_batches, size_t *n_batch_points, size_t max_n_batch_points, size_t n) { - if (max_n_batch_points == 0) { - return 0; - } - if (max_n_batch_points > ECMULT_MAX_POINTS_PER_BATCH) { - max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH; - } - if (n == 0) { - *n_batches = 0; - *n_batch_points = 0; +static int secp256k1_ecmult_multi_strauss( + const secp256k1_callback *error_callback, + secp256k1_gej *r, + size_t n, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g +) { + struct secp256k1_strauss_state state; + secp256k1_gej *points_gej = NULL; + size_t i; + int ret = 0; + + state.aux = NULL; + state.pre_a = NULL; + state.ps = NULL; + + secp256k1_gej_set_infinity(r); + + if (scalar_g == NULL && n == 0) { return 1; } - /* Compute ceil(n/max_n_batch_points) and ceil(n/n_batches) */ - *n_batches = CEIL_DIV(n, max_n_batch_points); - *n_batch_points = CEIL_DIV(n, *n_batches); - return 1; + + if (n > 0) { + state.aux = (secp256k1_fe *)checked_malloc(error_callback, + n * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); + state.pre_a = (secp256k1_ge *)checked_malloc(error_callback, + n * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); + state.ps = (struct secp256k1_strauss_point_state *)checked_malloc(error_callback, + n * sizeof(struct secp256k1_strauss_point_state)); + points_gej = (secp256k1_gej *)checked_malloc(error_callback, + n * sizeof(secp256k1_gej)); + + if (state.aux == NULL || state.pre_a == NULL || state.ps == NULL || + points_gej == NULL) { + goto cleanup; + } + + for (i = 0; i < n; i++) { + secp256k1_gej_set_ge(&points_gej[i], &points[i]); + } + } + + secp256k1_ecmult_strauss_wnaf(&state, r, n, points_gej, scalars, scalar_g); + ret = 1; + +cleanup: + free(state.aux); + free(state.pre_a); + free(state.ps); + free(points_gej); + + return ret; } -typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_callback* error_callback, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t); -static int secp256k1_ecmult_multi_var(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { +/* Pippenger algorithm per window size */ +static int secp256k1_ecmult_multi_pippenger( + const secp256k1_callback *error_callback, + secp256k1_gej *r, + size_t n, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g, + int window_size +) { + size_t entries = 2 * n + 2; + size_t n_wnaf = WNAF_SIZE(window_size + 1); + secp256k1_ge *points_endo = NULL; + secp256k1_scalar *scalars_endo = NULL; + secp256k1_gej *buckets = NULL; + struct secp256k1_pippenger_state state_space; + size_t idx = 0; size_t i; + int ret = 0; - int (*f)(const secp256k1_callback* error_callback, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t); - size_t n_batches; - size_t n_batch_points; + state_space.ps = NULL; + state_space.wnaf_na = NULL; secp256k1_gej_set_infinity(r); - if (inp_g_sc == NULL && n == 0) { - return 1; - } else if (n == 0) { - secp256k1_ecmult(r, r, &secp256k1_scalar_zero, inp_g_sc); + + if (scalar_g == NULL && n == 0) { return 1; } - if (scratch == NULL) { - return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n); + + if (window_size < 1) window_size = 1; + if (window_size > PIPPENGER_MAX_BUCKET_WINDOW) window_size = PIPPENGER_MAX_BUCKET_WINDOW; + + points_endo = (secp256k1_ge *)checked_malloc(error_callback, + entries * sizeof(secp256k1_ge)); + scalars_endo = (secp256k1_scalar *)checked_malloc(error_callback, + entries * sizeof(secp256k1_scalar)); + state_space.ps = (struct secp256k1_pippenger_point_state *)checked_malloc(error_callback, + entries * sizeof(struct secp256k1_pippenger_point_state)); + state_space.wnaf_na = (int *)checked_malloc(error_callback, + entries * n_wnaf * sizeof(int)); + buckets = (secp256k1_gej *)checked_malloc(error_callback, + ((size_t)1 << window_size) * sizeof(secp256k1_gej)); + + if (points_endo == NULL || scalars_endo == NULL || + state_space.ps == NULL || state_space.wnaf_na == NULL || buckets == NULL) { + goto cleanup; } - /* Compute the batch sizes for Pippenger's algorithm given a scratch space. If it's greater than - * a threshold use Pippenger's algorithm. Otherwise use Strauss' algorithm. - * As a first step check if there's enough space for Pippenger's algo (which requires less space - * than Strauss' algo) and if not, use the simple algorithm. */ - if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_pippenger_max_points(error_callback, scratch), n)) { - return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n); + if (scalar_g != NULL) { + scalars_endo[0] = *scalar_g; + points_endo[0] = secp256k1_ge_const_g; + idx++; + secp256k1_ecmult_endo_split(&scalars_endo[0], &scalars_endo[1], + &points_endo[0], &points_endo[1]); + idx++; } - if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) { - f = secp256k1_ecmult_pippenger_batch; - } else { - if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_strauss_max_points(error_callback, scratch), n)) { - return secp256k1_ecmult_multi_simple_var(r, inp_g_sc, cb, cbdata, n); + + for (i = 0; i < n; i++) { + if (secp256k1_ge_is_infinity(&points[i])) { + continue; } - f = secp256k1_ecmult_strauss_batch; + scalars_endo[idx] = scalars[i]; + points_endo[idx] = points[i]; + idx++; + secp256k1_ecmult_endo_split(&scalars_endo[idx - 1], &scalars_endo[idx], + &points_endo[idx - 1], &points_endo[idx]); + idx++; } - for(i = 0; i < n_batches; i++) { - size_t nbp = n < n_batch_points ? n : n_batch_points; - size_t offset = n_batch_points*i; - secp256k1_gej tmp; - if (!f(error_callback, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { - return 0; - } - secp256k1_gej_add_var(r, r, &tmp, NULL); - n -= nbp; + + secp256k1_ecmult_pippenger_wnaf(buckets, window_size, &state_space, r, + scalars_endo, points_endo, idx); + + ret = 1; + +cleanup: + free(points_endo); + free(scalars_endo); + free(state_space.ps); + free(state_space.wnaf_na); + free(buckets); + + return ret; +} + +static int secp256k1_ecmult_multi( + const secp256k1_callback *error_callback, + secp256k1_gej *r, + size_t n_points, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g, + size_t mem_limit +) { + secp256k1_ecmult_multi_algo algo = secp256k1_ecmult_multi_select(mem_limit, n_points); + return secp256k1_ecmult_multi_internal(error_callback, algo, r, n_points, points, scalars, scalar_g); +} + + +static int secp256k1_ecmult_multi_internal( + const secp256k1_callback *error_callback, + secp256k1_ecmult_multi_algo algo, + secp256k1_gej *r, + size_t n_points, + const secp256k1_ge *points, + const secp256k1_scalar *scalars, + const secp256k1_scalar *scalar_g +) { + switch (algo) { + case SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL: + return secp256k1_ecmult_multi_trivial(r, n_points, points, scalars, scalar_g); + + case SECP256K1_ECMULT_MULTI_ALGO_STRAUSS: + return secp256k1_ecmult_multi_strauss(error_callback, r, n_points, + points, scalars, scalar_g); + + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_1: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 1); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_2: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 2); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_3: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 3); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_4: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 4); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_5: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 5); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_6: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 6); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_7: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 7); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_8: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 8); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_9: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 9); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_10: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 10); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_11: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 11); + case SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12: + return secp256k1_ecmult_multi_pippenger(error_callback, r, n_points, + points, scalars, scalar_g, 12); + default: + return secp256k1_ecmult_multi_trivial(r, n_points, points, scalars, scalar_g); } - return 1; } #endif /* SECP256K1_ECMULT_IMPL_H */ diff --git a/src/modules/musig/keyagg_impl.h b/src/modules/musig/keyagg_impl.h index e412a27eca..8ddb797f95 100644 --- a/src/modules/musig/keyagg_impl.h +++ b/src/modules/musig/keyagg_impl.h @@ -149,26 +149,13 @@ typedef struct { secp256k1_ge second_pk; } secp256k1_musig_pubkey_agg_ecmult_data; -/* Callback for batch EC multiplication to compute keyaggcoef_0*P0 + keyaggcoef_1*P1 + ... */ -static int secp256k1_musig_pubkey_agg_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data) { - secp256k1_musig_pubkey_agg_ecmult_data *ctx = (secp256k1_musig_pubkey_agg_ecmult_data *) data; - int ret; - ret = secp256k1_pubkey_load(ctx->ctx, pt, ctx->pks[idx]); -#ifdef VERIFY - /* pubkey_load can't fail because the same pks have already been loaded in - * `musig_compute_pks_hash` (and we test this). */ - VERIFY_CHECK(ret); -#else - (void) ret; -#endif - secp256k1_musig_keyaggcoef_internal(sc, ctx->pks_hash, pt, &ctx->second_pk); - return 1; -} - int secp256k1_musig_pubkey_agg(const secp256k1_context* ctx, secp256k1_xonly_pubkey *agg_pk, secp256k1_musig_keyagg_cache *keyagg_cache, const secp256k1_pubkey * const* pubkeys, size_t n_pubkeys) { secp256k1_musig_pubkey_agg_ecmult_data ecmult_data; secp256k1_gej pkj; secp256k1_ge pkp; + secp256k1_ge *points = NULL; + secp256k1_scalar *scalars = NULL; + size_t mem_limit; size_t i; VERIFY_CHECK(ctx != NULL); @@ -199,14 +186,37 @@ int secp256k1_musig_pubkey_agg(const secp256k1_context* ctx, secp256k1_xonly_pub if (!secp256k1_musig_compute_pks_hash(ctx, ecmult_data.pks_hash, pubkeys, n_pubkeys)) { return 0; } - /* TODO: actually use optimized ecmult_multi algorithms by providing a - * scratch space */ - if (!secp256k1_ecmult_multi_var(&ctx->error_callback, NULL, &pkj, NULL, secp256k1_musig_pubkey_agg_callback, (void *) &ecmult_data, n_pubkeys)) { - /* In order to reach this line with the current implementation of - * ecmult_multi_var one would need to provide a callback that can - * fail. */ + + /* TODO: This follows the discussed approach of letting the users use + * malloc instead of scratch space. However there could also be a simple + * wrapper that abstracts the malloc stuff away. This could be cleaner. */ + points = (secp256k1_ge *)checked_malloc(&ctx->error_callback, n_pubkeys * sizeof(secp256k1_ge)); + scalars = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, n_pubkeys * sizeof(secp256k1_scalar)); + + for (i = 0; i < n_pubkeys; i++) { +#ifdef VERIFY + /* pubkey_load can't fail because the same pks have already been loaded + * in `musig_compute_pks_hash` (and we test this). */ + VERIFY_CHECK(secp256k1_pubkey_load(ctx, &points[i], pubkeys[i])); +#else + (void) secp256k1_pubkey_load(ctx, &points[i], pubkeys[i]); +#endif + secp256k1_musig_keyaggcoef_internal(&scalars[i], ecmult_data.pks_hash, &points[i], &ecmult_data.second_pk); + } + + /* TODO: Assumes that Strauss will be the optimal algorithm almost every + * time. Previously this just used TRIVIAL algorithm by not providing + * scratch space. To be discussed if this should be changed. */ + mem_limit = SECP256K1_STRAUSS_POINT_SIZE * n_pubkeys; + if (!secp256k1_ecmult_multi(&ctx->error_callback, &pkj, n_pubkeys, points, scalars, NULL, mem_limit)) { + free(points); + free(scalars); return 0; } + + free(points); + free(scalars); + secp256k1_ge_set_gej(&pkp, &pkj); secp256k1_fe_normalize_var(&pkp.y); /* The resulting public key is infinity with negligible probability */ diff --git a/src/tests.c b/src/tests.c index e09f5c7d23..97b8748713 100644 --- a/src/tests.c +++ b/src/tests.c @@ -4644,256 +4644,13 @@ static void run_ecmult_const_tests(void) { ecmult_const_mult_xonly(); } -typedef struct { - secp256k1_scalar *sc; - secp256k1_ge *pt; -} ecmult_multi_data; - -static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) { - ecmult_multi_data *data = (ecmult_multi_data*) cbdata; - *sc = data->sc[idx]; - *pt = data->pt[idx]; - return 1; -} - -static int ecmult_multi_false_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) { - (void)sc; - (void)pt; - (void)idx; - (void)cbdata; - return 0; -} - -static void test_ecmult_multi(secp256k1_scratch *scratch, secp256k1_ecmult_multi_func ecmult_multi) { - int ncount; - secp256k1_scalar sc[32]; - secp256k1_ge pt[32]; - secp256k1_gej r; - secp256k1_gej r2; - ecmult_multi_data data; - - data.sc = sc; - data.pt = pt; - - /* No points to multiply */ - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, NULL, ecmult_multi_callback, &data, 0)); - - /* Check 1- and 2-point multiplies against ecmult */ - for (ncount = 0; ncount < COUNT; ncount++) { - secp256k1_ge ptg; - secp256k1_gej ptgj; - testutil_random_scalar_order(&sc[0]); - testutil_random_scalar_order(&sc[1]); - - testutil_random_ge_test(&ptg); - secp256k1_gej_set_ge(&ptgj, &ptg); - pt[0] = ptg; - pt[1] = secp256k1_ge_const_g; - - /* only G scalar */ - secp256k1_ecmult(&r2, &ptgj, &secp256k1_scalar_zero, &sc[0]); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &sc[0], ecmult_multi_callback, &data, 0)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - - /* 1-point */ - secp256k1_ecmult(&r2, &ptgj, &sc[0], &secp256k1_scalar_zero); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 1)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - - /* Try to multiply 1 point, but callback returns false */ - CHECK(!ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_false_callback, &data, 1)); - - /* 2-point */ - secp256k1_ecmult(&r2, &ptgj, &sc[0], &sc[1]); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 2)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - - /* 2-point with G scalar */ - secp256k1_ecmult(&r2, &ptgj, &sc[0], &sc[1]); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &sc[1], ecmult_multi_callback, &data, 1)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - } - - /* Check infinite outputs of various forms */ - for (ncount = 0; ncount < COUNT; ncount++) { - secp256k1_ge ptg; - size_t i, j; - size_t sizes[] = { 2, 10, 32 }; - - for (j = 0; j < 3; j++) { - for (i = 0; i < 32; i++) { - testutil_random_scalar_order(&sc[i]); - secp256k1_ge_set_infinity(&pt[i]); - } - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, sizes[j])); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - for (j = 0; j < 3; j++) { - for (i = 0; i < 32; i++) { - testutil_random_ge_test(&ptg); - pt[i] = ptg; - secp256k1_scalar_set_int(&sc[i], 0); - } - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, sizes[j])); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - for (j = 0; j < 3; j++) { - testutil_random_ge_test(&ptg); - for (i = 0; i < 16; i++) { - testutil_random_scalar_order(&sc[2*i]); - secp256k1_scalar_negate(&sc[2*i + 1], &sc[2*i]); - pt[2 * i] = ptg; - pt[2 * i + 1] = ptg; - } - - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, sizes[j])); - CHECK(secp256k1_gej_is_infinity(&r)); - - testutil_random_scalar_order(&sc[0]); - for (i = 0; i < 16; i++) { - testutil_random_ge_test(&ptg); - - sc[2*i] = sc[0]; - sc[2*i+1] = sc[0]; - pt[2 * i] = ptg; - secp256k1_ge_neg(&pt[2*i+1], &pt[2*i]); - } - - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, sizes[j])); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - testutil_random_ge_test(&ptg); - secp256k1_scalar_set_int(&sc[0], 0); - pt[0] = ptg; - for (i = 1; i < 32; i++) { - pt[i] = ptg; - - testutil_random_scalar_order(&sc[i]); - secp256k1_scalar_add(&sc[0], &sc[0], &sc[i]); - secp256k1_scalar_negate(&sc[i], &sc[i]); - } - - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 32)); - CHECK(secp256k1_gej_is_infinity(&r)); - } - - /* Check random points, constant scalar */ - for (ncount = 0; ncount < COUNT; ncount++) { - size_t i; - secp256k1_gej_set_infinity(&r); - - testutil_random_scalar_order(&sc[0]); - for (i = 0; i < 20; i++) { - secp256k1_ge ptg; - sc[i] = sc[0]; - testutil_random_ge_test(&ptg); - pt[i] = ptg; - secp256k1_gej_add_ge_var(&r, &r, &pt[i], NULL); - } - - secp256k1_ecmult(&r2, &r, &sc[0], &secp256k1_scalar_zero); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 20)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - } - - /* Check random scalars, constant point */ - for (ncount = 0; ncount < COUNT; ncount++) { - size_t i; - secp256k1_ge ptg; - secp256k1_gej p0j; - secp256k1_scalar rs; - secp256k1_scalar_set_int(&rs, 0); - - testutil_random_ge_test(&ptg); - for (i = 0; i < 20; i++) { - testutil_random_scalar_order(&sc[i]); - pt[i] = ptg; - secp256k1_scalar_add(&rs, &rs, &sc[i]); - } - - secp256k1_gej_set_ge(&p0j, &pt[0]); - secp256k1_ecmult(&r2, &p0j, &rs, &secp256k1_scalar_zero); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 20)); - CHECK(secp256k1_gej_eq_var(&r, &r2)); - } - - /* Sanity check that zero scalars don't cause problems */ - for (ncount = 0; ncount < 20; ncount++) { - testutil_random_scalar_order(&sc[ncount]); - testutil_random_ge_test(&pt[ncount]); - } - - secp256k1_scalar_set_int(&sc[0], 0); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 20)); - secp256k1_scalar_set_int(&sc[1], 0); - secp256k1_scalar_set_int(&sc[2], 0); - secp256k1_scalar_set_int(&sc[3], 0); - secp256k1_scalar_set_int(&sc[4], 0); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 6)); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 5)); - CHECK(secp256k1_gej_is_infinity(&r)); - - /* Run through s0*(t0*P) + s1*(t1*P) exhaustively for many small values of s0, s1, t0, t1 */ - { - const size_t TOP = 8; - size_t s0i, s1i; - size_t t0i, t1i; - secp256k1_ge ptg; - secp256k1_gej ptgj; - - testutil_random_ge_test(&ptg); - secp256k1_gej_set_ge(&ptgj, &ptg); - - for(t0i = 0; t0i < TOP; t0i++) { - for(t1i = 0; t1i < TOP; t1i++) { - secp256k1_gej t0p, t1p; - secp256k1_scalar t0, t1; - - secp256k1_scalar_set_int(&t0, (t0i + 1) / 2); - secp256k1_scalar_cond_negate(&t0, t0i & 1); - secp256k1_scalar_set_int(&t1, (t1i + 1) / 2); - secp256k1_scalar_cond_negate(&t1, t1i & 1); - - secp256k1_ecmult(&t0p, &ptgj, &t0, &secp256k1_scalar_zero); - secp256k1_ecmult(&t1p, &ptgj, &t1, &secp256k1_scalar_zero); - - for(s0i = 0; s0i < TOP; s0i++) { - for(s1i = 0; s1i < TOP; s1i++) { - secp256k1_scalar tmp1, tmp2; - secp256k1_gej expected, actual; - - secp256k1_ge_set_gej(&pt[0], &t0p); - secp256k1_ge_set_gej(&pt[1], &t1p); - - secp256k1_scalar_set_int(&sc[0], (s0i + 1) / 2); - secp256k1_scalar_cond_negate(&sc[0], s0i & 1); - secp256k1_scalar_set_int(&sc[1], (s1i + 1) / 2); - secp256k1_scalar_cond_negate(&sc[1], s1i & 1); - - secp256k1_scalar_mul(&tmp1, &t0, &sc[0]); - secp256k1_scalar_mul(&tmp2, &t1, &sc[1]); - secp256k1_scalar_add(&tmp1, &tmp1, &tmp2); - - secp256k1_ecmult(&expected, &ptgj, &tmp1, &secp256k1_scalar_zero); - CHECK(ecmult_multi(&CTX->error_callback, scratch, &actual, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 2)); - CHECK(secp256k1_gej_eq_var(&actual, &expected)); - } - } - } - } - } -} - -static int test_ecmult_multi_random(secp256k1_scratch *scratch) { - /* Large random test for ecmult_multi_* functions which exercises: +static int test_ecmult_multi_random(void) { + /* Large random test for ecmult_multi which exercises: * - Few or many inputs (0 up to 128, roughly exponentially distributed). * - Few or many 0*P or a*INF inputs (roughly uniformly distributed). * - Including or excluding an nonzero a*G term (or such a term at all). * - Final expected result equal to infinity or not (roughly 50%). - * - ecmult_multi_var, ecmult_strauss_single_batch, ecmult_pippenger_single_batch + * - Different algorithm selections based on batch size */ /* These 4 variables define the eventual input to the ecmult_multi function. @@ -4911,14 +4668,9 @@ static int test_ecmult_multi_random(secp256k1_scratch *scratch) { secp256k1_ge ge_tmp; /* Variables needed for the actual input to ecmult_multi. */ secp256k1_ge ges[128]; - ecmult_multi_data data; + size_t mem_limit = 1024 * 1024; /* 1 MB */ int i; - /* Which multiplication function to use */ - int fn = testrand_int(3); - secp256k1_ecmult_multi_func ecmult_multi = fn == 0 ? secp256k1_ecmult_multi_var : - fn == 1 ? secp256k1_ecmult_strauss_batch_single : - secp256k1_ecmult_pippenger_batch_single; /* Simulate exponentially distributed num. */ int num_bits = 2 + testrand_int(6); /* Number of (scalar, point) inputs (excluding g). */ @@ -4927,8 +4679,7 @@ static int test_ecmult_multi_random(secp256k1_scratch *scratch) { int num_nonzero = testrand_int(num + 1); /* Whether we're aiming to create an input with nonzero expected result. */ int nonzero_result = testrand_bits(1); - /* Whether we will provide nonzero g multiplicand. In some cases our hand - * is forced here based on num_nonzero and nonzero_result. */ + /* Whether we will provide nonzero g multiplicand. */ int g_nonzero = num_nonzero == 0 ? nonzero_result : num_nonzero == 1 && !nonzero_result ? 1 : (int)testrand_bits(1); @@ -5044,223 +4795,79 @@ static int test_ecmult_multi_random(secp256k1_scratch *scratch) { /* Compute affine versions of all inputs. */ secp256k1_ge_set_all_gej_var(ges, gejs, filled); /* Invoke ecmult_multi code. */ - data.sc = scalars; - data.pt = ges; - CHECK(ecmult_multi(&CTX->error_callback, scratch, &computed, g_scalar_ptr, ecmult_multi_callback, &data, filled)); + CHECK(secp256k1_ecmult_multi(&CTX->error_callback, &computed, filled, ges, scalars, g_scalar_ptr, mem_limit)); mults += num_nonzero + g_nonzero; /* Compare with expected result. */ CHECK(secp256k1_gej_eq_var(&computed, &expected)); return mults; } -static void test_ecmult_multi_batch_single(secp256k1_ecmult_multi_func ecmult_multi) { - secp256k1_scalar sc; - secp256k1_ge pt; - secp256k1_gej r; - ecmult_multi_data data; - secp256k1_scratch *scratch_empty; - - testutil_random_ge_test(&pt); - testutil_random_scalar_order(&sc); - data.sc = ≻ - data.pt = &pt; - - /* Try to multiply 1 point, but scratch space is empty.*/ - scratch_empty = secp256k1_scratch_create(&CTX->error_callback, 0); - CHECK(!ecmult_multi(&CTX->error_callback, scratch_empty, &r, &secp256k1_scalar_zero, ecmult_multi_callback, &data, 1)); - secp256k1_scratch_destroy(&CTX->error_callback, scratch_empty); -} - -static void test_secp256k1_pippenger_bucket_window_inv(void) { - int i; - - CHECK(secp256k1_pippenger_bucket_window_inv(0) == 0); - for(i = 1; i <= PIPPENGER_MAX_BUCKET_WINDOW; i++) { - /* Bucket_window of 8 is not used with endo */ - if (i == 8) { - continue; - } - CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)) == i); - if (i != PIPPENGER_MAX_BUCKET_WINDOW) { - CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)+1) > i); - } - } -} - -/** - * Probabilistically test the function returning the maximum number of possible points - * for a given scratch space. - */ -static void test_ecmult_multi_pippenger_max_points(void) { - size_t scratch_size = testrand_bits(8); - size_t max_size = secp256k1_pippenger_scratch_size(secp256k1_pippenger_bucket_window_inv(PIPPENGER_MAX_BUCKET_WINDOW-1)+512, 12); - secp256k1_scratch *scratch; - size_t n_points_supported; - int bucket_window = 0; - - for(; scratch_size < max_size; scratch_size+=256) { - size_t i; - size_t total_alloc; - size_t checkpoint; - scratch = secp256k1_scratch_create(&CTX->error_callback, scratch_size); - CHECK(scratch != NULL); - checkpoint = secp256k1_scratch_checkpoint(&CTX->error_callback, scratch); - n_points_supported = secp256k1_pippenger_max_points(&CTX->error_callback, scratch); - if (n_points_supported == 0) { - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - continue; - } - bucket_window = secp256k1_pippenger_bucket_window(n_points_supported); - /* allocate `total_alloc` bytes over `PIPPENGER_SCRATCH_OBJECTS` many allocations */ - total_alloc = secp256k1_pippenger_scratch_size(n_points_supported, bucket_window); - for (i = 0; i < PIPPENGER_SCRATCH_OBJECTS - 1; i++) { - CHECK(secp256k1_scratch_alloc(&CTX->error_callback, scratch, 1)); - total_alloc--; - } - CHECK(secp256k1_scratch_alloc(&CTX->error_callback, scratch, total_alloc)); - secp256k1_scratch_apply_checkpoint(&CTX->error_callback, scratch, checkpoint); - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - } - CHECK(bucket_window == PIPPENGER_MAX_BUCKET_WINDOW); -} - -static void test_ecmult_multi_batch_size_helper(void) { - size_t n_batches, n_batch_points, max_n_batch_points, n; - - max_n_batch_points = 0; - n = 1; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 0); - - max_n_batch_points = 1; - n = 0; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == 0); - CHECK(n_batch_points == 0); - - max_n_batch_points = 2; - n = 5; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == 3); - CHECK(n_batch_points == 2); - - max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH; - n = ECMULT_MAX_POINTS_PER_BATCH; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == 1); - CHECK(n_batch_points == ECMULT_MAX_POINTS_PER_BATCH); - - max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH + 1; - n = ECMULT_MAX_POINTS_PER_BATCH + 1; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == 2); - CHECK(n_batch_points == ECMULT_MAX_POINTS_PER_BATCH/2 + 1); - - max_n_batch_points = 1; - n = SIZE_MAX; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == SIZE_MAX); - CHECK(n_batch_points == 1); - - max_n_batch_points = 2; - n = SIZE_MAX; - CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1); - CHECK(n_batches == SIZE_MAX/2 + 1); - CHECK(n_batch_points == 2); -} - -/** - * Run secp256k1_ecmult_multi_var with num points and a scratch space restricted to - * 1 <= i <= num points. - */ static void test_ecmult_multi_batching(void) { - static const int n_points = 2*ECMULT_PIPPENGER_THRESHOLD; + static const int n_points = 200; secp256k1_scalar scG; secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&CTX->error_callback, sizeof(secp256k1_scalar) * n_points); secp256k1_ge *pt = (secp256k1_ge *)checked_malloc(&CTX->error_callback, sizeof(secp256k1_ge) * n_points); - secp256k1_gej r; - secp256k1_gej r2; - ecmult_multi_data data; + secp256k1_gej r, r2; int i; - secp256k1_scratch *scratch; + size_t mem_limit; secp256k1_gej_set_infinity(&r2); - /* Get random scalars and group elements and compute result */ + /* Get random scalars and group elements and compute expected result */ testutil_random_scalar_order(&scG); secp256k1_ecmult(&r2, &r2, &secp256k1_scalar_zero, &scG); - for(i = 0; i < n_points; i++) { + for (i = 0; i < n_points; i++) { secp256k1_ge ptg; secp256k1_gej ptgj; testutil_random_ge_test(&ptg); - secp256k1_gej_set_ge(&ptgj, &ptg); pt[i] = ptg; testutil_random_scalar_order(&sc[i]); + secp256k1_gej_set_ge(&ptgj, &ptg); secp256k1_ecmult(&ptgj, &ptgj, &sc[i], NULL); secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL); } - data.sc = sc; - data.pt = pt; - secp256k1_gej_neg(&r2, &r2); - - /* Test with empty scratch space. It should compute the correct result using - * ecmult_mult_simple algorithm which doesn't require a scratch space. */ - scratch = secp256k1_scratch_create(&CTX->error_callback, 0); - CHECK(secp256k1_ecmult_multi_var(&CTX->error_callback, scratch, &r, &scG, ecmult_multi_callback, &data, n_points)); - secp256k1_gej_add_var(&r, &r, &r2, NULL); - CHECK(secp256k1_gej_is_infinity(&r)); - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - - /* Test with space for 1 point in pippenger. That's not enough because - * ecmult_multi selects strauss which requires more memory. It should - * therefore select the simple algorithm. */ - scratch = secp256k1_scratch_create(&CTX->error_callback, secp256k1_pippenger_scratch_size(1, 1) + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT); - CHECK(secp256k1_ecmult_multi_var(&CTX->error_callback, scratch, &r, &scG, ecmult_multi_callback, &data, n_points)); - secp256k1_gej_add_var(&r, &r, &r2, NULL); - CHECK(secp256k1_gej_is_infinity(&r)); - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - - for(i = 1; i <= n_points; i++) { - if (i > ECMULT_PIPPENGER_THRESHOLD) { - int bucket_window = secp256k1_pippenger_bucket_window(i); - size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window); - scratch = secp256k1_scratch_create(&CTX->error_callback, scratch_size + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT); - } else { - size_t scratch_size = secp256k1_strauss_scratch_size(i); - scratch = secp256k1_scratch_create(&CTX->error_callback, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); - } - CHECK(secp256k1_ecmult_multi_var(&CTX->error_callback, scratch, &r, &scG, ecmult_multi_callback, &data, n_points)); - secp256k1_gej_add_var(&r, &r, &r2, NULL); - CHECK(secp256k1_gej_is_infinity(&r)); - secp256k1_scratch_destroy(&CTX->error_callback, scratch); + + /* Test with various memory limits */ + for (mem_limit = 1024; mem_limit <= 10 * 1024 * 1024; mem_limit *= 4) { + CHECK(secp256k1_ecmult_multi(&CTX->error_callback, &r, n_points, pt, sc, &scG, mem_limit)); + CHECK(secp256k1_gej_eq_var(&r, &r2)); } + free(sc); free(pt); } static void run_ecmult_multi_tests(void) { - secp256k1_scratch *scratch; int64_t todo = (int64_t)320 * COUNT; + size_t mem_limit, batch_size; + secp256k1_ecmult_multi_algo algo; + + /* Test batch_size computation with various memory limits */ + mem_limit = 1024; /* 1 KB */ + batch_size = secp256k1_ecmult_multi_batch_size(mem_limit); + CHECK(batch_size > 0 || mem_limit == 0); + + mem_limit = 1024 * 1024; /* 1 MB */ + batch_size = secp256k1_ecmult_multi_batch_size(mem_limit); + CHECK(batch_size > 0); + + /* Test algorithm selection */ + algo = secp256k1_ecmult_multi_select(mem_limit, 10); + CHECK(algo >= SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL && + algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12); + + algo = secp256k1_ecmult_multi_select(mem_limit, 500); + CHECK(algo >= SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL && + algo <= SECP256K1_ECMULT_MULTI_ALGO_PIPPENGER_12); + + /* Zero batch size */ + algo = secp256k1_ecmult_multi_select(mem_limit, 0); + CHECK(algo == SECP256K1_ECMULT_MULTI_ALGO_TRIVIAL); - test_secp256k1_pippenger_bucket_window_inv(); - test_ecmult_multi_pippenger_max_points(); - scratch = secp256k1_scratch_create(&CTX->error_callback, 819200); - test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); - test_ecmult_multi(NULL, secp256k1_ecmult_multi_var); - test_ecmult_multi(scratch, secp256k1_ecmult_pippenger_batch_single); - test_ecmult_multi_batch_single(secp256k1_ecmult_pippenger_batch_single); - test_ecmult_multi(scratch, secp256k1_ecmult_strauss_batch_single); - test_ecmult_multi_batch_single(secp256k1_ecmult_strauss_batch_single); while (todo > 0) { - todo -= test_ecmult_multi_random(scratch); + todo -= test_ecmult_multi_random(); } - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - - /* Run test_ecmult_multi with space for exactly one point */ - scratch = secp256k1_scratch_create(&CTX->error_callback, secp256k1_strauss_scratch_size(1) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT); - test_ecmult_multi(scratch, secp256k1_ecmult_multi_var); - secp256k1_scratch_destroy(&CTX->error_callback, scratch); - test_ecmult_multi_batch_size_helper(); test_ecmult_multi_batching(); } @@ -5417,15 +5024,7 @@ static void run_wnaf(void) { CHECK(secp256k1_scalar_is_zero(&n)); } -static int test_ecmult_accumulate_cb(secp256k1_scalar* sc, secp256k1_ge* pt, size_t idx, void* data) { - const secp256k1_scalar* indata = (const secp256k1_scalar*)data; - *sc = *indata; - *pt = secp256k1_ge_const_g; - CHECK(idx == 0); - return 1; -} - -static void test_ecmult_accumulate(secp256k1_sha256* acc, const secp256k1_scalar* x, secp256k1_scratch* scratch) { +static void test_ecmult_accumulate(secp256k1_sha256* acc, const secp256k1_scalar* x) { /* Compute x*G in 6 different ways, serialize it uncompressed, and feed it into acc. */ secp256k1_gej rj1, rj2, rj3, rj4, rj5, rj6, gj, infj; secp256k1_ge r; @@ -5435,8 +5034,8 @@ static void test_ecmult_accumulate(secp256k1_sha256* acc, const secp256k1_scalar secp256k1_ecmult_gen(&CTX->ecmult_gen_ctx, &rj1, x); secp256k1_ecmult(&rj2, &gj, x, &secp256k1_scalar_zero); secp256k1_ecmult(&rj3, &infj, &secp256k1_scalar_zero, x); - CHECK(secp256k1_ecmult_multi_var(&CTX->error_callback, scratch, &rj4, x, NULL, NULL, 0)); - CHECK(secp256k1_ecmult_multi_var(&CTX->error_callback, scratch, &rj5, &secp256k1_scalar_zero, test_ecmult_accumulate_cb, (void*)x, 1)); + CHECK(secp256k1_ecmult_multi(&CTX->error_callback, &rj4, 0, NULL, NULL, x, 1024*1024)); + CHECK(secp256k1_ecmult_multi(&CTX->error_callback, &rj5, 1, &secp256k1_ge_const_g, x, &secp256k1_scalar_zero, 1024*1024)); secp256k1_ecmult_const(&rj6, &secp256k1_ge_const_g, x); secp256k1_ge_set_gej_var(&r, &rj1); CHECK(secp256k1_gej_eq_ge_var(&rj2, &r)); @@ -5468,7 +5067,6 @@ static void test_ecmult_constants_2bit(void) { secp256k1_sha256 acc; unsigned char b32[32]; int i, j; - secp256k1_scratch_space *scratch = secp256k1_scratch_space_create(CTX, 65536); /* Expected hash of all the computed points; created with an independent * implementation. */ @@ -5481,22 +5079,20 @@ static void test_ecmult_constants_2bit(void) { secp256k1_sha256_initialize(&acc); for (i = 0; i <= 36; ++i) { secp256k1_scalar_set_int(&x, i); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); secp256k1_scalar_negate(&x, &x); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); }; for (i = 0; i < 256; ++i) { for (j = 1; j < 256; j += 2) { int k; secp256k1_scalar_set_int(&x, j); for (k = 0; k < i; ++k) secp256k1_scalar_add(&x, &x, &x); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); } } secp256k1_sha256_finalize(&acc, b32); CHECK(secp256k1_memcmp_var(b32, expected32, 32) == 0); - - secp256k1_scratch_space_destroy(CTX, scratch); } static void test_ecmult_constants_sha(uint32_t prefix, size_t iter, const unsigned char* expected32) { @@ -5512,7 +5108,6 @@ static void test_ecmult_constants_sha(uint32_t prefix, size_t iter, const unsign unsigned char b32[32]; unsigned char inp[6]; size_t i; - secp256k1_scratch_space *scratch = secp256k1_scratch_space_create(CTX, 65536); inp[0] = prefix & 0xFF; inp[1] = (prefix >> 8) & 0xFF; @@ -5520,11 +5115,11 @@ static void test_ecmult_constants_sha(uint32_t prefix, size_t iter, const unsign inp[3] = (prefix >> 24) & 0xFF; secp256k1_sha256_initialize(&acc); secp256k1_scalar_set_int(&x, 0); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); secp256k1_scalar_set_int(&x, 1); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); secp256k1_scalar_negate(&x, &x); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); for (i = 0; i < iter; ++i) { secp256k1_sha256 gen; @@ -5534,12 +5129,10 @@ static void test_ecmult_constants_sha(uint32_t prefix, size_t iter, const unsign secp256k1_sha256_write(&gen, inp, sizeof(inp)); secp256k1_sha256_finalize(&gen, b32); secp256k1_scalar_set_b32(&x, b32, NULL); - test_ecmult_accumulate(&acc, &x, scratch); + test_ecmult_accumulate(&acc, &x); } secp256k1_sha256_finalize(&acc, b32); CHECK(secp256k1_memcmp_var(b32, expected32, 32) == 0); - - secp256k1_scratch_space_destroy(CTX, scratch); } static void run_ecmult_constants(void) { diff --git a/src/tests_exhaustive.c b/src/tests_exhaustive.c index 13bda6119d..b669725328 100644 --- a/src/tests_exhaustive.c +++ b/src/tests_exhaustive.c @@ -183,22 +183,13 @@ static void test_exhaustive_ecmult(const secp256k1_ge *group, const secp256k1_ge } } -typedef struct { - secp256k1_scalar sc[2]; - secp256k1_ge pt[2]; -} ecmult_multi_data; - -static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) { - ecmult_multi_data *data = (ecmult_multi_data*) cbdata; - *sc = data->sc[idx]; - *pt = data->pt[idx]; - return 1; -} - static void test_exhaustive_ecmult_multi(const secp256k1_context *ctx, const secp256k1_ge *group) { int i, j, k, x, y; uint64_t iter = 0; - secp256k1_scratch *scratch = secp256k1_scratch_create(&ctx->error_callback, 4096); + secp256k1_scalar scalars[2]; + secp256k1_ge points[2]; + size_t mem_limit = 4096; + for (i = 0; i < EXHAUSTIVE_TEST_ORDER; i++) { for (j = 0; j < EXHAUSTIVE_TEST_ORDER; j++) { for (k = 0; k < EXHAUSTIVE_TEST_ORDER; k++) { @@ -207,22 +198,20 @@ static void test_exhaustive_ecmult_multi(const secp256k1_context *ctx, const sec for (y = 0; y < EXHAUSTIVE_TEST_ORDER; y++) { secp256k1_gej tmp; secp256k1_scalar g_sc; - ecmult_multi_data data; - secp256k1_scalar_set_int(&data.sc[0], i); - secp256k1_scalar_set_int(&data.sc[1], j); + secp256k1_scalar_set_int(&scalars[0], i); + secp256k1_scalar_set_int(&scalars[1], j); secp256k1_scalar_set_int(&g_sc, k); - data.pt[0] = group[x]; - data.pt[1] = group[y]; + points[0] = group[x]; + points[1] = group[y]; - secp256k1_ecmult_multi_var(&ctx->error_callback, scratch, &tmp, &g_sc, ecmult_multi_callback, &data, 2); + CHECK(secp256k1_ecmult_multi(&ctx->error_callback, &tmp, 2, points, scalars, &g_sc, mem_limit)); CHECK(secp256k1_gej_eq_ge_var(&tmp, &group[(i * x + j * y + k) % EXHAUSTIVE_TEST_ORDER])); } } } } } - secp256k1_scratch_destroy(&ctx->error_callback, scratch); } static void r_from_k(secp256k1_scalar *r, const secp256k1_ge *group, int k, int* overflow) { diff --git a/tools/ecmult_multi_calib.py b/tools/ecmult_multi_calib.py new file mode 100644 index 0000000000..2e3727990f --- /dev/null +++ b/tools/ecmult_multi_calib.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import sys +from collections import defaultdict + +def linreg(x, y): + n, sx, sy = len(x), sum(x), sum(y) + sxy, sx2 = sum(a*b for a,b in zip(x,y)), sum(a*a for a in x) + d = n*sx2 - sx*sx + if abs(d) < 1e-10: return sy/n, 0 + return (sy - ((n*sxy - sx*sy)/d)*sx)/n, (n*sxy - sx*sy)/d + +data = defaultdict(list) +for line in sys.stdin: + line = line.strip() + if not line or line.startswith('#'): continue + p = line.split(',') + if len(p) == 3: data[p[0]].append((int(p[1]), float(p[2]))) + +res = {} +for algo, m in data.items(): + if len(m) >= 2: + C, D = linreg([1.0/n for n,_ in m], [t/n for n,t in m]) + res[algo] = (C, D) + +scale = 100.0 / res['PIPPENGER_4'][0] if 'PIPPENGER_4' in res else 1.0 + +print("static const struct secp256k1_ecmult_multi_abcd secp256k1_ecmult_multi_abcds[SECP256K1_ECMULT_MULTI_NUM_ALGOS] = {") +print(" {0, 0, 1000, 0 },") +if 'STRAUSS' in res: + C, D = res['STRAUSS'] + Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0 + Cstr = f"{Cs}," + print(f" {{SECP256K1_STRAUSS_POINT_SIZE, 0, {Cstr:<6} {Ds:<5}}},") +for i in range(1, 13): + if f'PIPPENGER_{i}' in res: + C, D = res[f'PIPPENGER_{i}'] + Cs, Ds = max(1,int(C*scale)), max(0,int(D*scale)) if D>0 else 0 + ps = f"SECP256K1_PIPPENGER_POINT_SIZE({i})," + fs = f"SECP256K1_PIPPENGER_FIXED_SIZE({i})," + Cstr = f"{Cs}," + print(f" {{{ps:<35} {fs:<35} {Cstr:<6} {Ds:<5}}},") +print("};")