From bae0b587e4faae261de34aab78d148b25774d1e2 Mon Sep 17 00:00:00 2001 From: pcasaretto Date: Thu, 21 Aug 2025 14:12:41 -0300 Subject: [PATCH] range-diff: fix integer overflow in get_correspondences() The get_correspondences() function uses 'int' to store the sum of two 'size_t' values from string_list structures. When processing large patch sets where a->nr + b->nr exceeds INT_MAX, the value wraps to negative, causing invalid array indexing and a segmentation fault. This manifests as a crash at line 356 in range-diff.c when accessing cost[i + n * j] with a negative or incorrectly calculated index. Fix this by using 'size_t' throughout for array sizes and indices. The compute_assignment() function signature is updated to accept size_t parameters, maintaining int only for sentinel values (-1). Note that while this fix prevents the integer overflow and segmentation fault, attempting to process extremely large patch sets will still hit memory limitations. In practice, the process will consume excessive memory and likely be terminated by the system (SIGKILL) before completing. However, fixing the integer overflow is still correct and allows range-diff to fail gracefully due to resource constraints rather than undefined behavior. Signed-off-by: Paulo Casaretto --- linear-assignment.c | 10 ++++++---- linear-assignment.h | 2 +- range-diff.c | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/linear-assignment.c b/linear-assignment.c index 5416cbcf409d26..cf4088c2716a9c 100644 --- a/linear-assignment.c +++ b/linear-assignment.c @@ -12,12 +12,13 @@ * The parameter `cost` is the cost matrix: the cost to assign column j to row * i is `cost[j + column_count * i]. */ -void compute_assignment(int column_count, int row_count, int *cost, +void compute_assignment(size_t column_count, size_t row_count, int *cost, int *column2row, int *row2column) { int *v, *d; int *free_row, free_count = 0, saved_free_count, *pred, *col; - int i, j, phase; + size_t i, j; + int phase; if (column_count < 2) { memset(column2row, 0, sizeof(int) * column_count); @@ -30,8 +31,8 @@ void compute_assignment(int column_count, int row_count, int *cost, ALLOC_ARRAY(v, column_count); /* column reduction */ - for (j = column_count - 1; j >= 0; j--) { - int i1 = 0; + for (j = column_count; j-- > 0; ) { + size_t i1 = 0; for (i = 1; i < row_count; i++) if (COST(j, i1) > COST(j, i)) @@ -132,6 +133,7 @@ void compute_assignment(int column_count, int row_count, int *cost, for (free_count = 0; free_count < saved_free_count; free_count++) { int i1 = free_row[free_count], low = 0, up = 0, last, k; int min, c, u1; + int j; for (j = 0; j < column_count; j++) { d[j] = COST(j, i1) - v[j]; diff --git a/linear-assignment.h b/linear-assignment.h index 1dfea766290d9d..47980ec46a6c7f 100644 --- a/linear-assignment.h +++ b/linear-assignment.h @@ -13,7 +13,7 @@ * assignments (-1 for unassigned, which can happen only if column_count != * row_count). */ -void compute_assignment(int column_count, int row_count, int *cost, +void compute_assignment(size_t column_count, size_t row_count, int *cost, int *column2row, int *row2column); /* The maximal cost in the cost matrix (to prevent integer overflows). */ diff --git a/range-diff.c b/range-diff.c index 8a2dcbee322e72..3228673800dbde 100644 --- a/range-diff.c +++ b/range-diff.c @@ -327,9 +327,9 @@ static int diffsize(const char *a, const char *b) static void get_correspondences(struct string_list *a, struct string_list *b, int creation_factor) { - int n = a->nr + b->nr; + size_t n = a->nr + b->nr; int *cost, c, *a2b, *b2a; - int i, j; + size_t i, j; ALLOC_ARRAY(cost, st_mult(n, n)); ALLOC_ARRAY(a2b, n);