Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions benchmark_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b49ae6d6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"plt.rcParams['figure.figsize'] = (16, 10)\n",
"plt.rcParams['font.size'] = 11"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d236980d",
"metadata": {},
"outputs": [],
"source": [
"def parse_csv(filepath):\n",
" with open(filepath, 'r') as f:\n",
" lines = f.readlines()[1:]\n",
" \n",
" data = []\n",
" for line in lines:\n",
" line = line.strip()\n",
" if line and ',' in line and not line.endswith(','):\n",
" parts = line.split(',')\n",
" if len(parts) >= 3:\n",
" try:\n",
" data.append({'Benchmark': parts[0].strip(), 'Time': float(parts[2])})\n",
" except:\n",
" continue\n",
" return pd.DataFrame(data)\n",
"\n",
"baseline = parse_csv('BASELINE_bench.csv')\n",
"custom = parse_csv('CUSTOM_AVX2_bench.csv')\n",
"\n",
"merged = baseline.merge(custom, on='Benchmark', suffixes=('_baseline', '_custom'))\n",
"merged['improvement'] = ((merged['Time_baseline'] - merged['Time_custom']) / merged['Time_baseline']) * 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8442b12d",
"metadata": {},
"outputs": [],
"source": [
"sorted_data = merged.sort_values('improvement', ascending=False)\n",
"top10 = sorted_data.head(10)\n",
"bottom10 = sorted_data.tail(10)\n",
"filtered = pd.concat([top10, bottom10])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa07550a",
"metadata": {},
"outputs": [],
"source": [
"heatmap_data = filtered.set_index('Benchmark')[['improvement']]\n",
"\n",
"plt.figure(figsize=(8, 12))\n",
"sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn', center=0, \n",
" cbar_kws={'label': 'Performance Improvement (%)'})\n",
"plt.title('CUSTOM_AVX2 vs BASELINE Performance (Top/Bottom 10)', fontsize=14, fontweight='bold')\n",
"plt.ylabel('')\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
57 changes: 57 additions & 0 deletions simd-bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash
set -e

options=("OFF" "ON")
BENCH_ITERS=${SECP256K1_BENCH_ITERS:-20000}

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'

echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null
sudo cpupower -c 0 frequency-set -g performance > /dev/null
command -v taskset > /dev/null && TASKSET_CMD="taskset -c 0"

run_bench() {
local dir=$1 bin=$2 log=$3
(
cd "$dir"
$TASKSET_CMD env SECP256K1_BENCH_ITERS=$BENCH_ITERS nice -n 0 ./bin/$bin >> "../../$log" 2>&1
echo "" >> "../../$log"
)
}

bench_all() {
local config="$1"
local dir="build/$config"
local log="${config}_bench.csv"

if [[ ! -d "$dir" ]]; then
echo -e "${RED}✖ $config${NC} (no dir)"
return 1
fi

{
echo "Benchmark results for $config"
echo "Generated on $(date)"
echo "Iterations: $BENCH_ITERS"
echo ""
} > "$log"

for bin in bench bench_ecmult bench_internal; do
if run_bench "$dir" "$bin" "$log"; then
echo -e " ${GREEN}✔ $bin${NC}"
else
echo -e " ${RED}✖ $bin${NC}"
return 1
fi
done

echo -e "${GREEN}✔ $config${NC} (log: $log)"
}

bench_all "BASELINE"
bench_all "CUSTOM_AVX2"

echo -e "\n${YELLOW}All benchmarks successful. Logs in project root${NC}"
30 changes: 30 additions & 0 deletions simd-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
set -e

mkdir -p build

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'

run_build() {
local config="$1"
local flags="-O3 -mavx2 $2"
local dir="build/$config"
local log="${config}_build.log"

mkdir -p "$dir"

if (cd "$dir" && cmake ../.. -G Ninja -DCMAKE_BUILD_TYPE=Release -DSECP256K1_APPEND_CFLAGS="$flags" >"../../$log" 2>&1 && ninja >>"../../$log" 2>&1); then
echo -e "${GREEN}✔ $config${NC}"
else
echo -e "${RED}✖ $config failed${NC}"
return 1
fi
}

run_build "BASELINE" "-U__AVX2__"
run_build "CUSTOM_AVX2" "-D__AVX2__"

echo -e "\n${YELLOW}All builds done. Logs in project root${NC}"
30 changes: 30 additions & 0 deletions simd-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
set -e

GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m'

run_test() {
local config="$1"
local dir="build/$config"
local log="${config}_test.log"

if [[ ! -d "$dir" ]]; then
echo -e "${RED}✖ $config${NC} (no dir)"
return 1
fi

if (cd "$dir" && ctest --output-on-failure -j"$(nproc)" &> "../../$log"); then
echo -e "${GREEN}✔ $config${NC} (log: $log)"
else
echo -e "${RED}✖ $config${NC} (log: $log)"
return 1
fi
}

run_test "BASELINE"
run_test "CUSTOM_AVX2"

echo -e "\n${YELLOW}All tests passed. Logs in project root${NC}"
81 changes: 45 additions & 36 deletions src/field_10x26_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,20 @@ static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
#endif

static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) {
r->n[0] = 0x3FFFFFFUL * 2 * m;
r->n[1] = 0x3FFFFFFUL * 2 * m;
r->n[2] = 0x3FFFFFFUL * 2 * m;
r->n[3] = 0x3FFFFFFUL * 2 * m;
r->n[4] = 0x3FFFFFFUL * 2 * m;
r->n[5] = 0x3FFFFFFUL * 2 * m;
r->n[6] = 0x3FFFFFFUL * 2 * m;
r->n[7] = 0x3FFFFFFUL * 2 * m;
r->n[8] = 0x3FFFFFFUL * 2 * m;
r->n[9] = 0x03FFFFFUL * 2 * m;
const uint64_t two_m = 2 * m;
const uint64_t bound1 = 0x3FFFFFFUL * two_m;
const uint64_t bound2 = 0x03FFFFFUL * two_m;

r->n[0] = bound1;
r->n[1] = bound1;
r->n[2] = bound1;
r->n[3] = bound1;
r->n[4] = bound1;
r->n[5] = bound1;
r->n[6] = bound1;
r->n[7] = bound1;
r->n[8] = bound1;
r->n[9] = bound2;
}

static void secp256k1_fe_impl_normalize(secp256k1_fe *r) {
Expand Down Expand Up @@ -257,8 +261,8 @@ static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) {
}

SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) {
memset(r->n, 0, sizeof(r->n));
r->n[0] = a;
r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
}

SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) {
Expand All @@ -272,12 +276,11 @@ SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) {

static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
int i;
int diff;
for (i = 9; i >= 0; i--) {
if (a->n[i] > b->n[i]) {
return 1;
}
if (a->n[i] < b->n[i]) {
return -1;
diff = (a->n[i] > b->n[i]) - (a->n[i] < b->n[i]);
if (diff != 0) {
return diff;
}
}
return 0;
Expand Down Expand Up @@ -338,24 +341,30 @@ static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
}

SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) {
const uint32_t two_m1 = 2 * (m + 1);
const uint32_t bound1 = 0x3FFFC2FUL * two_m1;
const uint32_t bound2 = 0x3FFFFBFUL * two_m1;
const uint32_t bound3 = 0x3FFFFFFUL * two_m1;
const uint32_t bound4 = 0x03FFFFFUL * two_m1;

/* For all legal values of m (0..31), the following properties hold: */
VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
VERIFY_CHECK(bound1 >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(bound2 >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(bound3 >= 0x3FFFFFFUL * 2 * m);
VERIFY_CHECK(bound4 >= 0x03FFFFFUL * 2 * m);

/* Due to the properties above, the left hand in the subtractions below is never less than
* the right hand. */
r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
r->n[0] = bound1 - a->n[0];
r->n[1] = bound2 - a->n[1];
r->n[2] = bound3 - a->n[2];
r->n[3] = bound3 - a->n[3];
r->n[4] = bound3 - a->n[4];
r->n[5] = bound3 - a->n[5];
r->n[6] = bound3 - a->n[6];
r->n[7] = bound3 - a->n[7];
r->n[8] = bound3 - a->n[8];
r->n[9] = bound4 - a->n[9];
}

SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) {
Expand Down Expand Up @@ -1111,24 +1120,24 @@ static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r,
}

static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
r->n[0] = a->n[0] | a->n[1] << 26;
r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
r->n[0] = a->n[0] | a->n[1] << 26;
r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
}

static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
r->n[0] = a->n[0] & 0x3FFFFFFUL;
r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
r->n[9] = a->n[7] >> 10;
Expand Down
Loading