Skip to content

Commit e7c78ab

Browse files
feat: Use C and NEON for hashing (Optimization) 100x on Pixel 8 (#415)
## Summary The Android library now builds and packages a C tile_hash JNI shared library (CMake/NDK with ABI filters) that computes per-tile hashes and tile layout (fixed 64px width; height chosen by nearest divisor), using NEON when available. ## How did you test this change? <!-- Frontend - Leave a screencast or a screenshot to visually describe the changes. --> ## Are there any deployment considerations? <!-- Backend - Do we need to consider migrations or backfilling data? --> <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Introduces new NDK/JNI code and changes the default tiling/hash implementation, which could affect replay diffing correctness or build/ABI compatibility despite added parity tests. > > **Overview** > Session Replay tile hashing is reworked to use a new **C/JNI shared library** (`tile_hash`) with optional **ARM NEON** acceleration, and `TileSignatureManager.compute(bitmap)` now prefers the native path when available. > > Default signature generation changes to a **fixed 64px tile width** (height still chosen by nearest divisor), and the Kotlin fallback hashing is updated to match native byte order so Kotlin/JNI outputs are identical. Data classes (`ImageSignature`, `TileSignature`, `diffRectangle`) are moved into a new `ImageSignature.kt`, and unit + instrumented tests are expanded to validate fixed-64 behavior and native/Kotlin parity. > > Build changes wire up `externalNativeBuild`/CMake for the library module and pin the e2e app’s `launchdarkly-android-client-sdk` dependency to `5.11.0`. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 6368801. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent b1510ea commit e7c78ab

File tree

13 files changed

+951
-166
lines changed

13 files changed

+951
-166
lines changed

e2e/android/app/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ dependencies {
9090
// Uncomment to use the publicly released version (note this may be behind branch/main)
9191
// implementation("com.launchdarkly:launchdarkly-observability-android:0.2.0")
9292

93-
implementation("com.launchdarkly:launchdarkly-android-client-sdk")
93+
implementation("com.launchdarkly:launchdarkly-android-client-sdk:5.11.0")
9494

9595
implementation("io.opentelemetry:opentelemetry-api:1.51.0")
9696
implementation("io.opentelemetry:opentelemetry-sdk:1.51.0")
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package com.example.androidobservability
2+
3+
import android.graphics.Bitmap
4+
import androidx.test.ext.junit.runners.AndroidJUnit4
5+
import com.launchdarkly.observability.replay.capture.TileSignatureManager
6+
import org.junit.Assert.assertEquals
7+
import org.junit.Assert.assertNotNull
8+
import org.junit.Assume.assumeTrue
9+
import org.junit.Test
10+
import org.junit.runner.RunWith
11+
12+
@RunWith(AndroidJUnit4::class)
13+
class TileHashParityInstrumentedTest {
14+
15+
@Test
16+
fun nativeSignaturesParity() {
17+
val width = 191
18+
val height = 67
19+
val bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888)
20+
val pixels = IntArray(width * height) { i ->
21+
val value = (i * 1103515245 + 12345) and 0x00FFFFFF
22+
(0xFF shl 24) or value
23+
}
24+
bitmap.setPixels(pixels, 0, width, 0, 0, width, height)
25+
26+
val nativePacked = nativeCompute(bitmap)
27+
assumeTrue(nativePacked != null)
28+
29+
val manager = TileSignatureManager()
30+
val tileHeight = expectedDefaultTileHeight(height)
31+
val nativeSig = manager.compute(bitmap)
32+
val kotlinSig = manager.compute(bitmap, 64, tileHeight)
33+
34+
assertNotNull(nativeSig)
35+
assertNotNull(kotlinSig)
36+
assertEquals(kotlinSig, nativeSig)
37+
}
38+
39+
private fun nativeCompute(bitmap: Bitmap): LongArray? {
40+
return try {
41+
val cls = Class.forName("com.launchdarkly.observability.replay.capture.TileHashNative")
42+
val method = cls.getDeclaredMethod("nativeCompute", Bitmap::class.java)
43+
runCatching { method.invoke(null, bitmap) as? LongArray }
44+
.getOrElse {
45+
val instance = cls.getDeclaredField("INSTANCE").get(null)
46+
method.invoke(instance, bitmap) as? LongArray
47+
}
48+
} catch (_: Throwable) {
49+
null
50+
}
51+
}
52+
53+
private fun expectedDefaultTileHeight(height: Int): Int {
54+
val preferred = 22
55+
val range = 22..44
56+
if (height <= 0) return preferred
57+
if (height % preferred == 0) return preferred
58+
59+
val maxDistance = maxOf(
60+
kotlin.math.abs(range.first - preferred),
61+
kotlin.math.abs(range.last - preferred),
62+
)
63+
for (offset in 1..maxDistance) {
64+
val positive = preferred + offset
65+
if (positive in range && height % positive == 0) return positive
66+
val negative = preferred - offset
67+
if (negative in range && height % negative == 0) return negative
68+
}
69+
return preferred
70+
}
71+
}

sdk/@launchdarkly/observability-android/lib/build.gradle.kts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ android {
9191
buildConfigField("String", "OBSERVABILITY_SDK_VERSION", "\"${project.version}\"")
9292
}
9393

94+
externalNativeBuild {
95+
cmake {
96+
path = file("src/main/cpp/CMakeLists.txt")
97+
}
98+
}
99+
94100
buildTypes {
95101
release {
96102
isMinifyEnabled = false
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
cmake_minimum_required(VERSION 3.22.1)
2+
project(tile_hash C)
3+
4+
add_library(tile_hash SHARED
5+
nearest_divisor.c
6+
tile_hash.c
7+
tile_hash_jni.c
8+
)
9+
10+
target_compile_options(tile_hash PRIVATE -O2)
11+
12+
find_library(jnigraphics-lib jnigraphics)
13+
14+
target_link_libraries(tile_hash ${jnigraphics-lib})
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include "nearest_divisor.h"
2+
3+
int nearest_divisor(int value, int preferred, int rangeLo, int rangeHi) {
4+
if (value <= 0) return preferred;
5+
if (preferred >= rangeLo && preferred <= rangeHi &&
6+
preferred > 0 && value % preferred == 0)
7+
return preferred;
8+
9+
int maxDist = rangeHi - preferred;
10+
if (preferred - rangeLo > maxDist) maxDist = preferred - rangeLo;
11+
if (maxDist <= 0) return preferred;
12+
13+
for (int offset = 1; offset <= maxDist; offset++) {
14+
int pos = preferred + offset;
15+
if (pos >= rangeLo && pos <= rangeHi && pos > 0 && value % pos == 0)
16+
return pos;
17+
int neg = preferred - offset;
18+
if (neg >= rangeLo && neg <= rangeHi && neg > 0 && value % neg == 0)
19+
return neg;
20+
}
21+
return preferred;
22+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef NEAREST_DIVISOR_H
2+
#define NEAREST_DIVISOR_H
3+
4+
int nearest_divisor(int value, int preferred, int rangeLo, int rangeHi);
5+
6+
#endif
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#include "tile_hash.h"
2+
#include "nearest_divisor.h"
3+
4+
#if defined(__ARM_NEON) && defined(__OPTIMIZE__)
5+
#define USE_NEON 1
6+
#else
7+
#define USE_NEON 0
8+
#endif
9+
10+
#if USE_NEON
11+
#include <arm_neon.h>
12+
#endif
13+
14+
#define TILE_W 64
15+
16+
typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
17+
typedef uint32_t unaligned_u32 __attribute__((aligned(1)));
18+
19+
TileHashResult tile_hash_w64_scalar(const unsigned char *rowPtr,
20+
int rows,
21+
int bytesPerRow) {
22+
uint64_t h0 = UINT64_C(0x517cc1b727220a95);
23+
uint64_t h1 = UINT64_C(0x6c62272e07bb0142);
24+
uint64_t h2 = UINT64_C(0x9e3779b97f4a7c15);
25+
uint64_t h3 = UINT64_C(0xbf58476d1ce4e5b9);
26+
27+
for (int y = 0; y < rows; y++) {
28+
const unsigned char *p = rowPtr;
29+
for (int i = 0; i < 8; i++) {
30+
h0 += *(const unaligned_u64 *)(p);
31+
h1 += *(const unaligned_u64 *)(p + 8);
32+
h2 += *(const unaligned_u64 *)(p + 16);
33+
h3 += *(const unaligned_u64 *)(p + 24);
34+
p += 32;
35+
}
36+
h0 ^= h2; h1 ^= h3;
37+
h2 += h0; h3 += h1;
38+
rowPtr += bytesPerRow;
39+
}
40+
41+
h0 ^= h2; h1 ^= h3;
42+
h0 ^= h0 >> 33; h0 *= UINT64_C(0xff51afd7ed558ccd); h0 ^= h0 >> 33;
43+
h1 ^= h1 >> 29; h1 *= UINT64_C(0xc4ceb9fe1a85ec53); h1 ^= h1 >> 29;
44+
45+
TileHashResult result;
46+
result.hashLo = (int64_t)h0;
47+
result.hashHi = (int64_t)h1;
48+
return result;
49+
}
50+
51+
#if USE_NEON
52+
TileHashResult tile_hash_w64_neon(const unsigned char *rowPtr,
53+
int rows,
54+
int bytesPerRow) {
55+
uint64x2_t s0 = vcombine_u64(vcreate_u64(UINT64_C(0x517cc1b727220a95)),
56+
vcreate_u64(UINT64_C(0x6c62272e07bb0142)));
57+
uint64x2_t s1 = vcombine_u64(vcreate_u64(UINT64_C(0x9e3779b97f4a7c15)),
58+
vcreate_u64(UINT64_C(0xbf58476d1ce4e5b9)));
59+
60+
for (int y = 0; y < rows; y++) {
61+
const unsigned char *p = rowPtr;
62+
for (int i = 0; i < 8; i++) {
63+
s0 = vaddq_u64(s0, vld1q_u64((const uint64_t *)p));
64+
s1 = vaddq_u64(s1, vld1q_u64((const uint64_t *)(p + 16)));
65+
p += 32;
66+
}
67+
s0 = veorq_u64(s0, s1);
68+
s1 = vaddq_u64(s1, s0);
69+
rowPtr += bytesPerRow;
70+
}
71+
72+
uint64_t h0 = vgetq_lane_u64(s0, 0);
73+
uint64_t h1 = vgetq_lane_u64(s0, 1);
74+
uint64_t h2 = vgetq_lane_u64(s1, 0);
75+
uint64_t h3 = vgetq_lane_u64(s1, 1);
76+
77+
h0 ^= h2; h1 ^= h3;
78+
h0 ^= h0 >> 33; h0 *= UINT64_C(0xff51afd7ed558ccd); h0 ^= h0 >> 33;
79+
h1 ^= h1 >> 29; h1 *= UINT64_C(0xc4ceb9fe1a85ec53); h1 ^= h1 >> 29;
80+
81+
TileHashResult result;
82+
result.hashLo = (int64_t)h0;
83+
result.hashHi = (int64_t)h1;
84+
return result;
85+
}
86+
#endif
87+
88+
static inline TileHashResult tile_hash_w64(const unsigned char *rowPtr,
89+
int rows,
90+
int bytesPerRow) {
91+
#if USE_NEON
92+
return tile_hash_w64_neon(rowPtr, rows, bytesPerRow);
93+
#else
94+
return tile_hash_w64_scalar(rowPtr, rows, bytesPerRow);
95+
#endif
96+
}
97+
98+
TileHashResult tile_hash(const void *data,
99+
int startX, int startY,
100+
int endX, int endY,
101+
int bytesPerRow) {
102+
const int byteWidth = (endX - startX) * 4;
103+
const int quads = byteWidth >> 5;
104+
const int remBytes = byteWidth & 31;
105+
const int rem8 = remBytes >> 3;
106+
const int tail = remBytes & 4;
107+
108+
const unsigned char *rowPtr = (const unsigned char *)data
109+
+ (size_t)startY * bytesPerRow
110+
+ (size_t)startX * 4;
111+
112+
uint64_t h0 = UINT64_C(0x517cc1b727220a95);
113+
uint64_t h1 = UINT64_C(0x6c62272e07bb0142);
114+
uint64_t h2 = UINT64_C(0x9e3779b97f4a7c15);
115+
uint64_t h3 = UINT64_C(0xbf58476d1ce4e5b9);
116+
117+
for (int y = startY; y < endY; y++) {
118+
const unsigned char *p = rowPtr;
119+
120+
for (int i = 0; i < quads; i++) {
121+
h0 += *(const unaligned_u64 *)(p);
122+
h1 += *(const unaligned_u64 *)(p + 8);
123+
h2 += *(const unaligned_u64 *)(p + 16);
124+
h3 += *(const unaligned_u64 *)(p + 24);
125+
p += 32;
126+
}
127+
128+
if (rem8 >= 1) h0 += *(const unaligned_u64 *)(p);
129+
if (rem8 >= 2) h1 += *(const unaligned_u64 *)(p + 8);
130+
if (rem8 >= 3) h2 += *(const unaligned_u64 *)(p + 16);
131+
if (tail) h3 += (uint64_t)(*(const unaligned_u32 *)(p + rem8 * 8));
132+
133+
h0 ^= h2; h1 ^= h3;
134+
h2 += h0; h3 += h1;
135+
136+
rowPtr += bytesPerRow;
137+
}
138+
139+
h0 ^= h2; h1 ^= h3;
140+
h0 ^= h0 >> 33; h0 *= UINT64_C(0xff51afd7ed558ccd); h0 ^= h0 >> 33;
141+
h1 ^= h1 >> 29; h1 *= UINT64_C(0xc4ceb9fe1a85ec53); h1 ^= h1 >> 29;
142+
143+
TileHashResult result;
144+
result.hashLo = (int64_t)h0;
145+
result.hashHi = (int64_t)h1;
146+
return result;
147+
}
148+
149+
TileLayout tile_compute_layout(int imageWidth, int imageHeight) {
150+
TileLayout layout;
151+
layout.tileWidth = TILE_W;
152+
layout.tileHeight = nearest_divisor(imageHeight, 22, 22, 44);
153+
layout.columns = (imageWidth + TILE_W - 1) / TILE_W;
154+
layout.rows = (imageHeight + layout.tileHeight - 1) / layout.tileHeight;
155+
return layout;
156+
}
157+
158+
void tile_compute_all(const void *data,
159+
int imageWidth, int imageHeight,
160+
int bytesPerRow,
161+
TileLayout layout,
162+
TileHashResult *out) {
163+
const int fullCols = imageWidth / TILE_W;
164+
int idx = 0;
165+
166+
for (int row = 0; row < layout.rows; row++) {
167+
int startY = row * layout.tileHeight;
168+
int tileRows = layout.tileHeight;
169+
if (startY + tileRows > imageHeight) tileRows = imageHeight - startY;
170+
171+
for (int col = 0; col < fullCols; col++) {
172+
const unsigned char *rowPtr = (const unsigned char *)data
173+
+ (size_t)startY * bytesPerRow
174+
+ (size_t)(col * TILE_W) * 4;
175+
out[idx] = tile_hash_w64(rowPtr, tileRows, bytesPerRow);
176+
idx++;
177+
}
178+
179+
if (fullCols < layout.columns) {
180+
int startX = fullCols * TILE_W;
181+
out[idx] = tile_hash(data, startX, startY, imageWidth, startY + tileRows, bytesPerRow);
182+
idx++;
183+
}
184+
}
185+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#ifndef TILE_HASH_H
2+
#define TILE_HASH_H
3+
4+
#include <stdint.h>
5+
6+
typedef struct {
7+
int64_t hashLo;
8+
int64_t hashHi;
9+
} TileHashResult;
10+
11+
typedef struct {
12+
int rows;
13+
int columns;
14+
int tileWidth;
15+
int tileHeight;
16+
} TileLayout;
17+
18+
/// Computes a fast non-cryptographic hash over the pixel rectangle
19+
/// [startX, endX) x [startY, endY) in a 4-bytes-per-pixel bitmap.
20+
TileHashResult tile_hash(const void *data,
21+
int startX, int startY,
22+
int endX, int endY,
23+
int bytesPerRow);
24+
25+
/// Always-scalar variant of tile_hash_w64, for parity testing.
26+
TileHashResult tile_hash_w64_scalar(const unsigned char *rowPtr,
27+
int rows,
28+
int bytesPerRow);
29+
30+
#if defined(__ARM_NEON) && defined(__OPTIMIZE__)
31+
/// Always-NEON variant of tile_hash_w64, for parity testing.
32+
TileHashResult tile_hash_w64_neon(const unsigned char *rowPtr,
33+
int rows,
34+
int bytesPerRow);
35+
#endif
36+
37+
/// Computes tile layout (tile dimensions, row/column counts) for an image.
38+
TileLayout tile_compute_layout(int imageWidth, int imageHeight);
39+
40+
/// Hashes every tile in the image and writes results to `out`.
41+
/// `out` must have space for layout.rows * layout.columns elements.
42+
void tile_compute_all(const void *data,
43+
int imageWidth, int imageHeight,
44+
int bytesPerRow,
45+
TileLayout layout,
46+
TileHashResult *out);
47+
48+
#endif

0 commit comments

Comments
 (0)