Skip to content

Commit 0c1764e

Browse files
ensure audit can handle duplcate data
1 parent d31e083 commit 0c1764e

File tree

1 file changed

+33
-16
lines changed

1 file changed

+33
-16
lines changed

code/logic/jellyfish.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,39 +1412,60 @@ int fossil_ai_jellyfish_audit(const fossil_ai_jellyfish_chain_t *chain) {
14121412
if (!chain) return -1;
14131413
int anomalies = 0;
14141414

1415-
/* Precompute list of valid commit hashes for quick parent existence tests */
1415+
/* Collect valid hashes + indices for duplicate / parent checks */
14161416
uint8_t valid_hashes[FOSSIL_JELLYFISH_MAX_MEM][FOSSIL_JELLYFISH_HASH_SIZE];
1417+
size_t valid_indices[FOSSIL_JELLYFISH_MAX_MEM];
14171418
size_t valid_count = 0;
14181419
for (size_t i = 0; i < FOSSIL_JELLYFISH_MAX_MEM; ++i) {
14191420
const fossil_ai_jellyfish_block_t *b = &chain->commits[i];
14201421
if (b->attributes.valid) {
14211422
memcpy(valid_hashes[valid_count], b->identity.commit_hash, FOSSIL_JELLYFISH_HASH_SIZE);
1423+
valid_indices[valid_count] = i;
14221424
valid_count++;
14231425
}
14241426
}
14251427

1428+
/* Detect duplicate commit hashes (content-address collision or duplicate IO when hash deterministic) */
1429+
for (size_t i = 0; i < valid_count; ++i) {
1430+
for (size_t j = 0; j < i; ++j) {
1431+
if (memcmp(valid_hashes[i], valid_hashes[j], FOSSIL_JELLYFISH_HASH_SIZE) == 0) {
1432+
anomalies++; /* count each additional duplicate once */
1433+
break;
1434+
}
1435+
}
1436+
}
1437+
1438+
/* Detect duplicate (input,output) pairs even if hashes differ (e.g., non-deterministic hash) */
1439+
for (size_t i = 0; i < valid_count; ++i) {
1440+
const fossil_ai_jellyfish_block_t *bi = &chain->commits[ valid_indices[i] ];
1441+
for (size_t j = 0; j < i; ++j) {
1442+
const fossil_ai_jellyfish_block_t *bj = &chain->commits[ valid_indices[j] ];
1443+
if (strcmp(bi->io.input, bj->io.input) == 0 &&
1444+
strcmp(bi->io.output, bj->io.output) == 0) {
1445+
anomalies++;
1446+
break;
1447+
}
1448+
}
1449+
}
1450+
14261451
for (size_t idx = 0; idx < FOSSIL_JELLYFISH_MAX_MEM; ++idx) {
14271452
const fossil_ai_jellyfish_block_t *b = &chain->commits[idx];
14281453
if (!b->attributes.valid) continue;
14291454

1430-
/* 1. Index consistency */
14311455
if (b->identity.commit_index != idx)
14321456
anomalies++;
14331457

1434-
/* 2. Hash recomputation (content based) */
14351458
uint8_t recomputed[FOSSIL_JELLYFISH_HASH_SIZE];
14361459
fossil_ai_jellyfish_hash(b->io.input, b->io.output, recomputed);
14371460
if (memcmp(recomputed, b->identity.commit_hash, FOSSIL_JELLYFISH_HASH_SIZE) != 0)
14381461
anomalies++;
14391462

1440-
/* Zero hash disallowed */
14411463
int all_zero = 1;
14421464
for (size_t k = 0; k < FOSSIL_JELLYFISH_HASH_SIZE; ++k)
14431465
if (b->identity.commit_hash[k] != 0) { all_zero = 0; break; }
14441466
if (all_zero)
14451467
anomalies++;
14461468

1447-
/* 3. Parent constraints */
14481469
if (b->identity.parent_count > 4)
14491470
anomalies++;
14501471
for (size_t p = 0; p < b->identity.parent_count && p < 4; ++p) {
@@ -1459,36 +1480,33 @@ int fossil_ai_jellyfish_audit(const fossil_ai_jellyfish_chain_t *chain) {
14591480
anomalies++;
14601481
}
14611482

1462-
/* 4. Merge flag consistency */
14631483
if ((b->identity.parent_count >= 2 && !b->identity.is_merge_commit) ||
14641484
(b->identity.parent_count < 2 && b->identity.is_merge_commit))
14651485
anomalies++;
14661486

1467-
/* 5. Length & token bounds */
1468-
size_t real_in_len = strnlen(b->io.input, FOSSIL_JELLYFISH_INPUT_SIZE);
1487+
size_t real_in_len = strnlen(b->io.input, FOSSIL_JELLYFISH_INPUT_SIZE);
14691488
size_t real_out_len = strnlen(b->io.output, FOSSIL_JELLYFISH_OUTPUT_SIZE);
1470-
if (real_in_len != b->io.input_len) anomalies++;
1489+
if (real_in_len != b->io.input_len) anomalies++;
14711490
if (real_out_len != b->io.output_len) anomalies++;
1472-
if (b->io.input_token_count > FOSSIL_JELLYFISH_MAX_TOKENS) anomalies++;
1491+
if (b->io.input_token_count > FOSSIL_JELLYFISH_MAX_TOKENS) anomalies++;
14731492
if (b->io.output_token_count > FOSSIL_JELLYFISH_MAX_TOKENS) anomalies++;
14741493

1475-
/* 6. Confidence range */
14761494
if (b->attributes.confidence < 0.0f || b->attributes.confidence > 1.0f)
14771495
anomalies++;
14781496

1479-
/* 7. Trusted flag heuristic: signed commit type without signature */
14801497
if (b->block_type == JELLY_COMMIT_SIGNED && b->identity.signature_len == 0)
14811498
anomalies++;
14821499

1483-
/* 8. Merge type sanity */
14841500
if (b->block_type == JELLY_COMMIT_MERGE && b->identity.parent_count < 2)
14851501
anomalies++;
14861502
}
14871503

1488-
/* 9. Branch head validity */
14891504
for (size_t br = 0; br < chain->branch_count; ++br) {
14901505
const uint8_t *head = chain->branches[br].head_hash;
1491-
if (memcmp(head, "\0\0\0\0\0\0\0\0", 8) == 0) continue; /* tolerate zeroed */
1506+
int all_zero = 1;
1507+
for (size_t k = 0; k < FOSSIL_JELLYFISH_HASH_SIZE; ++k)
1508+
if (head[k]) { all_zero = 0; break; }
1509+
if (all_zero) continue;
14921510
int found = 0;
14931511
for (size_t vh = 0; vh < valid_count; ++vh) {
14941512
if (memcmp(valid_hashes[vh], head, FOSSIL_JELLYFISH_HASH_SIZE) == 0) { found = 1; break; }
@@ -1497,7 +1515,6 @@ int fossil_ai_jellyfish_audit(const fossil_ai_jellyfish_chain_t *chain) {
14971515
anomalies++;
14981516
}
14991517

1500-
/* 10. Count consistency: chain->count should be >= highest valid index+1 (soft check) */
15011518
size_t highest_valid = 0;
15021519
for (size_t i = 0; i < FOSSIL_JELLYFISH_MAX_MEM; ++i)
15031520
if (chain->commits[i].attributes.valid && i + 1 > highest_valid)

0 commit comments

Comments
 (0)