Skip to content

Commit 3494934

Browse files
unknownclaude
andcommitted
Add example projects and update scoring evaluation with n=57 dataset
- Add A* pathfinding examples (Python + TypeScript) with test suites - Add ML regression (linear regression) and classification (KNN) examples - Update scoring-evaluation.md: expanded from n=21 to n=57 usable runs across 5 task types, 2 languages, 4 codebases - Add statistical tests: Cochran's Q (p<0.001), Wilcoxon signed-rank, Cliff's delta (small effect d=0.183), Spearman rank correlation - Add scoring-analysis.py script for reproducible statistical analysis - Key finding: Copeland-Borda agree 81%, Weighted disagrees ~32%, agreement rates differ significantly across method pairs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 25348a2 commit 3494934

File tree

17 files changed

+1089
-73
lines changed

17 files changed

+1089
-73
lines changed

docs/scoring-evaluation.md

Lines changed: 129 additions & 73 deletions
Large diffs are not rendered by default.

examples/astar-python/grid.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
Grid-based pathfinding challenge for thinktank ensemble testing.
3+
4+
The grid is a 2D list where:
5+
0 = walkable
6+
1 = obstacle
7+
8+
Implement find_path() to find the shortest path from start to end
9+
using A* or any optimal pathfinding algorithm.
10+
11+
Movement: 4-directional (up, down, left, right). No diagonals.
12+
"""
13+
14+
from typing import Optional
15+
from dataclasses import dataclass
16+
17+
Point = tuple[int, int]
18+
19+
20+
@dataclass
21+
class PathResult:
22+
path: list[Point]
23+
nodes_explored: int
24+
25+
26+
def find_path(
27+
grid: list[list[int]],
28+
start: Point,
29+
end: Point,
30+
) -> Optional[PathResult]:
31+
"""
32+
Find the shortest path from start to end on the grid.
33+
34+
YOUR TASK: Implement this function using A* pathfinding.
35+
Choose your own heuristic, data structures, and optimizations.
36+
37+
Args:
38+
grid: 2D grid (0=walkable, 1=obstacle)
39+
start: Starting position (row, col)
40+
end: Target position (row, col)
41+
42+
Returns:
43+
PathResult with the shortest path and nodes explored, or None if unreachable
44+
"""
45+
# TODO: Implement A* pathfinding
46+
raise NotImplementedError("This is your task!")

examples/astar-python/run-tests.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
cd "$(dirname "$0")" && python -m pytest test_pathfinding.py -v
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""Tests for A* pathfinding implementation."""
2+
3+
import time
4+
import unittest
5+
from grid import find_path, Point
6+
7+
8+
def is_valid_path(grid: list[list[int]], path: list[Point]) -> bool:
9+
"""Verify path is valid: each step is adjacent and on walkable terrain."""
10+
for i, (r, c) in enumerate(path):
11+
if r < 0 or r >= len(grid) or c < 0 or c >= len(grid[0]):
12+
return False
13+
if grid[r][c] != 0:
14+
return False
15+
if i > 0:
16+
pr, pc = path[i - 1]
17+
if abs(r - pr) + abs(c - pc) != 1:
18+
return False
19+
return True
20+
21+
22+
class TestPathfinding(unittest.TestCase):
23+
24+
def test_straight_line(self):
25+
grid = [
26+
[0, 0, 0, 0, 0],
27+
[0, 0, 0, 0, 0],
28+
[0, 0, 0, 0, 0],
29+
]
30+
result = find_path(grid, (0, 0), (0, 4))
31+
self.assertIsNotNone(result)
32+
self.assertEqual(result.path[0], (0, 0))
33+
self.assertEqual(result.path[-1], (0, 4))
34+
self.assertEqual(len(result.path), 5)
35+
self.assertTrue(is_valid_path(grid, result.path))
36+
37+
def test_around_obstacles(self):
38+
grid = [
39+
[0, 0, 0, 0, 0],
40+
[0, 1, 1, 1, 0],
41+
[0, 0, 0, 0, 0],
42+
]
43+
result = find_path(grid, (0, 0), (2, 4))
44+
self.assertIsNotNone(result)
45+
self.assertEqual(result.path[0], (0, 0))
46+
self.assertEqual(result.path[-1], (2, 4))
47+
self.assertTrue(is_valid_path(grid, result.path))
48+
self.assertEqual(len(result.path), 7)
49+
50+
def test_unreachable(self):
51+
grid = [
52+
[0, 1, 0],
53+
[0, 1, 0],
54+
[0, 1, 0],
55+
]
56+
result = find_path(grid, (0, 0), (0, 2))
57+
self.assertIsNone(result)
58+
59+
def test_start_equals_end(self):
60+
grid = [[0, 0], [0, 0]]
61+
result = find_path(grid, (1, 1), (1, 1))
62+
self.assertIsNotNone(result)
63+
self.assertEqual(len(result.path), 1)
64+
self.assertEqual(result.path[0], (1, 1))
65+
66+
def test_maze(self):
67+
grid = [
68+
[0, 1, 0, 0, 0],
69+
[0, 1, 0, 1, 0],
70+
[0, 0, 0, 1, 0],
71+
[1, 1, 0, 0, 0],
72+
[0, 0, 0, 1, 0],
73+
]
74+
result = find_path(grid, (0, 0), (4, 4))
75+
self.assertIsNotNone(result)
76+
self.assertEqual(result.path[0], (0, 0))
77+
self.assertEqual(result.path[-1], (4, 4))
78+
self.assertTrue(is_valid_path(grid, result.path))
79+
self.assertEqual(len(result.path), 13)
80+
81+
def test_large_grid_performance(self):
82+
size = 50
83+
grid = [[0] * size for _ in range(size)]
84+
for i in range(1, size - 1):
85+
grid[i][size // 2] = 1
86+
87+
start = time.perf_counter()
88+
result = find_path(grid, (0, 0), (size - 1, size - 1))
89+
elapsed = time.perf_counter() - start
90+
91+
self.assertIsNotNone(result)
92+
self.assertTrue(is_valid_path(grid, result.path))
93+
self.assertLess(elapsed, 1.0, f"Should complete in < 1 second (took {elapsed:.3f}s)")
94+
self.assertLess(result.nodes_explored, size * size)
95+
96+
def test_nodes_explored(self):
97+
grid = [
98+
[0, 0, 0],
99+
[0, 0, 0],
100+
[0, 0, 0],
101+
]
102+
result = find_path(grid, (0, 0), (2, 2))
103+
self.assertIsNotNone(result)
104+
self.assertGreater(result.nodes_explored, 0)
105+
self.assertLessEqual(result.nodes_explored, 9)
106+
107+
108+
if __name__ == "__main__":
109+
unittest.main()

examples/astar/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
node_modules/
2+
dist/

examples/astar/package.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"name": "astar-showcase",
3+
"version": "1.0.0",
4+
"description": "A* pathfinding showcase for thinktank ensemble testing",
5+
"type": "module",
6+
"scripts": {
7+
"test": "npx tsx --test tests/*.test.ts",
8+
"build": "npx tsc"
9+
},
10+
"devDependencies": {
11+
"@types/node": "^25.5.0",
12+
"tsx": "^4.21.0",
13+
"typescript": "^6.0.2"
14+
}
15+
}

examples/astar/run-tests.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
cd "$(dirname "$0")" && npx tsx --test tests/pathfinding.test.ts

examples/astar/src/grid.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/**
2+
* Grid-based pathfinding challenge for thinktank ensemble testing.
3+
*
4+
* The grid is a 2D array where:
5+
* 0 = walkable
6+
* 1 = obstacle
7+
*
8+
* Implement findPath() to find the shortest path from start to end
9+
* using A* or any optimal pathfinding algorithm.
10+
*
11+
* The function should return an array of [row, col] coordinates
12+
* representing the path from start to end (inclusive), or null
13+
* if no path exists.
14+
*
15+
* Movement: 4-directional (up, down, left, right). No diagonals.
16+
*/
17+
18+
export type Point = [number, number];
19+
20+
export interface PathResult {
21+
path: Point[];
22+
nodesExplored: number;
23+
}
24+
25+
/**
26+
* Find the shortest path from start to end on the grid.
27+
*
28+
* YOUR TASK: Implement this function using A* pathfinding.
29+
* Choose your own heuristic, data structures, and optimizations.
30+
*
31+
* @param grid - 2D grid (0=walkable, 1=obstacle)
32+
* @param start - Starting position [row, col]
33+
* @param end - Target position [row, col]
34+
* @returns PathResult with the shortest path and nodes explored, or null if unreachable
35+
*/
36+
export function findPath(
37+
grid: number[][],
38+
start: Point,
39+
end: Point,
40+
): PathResult | null {
41+
// TODO: Implement A* pathfinding
42+
throw new Error("Not implemented — this is your task!");
43+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import assert from "node:assert/strict";
2+
import { describe, it } from "node:test";
3+
import { findPath, type Point } from "../src/grid.js";
4+
5+
// Helper: verify path is valid (each step is adjacent and on walkable terrain)
6+
function isValidPath(grid: number[][], path: Point[]): boolean {
7+
for (let i = 0; i < path.length; i++) {
8+
const [r, c] = path[i]!;
9+
if (r < 0 || r >= grid.length || c < 0 || c >= grid[0]!.length) return false;
10+
if (grid[r]![c] !== 0) return false;
11+
if (i > 0) {
12+
const [pr, pc] = path[i - 1]!;
13+
const dr = Math.abs(r - pr);
14+
const dc = Math.abs(c - pc);
15+
if (dr + dc !== 1) return false; // must be 4-directional adjacent
16+
}
17+
}
18+
return true;
19+
}
20+
21+
describe("A* Pathfinding", () => {
22+
it("finds a straight-line path", () => {
23+
const grid = [
24+
[0, 0, 0, 0, 0],
25+
[0, 0, 0, 0, 0],
26+
[0, 0, 0, 0, 0],
27+
];
28+
const result = findPath(grid, [0, 0], [0, 4]);
29+
assert.ok(result, "should find a path");
30+
assert.deepEqual(result.path[0], [0, 0], "starts at start");
31+
assert.deepEqual(result.path[result.path.length - 1], [0, 4], "ends at end");
32+
assert.equal(result.path.length, 5, "shortest path is 5 cells");
33+
assert.ok(isValidPath(grid, result.path), "path must be valid");
34+
});
35+
36+
it("navigates around obstacles", () => {
37+
const grid = [
38+
[0, 0, 0, 0, 0],
39+
[0, 1, 1, 1, 0],
40+
[0, 0, 0, 0, 0],
41+
];
42+
const result = findPath(grid, [0, 0], [2, 4]);
43+
assert.ok(result, "should find a path");
44+
assert.deepEqual(result.path[0], [0, 0]);
45+
assert.deepEqual(result.path[result.path.length - 1], [2, 4]);
46+
assert.ok(isValidPath(grid, result.path), "path must be valid");
47+
assert.equal(result.path.length, 7, "shortest path around obstacle is 7");
48+
});
49+
50+
it("returns null for unreachable target", () => {
51+
const grid = [
52+
[0, 1, 0],
53+
[0, 1, 0],
54+
[0, 1, 0],
55+
];
56+
const result = findPath(grid, [0, 0], [0, 2]);
57+
assert.equal(result, null, "should return null when target is unreachable");
58+
});
59+
60+
it("handles start equals end", () => {
61+
const grid = [[0, 0], [0, 0]];
62+
const result = findPath(grid, [1, 1], [1, 1]);
63+
assert.ok(result, "should find a path");
64+
assert.equal(result.path.length, 1, "path is just the start/end point");
65+
assert.deepEqual(result.path[0], [1, 1]);
66+
});
67+
68+
it("solves a maze", () => {
69+
const grid = [
70+
[0, 1, 0, 0, 0],
71+
[0, 1, 0, 1, 0],
72+
[0, 0, 0, 1, 0],
73+
[1, 1, 0, 0, 0],
74+
[0, 0, 0, 1, 0],
75+
];
76+
const result = findPath(grid, [0, 0], [4, 4]);
77+
assert.ok(result, "should find a path through the maze");
78+
assert.deepEqual(result.path[0], [0, 0]);
79+
assert.deepEqual(result.path[result.path.length - 1], [4, 4]);
80+
assert.ok(isValidPath(grid, result.path), "path must be valid");
81+
assert.equal(result.path.length, 13, "shortest maze path is 13");
82+
});
83+
84+
it("handles large grid efficiently", () => {
85+
// 50x50 grid with a clear path
86+
const size = 50;
87+
const grid: number[][] = Array.from({ length: size }, () =>
88+
Array.from({ length: size }, () => 0)
89+
);
90+
// Add some obstacles but leave a clear path
91+
for (let i = 1; i < size - 1; i++) {
92+
grid[i]![Math.floor(size / 2)] = 1; // vertical wall with gap at top and bottom
93+
}
94+
95+
const start = performance.now();
96+
const result = findPath(grid, [0, 0], [size - 1, size - 1]);
97+
const elapsed = performance.now() - start;
98+
99+
assert.ok(result, "should find a path on large grid");
100+
assert.ok(isValidPath(grid, result.path), "path must be valid");
101+
assert.ok(elapsed < 1000, `should complete in < 1 second (took ${elapsed.toFixed(0)}ms)`);
102+
assert.ok(result.nodesExplored < size * size, "A* should not explore every cell");
103+
});
104+
105+
it("tracks nodes explored accurately", () => {
106+
const grid = [
107+
[0, 0, 0],
108+
[0, 0, 0],
109+
[0, 0, 0],
110+
];
111+
const result = findPath(grid, [0, 0], [2, 2]);
112+
assert.ok(result, "should find a path");
113+
assert.ok(result.nodesExplored > 0, "should explore at least 1 node");
114+
assert.ok(result.nodesExplored <= 9, "should not explore more than grid size");
115+
});
116+
});

examples/astar/tsconfig.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"compilerOptions": {
3+
"target": "esnext",
4+
"module": "nodenext",
5+
"strict": true,
6+
"outDir": "dist",
7+
"rootDir": ".",
8+
"lib": ["esnext"],
9+
"types": ["node"]
10+
},
11+
"include": ["src/**/*", "tests/**/*"]
12+
}

0 commit comments

Comments
 (0)