leetgpu-challenges/challenges/hard/36_radix_sort/challenge.py at 2acff33617401d1af7c8335dcd8f0956623e4545 · AlphaGPU/leetgpu-challenges · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import ctypes
from typing import Any, Dict, List

import torch
from core.challenge_base import ChallengeBase


class Challenge(ChallengeBase):
    def __init__(self):
        super().__init__(name="Radix Sort", atol=1e-05, rtol=1e-05, num_gpus=1, access_tier="free")

    def reference_impl(self, input: torch.Tensor, output: torch.Tensor, N: int):

        assert input.dtype == torch.uint32
        assert output.dtype == torch.uint32
        assert input.shape == output.shape == (N,)

        # Convert uint32 to int64 for sorting (since torch.sort doesn't support uint32)
        input_int64 = input.to(torch.int64)
        sorted_tensor = torch.sort(input_int64)[0]
        # Convert back to uint32
        output.copy_(sorted_tensor.to(torch.uint32))

    def get_solve_signature(self) -> Dict[str, tuple]:
        return {
            "input": (ctypes.POINTER(ctypes.c_uint32), "in"),
            "output": (ctypes.POINTER(ctypes.c_uint32), "out"),
            "N": (ctypes.c_int, "in"),
        }

    def generate_example_test(self) -> Dict[str, Any]:
        dtype = torch.uint32
        N = 8
        input_data = torch.tensor([170, 45, 75, 90, 2, 802, 24, 66], device="cuda", dtype=dtype)
        output_data = torch.zeros(N, device="cuda", dtype=dtype)
        return {
            "input": input_data,
            "output": output_data,
            "N": N,
        }

    def generate_functional_test(self) -> List[Dict[str, Any]]:
        dtype = torch.uint32
        test_cases = []

        # Test case 1: basic example
        test_cases.append(
            {
                "input": torch.tensor(
                    [170, 45, 75, 90, 2, 802, 24, 66], device="cuda", dtype=dtype
                ),
                "output": torch.zeros(8, device="cuda", dtype=dtype),
                "N": 8,
            }
        )

        # Test case 2: duplicate numbers
        test_cases.append(
            {
                "input": torch.tensor([1, 4, 1, 3, 555, 1000, 2], device="cuda", dtype=dtype),
                "output": torch.zeros(7, device="cuda", dtype=dtype),
                "N": 7,
            }
        )

        # Test case 3: single element
        test_cases.append(
            {
                "input": torch.tensor([42], device="cuda", dtype=dtype),
                "output": torch.zeros(1, device="cuda", dtype=dtype),
                "N": 1,
            }
        )

        # Test case 4: already sorted
        test_cases.append(
            {
                "input": torch.tensor([1, 2, 3, 4, 5, 6], device="cuda", dtype=dtype),
                "output": torch.zeros(6, device="cuda", dtype=dtype),
                "N": 6,
            }
        )

        # Test case 5: reverse sorted
        test_cases.append(
            {
                "input": torch.tensor([6, 5, 4, 3, 2, 1], device="cuda", dtype=dtype),
                "output": torch.zeros(6, device="cuda", dtype=dtype),
                "N": 6,
            }
        )

        # Test case 6: large numbers
        test_cases.append(
            {
                "input": torch.tensor(
                    [4294967295, 1000000000, 500000000, 2000000000, 100000000],
                    device="cuda",
                    dtype=dtype,
                ),
                "output": torch.zeros(5, device="cuda", dtype=dtype),
                "N": 5,
            }
        )

        # Test case 7: medium random
        test_cases.append(
            {
                "input": torch.randint(0, 1000001, (1024,), device="cuda", dtype=dtype),
                "output": torch.zeros(1024, device="cuda", dtype=dtype),
                "N": 1024,
            }
        )

        # Test case 8: large random
        test_cases.append(
            {
                "input": torch.randint(0, 4294967296, (10000,), device="cuda", dtype=dtype),
                "output": torch.zeros(10000, device="cuda", dtype=dtype),
                "N": 10000,
            }
        )

        return test_cases

    def generate_performance_test(self) -> Dict[str, Any]:
        dtype = torch.uint32
        N = 50000000
        return {
            "input": torch.randint(0, 4294967296, (N,), device="cuda", dtype=dtype),
            "output": torch.zeros(N, device="cuda", dtype=dtype),
            "N": N,
        }