leetgpu-challenges/challenges/medium/43_count_array_element/challenge.py at 2acff33617401d1af7c8335dcd8f0956623e4545 · AlphaGPU/leetgpu-challenges · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import ctypes
from typing import Any, Dict, List

import torch
from core.challenge_base import ChallengeBase


class Challenge(ChallengeBase):
    def __init__(self):
        super().__init__(
            name="Count Array Element", atol=1e-05, rtol=1e-05, num_gpus=1, access_tier="free"
        )

    def reference_impl(self, input: torch.Tensor, output: torch.Tensor, N: int, K: int):
        # Validate input types and shapes
        assert input.shape == (N,)
        assert output.shape == (1,)
        assert input.dtype == torch.int32
        assert output.dtype == torch.int32

        # count the number of element with value k in an input array
        equality_tensor = input == K
        output[0] = torch.sum(equality_tensor)

    def get_solve_signature(self) -> Dict[str, tuple]:
        return {
            "input": (ctypes.POINTER(ctypes.c_int), "in"),
            "output": (ctypes.POINTER(ctypes.c_int), "out"),
            "N": (ctypes.c_int, "in"),
            "K": (ctypes.c_int, "in"),
        }

    def generate_example_test(self) -> Dict[str, Any]:
        dtype = torch.int32
        input = torch.tensor([1, 2, 3, 4, 1], device="cuda", dtype=dtype)
        output = torch.empty(1, device="cuda", dtype=dtype)
        return {
            "input": input,
            "output": output,
            "N": 5,
            "K": 1,
        }

    def generate_functional_test(self) -> List[Dict[str, Any]]:
        dtype = torch.int32
        tests = []

        # basic_example
        tests.append(
            {
                "input": torch.tensor([1, 2, 3, 4, 1], device="cuda", dtype=dtype),
                "output": torch.empty(1, device="cuda", dtype=dtype),
                "N": 5,
                "K": 1,
            }
        )

        # all_same_value
        tests.append(
            {
                "input": torch.tensor([2] * 16, device="cuda", dtype=dtype),
                "output": torch.empty(1, device="cuda", dtype=dtype),
                "N": 16,
                "K": 2,
            }
        )

        # increasing_sequence
        tests.append(
            {
                "input": torch.randint(1, 5, (32,), device="cuda", dtype=dtype),
                "output": torch.empty(1, device="cuda", dtype=dtype),
                "N": 32,
                "K": 4,
            }
        )

        # medium_size
        tests.append(
            {
                "input": torch.randint(1, 10, (1000,), device="cuda", dtype=dtype),
                "output": torch.empty(1, device="cuda", dtype=dtype),
                "N": 1000,
                "K": 5,
            }
        )

        # large_size
        tests.append(
            {
                "input": torch.randint(1, 1000, (100000,), device="cuda", dtype=dtype),
                "output": torch.empty(1, device="cuda", dtype=dtype),
                "N": 100000,
                "K": 501,
            }
        )

        return tests

    def generate_performance_test(self) -> Dict[str, Any]:
        dtype = torch.int32
        input = torch.randint(1, 100001, (100000000,), device="cuda", dtype=dtype)
        output = torch.empty(1, device="cuda", dtype=dtype)
        return {
            "input": input,
            "output": output,
            "N": 100000000,
            "K": 501010,
        }