-
Notifications
You must be signed in to change notification settings - Fork 61
Expand file tree
/
Copy pathchallenge.py
More file actions
109 lines (94 loc) · 3.23 KB
/
challenge.py
File metadata and controls
109 lines (94 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import ctypes
from typing import Any, Dict, List
import torch
from core.challenge_base import ChallengeBase
class Challenge(ChallengeBase):
def __init__(self):
super().__init__(
name="Count Array Element", atol=1e-05, rtol=1e-05, num_gpus=1, access_tier="free"
)
def reference_impl(self, input: torch.Tensor, output: torch.Tensor, N: int, K: int):
# Validate input types and shapes
assert input.shape == (N,)
assert output.shape == (1,)
assert input.dtype == torch.int32
assert output.dtype == torch.int32
# count the number of element with value k in an input array
equality_tensor = input == K
output[0] = torch.sum(equality_tensor)
def get_solve_signature(self) -> Dict[str, tuple]:
return {
"input": (ctypes.POINTER(ctypes.c_int), "in"),
"output": (ctypes.POINTER(ctypes.c_int), "out"),
"N": (ctypes.c_int, "in"),
"K": (ctypes.c_int, "in"),
}
def generate_example_test(self) -> Dict[str, Any]:
dtype = torch.int32
input = torch.tensor([1, 2, 3, 4, 1], device="cuda", dtype=dtype)
output = torch.empty(1, device="cuda", dtype=dtype)
return {
"input": input,
"output": output,
"N": 5,
"K": 1,
}
def generate_functional_test(self) -> List[Dict[str, Any]]:
dtype = torch.int32
tests = []
# basic_example
tests.append(
{
"input": torch.tensor([1, 2, 3, 4, 1], device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 5,
"K": 1,
}
)
# all_same_value
tests.append(
{
"input": torch.tensor([2] * 16, device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 16,
"K": 2,
}
)
# increasing_sequence
tests.append(
{
"input": torch.randint(1, 5, (32,), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 32,
"K": 4,
}
)
# medium_size
tests.append(
{
"input": torch.randint(1, 10, (1000,), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 1000,
"K": 5,
}
)
# large_size
tests.append(
{
"input": torch.randint(1, 1000, (100000,), device="cuda", dtype=dtype),
"output": torch.empty(1, device="cuda", dtype=dtype),
"N": 100000,
"K": 501,
}
)
return tests
def generate_performance_test(self) -> Dict[str, Any]:
dtype = torch.int32
input = torch.randint(1, 100001, (100000000,), device="cuda", dtype=dtype)
output = torch.empty(1, device="cuda", dtype=dtype)
return {
"input": input,
"output": output,
"N": 100000000,
"K": 501010,
}