-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathruntime.impala
More file actions
121 lines (104 loc) · 5.44 KB
/
runtime.impala
File metadata and controls
121 lines (104 loc) · 5.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
extern "C" {
fn "anydsl_info" runtime_info() -> ();
fn "anydsl_device_name" runtime_device_name(_device: i32) -> &[u8];
fn "anydsl_device_check_feature_support" runtime_device_check_feature_support(_device: i32, _feature: &[u8]) -> bool;
fn "anydsl_alloc" runtime_alloc(i32, i64) -> &[i8];
fn "anydsl_alloc_host" runtime_alloc_host(i32, i64) -> &[i8];
fn "anydsl_alloc_unified" runtime_alloc_unified(i32, i64) -> &[i8];
fn "anydsl_copy" runtime_copy(i32, &[i8], i64, i32, &[i8], i64, i64) -> ();
fn "anydsl_copy_async" runtime_copy_async(i32, &[i8], i64, i32, &[i8], i64, i64) -> ();
fn "anydsl_get_device_ptr" runtime_get_device_ptr(i32, &[i8]) -> &[i8];
fn "anydsl_release" runtime_release(i32, &[i8]) -> ();
fn "anydsl_release_host" runtime_release_host(i32, &[i8]) -> ();
fn "anydsl_synchronize" runtime_synchronize(i32) -> ();
fn "anydsl_random_seed" random_seed(u32) -> ();
fn "anydsl_random_val_f32" random_val_f32() -> f32;
fn "anydsl_random_val_u64" random_val_u64() -> u64;
fn "anydsl_get_micro_time" get_micro_time() -> i64;
fn "anydsl_get_nano_time" get_nano_time() -> i64;
fn "anydsl_get_kernel_time" get_kernel_time() -> i64;
fn "anydsl_print_i16" print_i16(i16) -> ();
fn "anydsl_print_i32" print_i32(i32) -> ();
fn "anydsl_print_i64" print_i64(i64) -> ();
fn "anydsl_print_u16" print_u16(u16) -> ();
fn "anydsl_print_u32" print_u32(u32) -> ();
fn "anydsl_print_u64" print_u64(u64) -> ();
fn "anydsl_print_f32" print_f32(f32) -> ();
fn "anydsl_print_f64" print_f64(f64) -> ();
fn "anydsl_print_char" print_char(u8) -> ();
fn "anydsl_print_string" print_string(&[u8]) -> ();
fn "anydsl_print_flush" print_flush() -> ();
}
struct Buffer {
data : &[i8],
size : i64,
device : i32
}
fn @alloc(dev: i32, size: i64) -> Buffer {
Buffer {
device : dev,
data : runtime_alloc(dev, size),
size : size
}
}
fn @alloc_host(dev: i32, size: i64) -> Buffer {
Buffer {
device : dev,
data : runtime_alloc_host(dev, size),
size : size
}
}
fn @alloc_unified(dev: i32, size: i64) -> Buffer {
Buffer {
device : dev,
data : runtime_alloc_unified(dev, size),
size : size
}
}
fn @release(buf: Buffer) -> () { runtime_release(buf.device, buf.data) }
fn @runtime_device(platform: i32, device: i32) -> i32 { platform | (device << 4) }
fn @alloc_cpu(size: i64) -> Buffer { alloc(0, size) }
fn @alloc_cuda(dev: i32, size: i64) -> Buffer { alloc(runtime_device(1, dev), size) }
fn @alloc_cuda_host(dev: i32, size: i64) -> Buffer { alloc_host(runtime_device(1, dev), size) }
fn @alloc_cuda_unified(dev: i32, size: i64) -> Buffer { alloc_unified(runtime_device(1, dev), size) }
fn @synchronize_cuda(dev: i32) -> () { runtime_synchronize(runtime_device(1, dev)) }
fn @alloc_opencl(dev: i32, size: i64) -> Buffer { alloc(runtime_device(2, dev), size) }
fn @alloc_opencl_unified(dev: i32, size: i64) -> Buffer { alloc_unified(runtime_device(2, dev), size) }
fn @synchronize_opencl(dev: i32) -> () { runtime_synchronize(runtime_device(2, dev)) }
fn @alloc_hls(dev: i32, size: i64) -> Buffer { alloc(runtime_device(2, dev), size) }
fn @alloc_hls_unified(dev: i32, size: i64) -> Buffer { alloc_unified(runtime_device(2, dev), size) }
fn @synchronize_hls(dev: i32) -> () { runtime_synchronize(runtime_device(2, dev)) }
fn @alloc_hsa(dev: i32, size: i64) -> Buffer { alloc(runtime_device(3, dev), size) }
fn @alloc_hsa_host(dev: i32, size: i64) -> Buffer { alloc_host(runtime_device(3, dev), size) }
fn @alloc_hsa_unified(dev: i32, size: i64) -> Buffer { alloc_unified(runtime_device(3, dev), size) }
fn @synchronize_hsa(dev: i32) -> () { runtime_synchronize(runtime_device(3, dev)) }
fn @copy(src: Buffer, dst: Buffer) -> () {
runtime_copy(src.device, src.data, 0i64, dst.device, dst.data, 0i64, src.size)
}
fn @copy_offset(src: Buffer, off_src: i64, dst: Buffer, off_dst: i64, size: i64) -> () {
runtime_copy(src.device, src.data, off_src, dst.device, dst.data, off_dst, size)
}
fn @copy_async(src: Buffer, dst: Buffer) -> () {
runtime_copy_async(src.device, src.data, 0i64, dst.device, dst.data, 0i64, src.size)
}
fn @copy_offset_async(src: Buffer, off_src: i64, dst: Buffer, off_dst: i64, size: i64) -> () {
runtime_copy_async(src.device, src.data, off_src, dst.device, dst.data, off_dst, size)
}
// range, range_step, unroll, unroll_step, etc.
fn @(?lower & ?upper & ?step) unroll_step(lower: i32, upper: i32, @step: i32, body: fn(i32) -> ()) -> () {
if lower < upper {
@@body(lower);
unroll_step(lower+step, upper, step, body)
}
}
fn @(?upper & ?lower & ?step) unroll_step_rev(upper: i32, lower: i32, @step: i32, body: fn(i32) -> ()) -> () {
if upper > lower {
@@body(upper);
unroll_step_rev(upper-step, lower, step, body)
}
}
fn @range(lower: i32, upper: i32, body: fn(i32) -> ()) -> () { unroll_step($lower, $upper, 1, body) }
fn @range_step(lower: i32, upper: i32, step: i32, body: fn(i32) -> ()) -> () { unroll_step($lower, $upper, step, body) }
fn @range_rev(upper: i32, lower: i32, body: fn(i32) -> ()) -> () { unroll_step_rev($upper, $lower, 1, body) }
fn @unroll(lower: i32, upper: i32, body: fn(i32) -> ()) -> () { unroll_step(lower, upper, 1, body) }
fn @unroll_rev(upper: i32, lower: i32, body: fn(i32) -> ()) -> () { unroll_step_rev(upper, lower, 1, body) }