-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathexecution_common.hpp
More file actions
146 lines (127 loc) · 3.47 KB
/
execution_common.hpp
File metadata and controls
146 lines (127 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#ifndef MSCCLPP_EXECUTION_COMMON_HPP_
#define MSCCLPP_EXECUTION_COMMON_HPP_
#include <mscclpp/memory_channel.hpp>
#include <mscclpp/port_channel.hpp>
#include <mscclpp/switch_channel.hpp>
namespace mscclpp {
constexpr int MAX_CHANNEL = 16;
constexpr int MAX_CHANNEL_PER_OPERATION = 8;
constexpr int MAX_OPERATION = 64;
constexpr int MAX_DEVICE_SYNCERS = 16;
constexpr int MAX_DEVICE_SEMAPHORES = 16;
constexpr uint32_t PREDFINED_SCRATCH_SIZE = 1 << 26; // 64 MB
enum class BufferType : uint8_t {
NONE = UINT8_MAX,
INPUT = 0,
OUTPUT = 1,
SCRATCH = 2,
};
enum class ChannelType : uint8_t {
NONE,
MEMORY,
PORT,
SWITCH,
};
// NOTE(chhwang): any modification here requires corresponding updates in `tools/npkit/npkit_trace_generator.py`.
// As well as NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT in npkit_event.hpp
enum class OperationType : uint8_t {
NOP,
BARRIER,
PUT,
PUT_PACKETS,
READ_PUT_PACKETS,
PUT_WITH_SIGNAL,
PUT_WITH_SIGNAL_AND_FLUSH,
GET,
COPY,
COPY_PACKETS,
UNPACK_PACKETS,
SIGNAL,
WAIT,
FLUSH,
REDUCE,
REDUCE_PACKETS,
REDUCE_SEND,
REDUCE_SEND_PACKETS,
READ_REDUCE,
READ_REDUCE_SEND,
MULTI_LOAD_REDUCE_STORE,
RELAXED_SIGNAL,
RELAXED_WAIT,
PIPELINE,
SEM_RELEASE,
SEM_ACQUIRE,
};
struct Channels {
mscclpp::DeviceHandle<mscclpp::BaseMemoryChannel> memoryChannels[MAX_CHANNEL];
mscclpp::DeviceHandle<mscclpp::BasePortChannel> portChannels[MAX_CHANNEL];
mscclpp::DeviceHandle<mscclpp::SwitchChannel> nvlsChannels[MAX_CHANNEL];
};
struct RemoteBuffers {
// For buffer accessed via memory channel
BufferType memoryChannelBufferTypes[MAX_CHANNEL];
void* memoryChannelBufferPtrs[MAX_CHANNEL];
// for buffer access via port channel
BufferType portChannelBufferTypes[MAX_CHANNEL];
MemoryId portChannelBufferIds[MAX_CHANNEL];
};
union BufferRef {
uint8_t id;
BufferType type;
};
struct Operation {
OperationType type;
ChannelType channelType;
union {
BufferRef inputBufferRefs[MAX_CHANNEL_PER_OPERATION];
struct {
uint8_t nvlsInputIndex;
BufferType nvlsInputBufferType;
};
};
union {
BufferRef outputBufferRefs[MAX_CHANNEL_PER_OPERATION];
struct {
uint8_t nvlsOutputIndex;
BufferType nvlsOutputBufferType;
};
};
union {
struct {
uint8_t channelIndexes[MAX_CHANNEL_PER_OPERATION];
uint32_t inputOffsets[MAX_CHANNEL_PER_OPERATION];
uint32_t outputOffsets[MAX_CHANNEL_PER_OPERATION];
uint32_t inputBufferSizes[MAX_CHANNEL_PER_OPERATION];
uint32_t outputBufferSizes[MAX_CHANNEL_PER_OPERATION];
uint8_t nChannels;
uint8_t nInputs;
uint8_t nOutputs;
};
struct {
uint32_t unitSize;
uint32_t nIterations;
uint8_t nOperations;
};
struct {
uint32_t deviceSyncerIndex;
uint32_t nThreadBlocks;
};
struct {
uint32_t deviceSemaphoreIds[MAX_DEVICE_SEMAPHORES];
uint32_t nDeviceSemaphores;
};
};
};
// total size = 2016 + 10240 + 4 + 12(padding) = 12272 bytes
struct __attribute__((aligned(16))) DeviceExecutionPlan {
uint8_t nMemoryChannels; // 1 bytes
uint8_t nPortChannels; // 1 bytes
uint16_t nOperations; // 2 bytes
Channels channels; // 1792 bytes
RemoteBuffers remoteBuffers; // 224 bytes
Operation operations[MAX_OPERATION]; // 64 * 160 = 10240 bytes
};
} // namespace mscclpp
#endif // MSCCLPP_EXECUTION_COMMON_HPP_