Skip to content

Commit febba14

Browse files
hanchengsshaulnv
authored andcommitted
Add support for Cambricon devices
To support Cambricon MLUs, configure using --enable-mlu --with-mlu=</usr/local/neuware> Run bandwidth tests with Cambricon MLUs direct by specifying --use_mlu=<device_id>. Signed-off-by: hancheng <[email protected]>
1 parent 2e3aa2f commit febba14

File tree

6 files changed

+286
-4
lines changed

6 files changed

+286
-4
lines changed

Makefile.am

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ AUTOMAKE_OPTIONS= subdir-objects
3535

3636
noinst_LIBRARIES = libperftest.a
3737
libperftest_a_SOURCES = src/get_clock.c src/perftest_communication.c src/perftest_parameters.c src/perftest_resources.c src/perftest_counters.c src/host_memory.c src/mmap_memory.c
38-
noinst_HEADERS = src/get_clock.h src/perftest_communication.h src/perftest_parameters.h src/perftest_resources.h src/perftest_counters.h src/memory.h src/host_memory.h src/mmap_memory.h src/cuda_memory.h src/rocm_memory.h src/neuron_memory.h src/hl_memory.h
38+
noinst_HEADERS = src/get_clock.h src/perftest_communication.h src/perftest_parameters.h src/perftest_resources.h src/perftest_counters.h src/memory.h src/host_memory.h src/mmap_memory.h src/cuda_memory.h src/rocm_memory.h src/neuron_memory.h src/hl_memory.h src/mlu_memory.h
3939

4040
if CUDA
4141
libperftest_a_SOURCES += src/cuda_memory.c
@@ -49,6 +49,10 @@ if NEURON
4949
libperftest_a_SOURCES += src/neuron_memory.c
5050
endif
5151

52+
if MLU
53+
libperftest_a_SOURCES += src/mlu_memory.c
54+
endif
55+
5256
if HABANALABS
5357
libperftest_a_SOURCES += src/hl_memory.c
5458
endif

configure.ac

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,33 @@ AS_IF([test "x$enable_hl" = xyes], [
340340

341341
AM_CONDITIONAL([HABANALABS], [test x$enable_hl = xyes])
342342

343+
AC_ARG_ENABLE([mlu],
344+
[AS_HELP_STRING([--enable-mlu],
345+
[Enable MLU benchmarks])
346+
],
347+
[],
348+
[enable_mlu=no])
349+
350+
AC_ARG_WITH([mlu],
351+
[AS_HELP_STRING([--with-mlu=@<:@MLU installation path@:>@],
352+
[Provide path to MLU installation])
353+
],
354+
[AS_CASE([$with_mlu],
355+
[yes|no], [],
356+
[CPPFLAGS="-I$with_mlu/include $CPPFLAGS"
357+
LDFLAGS="-L$with_mlu/lib64 -Wl,-rpath=$with_mlu/lib64 -lcndrv $LDFLAGS"])
358+
])
359+
360+
AS_IF([test "x$enable_mlu" = xyes], [
361+
AC_DEFINE([HAVE_MLU], [1], [Enable MLU benchmarks])
362+
AC_CHECK_HEADERS([cn_api.h], [],
363+
[AC_MSG_ERROR([could not find cn_api.h in include path])])
364+
AC_SEARCH_LIBS([cnMalloc], [cndrv], [],
365+
[AC_MSG_ERROR([could not find library, cndrv])])
366+
])
367+
368+
AM_CONDITIONAL([MLU], [test x$enable_mlu = xyes])
369+
343370
AC_TRY_LINK([#include <infiniband/verbs.h>],
344371
[struct ibv_qp_attr *attr; int x = attr->rate_limit;],[HAVE_PACKET_PACING=yes], [HAVE_PACKET_PACING=no])
345372
AM_CONDITIONAL([HAVE_PACKET_PACING],[test "x$HAVE_PACKET_PACING" = "xyes"])

src/mlu_memory.c

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
2+
/*
3+
* Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
4+
*/
5+
6+
#include <stdio.h>
7+
#include <stdlib.h>
8+
#include <errno.h>
9+
#include <cn_api.h>
10+
#include "mlu_memory.h"
11+
#include "perftest_parameters.h"
12+
13+
static inline const char *getErrorName(CNresult error)
14+
{
15+
const char *str;
16+
cnGetErrorName(error, &str);
17+
return str;
18+
}
19+
20+
static inline const char *getErrorString(CNresult error)
21+
{
22+
const char *str;
23+
cnGetErrorString(error, &str);
24+
return str;
25+
}
26+
27+
#define ERROR_CHECK(ret) \
28+
do { \
29+
CNresult r__ = (ret); \
30+
if (r__ != CN_SUCCESS) { \
31+
printf( \
32+
"error occur, func: %s, line: %d, ret:%d, cn_error_code:%s, cn_error_string:%s\n", \
33+
__func__, __LINE__, r__, getErrorName(r__), getErrorString(r__)); \
34+
exit(0); \
35+
} \
36+
} while (0)
37+
38+
#define ACCEL_PAGE_SIZE (64 * 1024)
39+
40+
41+
struct mlu_memory_ctx {
42+
struct memory_ctx base;
43+
int device_id;
44+
CNdev cnDevice;
45+
CNcontext cnContext;
46+
47+
};
48+
49+
50+
static int init_mlu(struct mlu_memory_ctx *ctx)
51+
{
52+
int mlu_device_id = ctx->device_id;
53+
int mlu_pci_bus_id;
54+
int mlu_pci_device_id;
55+
int index;
56+
CNdev cn_device;
57+
58+
printf("initializing MLU\n");
59+
CNresult error = cnInit(0);
60+
if (error != CN_SUCCESS) {
61+
printf("cnInit(0) returned %d\n", error);
62+
return FAILURE;
63+
}
64+
65+
int deviceCount = 0;
66+
error = cnDeviceGetCount(&deviceCount);
67+
if (error != CN_SUCCESS) {
68+
printf("cnDeviceGetCount() returned %d\n", error);
69+
return FAILURE;
70+
}
71+
72+
if (deviceCount == 0) {
73+
printf("There are no available device(s) that support MLU\n");
74+
return FAILURE;
75+
}
76+
if (mlu_device_id >= deviceCount) {
77+
fprintf(stderr, "No such device ID (%d) exists in system\n", mlu_device_id);
78+
return FAILURE;
79+
}
80+
81+
printf("Listing all MLU devices in system:\n");
82+
for (index = 0; index < deviceCount; index++) {
83+
ERROR_CHECK(cnDeviceGet(&cn_device, index));
84+
cnDeviceGetAttribute(&mlu_pci_bus_id, CN_DEVICE_ATTRIBUTE_PCI_BUS_ID , cn_device);
85+
cnDeviceGetAttribute(&mlu_pci_device_id, CN_DEVICE_ATTRIBUTE_PCI_DEVICE_ID , cn_device);
86+
printf("MLU device %d: PCIe address is %02X:%02X\n", index, (unsigned int)mlu_pci_bus_id, (unsigned int)mlu_pci_device_id);
87+
}
88+
89+
printf("\nPicking device No. %d\n", mlu_device_id);
90+
91+
ERROR_CHECK(cnDeviceGet(&ctx->cnDevice, mlu_device_id));
92+
93+
char name[128];
94+
ERROR_CHECK(cnDeviceGetName(name, sizeof(name), mlu_device_id));
95+
printf("[pid = %d, dev = %ld] device name = [%s]\n", getpid(), ctx->cnDevice, name);
96+
printf("creating MLU Ctx\n");
97+
98+
/* Create context */
99+
error = cnCtxCreate(&ctx->cnContext, 0, ctx->cnDevice);
100+
if (error != CN_SUCCESS) {
101+
printf("cnCtxCreate() error=%d\n", error);
102+
return FAILURE;
103+
}
104+
105+
printf("making it the current MLU Ctx\n");
106+
error = cnCtxSetCurrent(ctx->cnContext);
107+
if (error != CN_SUCCESS) {
108+
printf("cnCtxSetCurrent() error=%d\n", error);
109+
return FAILURE;
110+
}
111+
112+
return SUCCESS;
113+
}
114+
115+
static void free_mlu(struct mlu_memory_ctx *ctx)
116+
{
117+
printf("destroying current MLU Ctx\n");
118+
ERROR_CHECK(cnCtxDestroy(ctx->cnContext));
119+
}
120+
121+
int mlu_memory_init(struct memory_ctx *ctx) {
122+
struct mlu_memory_ctx *mlu_ctx = container_of(ctx, struct mlu_memory_ctx, base);
123+
int return_value = 0;
124+
125+
return_value = init_mlu(mlu_ctx);
126+
127+
if (return_value) {
128+
fprintf(stderr, "Couldn't initialize mlu device : %d\n", return_value);
129+
return FAILURE;
130+
}
131+
132+
return SUCCESS;
133+
}
134+
135+
int mlu_memory_destroy(struct memory_ctx *ctx) {
136+
struct mlu_memory_ctx *mlu_ctx = container_of(ctx, struct mlu_memory_ctx, base);
137+
138+
free_mlu(mlu_ctx);
139+
free(mlu_ctx);
140+
return SUCCESS;
141+
}
142+
143+
int mlu_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd,
144+
uint64_t *dmabuf_offset, void **addr, bool *can_init) {
145+
CNresult error;
146+
size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1);
147+
148+
CNaddr mlu_addr;
149+
printf("cnMalloc() of a %lu bytes MLU buffer\n", size);
150+
151+
error = cnMalloc(&mlu_addr, buf_size);
152+
if (error != CN_SUCCESS) {
153+
printf("cnMalloc error=%d\n", error);
154+
return FAILURE;
155+
}
156+
157+
printf("allocated %lu bytes of MLU buffer at %ld\n", (unsigned long)buf_size, mlu_addr);
158+
*addr = (void *)mlu_addr;
159+
*can_init = false;
160+
return SUCCESS;
161+
}
162+
163+
int mlu_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size)
164+
{
165+
CNaddr mlu_addr = (CNaddr)addr;
166+
printf("deallocating MLU buffer %016lx\n", mlu_addr);
167+
cnFree(mlu_addr);
168+
return SUCCESS;
169+
}
170+
171+
void *mlu_memory_copy_host_buffer(void *dest, const void *src, size_t size) {
172+
cnMemcpy((CNaddr) dest, (CNaddr) src, size);
173+
return dest;
174+
}
175+
176+
void *mlu_memory_copy_buffer_to_buffer(void *dest, const void *src, size_t size) {
177+
cnMemcpyDtoD((CNaddr) dest, (CNaddr) src, size);
178+
return dest;
179+
}
180+
181+
bool mlu_memory_supported() {
182+
return true;
183+
}
184+
185+
struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) {
186+
struct mlu_memory_ctx *ctx;
187+
188+
ALLOCATE(ctx, struct mlu_memory_ctx, 1);
189+
ctx->base.init = mlu_memory_init;
190+
ctx->base.destroy = mlu_memory_destroy;
191+
ctx->base.allocate_buffer = mlu_memory_allocate_buffer;
192+
ctx->base.free_buffer = mlu_memory_free_buffer;
193+
ctx->base.copy_host_to_buffer = mlu_memory_copy_host_buffer;
194+
ctx->base.copy_buffer_to_host = mlu_memory_copy_host_buffer;
195+
ctx->base.copy_buffer_to_buffer = mlu_memory_copy_buffer_to_buffer;
196+
ctx->device_id = params->mlu_device_id;
197+
198+
return &ctx->base;
199+
}

src/mlu_memory.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
2+
/*
3+
* Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
4+
*/
5+
6+
#ifndef MLU_MEMORY_H
7+
#define MLU_MEMORY_H
8+
9+
#include <stddef.h>
10+
#include <stdint.h>
11+
#include "memory.h"
12+
#include "config.h"
13+
14+
15+
struct perftest_parameters;
16+
17+
bool mlu_memory_supported();
18+
19+
struct memory_ctx *mlu_memory_create(struct perftest_parameters *params);
20+
21+
22+
#ifndef HAVE_MLU
23+
24+
inline bool mlu_memory_supported() {
25+
return false;
26+
}
27+
28+
inline struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) {
29+
return NULL;
30+
}
31+
32+
#endif
33+
34+
#endif /* MLU_MEMORY_H */

src/perftest_parameters.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "rocm_memory.h"
1919
#include "neuron_memory.h"
2020
#include "hl_memory.h"
21+
#include "mlu_memory.h"
2122
#include<math.h>
2223
#ifdef HAVE_RO
2324
#include <stdbool.h>
@@ -590,6 +591,11 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection
590591
printf(" Use selected Habana Labs device for RDMA testing\n");
591592
}
592593

594+
if (mlu_memory_supported()) {
595+
printf(" --use_mlu=<mlu device id>");
596+
printf(" Use selected MLU device for MLUDirect RDMA testing\n");
597+
}
598+
593599
printf(" --use_hugepages ");
594600
printf(" Use Hugepages instead of contig, memalign allocations.\n");
595601
}
@@ -810,6 +816,7 @@ static void init_perftest_params(struct perftest_parameters *user_param)
810816
user_param->use_cuda_dmabuf = 0;
811817
user_param->rocm_device_id = 0;
812818
user_param->neuron_core_id = 0;
819+
user_param->mlu_device_id = 0;
813820
user_param->mmap_file = NULL;
814821
user_param->mmap_offset = 0;
815822
user_param->iters_per_port[0] = 0;
@@ -2273,6 +2280,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
22732280
static int use_neuron_flag = 0;
22742281
static int use_neuron_dmabuf_flag = 0;
22752282
static int use_hl_flag = 0;
2283+
static int use_mlu_flag = 0;
22762284
static int disable_pcir_flag = 0;
22772285
static int mmap_file_flag = 0;
22782286
static int mmap_offset_flag = 0;
@@ -2431,6 +2439,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
24312439
{ .name = "use_neuron", .has_arg = 1, .flag = &use_neuron_flag, .val = 1},
24322440
{ .name = "use_neuron_dmabuf", .has_arg = 0, .flag = &use_neuron_dmabuf_flag, .val = 1},
24332441
{ .name = "use_hl", .has_arg = 1, .flag = &use_hl_flag, .val = 1},
2442+
{ .name = "use_mlu", .has_arg = 1, .flag = &use_mlu_flag, .val = 1},
24342443
{ .name = "mmap", .has_arg = 1, .flag = &mmap_file_flag, .val = 1},
24352444
{ .name = "mmap-offset", .has_arg = 1, .flag = &mmap_offset_flag, .val = 1},
24362445
{ .name = "ipv6", .has_arg = 0, .flag = &ipv6_flag, .val = 1},
@@ -2861,13 +2870,14 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
28612870
(use_rocm_flag && !rocm_memory_supported()) ||
28622871
(use_neuron_flag && !neuron_memory_supported()) ||
28632872
(use_neuron_dmabuf_flag && !neuron_memory_dmabuf_supported()) ||
2864-
(use_hl_flag && !hl_memory_supported())) {
2873+
(use_hl_flag && !hl_memory_supported()) ||
2874+
(use_mlu_flag && !mlu_memory_supported())) {
28652875
printf(" Unsupported memory type\n");
28662876
return FAILURE;
28672877
}
28682878
/* Memory types are mutually exclucive, make sure we were not already asked to use a different memory type. */
28692879
if (user_param->memory_type != MEMORY_HOST &&
2870-
(mmap_file_flag || use_rocm_flag || use_neuron_flag || use_hl_flag ||
2880+
(mmap_file_flag || use_mlu_flag || use_rocm_flag || use_neuron_flag || use_hl_flag ||
28712881
((use_cuda_flag || use_cuda_bus_id_flag) && user_param->memory_type != MEMORY_CUDA))) {
28722882
fprintf(stderr, " Can't use multiple memory types\n");
28732883
return FAILURE;
@@ -2925,6 +2935,12 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
29252935
user_param->memory_create = hl_memory_create;
29262936
use_hl_flag = 0;
29272937
}
2938+
if (use_mlu_flag) {
2939+
CHECK_VALUE_NON_NEGATIVE(user_param->mlu_device_id,int,"MLU device",not_int_ptr);
2940+
user_param->memory_type = MEMORY_MLU;
2941+
user_param->memory_create = mlu_memory_create;
2942+
use_mlu_flag = 0;
2943+
}
29282944
if (flow_label_flag) {
29292945
CHECK_VALUE(user_param->flow_label,int,"flow label",not_int_ptr);
29302946
if (user_param->connection_type == RawEth && user_param->flow_label < 0) {

src/perftest_parameters.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,8 @@ enum memory_type {
441441
MEMORY_CUDA,
442442
MEMORY_ROCM,
443443
MEMORY_NEURON,
444-
MEMORY_HL
444+
MEMORY_HL,
445+
MEMORY_MLU
445446
};
446447

447448
struct perftest_parameters {
@@ -574,6 +575,7 @@ struct perftest_parameters {
574575
int neuron_core_id;
575576
int use_neuron_dmabuf;
576577
char *hl_device_bus_id;
578+
int mlu_device_id;
577579
char *mmap_file;
578580
unsigned long mmap_offset;
579581
/* New test params format pilot. will be used in all flags soon,. */

0 commit comments

Comments
 (0)