Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 88 additions & 49 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,70 +1,109 @@
cmake_minimum_required(VERSION 3.18)

project(pulp-nnx
VERSION 0.3.0
DESCRIPTION "Kernel library for PULP-based NN accelerators."
LANGUAGES C)
VERSION 0.3.0
DESCRIPTION "Kernel library for PULP-based NN accelerators."
LANGUAGES C)

add_library(pulp-nnx STATIC)
add_library(pulp-nnx INTERFACE)

target_sources(pulp-nnx PRIVATE util/pulp_nnx_util.c util/hwpe.c)
target_include_directories(pulp-nnx PUBLIC inc util)
add_library(pulp-nnx-hal STATIC)

target_sources(pulp-nnx-hal PRIVATE util/pulp_nnx_util.c util/hwpe.c)
target_include_directories(pulp-nnx-hal PUBLIC inc util)

option(USE_NE16 "Use the NE16 accelerator.")
option(USE_NEUREKA "Use the N-EUREKA accelerator.")
option(USE_NEUREKA_V2 "Use the N-EUREKA v2 accelerator.")
option(ENABLE_BSP "Enable the build of the BSP for your chosen accelerator. Requires the PULP-SDK.")

if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA} AND NOT ${USE_NEUREKA_V2})
message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_<acc> option.")
message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_<acc> option.")
endif()

if(${ENABLE_BSP})
add_library(pulp-nnx-bsp STATIC)
endif()

if (${USE_NE16})
message(STATUS "[PULP-NNX] Using the NE16 accelerator.")
target_sources(pulp-nnx
PRIVATE
ne16/bsp/ne16_pulp_bsp.c
ne16/hal/ne16.c
ne16/hal/ne16_task.c
src/pulp_nnx_ne16.c
)
target_include_directories(pulp-nnx
PUBLIC
ne16/bsp
ne16/hal
ne16/gvsoc
)
message(STATUS "[PULP-NNX] Using the NE16 accelerator.")
target_sources(pulp-nnx-hal
PRIVATE
ne16/hal/ne16.c
ne16/hal/ne16_task.c
)
target_include_directories(pulp-nnx-hal
PUBLIC
ne16/hal
ne16/gvsoc
)
if(${ENABLE_BSP})
target_sources(pulp-nnx-bsp
PRIVATE
ne16/bsp/ne16_pulp_bsp.c
src/pulp_nnx_ne16.c
)
target_include_directories(pulp-nnx-bsp
PUBLIC
ne16/bsp
)
endif()

endif()

if (${USE_NEUREKA})
message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.")
target_sources(pulp-nnx
PRIVATE
neureka/bsp/neureka_siracusa_bsp.c
neureka/hal/neureka.c
neureka/hal/neureka_task.c
src/pulp_nnx_neureka.c
)
target_include_directories(pulp-nnx
PUBLIC
neureka/bsp
neureka/hal
neureka/gvsoc
)
message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.")
target_sources(pulp-nnx-hal
PRIVATE
neureka/hal/neureka.c
neureka/hal/neureka_task.c
)
target_include_directories(pulp-nnx-hal
PUBLIC
neureka/hal
neureka/gvsoc
)
if(${ENABLE_BSP})
target_sources(pulp-nnx-bsp
PRIVATE
neureka/bsp/neureka_siracusa_bsp.c
src/pulp_nnx_neureka.c
)
target_include_directories(pulp-nnx-bsp
PUBLIC
neureka/bsp
)
endif()

endif()

if (${USE_NEUREKA_V2})
message(STATUS "[PULP-NNX] Using the N-EUREKA v2 accelerator.")
target_sources(pulp-nnx
PRIVATE
neureka_v2/bsp/neureka_v2_siracusa_bsp.c
neureka_v2/hal/neureka_v2.c
neureka_v2/hal/neureka_v2_task.c
src/pulp_nnx_neureka_v2.c
)
target_include_directories(pulp-nnx
PUBLIC
neureka_v2/bsp
neureka_v2/hal
neureka_v2/gvsoc
)
message(STATUS "[PULP-NNX] Using the N-EUREKA v2 accelerator.")
target_sources(pulp-nnx-hal
PRIVATE
neureka_v2/hal/neureka_v2.c
neureka_v2/hal/neureka_v2_task.c
)
target_include_directories(pulp-nnx-hal
PUBLIC
neureka_v2/hal
neureka_v2/gvsoc
)
if(${ENABLE_BSP})
target_sources(pulp-nnx-bsp
PRIVATE
neureka_v2/bsp/neureka_v2_pulp_bsp.c
src/pulp_nnx_neureka_v2.c
)
target_include_directories(pulp-nnx-bsp
PUBLIC
neureka_v2/bsp
)
endif()

endif()

target_link_libraries(pulp-nnx INTERFACE pulp-nnx-hal)
if(${ENABLE_BSP})
target_link_libraries(pulp-nnx INTERFACE pulp-nnx-bsp)
endif()
1 change: 0 additions & 1 deletion inc/pulp_nnx_neureka.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
*/

#include "neureka.h"
#include "neureka_siracusa_bsp.h"
#include "neureka_task.h"
#include <stdint.h>

Expand Down
9 changes: 6 additions & 3 deletions neureka/hal/neureka_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,17 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
.d2 = h_out_stride};
task->data.cfg.output_stride = output_stride;

task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES;
if (task->kernel_shape == 1) { // 1x1
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1;
task->data.cfg.weights_stride.d1 =
NEUREKA_WEIGHT_BANDWIDTH_BYTES * num_k_in;
(NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in;
} else if (!task->depthwise) { // 3x3
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
task->data.cfg.weights_stride.d1 =
NEUREKA_WEIGHT_BANDWIDTH_BYTES * task->qw * num_k_in;
NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in;

} else { // 3x3 depthwise
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
task->data.cfg.weights_stride.d1 = 0;
}
task->data.cfg.weights_stride.d2 = 0;
Expand Down
34 changes: 25 additions & 9 deletions neureka/hal/neureka_task_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,38 @@
#ifndef __NEUREKA_DEFS_H__
#define __NEUREKA_DEFS_H__

/* ARHITECTURE */

#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (6)
#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (6)
/* ARCHITECTURE */
// The definitions wrapped in #ifndefs can be overwritten with compiler flags
// for different parametrizations of the Neureka architecture
#ifndef NNX_NEUREKA_PE_H
#define NNX_NEUREKA_PE_H (6)
#endif
#ifndef NNX_NEUREKA_PE_W
#define NNX_NEUREKA_PE_W (6)
#endif
#define NNX_NEUREKA_BANDWIDTH_1x1 (256)

#ifndef NNX_NEUREKA_BANDWIDTH_3x3
#define NNX_NEUREKA_BANDWIDTH_3x3 (256)
#endif

#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H)
#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W)
#define NEUREKA_SUBTILE_INPUT_CHANNEL_1x1 (32)

#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (8)
#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (8)
#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (NNX_NEUREKA_PE_H + 2)
#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W + 2)
#ifndef NEUREKA_SUBTILE_INPUT_CHANNEL_3x3
#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (28)
#endif

#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (6)
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (6)
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H)
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W)
#define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32)

#define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES (32)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1 / 8)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3 / 8)

/* TASK REGISTERS */

Expand Down
24 changes: 13 additions & 11 deletions util/hwpe.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,34 @@
#define HWPE_SWSYNC 6
#define HWPE_TASK_REG_OFFSET 8

inline void hwpe_reg_write(hwpe_dev_t *dev, int reg, uint32_t value) {
inline void hwpe_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value) {
dev->base_addr[reg] = value;
}

inline uint32_t hwpe_reg_read(hwpe_dev_t *dev, int reg) {
inline uint32_t hwpe_reg_read(const hwpe_dev_t *dev, int reg) {
return dev->base_addr[reg];
}

inline void hwpe_task_reg_write(hwpe_dev_t *dev, int reg, uint32_t value) {
inline void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg,
uint32_t value) {
hwpe_reg_write(dev, HWPE_TASK_REG_OFFSET + reg, value);
}

inline uint32_t hwpe_task_reg_read(hwpe_dev_t *dev, int reg) {
inline uint32_t hwpe_task_reg_read(const hwpe_dev_t *dev, int reg) {
return hwpe_reg_read(dev, HWPE_TASK_REG_OFFSET + reg);
}

void hwpe_soft_clear(hwpe_dev_t *dev) {
void hwpe_soft_clear(const hwpe_dev_t *dev) {
hwpe_reg_write(dev, HWPE_SOFT_CLEAR, 0);
for (volatile int i = 0; i < 10; i++)
;
}

uint32_t hwpe_task_queue_status(hwpe_dev_t *dev) {
uint32_t hwpe_task_queue_status(const hwpe_dev_t *dev) {
return hwpe_reg_read(dev, HWPE_STATUS);
}

int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id) {
int hwpe_task_queue_acquire_task(const hwpe_dev_t *dev, uint8_t *id) {
uint32_t read_value = (int32_t)hwpe_reg_read(dev, HWPE_ACQUIRE);
if (read_value >= 256) {
return 1;
Expand All @@ -66,20 +67,21 @@ int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id) {
}
}

void hwpe_task_queue_write_task(hwpe_dev_t *dev, uint32_t *data, int len) {
void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data,
int len) {
for (int i = 0; i < len; i++) {
hwpe_task_reg_write(dev, i, data[i]);
}
}

void hwpe_task_queue_release_and_run(hwpe_dev_t *dev) {
void hwpe_task_queue_release_and_run(const hwpe_dev_t *dev) {
hwpe_reg_write(dev, HWPE_TRIGGER, 0);
}

void hwpe_task_queue_release(hwpe_dev_t *dev) {
void hwpe_task_queue_release(const hwpe_dev_t *dev) {
hwpe_reg_write(dev, HWPE_TRIGGER, 1);
}

uint8_t hwpe_last_task_id(hwpe_dev_t *dev) {
uint8_t hwpe_last_task_id(const hwpe_dev_t *dev) {
return (uint8_t)hwpe_reg_read(dev, HWPE_RUNNING_JOB);
}
22 changes: 11 additions & 11 deletions util/hwpe.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ typedef struct hwpe_dev_t {
volatile uint32_t *base_addr;
} hwpe_dev_t;

void hwpe_reg_write(hwpe_dev_t *dev, int reg, uint32_t value);
uint32_t hwpe_reg_read(hwpe_dev_t *dev, int reg);
void hwpe_task_reg_write(hwpe_dev_t *dev, int reg, uint32_t value);
uint32_t hwpe_task_reg_read(hwpe_dev_t *dev, int reg);
void hwpe_soft_clear(hwpe_dev_t *dev);
uint32_t hwpe_task_queue_status(hwpe_dev_t *dev);
int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id);
void hwpe_task_queue_write_task(hwpe_dev_t *dev, uint32_t *data, int len);
void hwpe_task_queue_release_and_run(hwpe_dev_t *dev);
void hwpe_task_queue_release(hwpe_dev_t *dev);
uint8_t hwpe_last_task_id(hwpe_dev_t *dev);
void hwpe_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value);
uint32_t hwpe_reg_read(const hwpe_dev_t *dev, int reg);
void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value);
uint32_t hwpe_task_reg_read(const hwpe_dev_t *dev, int reg);
void hwpe_soft_clear(const hwpe_dev_t *dev);
uint32_t hwpe_task_queue_status(const hwpe_dev_t *dev);
int hwpe_task_queue_acquire_task(const hwpe_dev_t *dev, uint8_t *id);
void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data, int len);
void hwpe_task_queue_release_and_run(const hwpe_dev_t *dev);
void hwpe_task_queue_release(const hwpe_dev_t *dev);
uint8_t hwpe_last_task_id(const hwpe_dev_t *dev);

#endif // !__HWPE_H__