Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ extern "C" {

enum ggml_unary_op {
GGML_UNARY_OP_ABS,
GGML_UNARY_OP_FLOOR,
GGML_UNARY_OP_SGN,
GGML_UNARY_OP_NEG,
GGML_UNARY_OP_STEP,
Expand Down Expand Up @@ -1029,6 +1030,15 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_floor(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_floor_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);


GGML_API struct ggml_tensor * ggml_sgn(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cpu/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8933,6 +8933,10 @@ void ggml_compute_forward_unary(
{
ggml_compute_forward_abs(params, dst);
} break;
case GGML_UNARY_OP_FLOOR:
{
ggml_compute_forward_floor(params, dst);
} break;
case GGML_UNARY_OP_SGN:
{
ggml_compute_forward_sgn(params, dst);
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/unary-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ static inline float op_abs(float x) {
return fabsf(x);
}

static inline float op_floor(float x) {
return floorf(x);
}

static inline float op_sgn(float x) {
return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f);
}
Expand Down Expand Up @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor *
unary_op<op_abs>(params, dst);
}

void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_floor>(params, dst);
}

void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_sgn>(params, dst);
}
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cpu/unary-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ extern "C" {
#endif

void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_floor(const struct ggml_compute_params * params,struct ggml_tensor * dst);
void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst);
Expand Down
30 changes: 30 additions & 0 deletions ggml/src/ggml-sycl/element_wise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) {
return sycl::fabs(x);
}

template<typename T>
static __dpct_inline__ T op_floor(T x) {
return sycl::floor(x);
}

template<typename T>
static __dpct_inline__ T op_elu(T x) {
return (x > static_cast<T>(0.f)) ? x : sycl::expm1(x);
Expand Down Expand Up @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n
}
}

template<typename T>
static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
dst[i] = op_floor(x[i]);
}
}

template<typename T>
static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
Expand Down Expand Up @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor
});
}

static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
const int num_blocks = ceil_div(k_elements, 256);
sycl_parallel_for(stream,
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
sycl::range<1>(256)),
[=](sycl::nd_item<1> item_ct1) {
unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1);
});
});
}

static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
Expand Down Expand Up @@ -1129,6 +1154,11 @@ void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_clamp(ctx, dst);
}

void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_floor(ctx, dst);
}

void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sgn(ctx, dst);
Expand Down
2 changes: 2 additions & 0 deletions ggml/src/ggml-sycl/element_wise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3636,6 +3636,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
case GGML_UNARY_OP_ABS:
ggml_sycl_abs(ctx, dst);
break;
case GGML_UNARY_OP_FLOOR:
ggml_sycl_floor(ctx, dst);
break;
case GGML_UNARY_OP_ELU:
ggml_sycl_elu(ctx, dst);
break;
Expand Down Expand Up @@ -4193,6 +4196,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_FLOOR:
#if defined (GGML_SYCL_F16)
return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type);
#else
Expand Down
17 changes: 16 additions & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,7 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");

static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
"ABS",
"FLOOR",
"SGN",
"NEG",
"STEP",
Expand All @@ -1145,7 +1146,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
"GELU_ERF",
};

static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16");


static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
Expand Down Expand Up @@ -2481,6 +2482,20 @@ struct ggml_tensor * ggml_abs_inplace(
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
}

// ggml_floor

struct ggml_tensor * ggml_floor(
struct ggml_context * ctx,
struct ggml_tensor * a) {
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
}

struct ggml_tensor * ggml_floor_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a) {
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
}

// ggml_sgn

struct ggml_tensor * ggml_sgn(
Expand Down
43 changes: 43 additions & 0 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3524,6 +3524,45 @@ struct test_log : public test_case {
}
};

// GGML_OP_FLOOR
struct test_floor : public test_case {
const ggml_type type;
const std::array<int64_t, 4> ne;

std::string vars() override {
return VARS_TO_STR2(type, ne);
}

test_floor(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {10, 5, 4, 3})
: type(type), ne(ne) {}

ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
ggml_set_param(a);
ggml_set_name(a, "a");

ggml_tensor * out = ggml_floor(ctx, a);
ggml_set_name(out, "out");

return out;
}

void initialize_tensors(ggml_context * ctx) override {
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
init_tensor_uniform(t, -10.0f, 10.0f);
}
}

float grad_eps() override {
return 1.0f;
}

bool grad_precise() override {
return true;
}
};

// GGML_OP_SIN
struct test_sin : public test_case {
const ggml_type type;
Expand Down Expand Up @@ -6329,20 +6368,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
}

for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {

test_cases.emplace_back(new test_sqr (type));
test_cases.emplace_back(new test_sqrt (type));
test_cases.emplace_back(new test_log (type));
test_cases.emplace_back(new test_sin (type));
test_cases.emplace_back(new test_cos (type));
test_cases.emplace_back(new test_clamp (type));
test_cases.emplace_back(new test_leaky_relu(type));
test_cases.emplace_back(new test_floor (type));
test_cases.emplace_back(new test_sqr (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_sqrt (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_log (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_sin (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_cos (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_clamp (type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_leaky_relu(type, {7, 1, 5, 3}));
test_cases.emplace_back(new test_floor (type, {7, 1, 5, 3}))

}

test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));
Expand Down
6 changes: 3 additions & 3 deletions vendor/miniaudio/miniaudio.h
Original file line number Diff line number Diff line change
Expand Up @@ -28227,7 +28227,7 @@ static ma_result ma_device_start__alsa(ma_device* pDevice)
}

if (pDevice->type == ma_device_type_playback || pDevice->type == ma_device_type_duplex) {
/*
/*
When data is written to the device we wait for the device to get ready to receive data with poll(). In my testing
I have observed that poll() can sometimes block forever unless the device is started explicitly with snd_pcm_start()
or some data is written with snd_pcm_writei().
Expand Down Expand Up @@ -34520,7 +34520,7 @@ static ma_result ma_device_init_internal__coreaudio(ma_context* pContext, ma_dev
#endif
}


status = ((ma_AudioUnitSetProperty_proc)pContext->coreaudio.AudioUnitSetProperty)(pData->audioUnit, kAudioUnitProperty_StreamFormat, formatScope, formatElement, &bestFormat, sizeof(bestFormat));
if (status != noErr) {
((ma_AudioComponentInstanceDispose_proc)pContext->coreaudio.AudioComponentInstanceDispose)(pData->audioUnit);
Expand Down Expand Up @@ -38526,7 +38526,7 @@ static ma_result ma_device_reinit__aaudio(ma_device* pDevice, ma_device_type dev
ma_device_stop(pDevice); /* Do a full device stop so we set internal state correctly. */
}
}

result = MA_SUCCESS;
}
done:
Expand Down