Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/infinicore/ops/embedding.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include "common/op.hpp"

namespace infinicore::op {

Tensor embedding(Tensor input, Tensor weight);
void embedding_(Tensor out, Tensor input, Tensor weight);
} // namespace infinicore::op
1 change: 1 addition & 0 deletions include/infinicore/ops/linear.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "common/op.hpp"
#include <optional>

namespace infinicore::op {

Expand Down
8 changes: 4 additions & 4 deletions include/infinicore/ops/rope.hpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#pragma once

#include "../device.hpp"
#include "../tensor.hpp"
#include "../nn/rope.hpp"
#include "../tensor.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class RoPE {
public:
using schema = void (*)(Tensor, const Tensor &, const Tensor &, const Tensor &, const Tensor &, infinicore::nn::RoPE::Algo);
static void execute(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
static void execute(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
static common::OpDispatcher<schema> &dispatcher();
};

// Internal function
void rope_(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
void rope_(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_table, infinicore::nn::RoPE::Algo algo);

// Public API that uses infinicore::nn::RoPE::Algo
Tensor rope(const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo);
Tensor rope(const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_table, infinicore::nn::RoPE::Algo algo);
} // namespace infinicore::op
14 changes: 13 additions & 1 deletion python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
from .causal_softmax import causal_softmax
from .embedding import embedding
from .linear import linear
from .random_sample import random_sample
from .rms_norm import rms_norm
from .rope import RopeAlgo, rope
from .silu import silu
from .swiglu import swiglu

__all__ = ["causal_softmax", "random_sample", "rms_norm", "silu", "swiglu", "linear"]
__all__ = [
"causal_softmax",
"random_sample",
"rms_norm",
"silu",
"swiglu",
"linear",
"embedding",
"rope",
"RopeAlgo",
]
35 changes: 35 additions & 0 deletions python/infinicore/nn/functional/embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor

__all__ = ["embedding"]


def embedding(
input: Tensor,
weight: Tensor,
padding_idx=None,
max_norm=None,
norm_type=2.0,
scale_grad_by_freq=False,
sparse=False,
*,
out=None,
) -> Tensor:
r"""Generate a simple lookup table that looks up embeddings in a fixed dictionary and size."""

assert (
(padding_idx is None)
and (max_norm is None)
and (scale_grad_by_freq is False)
and (sparse is False)
), "Unsupported parameters."

assert "cpu" == input.device.type, (
"The device of 'input' variable must be on the CPU."
)

if out is None:
return Tensor(_infinicore.embedding(input._underlying, weight._underlying))

_infinicore.embedding_(out._underlying, input._underlying, weight._underlying)
return out
44 changes: 44 additions & 0 deletions python/infinicore/nn/functional/rope.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

文件名应该时rope吧

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已修改

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor

__all__ = ["rope", "RopeAlgo"]


class RopeAlgo:
r"""Different types of RoPE algorithms."""

GPT_J = _infinicore.Algo.GPT_J
GPT_NEOX = _infinicore.Algo.GPT_NEOX


def rope(
x: Tensor,
pos_ids: Tensor,
sin_table: Tensor,
cos_table: Tensor,
algo: RopeAlgo = RopeAlgo.GPT_NEOX,
*,
out=None,
) -> Tensor:
r"""Rotary Position Embedding(RoPE)."""

if out is None:
return Tensor(
_infinicore.rope(
x._underlying,
pos_ids._underlying,
sin_table._underlying,
cos_table._underlying,
algo,
)
)

_infinicore.rope_(
out._underlying,
x._underlying,
pos_ids._underlying,
sin_table._underlying,
cos_table._underlying,
algo,
)
return out
90 changes: 90 additions & 0 deletions src/infinicore/ops/embedding/embedding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#include "infinicore/ops/embedding.hpp"
#include "infinicore/context/context.hpp"
#include <cstring>

namespace infinicore::op {

Tensor embedding(Tensor input, // LongTensor of arbitrary shape containing the indices to extract
Tensor weight // Weight: Embedding matrix of floating point type with shape (V, embedding_dim), where V = maximum index + 1
) {
auto input_shape = input->shape();
auto weight_shape = weight->shape();
auto vocab_size = weight_shape[0];
auto embedding_dim = weight_shape[1];

// Assign memory to out variables
auto output_shape = input_shape;
output_shape.push_back(embedding_dim);
Tensor inputs_embeds = Tensor::empty(output_shape, weight->dtype(), weight->device());

embedding_(inputs_embeds, input, weight);
return inputs_embeds;
}

void embedding_(Tensor out, Tensor input, Tensor weight) {
Copy link
Contributor Author

@pengcheng888 pengcheng888 Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

python的接口中,input只能是cpu类型。 在之前测试中,如果input是gpu,在c++中to到gpu上的话,会报pin_memory相关的警告,然后程序就段错误了。

assert(infinicore::DataType::I64 == input->dtype() || (infinicore::DataType::I32 == input->dtype()));
assert(infinicore::Device::Type::CPU == input->device());

auto input_shape = input->shape();
auto weight_shape = weight->shape();
auto vocab_size = weight_shape[0];
auto embedding_dim = weight_shape[1];

// Calculate the number of token
Size counts = 1;
for (auto &v : input_shape) {
counts *= v;
}

// the bytes of one token
const Size bytes = dsize(weight->dtype()) * embedding_dim;
auto *weight_ptr = weight->data();
auto *out_ptr = out->data();

// copies
if (weight->device().getType() == Device::Type::CPU) {
if (infinicore::DataType::I64 == input->dtype()) {
const int64_t *input_arr = reinterpret_cast<const int64_t *>(input->data());
for (Size i = 0; i < counts; ++i) {
int64_t idx = input_arr[i];
assert((idx >= 0) && (idx < vocab_size));
std::memcpy(out_ptr + i * bytes,
weight_ptr + idx * bytes,
bytes);
}
} else if (infinicore::DataType::I32 == input->dtype()) {
const int32_t *input_arr = reinterpret_cast<const int32_t *>(input->data());

for (Size i = 0; i < counts; ++i) {
int32_t idx = input_arr[i];
assert((idx >= 0) && (idx < vocab_size));
std::memcpy(out_ptr + i * bytes,
weight_ptr + idx * bytes,
bytes);
}
}

} else {
if (infinicore::DataType::I64 == input->dtype()) {
const int64_t *input_arr = reinterpret_cast<const int64_t *>(input->data());
for (Size i = 0; i < counts; ++i) {
int64_t idx = input_arr[i];
assert((idx >= 0) && (idx < vocab_size));
context::memcpyD2D(out_ptr + i * bytes,
weight_ptr + idx * bytes,
bytes);
}
} else if (infinicore::DataType::I32 == input->dtype()) {
const int32_t *input_arr = reinterpret_cast<const int32_t *>(input->data());
for (Size i = 0; i < counts; ++i) {
int32_t idx = input_arr[i];
assert((idx >= 0) && (idx < vocab_size));
context::memcpyD2D(out_ptr + i * bytes,
weight_ptr + idx * bytes,
bytes);
}
}
}
}

} // namespace infinicore::op
12 changes: 6 additions & 6 deletions src/infinicore/ops/rope/rope.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@ common::OpDispatcher<RoPE::schema> &RoPE::dispatcher() {
return dispatcher_;
};

void RoPE::execute(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo) {
void RoPE::execute(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_table, infinicore::nn::RoPE::Algo algo) {
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);

if (func == nullptr) {
throw std::runtime_error("No RoPE implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}

func(x_out, x, pos, sin_cache, cos_cache, algo);
func(x_out, x, pos, sin_table, cos_table, algo);
}

void rope_(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo) {
RoPE::execute(x_out, x, pos, sin_cache, cos_cache, algo);
void rope_(Tensor x_out, const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_table, infinicore::nn::RoPE::Algo algo) {
RoPE::execute(x_out, x, pos, sin_table, cos_table, algo);
}

Tensor rope(const Tensor &x, const Tensor &pos, const Tensor &sin_cache, const Tensor &cos_cache, infinicore::nn::RoPE::Algo algo) {
Tensor rope(const Tensor &x, const Tensor &pos, const Tensor &sin_table, const Tensor &cos_table, infinicore::nn::RoPE::Algo algo) {
Shape shape = x->shape();
auto x_out = Tensor::empty(shape, x->dtype(), x->device());
rope_(x_out, x, pos, sin_cache, cos_cache, algo);
rope_(x_out, x, pos, sin_table, cos_table, algo);
return x_out;
}

Expand Down
4 changes: 4 additions & 0 deletions src/infinicore/pybind11/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
#include "ops/add.hpp"
#include "ops/attention.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/embedding.hpp"
#include "ops/linear.hpp"
#include "ops/matmul.hpp"
#include "ops/mul.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/swiglu.hpp"

Expand All @@ -30,6 +32,8 @@ inline void bind(py::module &m) {
bind_rms_norm(m);
bind_silu(m);
bind_swiglu(m);
bind_rope(m);
bind_embedding(m);
}

} // namespace infinicore::ops
26 changes: 26 additions & 0 deletions src/infinicore/pybind11/ops/embedding.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#pragma once

#include "infinicore/ops/embedding.hpp"
#include <pybind11/pybind11.h>

namespace py = pybind11;

namespace infinicore::ops {

inline void bind_embedding(py::module &m) {

m.def("embedding",
&op::embedding,
py::arg("input"),
py::arg("weight"),
R"doc(Generate a simple lookup table that looks up embeddings in a fixed dictionary and size..)doc");

m.def("embedding_",
&op::embedding_,
py::arg("out"),
py::arg("input"),
py::arg("weight"),
R"doc(In-place, Generate a simple lookup table that looks up embeddings in a fixed dictionary and size..)doc");
}

} // namespace infinicore::ops
37 changes: 37 additions & 0 deletions src/infinicore/pybind11/ops/rope.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#pragma once

#include <pybind11/pybind11.h>

#include "infinicore/ops/rope.hpp"

namespace py = pybind11;

namespace infinicore::ops {

inline void bind_rope(py::module &m) {

py::enum_<infinicore::nn::RoPE::Algo>(m, "Algo")
.value("GPT_J", infinicore::nn::RoPE::Algo::GPT_J)
.value("GPT_NEOX", infinicore::nn::RoPE::Algo::GPT_NEOX);

m.def("rope",
&op::rope,
py::arg("x"),
py::arg("pos"),
py::arg("sin_table"),
py::arg("cos_table"),
py::arg("algo"),
R"doc( Rotary Position Embedding(RoPE).)doc");

m.def("rope_",
&op::rope_,
py::arg("x_out"),
py::arg("x"),
py::arg("pos"),
py::arg("sin_table"),
py::arg("cos_table"),
py::arg("algo"),
R"doc(In-place, Rotary Position Embedding(RoPE).)doc");
}

} // namespace infinicore::ops
Loading