Skip to content

Commit 4b99fea

Browse files
committed
accomplish flipud scatter multi_margin_loss float_power floor_divide oerator
1 parent 726eacf commit 4b99fea

File tree

247 files changed

+18131
-1456
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

247 files changed

+18131
-1456
lines changed

include/infinicore/common/hash.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../tensor.hpp"
44

5+
#include <optional>
56
#include <type_traits>
67

78
namespace infinicore {
@@ -24,6 +25,15 @@ inline void hash_combine(size_t &seed, Tensor tensor) {
2425
}
2526
}
2627

28+
// Specialization for optional
29+
template <typename T>
30+
inline void hash_combine(size_t &seed, const std::optional<T> &opt) {
31+
hash_combine(seed, opt.has_value());
32+
if (opt) {
33+
hash_combine(seed, *opt);
34+
}
35+
}
36+
2737
// Specialization for std::string
2838
inline void hash_combine(size_t &seed, const std::string &str) {
2939
hash_combine(seed, std::hash<std::string>{}(str));

include/infinicore/context/context.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
namespace infinicore {
1212

1313
namespace context {
14-
void setDevice(Device device, bool force_cpu = false);
14+
void setDevice(Device device);
1515
Device getDevice();
1616
size_t getDeviceCount(Device::Type type);
1717

include/infinicore/ops.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#pragma once
22

33
#include "ops/add.hpp"
4+
#include "ops/add_rms_norm.hpp"
45
#include "ops/attention.hpp"
56
#include "ops/causal_softmax.hpp"
67
#include "ops/matmul.hpp"
78
#include "ops/ones.hpp"
9+
#include "ops/paged_attention.hpp"
10+
#include "ops/paged_attention_prefill.hpp"
11+
#include "ops/paged_caching.hpp"
12+
#include "ops/random_sample.hpp"
813
#include "ops/rearrange.hpp"
914
#include "ops/rms_norm.hpp"
1015
#include "ops/rope.hpp"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
#include <utility>
6+
7+
namespace infinicore::op {
8+
class AddRMSNorm {
9+
public:
10+
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, float);
11+
static void execute(Tensor y, Tensor residual_out, Tensor a, Tensor b, Tensor weight, float epsilon = 1e-5f);
12+
static common::OpDispatcher<schema> &dispatcher();
13+
};
14+
15+
// Fused Add and RMS Normalization
16+
// Returns: (normalized_result, add_result)
17+
// The add_result can be used as residual for subsequent layers
18+
std::pair<Tensor, Tensor> add_rms_norm(Tensor a, Tensor b, Tensor weight, float epsilon = 1e-5f);
19+
void add_rms_norm_(Tensor y, Tensor residual_out, Tensor a, Tensor b, Tensor weight, float epsilon = 1e-5f);
20+
} // namespace infinicore::op

include/infinicore/ops/common/cache.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class OpCache {
3636
return cache_vector[device_index];
3737
}
3838

39+
BaseCache &getCache(Device device) {
40+
return getCache(device.getType(), device.getIndex());
41+
}
42+
3943
void setCapacity(size_t capacity) {
4044
capacity_ = capacity;
4145
for (auto &vec : caches_) {

include/infinicore/ops/flipud.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class Flipud {
9+
public:
10+
// Schema signature: (Output, Input)
11+
using schema = void (*)(Tensor, Tensor);
12+
13+
static void execute(Tensor output, Tensor input);
14+
static common::OpDispatcher<schema> &dispatcher();
15+
};
16+
Tensor flipud(Tensor input);
17+
void flipud_(Tensor output, Tensor input);
18+
19+
} // namespace infinicore::op
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class FloatPower {
9+
public:
10+
// ==========================================================
11+
// Dispatcher Schemas
12+
// ==========================================================
13+
14+
// Output = Input ^ Scalar (scalar must be double!)
15+
using schema_scalar = void (*)(Tensor output,
16+
Tensor input,
17+
double exponent);
18+
19+
// Output = Input ^ Tensor
20+
using schema_tensor = void (*)(Tensor output,
21+
Tensor input,
22+
Tensor exponent);
23+
24+
// ==========================================================
25+
// Execute Entry Points (called by functional interface)
26+
// ==========================================================
27+
28+
static void execute(Tensor output,
29+
Tensor input,
30+
double exponent);
31+
32+
static void execute(Tensor output,
33+
Tensor input,
34+
Tensor exponent);
35+
36+
// ==========================================================
37+
// Dispatchers
38+
// ==========================================================
39+
40+
static common::OpDispatcher<schema_scalar>& dispatcher_scalar();
41+
static common::OpDispatcher<schema_tensor>& dispatcher_tensor();
42+
};
43+
44+
// =======================================================================
45+
// Functional Interface (Python-visible semantics)
46+
// =======================================================================
47+
48+
// -------------------------------
49+
// 1. Scalar Exponent
50+
// -------------------------------
51+
52+
// out-of-place: ALWAYS float64
53+
Tensor float_power(Tensor input, double exponent);
54+
55+
// in-place
56+
void float_power_(Tensor output, Tensor input, double exponent);
57+
58+
// -------------------------------
59+
// 2. Tensor Exponent
60+
// -------------------------------
61+
62+
// out-of-place: ALWAYS float64
63+
Tensor float_power(Tensor input, Tensor exponent);
64+
65+
// in-place
66+
void float_power_(Tensor output, Tensor input, Tensor exponent);
67+
68+
} // namespace infinicore::op
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
class FloorDivide {
8+
public:
9+
using schema = void (*)(Tensor, Tensor, Tensor);
10+
static void execute(Tensor c, Tensor a, Tensor b);
11+
static common::OpDispatcher<schema> &dispatcher();
12+
};
13+
14+
Tensor floor_divide(Tensor a, Tensor b);
15+
void floor_divide_(Tensor c, Tensor a, Tensor b);
16+
} // namespace infinicore::op
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
6+
namespace infinicore::op {
7+
8+
class MultiMarginLoss {
9+
public:
10+
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, int64_t, float, int64_t);
11+
12+
static void execute(Tensor output, Tensor input, Tensor target, Tensor weight, int64_t p, float margin, int64_t reduction);
13+
static common::OpDispatcher<schema> &dispatcher();
14+
};
15+
16+
Tensor multi_margin_loss(Tensor input, Tensor target, Tensor weight = {}, int64_t p = 1, float margin = 1.0f, int64_t reduction = 1);
17+
void multi_margin_loss_(Tensor output, Tensor input, Tensor target, Tensor weight, int64_t p, float margin, int64_t reduction);
18+
19+
} // namespace infinicore::op
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
3+
#include "../device.hpp"
4+
#include "common/op.hpp"
5+
#include <optional>
6+
7+
namespace infinicore::op {
8+
9+
class PagedAttention {
10+
public:
11+
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, std::optional<Tensor>, float);
12+
static void execute(Tensor out, Tensor q, Tensor k_cache, Tensor v_cache, Tensor block_tables, Tensor cache_lens, std::optional<Tensor> alibi_slopes, float);
13+
static common::OpDispatcher<schema> &dispatcher();
14+
};
15+
16+
Tensor paged_attention(Tensor q, Tensor k_cache, Tensor v_cache, Tensor block_tables, Tensor cache_lens, std::optional<Tensor> alibi_slopes, float scale);
17+
void paged_attention_(Tensor out, Tensor q, Tensor k_cache, Tensor v_cache, Tensor block_tables, Tensor cache_lens, std::optional<Tensor> alibi_slopes, float scale);
18+
} // namespace infinicore::op

0 commit comments

Comments
 (0)