Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
883952a
完成adaptive_avg_pool3d算子cpu 以及 nvidia版本(待优化)
Dec 2, 2025
da1cf2f
Merge branch 'InfiniTensor:main' into 2025-autumn-yudeng-T1-1-8
YuDeng0102 Dec 2, 2025
dcdae73
Merge branch 'InfiniTensor:main' into 2025-autumn-yudeng-T1-1-8
YuDeng0102 Dec 3, 2025
3b1a11e
完成moore版本adapte_avg_pool3d
Dec 3, 2025
6abd99b
Merge branch 'InfiniTensor:main' into 2025-autumn-yudeng-T1-1-8
YuDeng0102 Dec 5, 2025
9abcf87
adaptive_avg_pool3d in metax finished
Dec 5, 2025
b4fa8db
Merge remote-tracking branch 'upstream/main' into 2025-autumn-yudeng-…
Dec 9, 2025
26fe305
完成argwhere cpu,nvidia版本
Dec 12, 2025
aa5af15
完成五个算子cpu,nvidia版本
Dec 13, 2025
3bbe1d1
测试文件必要修改
Dec 13, 2025
a66715e
撤销多余修改
Dec 13, 2025
373624b
Merge remote-tracking branch 'upstream/main' into 2025-autumn-yudeng-…
Dec 13, 2025
e35f148
完成moore版本,修复一个bug
Dec 13, 2025
594b7d8
finish metax version
Dec 14, 2025
7568bbc
iluvatar version finished
Dec 14, 2025
43a781c
Merge branch '2025-autumn-yudeng-T1-1-8' of https://github.com/YuDeng…
Dec 14, 2025
6217ee2
format
Dec 14, 2025
16ec5a8
fix a bug in matax
Dec 14, 2025
f1ed3ca
Merge branch 'InfiniTensor:main' into 2025-autumn-YuDeng0102-T1-1-8
YuDeng0102 Dec 20, 2025
c4ed02d
修改argwhere算子适应更大tensor
Dec 22, 2025
ec6c04b
合并冲突
Dec 22, 2025
743ceba
Merge remote-tracking branch 'origin' into 2025-autumn-YuDeng0102-T1-1-8
Jan 9, 2026
aa761b2
1.fix a bug in argwhere metax
Jan 10, 2026
c7ffd95
Remove redundant comments from argwhere
Jan 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#pragma once

#include "ops/add.hpp"
#include "ops/asin.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/attention.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/fmin.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
Expand All @@ -14,4 +16,4 @@
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/swiglu.hpp"
#include "ops/swiglu.hpp"
16 changes: 16 additions & 0 deletions include/infinicore/ops/adaptive_avg_pool3d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class AdaptiveAvgPool3D {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor adaptive_avg_pool3d(Tensor x, std::vector<size_t> output_size);
void adaptive_avg_pool3d_(Tensor y, Tensor x);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/addr.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Addr {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, float, float);
static void execute(Tensor out, Tensor input, Tensor vec1, Tensor vec2, float beta = 1.0f, float alpha = 1.0f);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor addr(Tensor input, Tensor vec1, Tensor vec2, float beta = 1.0f, float alpha = 1.0f);
void addr_(Tensor out, Tensor input, Tensor vec1, Tensor vec2, float beta = 1.0f, float alpha = 1.0f);
} // namespace infinicore::op
14 changes: 14 additions & 0 deletions include/infinicore/ops/argwhere.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Argwhere {
public:
using schema = void (*)(void **, size_t *, Tensor);
static void execute(void **, size_t *count, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor argwhere(Tensor x);
} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/asin.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Asin {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor asin(Tensor input);
void asin_(Tensor output, Tensor input);

} // namespace infinicore::op
18 changes: 18 additions & 0 deletions include/infinicore/ops/fmin.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Fmin {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor fmin(Tensor a, Tensor b);
void fmin_(Tensor c, Tensor a, Tensor b);

} // namespace infinicore::op
4 changes: 4 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
#define __INFINIOP_API_H__

#include "infiniop/handle.h"
#include "infiniop/ops/adaptive_avg_pool3d.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/addr.h"
#include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/asin.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/fmin.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/layer_norm.h"
Expand Down
30 changes: 30 additions & 0 deletions include/infiniop/ops/adaptive_avg_pool3d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef INFINIOP_ADAPTIVE_AVG_POOL3D_H_
#define INFINIOP_ADAPTIVE_AVG_POOL3D_H_

#include "../operator_descriptor.h"
#include <cstddef>

typedef struct InfiniopDescriptor *infiniopAdaptiveAvgPool3DDescriptor_t;

__C __export infiniStatus_t infiniopCreateAdaptiveAvgPool3DDescriptor(
infiniopHandle_t handle,
infiniopAdaptiveAvgPool3DDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
size_t *output_size);

__C __export infiniStatus_t infiniopGetAdaptiveAvgPool3DWorkspaceSize(
infiniopAdaptiveAvgPool3DDescriptor_t desc,
size_t *size);

__C __export infiniStatus_t infiniopAdaptiveAvgPool3D(
infiniopAdaptiveAvgPool3DDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyAdaptiveAvgPool3DDescriptor(infiniopAdaptiveAvgPool3DDescriptor_t desc);

#endif
30 changes: 30 additions & 0 deletions include/infiniop/ops/addr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef __INFINIOP_ADDR_API_H__
#define __INFINIOP_ADDR_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAddrDescriptor_t;

__C __export infiniStatus_t infiniopCreateAddrDescriptor(infiniopHandle_t handle,
infiniopAddrDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out,
infiniopTensorDescriptor_t input,
infiniopTensorDescriptor_t vec1,
infiniopTensorDescriptor_t vec2,
float beta,
float alpha);

__C __export infiniStatus_t infiniopGetAddrWorkspaceSize(infiniopAddrDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAddr(infiniopAddrDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *input,
const void *vec1,
const void *vec2,
void *stream);

__C __export infiniStatus_t infiniopDestroyAddrDescriptor(infiniopAddrDescriptor_t desc);

#endif
29 changes: 29 additions & 0 deletions include/infiniop/ops/argwhere.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef __INFINIOP_ARGWHERE_API_H__
#define __INFINIOP_ARGWHERE_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopArgwhereDescriptor_t;

__C __export infiniStatus_t infiniopCreateArgwhereDescriptor(
infiniopHandle_t handle,
infiniopArgwhereDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t input_desc);

__C __export infiniStatus_t infiniopGetArgwhereWorkspaceSize(
infiniopArgwhereDescriptor_t desc,
size_t *size);

__C __export infiniStatus_t infiniopArgwhere(
infiniopArgwhereDescriptor_t desc,
void *workspace,
size_t workspace_size,
void **output,
size_t *count,
const void *input,
void *stream);

__C __export infiniStatus_t infiniopDestroyArgwhereDescriptor(
infiniopArgwhereDescriptor_t desc);

#endif // __INFINIOP_ARGWHERE_API_H__
24 changes: 24 additions & 0 deletions include/infiniop/ops/asin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ASIN_API_H__
#define __INFINIOP_ASIN_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopAsinDescriptor_t;

__C __export infiniStatus_t infiniopCreateAsinDescriptor(infiniopHandle_t handle,
infiniopAsinDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input);

__C __export infiniStatus_t infiniopGetAsinWorkspaceSize(infiniopAsinDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAsin(infiniopAsinDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__C __export infiniStatus_t infiniopDestroyAsinDescriptor(infiniopAsinDescriptor_t desc);

#endif
28 changes: 28 additions & 0 deletions include/infiniop/ops/fmin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __INFINIOP_FMIN_H__
#define __INFINIOP_FMIN_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopFminDescriptor_t;

__C __export infiniStatus_t infiniopCreateFminDescriptor(infiniopHandle_t handle,
infiniopFminDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__C __export infiniStatus_t infiniopGetFminWorkspaceSize(infiniopFminDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopGetFminWorkspaceSize(infiniopFminDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopFmin(infiniopFminDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__C __export infiniStatus_t infiniopDestroyFminDescriptor(infiniopFminDescriptor_t desc);

#endif
9 changes: 9 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
uint8,
)
from infinicore.ops.add import add
from infinicore.ops.addr import addr
from infinicore.ops.asin import asin
from infinicore.ops.add_rms_norm import add_rms_norm, add_rms_norm_
from infinicore.ops.attention import attention
from infinicore.ops.matmul import matmul
Expand All @@ -49,8 +51,11 @@
from infinicore.ops.paged_attention_prefill import paged_attention_prefill
from infinicore.ops.paged_caching import paged_caching
from infinicore.ops.rearrange import rearrange
from infinicore.ops.argwhere import argwhere
from infinicore.ops.fmin import fmin
from infinicore.ops.squeeze import squeeze
from infinicore.ops.unsqueeze import unsqueeze

from infinicore.tensor import (
Tensor,
empty,
Expand Down Expand Up @@ -106,8 +111,11 @@
"uint8",
# Operations.
"add",
"addr",
"add_rms_norm",
"add_rms_norm_",
"argwhere",
"asin",
"attention",
"matmul",
"mul",
Expand All @@ -121,6 +129,7 @@
"from_list",
"from_numpy",
"from_torch",
"fmin",
"paged_caching",
"paged_attention",
"paged_attention_prefill",
Expand Down
3 changes: 3 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from .rope import RopeAlgo, rope
from .silu import silu
from .swiglu import swiglu
from .adaptive_avg_pool3d import adaptive_avg_pool3d


__all__ = [
"causal_softmax",
Expand All @@ -17,4 +19,5 @@
"embedding",
"rope",
"RopeAlgo",
"adaptive_avg_pool3d",
]
16 changes: 16 additions & 0 deletions python/infinicore/nn/functional/adaptive_avg_pool3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
from typing import List


def adaptive_avg_pool3d(x: Tensor, output_size: List[int] = {1, 1, 1}) -> Tensor:
r"""Applies a 3D adaptive average pooling over an input signal composed of several input planes.

Args:
x (Tensor): The input tensor of shape (N, C, D, H, W)
output_size (List[int]): The target output size of the form (d, h, w)

Returns:
Tensor: The pooled output tensor
"""
return Tensor(_infinicore.adaptive_avg_pool3d(x._underlying, output_size))
28 changes: 28 additions & 0 deletions python/infinicore/ops/addr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def addr(
input: Tensor,
vec1: Tensor,
vec2: Tensor,
beta: float = 1.0,
alpha: float = 1.0,
out=None,
) -> Tensor:
if out is None:
return Tensor(
_infinicore.addr(
input._underlying, vec1._underlying, vec2._underlying, beta, alpha
)
)

_infinicore.addr_(
out._underlying,
input._underlying,
vec1._underlying,
vec2._underlying,
beta,
alpha,
)
return out
6 changes: 6 additions & 0 deletions python/infinicore/ops/argwhere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def argwhere(x: Tensor) -> Tensor:
return Tensor(_infinicore.argwhere(x._underlying))
11 changes: 11 additions & 0 deletions python/infinicore/ops/asin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def asin(input: Tensor, *, out=None):
"""Arcsin activation function."""
if out is None:
return Tensor(_infinicore.asin(input._underlying))

_infinicore.asin_(out._underlying, input._underlying)
return out
Loading