Skip to content

Commit 6b903fd

Browse files
Merge pull request #476 from InfiniTensor/issue/474
issue/474: rename Dequantize to DequantizeAWQ in nvidia gpu
2 parents d3d982d + 4217976 commit 6b903fd

File tree

12 files changed

+106
-103
lines changed

12 files changed

+106
-103
lines changed

include/infiniop.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include "infiniop/ops/causal_softmax.h"
88
#include "infiniop/ops/clip.h"
99
#include "infiniop/ops/conv.h"
10-
#include "infiniop/ops/dequantize.h"
10+
#include "infiniop/ops/dequantize_awq.h"
1111
#include "infiniop/ops/gemm.h"
1212
#include "infiniop/ops/mul.h"
1313
#include "infiniop/ops/random_sample.h"

include/infiniop/ops/dequantize.h

Lines changed: 0 additions & 28 deletions
This file was deleted.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef __INFINIOP_DEQUANTIZE_AWQ_API_H__
2+
#define __INFINIOP_DEQUANTIZE_AWQ_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopDequantizeAWQDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateDequantizeAWQDescriptor(infiniopHandle_t handle,
9+
infiniopDequantizeAWQDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t out_desc,
11+
infiniopTensorDescriptor_t qweight_desc,
12+
infiniopTensorDescriptor_t scales_desc,
13+
infiniopTensorDescriptor_t zeros_desc);
14+
15+
__C __export infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc, size_t *size);
16+
17+
__C __export infiniStatus_t infiniopDequantizeAWQ(infiniopDequantizeAWQDescriptor_t desc,
18+
void *workspace,
19+
size_t workspace_size,
20+
void *out,
21+
const void *qweight,
22+
const void *scales,
23+
const void *zeros,
24+
void *stream);
25+
26+
__C __export infiniStatus_t infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc);
27+
28+
#endif

src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh

Lines changed: 0 additions & 8 deletions
This file was deleted.

src/infiniop/ops/dequantize/dequantize.h renamed to src/infiniop/ops/dequantize_awq/dequantize_awq.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#ifndef __DEQUANTIZE_H__
2-
#define __DEQUANTIZE_H__
1+
#ifndef __DEQUANTIZE_AWQ_H__
2+
#define __DEQUANTIZE_AWQ_H__
33

44
#include "../../../utils.h"
55
#include "../../operator.h"
@@ -8,17 +8,17 @@
88

99
#define DESCRIPTOR(NAMESPACE) \
1010
\
11-
namespace op::dequantize::NAMESPACE { \
11+
namespace op::dequantize_awq::NAMESPACE { \
1212
class Descriptor final : public InfiniopDescriptor { \
1313
struct Opaque; \
1414
Opaque *_opaque; \
15-
DequantizeInfo _info; \
15+
DequantizeAWQInfo _info; \
1616
size_t _workspace_size; \
1717
\
1818
Descriptor( \
1919
size_t workspace_size_, \
2020
Opaque *opaque, \
21-
DequantizeInfo info, \
21+
DequantizeAWQInfo info, \
2222
infiniDevice_t device_type, \
2323
int device_id) \
2424
: InfiniopDescriptor{device_type, device_id}, \
@@ -49,4 +49,5 @@
4949
void *stream) const; \
5050
}; \
5151
}
52-
#endif
52+
53+
#endif //__DEQUANTIZE_AWQ_H__
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
#ifndef __DEQUANTIZE_INFO_H__
2-
#define __DEQUANTIZE_INFO_H__
1+
#ifndef __DEQUANTIZE_AWQ_INFO_H__
2+
#define __DEQUANTIZE_AWQ_INFO_H__
33

44
#include "../../../utils.h"
55
#include "../../tensor.h"
66
#include <vector>
77

8-
namespace op::dequantize {
8+
namespace op::dequantize_awq {
99

10-
class DequantizeInfo {
11-
DequantizeInfo() = default;
10+
class DequantizeAWQInfo {
11+
DequantizeAWQInfo() = default;
1212

1313
public:
1414
int _in_features, _out_features, _num_groups;
@@ -17,7 +17,7 @@ class DequantizeInfo {
1717
int out_features() const { return _out_features; }
1818
int num_groups() const { return _num_groups; }
1919

20-
static utils::Result<DequantizeInfo> create(
20+
static utils::Result<DequantizeAWQInfo> create(
2121
infiniopTensorDescriptor_t out_desc,
2222
infiniopTensorDescriptor_t qweight_desc,
2323
infiniopTensorDescriptor_t scales_desc,
@@ -27,13 +27,13 @@ class DequantizeInfo {
2727
int _out_features = qweight_desc->dim(1);
2828
int _num_groups = scales_desc->dim(0);
2929

30-
return utils::Result<DequantizeInfo>(DequantizeInfo{
30+
return utils::Result<DequantizeAWQInfo>(DequantizeAWQInfo{
3131
_in_features,
3232
_out_features,
3333
_num_groups});
3434
}
3535
};
3636

37-
} // namespace op::dequantize
37+
} // namespace op::dequantize_awq
3838

39-
#endif // __DEQUANTIZE_INFO_H__
39+
#endif // __DEQUANTIZE_AWQ_INFO_H__

src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cu renamed to src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "dequantize_w42f16_kernel.cuh"
66
#include "dequantize_w42f16_nvidia.cuh"
77

8-
#include "../dequantize.h"
8+
#include "../dequantize_awq.h"
99
#include <cuda_fp16.h>
1010

1111
__global__ void __launch_bounds__(64)
@@ -68,7 +68,7 @@ __global__ void __launch_bounds__(64)
6868
}
6969
}
7070

71-
namespace op::dequantize::nvidia {
71+
namespace op::dequantize_awq::nvidia {
7272

7373
struct Descriptor::Opaque {
7474
std::shared_ptr<device::nvidia::Handle::Internal> internal;
@@ -87,7 +87,7 @@ infiniStatus_t Descriptor::create(
8787
infiniopTensorDescriptor_t zeros_desc) {
8888

8989
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
90-
auto result = DequantizeInfo::create(out_desc, qweight_desc, scales_desc, zeros_desc);
90+
auto result = DequantizeAWQInfo::create(out_desc, qweight_desc, scales_desc, zeros_desc);
9191

9292
*desc_ptr = new Descriptor(
9393
0,
@@ -133,6 +133,6 @@ Descriptor::calculate(
133133
return INFINI_STATUS_SUCCESS;
134134
}
135135

136-
} // namespace op::dequantize::nvidia
136+
} // namespace op::dequantize_awq::nvidia
137137

138138
#endif
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef __DEQUANTIZE_AWQ_CUDA_CUH__
2+
#define __DEQUANTIZE_AWQ_CUDA_CUH__
3+
4+
#include "../dequantize_awq.h"
5+
6+
DESCRIPTOR(nvidia)
7+
8+
#endif // __DEQUANTIZE_AWQ_CUDA_CUH__

src/infiniop/ops/dequantize/operator.cc renamed to src/infiniop/ops/dequantize_awq/operator.cc

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
#include "../../operator.h"
22
#include "../../handle.h"
3-
#include "infiniop/ops/dequantize.h"
3+
#include "infiniop/ops/dequantize_awq.h"
44

55
#ifdef ENABLE_NVIDIA_API
66
#include "nvidia/dequantize_w42f16_nvidia.cuh"
77
#endif
88

9-
__C infiniStatus_t infiniopCreateDequantizeDescriptor(
9+
__C infiniStatus_t infiniopCreateDequantizeAWQDescriptor(
1010
infiniopHandle_t handle,
11-
infiniopDequantizeDescriptor_t *desc_ptr,
11+
infiniopDequantizeAWQDescriptor_t *desc_ptr,
1212
infiniopTensorDescriptor_t out_desc,
1313
infiniopTensorDescriptor_t qweight_desc,
1414
infiniopTensorDescriptor_t scales_desc,
1515
infiniopTensorDescriptor_t zeros_desc) {
1616

17-
#define CREATE(CASE, NAMESPACE) \
18-
case CASE: \
19-
return op::dequantize::NAMESPACE::Descriptor::create( \
20-
handle, \
21-
reinterpret_cast<op::dequantize::NAMESPACE::Descriptor **>(desc_ptr), \
22-
out_desc, \
23-
qweight_desc, \
24-
scales_desc, \
17+
#define CREATE(CASE, NAMESPACE) \
18+
case CASE: \
19+
return op::dequantize_awq::NAMESPACE::Descriptor::create( \
20+
handle, \
21+
reinterpret_cast<op::dequantize_awq::NAMESPACE::Descriptor **>(desc_ptr), \
22+
out_desc, \
23+
qweight_desc, \
24+
scales_desc, \
2525
zeros_desc)
2626

2727
switch (handle->device) {
@@ -35,11 +35,11 @@ __C infiniStatus_t infiniopCreateDequantizeDescriptor(
3535
#undef CREATE
3636
}
3737

38-
__C infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescriptor_t desc,
39-
size_t *size) {
40-
#define GET(CASE, NAMESPACE) \
41-
case CASE: \
42-
*size = reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
38+
__C infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc,
39+
size_t *size) {
40+
#define GET(CASE, NAMESPACE) \
41+
case CASE: \
42+
*size = reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
4343
return INFINI_STATUS_SUCCESS
4444

4545
switch (desc->device_type) {
@@ -52,8 +52,8 @@ __C infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescript
5252
#undef GET
5353
}
5454

55-
__C infiniStatus_t infiniopDequantize(
56-
infiniopDequantizeDescriptor_t desc,
55+
__C infiniStatus_t infiniopDequantizeAWQ(
56+
infiniopDequantizeAWQDescriptor_t desc,
5757
void *workspace,
5858
size_t workspace_size,
5959
void *out,
@@ -62,9 +62,9 @@ __C infiniStatus_t infiniopDequantize(
6262
const void *zeros,
6363
void *stream) {
6464

65-
#define CALCULATE(CASE, NAMESPACE) \
66-
case CASE: \
67-
return reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc) \
65+
#define CALCULATE(CASE, NAMESPACE) \
66+
case CASE: \
67+
return reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc) \
6868
->calculate(workspace, workspace_size, out, qweight, scales, zeros, stream)
6969

7070
switch (desc->device_type) {
@@ -79,11 +79,11 @@ __C infiniStatus_t infiniopDequantize(
7979
}
8080

8181
__C infiniStatus_t
82-
infiniopDestroyDequantizeDescriptor(infiniopDequantizeDescriptor_t desc) {
82+
infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc) {
8383

84-
#define DELETE(CASE, NAMESPACE) \
85-
case CASE: \
86-
delete reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc); \
84+
#define DELETE(CASE, NAMESPACE) \
85+
case CASE: \
86+
delete reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc); \
8787
return INFINI_STATUS_SUCCESS;
8888

8989
switch (desc->device_type) {

0 commit comments

Comments
 (0)