|
| 1 | +#include "infinicore/ops/reshape_and_cache.hpp" |
| 2 | + |
| 3 | +#include "../infiniop_impl.hpp" |
| 4 | + |
| 5 | +namespace infinicore::op::reshape_and_cache_impl::infiniop { |
| 6 | + |
| 7 | +INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, ReshapeAndCache, 100); |
| 8 | + |
| 9 | +struct PlannedMeta { |
| 10 | + std::shared_ptr<Descriptor> descriptor; |
| 11 | + graph::GraphTensor workspace; |
| 12 | + graph::GraphTensor key; |
| 13 | + graph::GraphTensor value; |
| 14 | + graph::GraphTensor key_cache; |
| 15 | + graph::GraphTensor value_cache; |
| 16 | + graph::GraphTensor slot_mapping; |
| 17 | + graph::GraphTensor k_scale; |
| 18 | + graph::GraphTensor v_scale; |
| 19 | + std::string kv_cache_dtype; |
| 20 | +}; |
| 21 | + |
| 22 | +void *plan(Tensor &key, |
| 23 | + Tensor &value, |
| 24 | + Tensor &key_cache, |
| 25 | + Tensor &value_cache, |
| 26 | + Tensor &slot_mapping, |
| 27 | + const std::string &kv_cache_dtype, |
| 28 | + Tensor &k_scale, |
| 29 | + Tensor &v_scale) { |
| 30 | + size_t seed = hash_combine(key, value, key_cache, value_cache, slot_mapping); |
| 31 | + |
| 32 | + INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE( |
| 33 | + Descriptor, descriptor, ReshapeAndCache, |
| 34 | + seed, |
| 35 | + key->desc(), value->desc(), key_cache->desc(), value_cache->desc(), |
| 36 | + slot_mapping->desc(), kv_cache_dtype.c_str()); |
| 37 | + |
| 38 | + INFINIOP_WORKSPACE_TENSOR(workspace, ReshapeAndCache, descriptor); |
| 39 | + |
| 40 | + return new PlannedMeta{ |
| 41 | + descriptor, |
| 42 | + graph::GraphTensor(workspace), |
| 43 | + graph::GraphTensor(key), |
| 44 | + graph::GraphTensor(value), |
| 45 | + graph::GraphTensor(key_cache), |
| 46 | + graph::GraphTensor(value_cache), |
| 47 | + graph::GraphTensor(slot_mapping), |
| 48 | + graph::GraphTensor(k_scale), |
| 49 | + graph::GraphTensor(v_scale), |
| 50 | + kv_cache_dtype}; |
| 51 | +} |
| 52 | + |
| 53 | +void run(void *planned_meta) { |
| 54 | + auto *p = reinterpret_cast<PlannedMeta *>(planned_meta); |
| 55 | + |
| 56 | + INFINICORE_CHECK_ERROR( |
| 57 | + infiniopReshapeAndCache( |
| 58 | + p->descriptor->desc, |
| 59 | + p->workspace->data(), |
| 60 | + p->workspace->numel(), |
| 61 | + p->key->data(), |
| 62 | + p->value->data(), |
| 63 | + p->key_cache->data(), |
| 64 | + p->value_cache->data(), |
| 65 | + p->slot_mapping->data(), |
| 66 | + p->kv_cache_dtype.c_str(), |
| 67 | + p->k_scale->data(), |
| 68 | + p->v_scale->data(), |
| 69 | + context::getStream())); |
| 70 | +} |
| 71 | + |
| 72 | +void cleanup(void **planned_meta_ptr) { |
| 73 | + delete *reinterpret_cast<PlannedMeta **>(planned_meta_ptr); |
| 74 | + *planned_meta_ptr = nullptr; |
| 75 | +} |
| 76 | + |
| 77 | +INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(ReshapeAndCache, &plan, &run, &cleanup); |
| 78 | + |
| 79 | +} // namespace infinicore::op::reshape_and_cache_impl::infiniop |
0 commit comments