|
| 1 | +#include "../infiniop_impl.hpp" |
| 2 | +#include "infinicore/ops/kv_caching.hpp" |
| 3 | + |
| 4 | +namespace infinicore::op::kv_caching_impl::infiniop { |
| 5 | + |
| 6 | +INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, KVCaching, 100); |
| 7 | + |
| 8 | +struct PlannedMeta { |
| 9 | + std::shared_ptr<Descriptor> descriptor; |
| 10 | + graph::GraphTensor workspace, k_cache, v_cache, k, v, past_kv_lengths; |
| 11 | +}; |
| 12 | + |
| 13 | +void *plan(Tensor k_cache, |
| 14 | + Tensor v_cache, |
| 15 | + const Tensor &k, |
| 16 | + const Tensor &v, |
| 17 | + const Tensor &past_kv_lengths) { |
| 18 | + size_t seed = hash_combine(k_cache, v_cache, k, v, past_kv_lengths); |
| 19 | + |
| 20 | + INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE( |
| 21 | + Descriptor, descriptor, KVCaching, |
| 22 | + seed, k_cache->desc(), v_cache->desc(), |
| 23 | + k->desc(), v->desc(), past_kv_lengths->desc()); |
| 24 | + |
| 25 | + INFINIOP_WORKSPACE_TENSOR(workspace, KVCaching, descriptor); |
| 26 | + |
| 27 | + auto planned = new PlannedMeta{ |
| 28 | + descriptor, |
| 29 | + graph::GraphTensor(workspace), |
| 30 | + graph::GraphTensor(k_cache), |
| 31 | + graph::GraphTensor(v_cache), |
| 32 | + graph::GraphTensor(k), |
| 33 | + graph::GraphTensor(v), |
| 34 | + graph::GraphTensor(past_kv_lengths)}; |
| 35 | + |
| 36 | + return planned; |
| 37 | +} |
| 38 | + |
| 39 | +void run(void *planned_meta) { |
| 40 | + auto planned = reinterpret_cast<PlannedMeta *>(planned_meta); |
| 41 | + |
| 42 | + INFINICORE_CHECK_ERROR(infiniopKVCaching( |
| 43 | + planned->descriptor->desc, |
| 44 | + nullptr, 0, |
| 45 | + planned->k_cache->data(), |
| 46 | + planned->v_cache->data(), |
| 47 | + planned->k->data(), |
| 48 | + planned->v->data(), |
| 49 | + planned->past_kv_lengths->data(), |
| 50 | + context::getStream())); |
| 51 | +} |
| 52 | + |
| 53 | +void cleanup(void **planned_meta_ptr) { |
| 54 | + delete *reinterpret_cast<PlannedMeta **>(planned_meta_ptr); |
| 55 | + *planned_meta_ptr = nullptr; |
| 56 | +} |
| 57 | + |
| 58 | +INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(KVCaching, &plan, &run, cleanup); |
| 59 | + |
| 60 | +} // namespace infinicore::op::kv_caching_impl::infiniop |
0 commit comments