Skip to content

Commit 246c7e3

Browse files
committed
[XNNPACK][Weights Cache] Initial Weights Cache Design with NamedDataMap
XNNWeightsCache Design with NamedDataMap. The intent of the weights cache is for tensors to be loaded (via name) through the named data map. APIs to be used by XNNCompiler: - load_unpacked_data - Takes in a string name (tensor name). The weights cache loads the data for this string from the named data map and returns the pointer. It also creates a mapping of this pointer to the name which is later used by the XNNPACK's internal weight cache implementation - free_unpacked_data - Frees all the unpacked data loaded from NamedDataMap. This is only safe to call after xnn_create_runtime has been called. This is because create_runtime takes unpacked data pointers and packs them into a separate buffer. - a couple getter methods - get_packed_data_names - get_unpacked_data_names - get_num_packed_data - get() (get's the xnn_weights_cache object) Internal APIs used by XNNPACK Library - look_up - takes a cache key (weight and bias pointers) and looks up the offset to the packed weight if it exists - look_up_or_insert - takes a cache key and pointer to packed weights and looks_up the offset if it exists, or inserts a new packed weight into the cache and returns that offset - offset_to_addr - gets offset and returns address to packed pointer - reserve_space - returns memory address with appropriate sie for XNNPACK to populate with packed weights ( I want to use the runtime_allocator for this but i don't think we have the right sizes, so for now we are just using a string buffer and resizing it) - is_finalized - since this cache doesn't necessarily need to care about a finalized state we always return true. - delete_cache - deletes cache Differential Revision: [D70885917](https://our.internmc.facebook.com/intern/diff/D70885917/) ghstack-source-id: 271070691 Pull Request resolved: #9154
1 parent 99ad3d6 commit 246c7e3

File tree

6 files changed

+691
-1
lines changed

6 files changed

+691
-1
lines changed
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
10+
#include <executorch/runtime/core/memory_allocator.h>
11+
#include <executorch/runtime/core/error.h>
12+
#include <sys/stat.h>
13+
#include <xnnpack.h>
14+
namespace executorch {
15+
namespace backends {
16+
namespace xnnpack {
17+
namespace delegate {
18+
19+
using executorch::runtime::MemoryAllocator;
20+
using executorch::runtime::NamedDataMap;
21+
22+
XNNWeightsCache::XNNWeightsCache()
23+
{
24+
weights_cache_.context = this;
25+
weights_cache_.look_up = (size_t(*)(
26+
void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up;
27+
weights_cache_.reserve_space =
28+
(void* (*)(void*, size_t))XNNWeightsCache::reserve_space;
29+
weights_cache_.look_up_or_insert =
30+
(size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
31+
XNNWeightsCache::look_up_or_insert;
32+
weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized;
33+
weights_cache_.offset_to_addr =
34+
(void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr;
35+
weights_cache_.delete_cache =
36+
(enum xnn_status(*)(void*))XNNWeightsCache::delete_cache;
37+
}
38+
39+
Error XNNWeightsCache::initialize_for_runtime(
40+
MemoryAllocator* runtime_allocator,
41+
const NamedDataMap* named_data_map)
42+
{
43+
44+
runtime_allocator_ = runtime_allocator;
45+
named_data_map_ = named_data_map;
46+
is_finalized_ = false;
47+
48+
return Error::Ok;
49+
}
50+
51+
Result<std::vector<std::string>> XNNWeightsCache::finalize_for_runtime(){
52+
is_finalized_ = true;
53+
54+
// All data has been packed by create_runtime
55+
// so we clear the unpacked data as it is no longer needed
56+
for (FreeableBuffer& buffer : unpacked_data_){
57+
buffer.Free();
58+
}
59+
unpacked_data_.clear();
60+
unpacked_data_to_name_.clear();
61+
62+
std::vector<std::string> packed_data_names;
63+
// update the reference count of all the packed data
64+
// used by this runtime
65+
for (auto& entry : name_to_packed_data_metadata_){
66+
if (entry.second.in_current_runtime){
67+
entry.second.ref_count++;
68+
entry.second.in_current_runtime = false;
69+
packed_data_names.push_back(entry.first);
70+
}
71+
}
72+
73+
return packed_data_names;
74+
}
75+
76+
77+
Result<const uint8_t*> XNNWeightsCache::load_unpacked_data(const std::string& name){
78+
Result<FreeableBuffer> named_data = named_data_map_->get_data(name.c_str());
79+
if (!named_data.ok()){
80+
ET_LOG(Error, "Failed to load constant data for key %s", name.c_str());
81+
return Error::InvalidExternalData;
82+
}
83+
const uint8_t* data_pointer = static_cast<const uint8_t*>(named_data.get().data());
84+
unpacked_data_.push_back(std::move(named_data.get()));
85+
unpacked_data_to_name_[data_pointer] = name;
86+
87+
return data_pointer;
88+
}
89+
90+
Error XNNWeightsCache::delete_packed_data(const std::vector<std::string>& packed_data_names){
91+
if (!is_finalized_){
92+
ET_LOG(Error, "Error, attempted to delete packed data from the cache but the cache is not finalized");
93+
return Error::InvalidArgument;
94+
}
95+
for (const std::string& name : packed_data_names){
96+
auto entry = name_to_packed_data_metadata_.find(name);
97+
if (entry == name_to_packed_data_metadata_.end()){
98+
ET_LOG(Error, "Error, attempted to deleted packed data: %s, from the cache but it wasn't found", name.c_str());
99+
return Error::InvalidArgument;
100+
} else {
101+
entry->second.ref_count--;
102+
if (entry->second.ref_count == 0) {
103+
void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
104+
// Erase the key/value from the map frees the pointer holding the packed data
105+
packed_pointer_to_container_.erase(packed_data_ptr);
106+
// remove the pointer from the packed_data_ptrs_
107+
packed_data_ptrs_[entry->second.offset] = nullptr;
108+
// Erase the name to packed metadata entry
109+
name_to_packed_data_metadata_.erase(entry->first);
110+
}
111+
}
112+
}
113+
114+
return Error::Ok;
115+
}
116+
117+
118+
size_t XNNWeightsCache::look_up(
119+
XNNWeightsCache* context,
120+
const xnn_weights_cache_look_up_key* cache_key) {
121+
const void* unpacked_weights_ptr = cache_key->kernel;
122+
const void* unpacked_bias_ptr = cache_key->bias;
123+
auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr);
124+
125+
// Check if weight_pointer has been cached
126+
if (entry == context->unpacked_data_to_name_.end()){
127+
return SIZE_MAX;
128+
}
129+
130+
std::string weight_bias_name = entry->second;
131+
132+
// Check if bias_pointer has been cached
133+
if (unpacked_bias_ptr != nullptr){
134+
auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr);
135+
if (bias_entry != context->unpacked_data_to_name_.end()){
136+
weight_bias_name.append(bias_entry->second);
137+
}
138+
}
139+
140+
// check if weight_bias_name has been packed already
141+
auto packed_weight_entry = context->name_to_packed_data_metadata_.find(weight_bias_name);
142+
if (packed_weight_entry == context->name_to_packed_data_metadata_.end()){
143+
return SIZE_MAX;
144+
}
145+
packed_weight_entry->second.in_current_runtime = true;
146+
147+
return packed_weight_entry->second.offset;
148+
}
149+
150+
void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
151+
// MemoryAllocator* allocator = context->runtime_allocator_;
152+
// void* reserved_pointer = allocator->allocate(n, context->kPackedAllocationAlignment);
153+
154+
// return reserved_pointer;
155+
std::string data_container;
156+
data_container.resize(n + context->kPackedAllocationAlignment);
157+
void* maybe_aligned_space = data_container.data();
158+
void* aligned_space = (void*)((intptr_t)maybe_aligned_space + 64 -
159+
(intptr_t)maybe_aligned_space % 64);
160+
161+
context->packed_pointer_to_container_[aligned_space] = std::move(data_container);
162+
return aligned_space;
163+
}
164+
165+
size_t XNNWeightsCache::look_up_or_insert(
166+
XNNWeightsCache* context,
167+
const xnn_weights_cache_look_up_key* cache_key,
168+
void* ptr,
169+
size_t size) {
170+
size_t offset = context->look_up(context, cache_key);
171+
172+
if (offset != SIZE_MAX) {
173+
void* saved_ptr = context->offset_to_addr(context, offset);
174+
if (0 == memcmp(ptr, saved_ptr, size)) {
175+
return offset;
176+
}
177+
// Failure, cache is out of date
178+
return SIZE_MAX;
179+
}
180+
181+
// Add to Cache if it is not finalized
182+
size_t next_offset = context->packed_data_ptrs_.size();
183+
auto entry = context->unpacked_data_to_name_.find(cache_key->kernel);
184+
185+
// Check if weight_pointer has been cached
186+
if (entry != context->unpacked_data_to_name_.end()){
187+
std::string weight_bias_name = entry->second;
188+
if (cache_key->bias != nullptr){
189+
auto bias_entry = context->unpacked_data_to_name_.find(cache_key->bias);
190+
if (bias_entry != context->unpacked_data_to_name_.end()){
191+
weight_bias_name.append(bias_entry->second);
192+
}
193+
}
194+
PackedDataMeta packed_data_metadata = {
195+
.offset=next_offset,
196+
.ref_count = 0, // ref_count is only incremented after finalizing for runtime
197+
.in_current_runtime = true
198+
};
199+
context->name_to_packed_data_metadata_[weight_bias_name] = packed_data_metadata;
200+
} else{
201+
ET_LOG(
202+
Info,
203+
"Warning: Unpacked weight and bias were not registered with names, "
204+
"this will add new cache entries for packed data and may affect performance."
205+
);
206+
}
207+
context->packed_data_ptrs_.push_back(ptr);
208+
209+
return next_offset;
210+
}
211+
212+
bool XNNWeightsCache::is_finalized(XNNWeightsCache* context) {
213+
return context->is_finalized_;
214+
}
215+
216+
void* XNNWeightsCache::offset_to_addr(XNNWeightsCache* context, size_t offset) {
217+
return context->packed_data_ptrs_[offset];
218+
}
219+
220+
enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) {
221+
return xnn_status_success;
222+
}
223+
224+
} // namespace delegate
225+
} // namespace xnnpack
226+
} // namespace executor
227+
} // namespace torch
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <xnnpack.h>
12+
13+
#include <executorch/runtime/executor/pte_data_map.h>
14+
#include <executorch/runtime/core/memory_allocator.h>
15+
#include <executorch/runtime/core/error.h>
16+
#include <executorch/runtime/core/result.h>
17+
#include <string>
18+
#include <array>
19+
#include <unordered_map>
20+
21+
namespace executorch {
22+
namespace backends {
23+
namespace xnnpack {
24+
namespace delegate {
25+
26+
using executorch::runtime::MemoryAllocator;
27+
using executorch::runtime::NamedDataMap;
28+
using executorch::runtime::Error;
29+
using executorch::runtime::Result;
30+
using executorch::runtime::FreeableBuffer;
31+
32+
struct PackedDataMeta {
33+
size_t offset;
34+
// Count number of xnn_runtime_t this packed data is used in
35+
size_t ref_count;
36+
// true if this packed data was inserted or looked up for the
37+
// current runtime being created
38+
bool in_current_runtime;
39+
};
40+
41+
class XNNWeightsCache {
42+
public:
43+
XNNWeightsCache();
44+
45+
/**
46+
* Initializes the XNNWeightsCache for the next xnn_create_runtime
47+
*/
48+
Error initialize_for_runtime(
49+
MemoryAllocator* runtime_allocator,
50+
const NamedDataMap* named_data_map);
51+
52+
/**
53+
* Finalizes the weights cache after the weights have been packed
54+
* in xnn_create_runtime.
55+
*
56+
* This should only be called after creating the runtime. Returns
57+
* the name of all the packed weights used by this runtime
58+
*/
59+
Result<std::vector<std::string>> finalize_for_runtime();
60+
61+
// Taken from XNN_ALLOCATION_ALIGNMENT in xnnpack/common.h
62+
static const size_t kPackedAllocationAlignment = 64;
63+
64+
/**
65+
* Returns XNNPACK's underlying weights_cache pointer
66+
*/
67+
inline xnn_weights_cache_t get() {
68+
return (xnn_weights_cache_t)&weights_cache_;
69+
}
70+
71+
/**
72+
* Returns the number of unpacked data
73+
*/
74+
inline size_t get_num_unpacked_data(){
75+
return unpacked_data_.size();
76+
};
77+
78+
/**
79+
* Returns the names of all unpacked data
80+
*/
81+
inline std::vector<std::string> get_unpacked_data_names(){
82+
std::vector<std::string> names;
83+
for (const auto& pair : unpacked_data_to_name_) {
84+
names.push_back(pair.second);
85+
}
86+
return names;
87+
};
88+
89+
/**
90+
* Returns the packed data names
91+
*/
92+
inline std::vector<std::string> get_packed_data_names(){
93+
std::vector<std::string> names;
94+
for (const auto& pair : name_to_packed_data_metadata_) {
95+
names.push_back(pair.first);
96+
}
97+
return names;
98+
};
99+
100+
101+
/**
102+
* Loads unpacked named data from the NamedDataMap into this XNNWeightsCache
103+
* and returns a pointer to the unpacked data. This unpacked data is given
104+
* to XNNPACK's define_tensor APIs, and used as the cache key for look_up_or_insert.
105+
* @param[in] name The name of the data to load
106+
* @param[out] out the pointer to the unpacked data that was loaded
107+
*/
108+
Result<const uint8_t*> load_unpacked_data(const std::string& name);
109+
110+
/**
111+
* Deletes the packed data associated with the names given.
112+
* Decrements the ref_count if the packed data is used by other
113+
* models
114+
*
115+
*/
116+
Error delete_packed_data(const std::vector<std::string>& packed_names);
117+
118+
119+
private:
120+
// Runtime Allocator used to reserve memory for packed weights
121+
MemoryAllocator* runtime_allocator_;
122+
123+
// Named Data Map used to load named data
124+
const NamedDataMap* named_data_map_;
125+
126+
// Map of unpacked pointers to the data name
127+
std::unordered_map<const void*, std::string> unpacked_data_to_name_;
128+
// Map of data names to offset into the packed data
129+
std::unordered_map<std::string, PackedDataMeta> name_to_packed_data_metadata_;
130+
// Vector holding list of pointers to the packed data
131+
std::vector<void*> packed_data_ptrs_;
132+
// vector holding list of strings which are containers for packed_data_ptrs
133+
std::unordered_map<void*, std::string> packed_pointer_to_container_;
134+
// Vector hodling list of unpacked freeable buffers
135+
std::vector<FreeableBuffer> unpacked_data_;
136+
// xnnpack's weight cache provider
137+
xnn_weights_cache_provider weights_cache_;
138+
// whether or not the weight cache is finalized
139+
bool is_finalized_;
140+
141+
// Function pointers to override XNNPACK's default xnn_weights_cache_provider
142+
// functions.
143+
static size_t look_up(
144+
XNNWeightsCache* context,
145+
const xnn_weights_cache_look_up_key* cache_key);
146+
147+
static void* reserve_space(XNNWeightsCache* context, size_t n);
148+
149+
static size_t look_up_or_insert(
150+
XNNWeightsCache* context,
151+
const xnn_weights_cache_look_up_key* cache_key,
152+
void* ptr,
153+
size_t size);
154+
155+
static bool is_finalized(XNNWeightsCache* context);
156+
157+
static void* offset_to_addr(XNNWeightsCache* context, size_t offset);
158+
159+
static enum xnn_status delete_cache(XNNWeightsCache* context);
160+
161+
};
162+
163+
} // namespace delegate
164+
} // namespace xnnpack
165+
} // namespace executor
166+
} // namespace torch

0 commit comments

Comments
 (0)