@@ -1980,3 +1980,149 @@ ggml_backend_buffer_type_t ggml_backend_cpu_repack_buffer_type(void) {
19801980
19811981    return  &ggml_backend_cpu_buffer_type_repack;
19821982}
1983+ 
1984+ #ifdef  GGML_BUILD_TESTS
1985+ //  Test repack wrapper buffer type that stores original data before repacking.
1986+ //  The motivation for this type is that when testing repack when set_tensor is
1987+ //  called the data of the tensor is repacked and the original data is lost.
1988+ // 
1989+ //  In test-backend-ops.cpp we want to compare the results of a backend using
1990+ //  repacked input data, and compare against a backend that non-repacked data.
1991+ //  The problem arises in `ggml_backend_compare_graph_backend` where the graphs
1992+ //  are copied and ggml_backend_buffer_repack_buffer_type does not implement
1993+ //  the get_tensor function, but even if it did it would return the repacked data
1994+ //  which is not what we want to compare against. This type allows proper
1995+ //  comparison between repack and non-repack data.
1996+ 
1997+ #include  < unordered_map> 
1998+ #include  < vector> 
1999+ 
2000+ struct  test_repack_wrapper_context  {
2001+     ggml_backend_buffer_t  cpu_buffer;
2002+ 
2003+     //  This map stores the original (non repacked) data so that when the graph
2004+     //  is copied by ggml_backend_compare_graph_backend we can return the original
2005+     //  data in get_tensor.
2006+     std::unordered_map<struct  ggml_tensor  *, std::vector<uint8_t >> original_data;
2007+ };
2008+ 
2009+ static  void  ggml_backend_cpu_repack_test_buffer_free_buffer (ggml_backend_buffer_t  buffer) {
2010+     test_repack_wrapper_context * ctx = (test_repack_wrapper_context *) buffer->context ;
2011+     if  (ctx->cpu_buffer ) {
2012+         ggml_backend_buffer_free (ctx->cpu_buffer );
2013+     }
2014+     delete  ctx;
2015+ }
2016+ 
2017+ static  enum  ggml_status ggml_backend_cpu_repack_test_buffer_init_tensor (ggml_backend_buffer_t  buffer, struct  ggml_tensor  * tensor) {
2018+     if  (tensor->op  == GGML_OP_MUL_MAT && ggml_n_dims (tensor->src [0 ]) == 2 ) {
2019+         tensor->src [0 ]->extra  = (void  *) const_cast <ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type (tensor->src [0 ]));
2020+     }
2021+     if  (tensor->op  == GGML_OP_MUL_MAT_ID && ggml_n_dims (tensor->src [0 ]) == 3 ) {
2022+         tensor->src [0 ]->extra  = (void  *) const_cast <ggml::cpu::tensor_traits *>(ggml_repack_get_optimal_repack_type (tensor->src [0 ]));
2023+     }
2024+ 
2025+     //  Not really sure if this is strictly needed as the cpu buffer does not
2026+     //  initialize anything at the moment, but keeping this just in case that changes.
2027+     test_repack_wrapper_context * w_ctx = (test_repack_wrapper_context *) buffer->context ;
2028+     if  (w_ctx->cpu_buffer ->iface .init_tensor ) {
2029+         return  w_ctx->cpu_buffer ->iface .init_tensor (w_ctx->cpu_buffer , tensor);
2030+     }
2031+     return  GGML_STATUS_SUCCESS;
2032+ }
2033+ 
2034+ static  void  ggml_backend_cpu_repack_test_buffer_set_tensor (ggml_backend_buffer_t  buffer, struct  ggml_tensor  * tensor, const  void  * data, size_t  offset, size_t  size) {
2035+     test_repack_wrapper_context * w_ctx = (test_repack_wrapper_context *) buffer->context ;
2036+     GGML_ASSERT (w_ctx != nullptr );
2037+ 
2038+     auto  tensor_traits = (ggml::cpu::repack::tensor_traits_base *) tensor->extra ;
2039+     if  (tensor_traits) {
2040+         w_ctx->original_data [tensor] = std::vector<uint8_t >((const  uint8_t  *)data, (const  uint8_t  *)data + size);
2041+         auto  OK = tensor_traits->repack (tensor, data, size);
2042+         GGML_ASSERT (OK == 0 );
2043+     } else  {
2044+         //  Forward to underlying CPU buffer (no repacking)
2045+         w_ctx->cpu_buffer ->iface .set_tensor (w_ctx->cpu_buffer , tensor, data, offset, size);
2046+     }
2047+ }
2048+ 
2049+ static  void  * ggml_backend_cpu_repack_test_buffer_get_base (ggml_backend_buffer_t  buffer) {
2050+     test_repack_wrapper_context * w_ctx = (test_repack_wrapper_context *) buffer->context ;
2051+     return  ggml_backend_buffer_get_base (w_ctx->cpu_buffer );
2052+ }
2053+ 
2054+ static  void  ggml_backend_cpu_repack_test_buffer_get_tensor (ggml_backend_buffer_t  buffer, const  struct  ggml_tensor  * tensor, void  * data, size_t  offset, size_t  size) {
2055+     test_repack_wrapper_context * w_ctx = (test_repack_wrapper_context *) buffer->context ;
2056+ 
2057+     auto  tensor_traits = (ggml::cpu::repack::tensor_traits_base *) tensor->extra ;
2058+     if  (tensor_traits) {
2059+         //  Return the original data for repacked tensor data. This is here so
2060+         //  that when the graph is copied we can still get the original data which
2061+         //  would otherwise be lost.
2062+         auto  it = w_ctx->original_data .find (const_cast <struct  ggml_tensor  *>(tensor));
2063+         if  (it != w_ctx->original_data .end ()) {
2064+             const  auto & original = it->second ;
2065+             size_t  copy_size = std::min (size, original.size () - offset);
2066+             std::memcpy (data, original.data () + offset, copy_size);
2067+         }
2068+     } else  {
2069+         //  For non-repacked data just forward to the underlying CPU buffer.
2070+         w_ctx->cpu_buffer ->iface .get_tensor (w_ctx->cpu_buffer , tensor, data, offset, size);
2071+     }
2072+ }
2073+ 
2074+ static  const  char  * ggml_backend_cpu_repack_test_buffer_type_get_name (ggml_backend_buffer_type_t  buft) {
2075+     return  " CPU_REPACK_TEST" 
2076+     GGML_UNUSED (buft);
2077+ }
2078+ 
2079+ static  ggml_backend_buffer_t  ggml_backend_cpu_repack_test_buffer_type_alloc_buffer (ggml_backend_buffer_type_t  buft, size_t  size) {
2080+     ggml_backend_buffer_t  cpu_buffer = ggml_backend_buft_alloc_buffer (ggml_backend_cpu_buffer_type (), size);
2081+     if  (!cpu_buffer) {
2082+         return  nullptr ;
2083+     }
2084+ 
2085+     test_repack_wrapper_context * w_ctx = new  test_repack_wrapper_context;
2086+     w_ctx->cpu_buffer  = cpu_buffer;
2087+ 
2088+     static  const  struct  ggml_backend_buffer_i  ggml_backend_cpu_repack_test_buffer_i = {
2089+         /*  .free_buffer   = */ 
2090+         /*  .get_base      = */ 
2091+         /*  .init_tensor   = */ 
2092+         /*  .memset_tensor = */ nullptr ,
2093+         /*  .set_tensor    = */ 
2094+         /*  .get_tensor    = */ 
2095+         /*  .cpy_tensor    = */ nullptr ,
2096+         /*  .clear         = */ nullptr ,
2097+         /*  .reset         = */ nullptr ,
2098+     };
2099+ 
2100+     //  This is intentionally using the repack buffer type because this type is
2101+     //  used in ggml::cpu::repack::get_tensor_traits, and without this the
2102+     //  computation will not be forwarded to repacks compute_forward function.
2103+     auto  repack_buft = ggml_backend_cpu_repack_buffer_type ();
2104+     return  ggml_backend_buffer_init (repack_buft, ggml_backend_cpu_repack_test_buffer_i, w_ctx, size);
2105+     GGML_UNUSED (buft);
2106+ }
2107+ 
2108+ static  size_t  ggml_backend_cpu_repack_test_buffer_type_get_alignment (ggml_backend_buffer_type_t  buft) {
2109+     return  ggml_backend_buft_get_alignment (ggml_backend_cpu_buffer_type ());
2110+     GGML_UNUSED (buft);
2111+ }
2112+ 
2113+ ggml_backend_buffer_type_t  ggml_backend_cpu_repack_test_buffer_type (void ) {
2114+     static  struct  ggml_backend_buffer_type  ggml_backend_cpu_buffer_type_repack_test = {
2115+         /*  .iface    = */ 
2116+                            /*  .get_name         = */ 
2117+                            /*  .alloc_buffer     = */ 
2118+                            /*  .get_alignment    = */ 
2119+                            /*  .get_max_size     = */ nullptr ,  //  defaults to SIZE_MAX
2120+                            /*  .get_alloc_size   = */ nullptr ,  //  defaults to ggml_nbytes
2121+                            /*  .is_host          = */ nullptr ,  //  defaults to true
2122+                            },
2123+         /*  .device  = */ ggml_backend_reg_dev_get (ggml_backend_cpu_reg (), 0 ),
2124+         /*  .context = */ nullptr ,
2125+     };
2126+     return  &ggml_backend_cpu_buffer_type_repack_test;
2127+ }
2128+ #endif  //  GGML_BUILD_TESTS
0 commit comments