@@ -591,7 +591,47 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
591591 int p1 = 0 ,
592592 int d0 = 1 ,
593593 int d1 = 1 ) {
594+ // if(w->ne[0]==3 && w->ne[1]==3 && p0==1 && p1==1 && s0==1 && s1==1 &&
595+ // d0==1 && d1==1 && w->ne[3]%64 == 0 && w->ne[2]%8 == 0 && x->ne[3] == 1){
596+
597+ // printf("x-shape 0: (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], w->ne[2], w->ne[3]);
598+ // printf(" (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], w->ne[2], w->ne[3]);
599+ // print_ggml_tensor(x, false, "bef wino");
600+ // x = ggml_conv_2d_3x3(ctx, w, x);
601+ // print_ggml_tensor(x, false, "aft wino");
602+ // printf("x-shape 2: (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], w->ne[2], w->ne[3]);
603+ // }
604+ // else{
594605 x = ggml_conv_2d (ctx, w, x, s0, s1, p0, p1, d0, d1);
606+ // if(w->ne[0]==3 && w->ne[1]==3 && p0==1 && p1==1 && s0==1 && s1==1 &&
607+ // d0==1 && d1==1 && w->ne[3]%64 == 0 && w->ne[2]%8 == 0 && x->ne[3] == 1){
608+ // printf("x-shape1: (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], w->ne[2], w->ne[3]);
609+ // }
610+ // }
611+ if (b != NULL ) {
612+ b = ggml_reshape_4d (ctx, b, 1 , 1 , b->ne [0 ], 1 );
613+ // b = ggml_repeat(ctx, b, x);
614+ x = ggml_add (ctx, x, b);
615+ }
616+ return x;
617+ }
618+
619+ // w: [IC, 4, 4, OC]
620+ // x: [1, IC, IH, IW]
621+ // b: [OC,]
622+ // result: [N, OC, OH, OW]
623+ __STATIC_INLINE__ struct ggml_tensor * ggml_nn_conv_2d1x3x3 (struct ggml_context * ctx,
624+ struct ggml_tensor * x,
625+ struct ggml_tensor * w,
626+ struct ggml_tensor * b
627+ ) {
628+ // int64_t *ne = x->ne;
629+ // if(!w) printf("w is null\n");
630+ // int64_t *ne1 = w->ne;
631+ // printf("before: (%ld, %ld, %ld, %ld), (%ld, %ld, %ld, %ld)\n", ne[0], ne[1], ne[2], ne[3], ne1[0], ne1[1], ne1[2], ne1[3]);
632+ x = ggml_winograd_stage1 (ctx, w, x);
633+ // ne = x->ne;
634+ // printf("after: (%ld, %ld, %ld, %ld)\n", ne[0], ne[1], ne[2], ne[3]);
595635 if (b != NULL ) {
596636 b = ggml_reshape_4d (ctx, b, 1 , 1 , b->ne [0 ], 1 );
597637 // b = ggml_repeat(ctx, b, x);
@@ -1001,7 +1041,7 @@ struct GGMLRunner {
10011041
10021042 // compute the required memory
10031043 size_t compute_buffer_size = ggml_gallocr_get_buffer_size (compute_allocr, 0 );
1004- LOG_DEBUG (" %s compute buffer size: %.2f MB(%s)" ,
1044+ LOG_INFO (" %s compute buffer size: %.2f MB(%s)" ,
10051045 get_desc ().c_str (),
10061046 compute_buffer_size / 1024.0 / 1024.0 ,
10071047 ggml_backend_is_cpu (backend) ? " RAM" : " VRAM" );
@@ -1019,6 +1059,8 @@ struct GGMLRunner {
10191059 backend_tensor_data_map.clear ();
10201060 }
10211061
1062+ virtual void transform (int n){};
1063+
10221064public:
10231065 virtual std::string get_desc () = 0;
10241066
@@ -1155,14 +1197,29 @@ class GGMLBlock {
11551197 }
11561198 }
11571199
1200+ void transform_blocks (struct ggml_context * ctx, int n, ggml_backend_t backend) {
1201+ for (auto & pair : blocks) {
1202+ auto & block = pair.second ;
1203+
1204+ block->transform (ctx, n, backend);
1205+ }
1206+ }
1207+
11581208 virtual void init_params (struct ggml_context * ctx, ggml_type wtype) {}
11591209
1210+ virtual void transform_params (struct ggml_context * ctx, int n, ggml_backend_t backend){}
1211+
11601212public:
11611213 void init (struct ggml_context * ctx, ggml_type wtype) {
11621214 init_blocks (ctx, wtype);
11631215 init_params (ctx, wtype);
11641216 }
11651217
1218+ void transform (struct ggml_context * ctx, int n, ggml_backend_t backend) {
1219+ transform_blocks (ctx, n, backend);
1220+ transform_params (ctx, n, backend);
1221+ }
1222+
11661223 size_t get_params_num () {
11671224 size_t num_tensors = params.size ();
11681225 for (auto & pair : blocks) {
@@ -1313,16 +1370,77 @@ class Conv2d : public UnaryBlock {
13131370 dilation(dilation),
13141371 bias(bias) {}
13151372
1373+ // Conv2d(){}
1374+
13161375 struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
13171376 struct ggml_tensor * w = params[" weight" ];
13181377 struct ggml_tensor * b = NULL ;
13191378 if (bias) {
13201379 b = params[" bias" ];
13211380 }
1381+ // if(kernel_size.first == 3){
1382+ // printf(" (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], in_channels, out_channels);
1383+ // // printf(" (%d - %d - %d) \n", stride.first, padding.first, dilation.first);
1384+ // }
13221385 return ggml_nn_conv_2d (ctx, x, w, b, stride.second , stride.first , padding.second , padding.first , dilation.second , dilation.first );
13231386 }
13241387};
13251388
1389+ class Conv2d1x3x3 : public UnaryBlock {
1390+ protected:
1391+ int64_t in_channels;
1392+ int64_t out_channels;
1393+ bool bias;
1394+
1395+ struct ggml_tensor * trans = NULL ;
1396+
1397+ void init_params (struct ggml_context * ctx, ggml_type wtype) {
1398+ params[" weight" ] = ggml_new_tensor_4d (ctx, GGML_TYPE_F16, 3 , 3 , in_channels, out_channels);
1399+ // params["transform"] = ggml_winograd_stage0(ctx, params["weight"]);
1400+ trans = ggml_winograd_stage0 (ctx, params[" weight" ]);
1401+ if (bias) {
1402+ params[" bias" ] = ggml_new_tensor_1d (ctx, GGML_TYPE_F32, out_channels);
1403+ }
1404+ }
1405+
1406+ void transform_params (struct ggml_context * ctx, int n_threads, ggml_backend_t backend){
1407+ // struct ggml_tensor* w = params["weight"];
1408+ // struct ggml_tensor* t = ggml_winograd_stage0(ctx, w);
1409+ struct ggml_cgraph * gf = ggml_new_graph (ctx);
1410+ ggml_build_forward_expand (gf, trans);
1411+ if (ggml_backend_is_cpu (backend)) {
1412+ ggml_backend_cpu_set_n_threads (backend, n_threads);
1413+ }
1414+ ggml_backend_graph_compute (backend, gf);
1415+ params[" transform" ] = trans;
1416+ ggml_graph_clear (gf);
1417+ trans->src [0 ] = NULL ; // not elegant!! skip FX during wino_stage1
1418+ }
1419+
1420+ public:
1421+ Conv2d1x3x3 (int64_t in_channels,
1422+ int64_t out_channels,
1423+ bool bias = true )
1424+ : in_channels(in_channels),
1425+ out_channels (out_channels),
1426+ bias(bias){}
1427+
1428+ struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
1429+ // struct ggml_tensor* w = params["weight"];
1430+ struct ggml_tensor * w = params[" transform" ];
1431+ struct ggml_tensor * b = NULL ;
1432+ if (bias) {
1433+ b = params[" bias" ];
1434+ }
1435+ // if(kernel_size.first == 3){
1436+ // printf(" (%zu, %zu, %zu, %zu) %zu, %zu \n", x->ne[0], x->ne[1], x->ne[2], x->ne[3], in_channels, out_channels);
1437+ // // printf(" (%d - %d - %d) \n", stride.first, padding.first, dilation.first);
1438+ // }
1439+ // return ggml_nn_conv_2d1x3x3(ctx, x, w, b);
1440+ return ggml_nn_conv_2d1x3x3 (ctx, x, trans, b);
1441+ }
1442+ };
1443+
13261444class Conv3dnx1x1 : public UnaryBlock {
13271445protected:
13281446 int64_t in_channels;
0 commit comments