@@ -635,7 +635,10 @@ sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int sma
635635}
636636
637637// Tiling
638- __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
638+ __STATIC_INLINE__ void sd_tiling_non_square (ggml_tensor* input, ggml_tensor* output, const int scale,
639+ const int p_tile_size_x, const int p_tile_size_y,
640+ const float tile_overlap_factor, on_tile_process on_processing) {
641+
639642 output = ggml_set_f32 (output, 0 );
640643
641644 int input_width = (int )input->ne [0 ];
@@ -658,25 +661,25 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
658661
659662 int num_tiles_x;
660663 float tile_overlap_factor_x;
661- sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, tile_size , tile_overlap_factor);
664+ sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, p_tile_size_x , tile_overlap_factor);
662665
663666 int num_tiles_y;
664667 float tile_overlap_factor_y;
665- sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, tile_size , tile_overlap_factor);
668+ sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y , tile_overlap_factor);
666669
667670 LOG_DEBUG (" num tiles : %d, %d " , num_tiles_x, num_tiles_y);
668671 LOG_DEBUG (" optimal overlap : %f, %f (targeting %f)" , tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
669672
670673 GGML_ASSERT (input_width % 2 == 0 && input_height % 2 == 0 && output_width % 2 == 0 && output_height % 2 == 0 ); // should be multiple of 2
671674
672- int tile_overlap_x = (int32_t )(tile_size * tile_overlap_factor_x);
673- int non_tile_overlap_x = tile_size - tile_overlap_x;
675+ int tile_overlap_x = (int32_t )(p_tile_size_x * tile_overlap_factor_x);
676+ int non_tile_overlap_x = p_tile_size_x - tile_overlap_x;
674677
675- int tile_overlap_y = (int32_t )(tile_size * tile_overlap_factor_y);
676- int non_tile_overlap_y = tile_size - tile_overlap_y;
678+ int tile_overlap_y = (int32_t )(p_tile_size_y * tile_overlap_factor_y);
679+ int non_tile_overlap_y = p_tile_size_y - tile_overlap_y;
677680
678- int tile_size_x = tile_size < small_width ? tile_size : small_width;
679- int tile_size_y = tile_size < small_height ? tile_size : small_height;
681+ int tile_size_x = p_tile_size_x < small_width ? p_tile_size_x : small_width;
682+ int tile_size_y = p_tile_size_y < small_height ? p_tile_size_y : small_height;
680683
681684 int input_tile_size_x = tile_size_x;
682685 int input_tile_size_y = tile_size_y;
@@ -765,6 +768,11 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
765768 ggml_free (tiles_ctx);
766769}
767770
771+ __STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale,
772+ const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
773+ sd_tiling_non_square (input, output, scale, tile_size, tile_size, tile_overlap_factor, on_processing);
774+ }
775+
768776__STATIC_INLINE__ struct ggml_tensor * ggml_group_norm_32 (struct ggml_context * ctx,
769777 struct ggml_tensor * a) {
770778 const float eps = 1e-6f ; // default eps parameter
0 commit comments