@@ -607,6 +607,33 @@ __STATIC_INLINE__ void ggml_tensor_scale_output(struct ggml_tensor* src) {
607607
608608typedef std::function<void (ggml_tensor*, ggml_tensor*, bool )> on_tile_process;
609609
610+ __STATIC_INLINE__ void
611+ sd_tiling_calc_tiles (int &num_tiles_dim, float & tile_overlap_factor_dim, int small_dim, int tile_size, const float tile_overlap_factor) {
612+
613+ int tile_overlap = (tile_size * tile_overlap_factor);
614+ int non_tile_overlap = tile_size - tile_overlap;
615+
616+ num_tiles_dim = (small_dim - tile_overlap) / non_tile_overlap;
617+ int overshoot_dim = ((num_tiles_dim + 1 ) * non_tile_overlap + tile_overlap) % small_dim;
618+
619+ if ((overshoot_dim != non_tile_overlap) && (overshoot_dim <= num_tiles_dim * (tile_size / 2 - tile_overlap))) {
620+ // if tiles don't fit perfectly using the desired overlap
621+ // and there is enough room to squeeze an extra tile without overlap becoming >0.5
622+ num_tiles_dim++;
623+ }
624+
625+ tile_overlap_factor_dim = (float )(tile_size * num_tiles_dim - small_dim) / (float )(tile_size * (num_tiles_dim - 1 ));
626+ if (num_tiles_dim <= 2 ) {
627+ if (small_dim <= tile_size) {
628+ num_tiles_dim = 1 ;
629+ tile_overlap_factor_dim = 0 ;
630+ } else {
631+ num_tiles_dim = 2 ;
632+ tile_overlap_factor_dim = (2 * tile_size - small_dim) / (float )tile_size;
633+ }
634+ }
635+ }
636+
610637// Tiling
611638__STATIC_INLINE__ void sd_tiling (ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
612639 output = ggml_set_f32 (output, 0 );
@@ -629,48 +656,13 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
629656 small_height = input_height;
630657 }
631658
632- int tile_overlap = (tile_size * tile_overlap_factor);
633- int non_tile_overlap = tile_size - tile_overlap;
634-
635- int num_tiles_x = (small_width - tile_overlap) / non_tile_overlap;
636- int overshoot_x = ((num_tiles_x + 1 ) * non_tile_overlap + tile_overlap) % small_width;
659+ int num_tiles_x;
660+ float tile_overlap_factor_x;
661+ sd_tiling_calc_tiles (num_tiles_x, tile_overlap_factor_x, small_width, tile_size, tile_overlap_factor);
637662
638- if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (tile_size / 2 - tile_overlap))) {
639- // if tiles don't fit perfectly using the desired overlap
640- // and there is enough room to squeeze an extra tile without overlap becoming >0.5
641- num_tiles_x++;
642- }
643-
644- float tile_overlap_factor_x = (float )(tile_size * num_tiles_x - small_width) / (float )(tile_size * (num_tiles_x - 1 ));
645- if (num_tiles_x <= 2 ) {
646- if (small_width <= tile_size) {
647- num_tiles_x = 1 ;
648- tile_overlap_factor_x = 0 ;
649- } else {
650- num_tiles_x = 2 ;
651- tile_overlap_factor_x = (2 * tile_size - small_width) / (float )tile_size;
652- }
653- }
654-
655- int num_tiles_y = (small_height - tile_overlap) / non_tile_overlap;
656- int overshoot_y = ((num_tiles_y + 1 ) * non_tile_overlap + tile_overlap) % small_height;
657-
658- if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (tile_size / 2 - tile_overlap))) {
659- // if tiles don't fit perfectly using the desired overlap
660- // and there is enough room to squeeze an extra tile without overlap becoming >0.5
661- num_tiles_y++;
662- }
663-
664- float tile_overlap_factor_y = (float )(tile_size * num_tiles_y - small_height) / (float )(tile_size * (num_tiles_y - 1 ));
665- if (num_tiles_y <= 2 ) {
666- if (small_height <= tile_size) {
667- num_tiles_y = 1 ;
668- tile_overlap_factor_y = 0 ;
669- } else {
670- num_tiles_y = 2 ;
671- tile_overlap_factor_y = (2 * tile_size - small_height) / (float )tile_size;
672- }
673- }
663+ int num_tiles_y;
664+ float tile_overlap_factor_y;
665+ sd_tiling_calc_tiles (num_tiles_y, tile_overlap_factor_y, small_height, tile_size, tile_overlap_factor);
674666
675667 LOG_DEBUG (" num tiles : %d, %d " , num_tiles_x, num_tiles_y);
676668 LOG_DEBUG (" optimal overlap : %f, %f (targeting %f)" , tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
0 commit comments