Skip to content

Commit a27ff2e

Browse files
committed
refactor tile number calculation
1 parent e201588 commit a27ff2e

File tree

1 file changed

+33
-41
lines changed

1 file changed

+33
-41
lines changed

ggml_extend.hpp

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,33 @@ __STATIC_INLINE__ void ggml_tensor_scale_output(struct ggml_tensor* src) {
607607

608608
typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
609609

610+
__STATIC_INLINE__ void
611+
sd_tiling_calc_tiles(int &num_tiles_dim, float& tile_overlap_factor_dim, int small_dim, int tile_size, const float tile_overlap_factor) {
612+
613+
int tile_overlap = (tile_size * tile_overlap_factor);
614+
int non_tile_overlap = tile_size - tile_overlap;
615+
616+
num_tiles_dim = (small_dim - tile_overlap) / non_tile_overlap;
617+
int overshoot_dim = ((num_tiles_dim + 1) * non_tile_overlap + tile_overlap) % small_dim;
618+
619+
if ((overshoot_dim != non_tile_overlap) && (overshoot_dim <= num_tiles_dim * (tile_size / 2 - tile_overlap))) {
620+
// if tiles don't fit perfectly using the desired overlap
621+
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
622+
num_tiles_dim++;
623+
}
624+
625+
tile_overlap_factor_dim = (float)(tile_size * num_tiles_dim - small_dim) / (float)(tile_size * (num_tiles_dim - 1));
626+
if (num_tiles_dim <= 2) {
627+
if (small_dim <= tile_size) {
628+
num_tiles_dim = 1;
629+
tile_overlap_factor_dim = 0;
630+
} else {
631+
num_tiles_dim = 2;
632+
tile_overlap_factor_dim = (2 * tile_size - small_dim) / (float)tile_size;
633+
}
634+
}
635+
}
636+
610637
// Tiling
611638
__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
612639
output = ggml_set_f32(output, 0);
@@ -629,48 +656,13 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
629656
small_height = input_height;
630657
}
631658

632-
int tile_overlap = (tile_size * tile_overlap_factor);
633-
int non_tile_overlap = tile_size - tile_overlap;
634-
635-
int num_tiles_x = (small_width - tile_overlap) / non_tile_overlap;
636-
int overshoot_x = ((num_tiles_x + 1) * non_tile_overlap + tile_overlap) % small_width;
659+
int num_tiles_x;
660+
float tile_overlap_factor_x;
661+
sd_tiling_calc_tiles(num_tiles_x, tile_overlap_factor_x, small_width, tile_size, tile_overlap_factor);
637662

638-
if ((overshoot_x != non_tile_overlap) && (overshoot_x <= num_tiles_x * (tile_size / 2 - tile_overlap))) {
639-
// if tiles don't fit perfectly using the desired overlap
640-
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
641-
num_tiles_x++;
642-
}
643-
644-
float tile_overlap_factor_x = (float)(tile_size * num_tiles_x - small_width) / (float)(tile_size * (num_tiles_x - 1));
645-
if (num_tiles_x <= 2) {
646-
if (small_width <= tile_size) {
647-
num_tiles_x = 1;
648-
tile_overlap_factor_x = 0;
649-
} else {
650-
num_tiles_x = 2;
651-
tile_overlap_factor_x = (2 * tile_size - small_width) / (float)tile_size;
652-
}
653-
}
654-
655-
int num_tiles_y = (small_height - tile_overlap) / non_tile_overlap;
656-
int overshoot_y = ((num_tiles_y + 1) * non_tile_overlap + tile_overlap) % small_height;
657-
658-
if ((overshoot_y != non_tile_overlap) && (overshoot_y <= num_tiles_y * (tile_size / 2 - tile_overlap))) {
659-
// if tiles don't fit perfectly using the desired overlap
660-
// and there is enough room to squeeze an extra tile without overlap becoming >0.5
661-
num_tiles_y++;
662-
}
663-
664-
float tile_overlap_factor_y = (float)(tile_size * num_tiles_y - small_height) / (float)(tile_size * (num_tiles_y - 1));
665-
if (num_tiles_y <= 2) {
666-
if (small_height <= tile_size) {
667-
num_tiles_y = 1;
668-
tile_overlap_factor_y = 0;
669-
} else {
670-
num_tiles_y = 2;
671-
tile_overlap_factor_y = (2 * tile_size - small_height) / (float)tile_size;
672-
}
673-
}
663+
int num_tiles_y;
664+
float tile_overlap_factor_y;
665+
sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, tile_size, tile_overlap_factor);
674666

675667
LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
676668
LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);

0 commit comments

Comments
 (0)