Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions examples/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ add_executable(${TARGET}
image_metadata.cpp
main.cpp
)
target_include_directories(${TARGET} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/.."
"${PROJECT_SOURCE_DIR}/src"
)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
if(SD_WEBP)
Expand Down
7 changes: 7 additions & 0 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ ArgOptions SDContextParams::get_options() {
"--high-noise-diffusion-model",
"path to the standalone high noise diffusion model",
&high_noise_diffusion_model_path},
{"",
"--embeddings-connectors",
"path to LTXAV embeddings connectors",
&embeddings_connectors_path},
{"",
"--vae",
"path to standalone vae model",
Expand Down Expand Up @@ -656,6 +660,7 @@ std::string SDContextParams::to_string() const {
<< " llm_vision_path: \"" << llm_vision_path << "\",\n"
<< " diffusion_model_path: \"" << diffusion_model_path << "\",\n"
<< " high_noise_diffusion_model_path: \"" << high_noise_diffusion_model_path << "\",\n"
<< " embeddings_connectors_path: \"" << embeddings_connectors_path << "\",\n"
<< " vae_path: \"" << vae_path << "\",\n"
<< " taesd_path: \"" << taesd_path << "\",\n"
<< " esrgan_path: \"" << esrgan_path << "\",\n"
Expand Down Expand Up @@ -712,6 +717,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
llm_vision_path.c_str(),
diffusion_model_path.c_str(),
high_noise_diffusion_model_path.c_str(),
embeddings_connectors_path.c_str(),
vae_path.c_str(),
taesd_path.c_str(),
control_net_path.c_str(),
Expand Down Expand Up @@ -2180,6 +2186,7 @@ sd_vid_gen_params_t SDGenerationParams::to_sd_vid_gen_params_t() {
params.strength = strength;
params.seed = seed;
params.video_frames = video_frames;
params.fps = fps;
params.vace_strength = vace_strength;
params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params;
Expand Down
1 change: 1 addition & 0 deletions examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ struct SDContextParams {
std::string llm_vision_path;
std::string diffusion_model_path;
std::string high_noise_diffusion_model_path;
std::string embeddings_connectors_path;
std::string vae_path;
std::string taesd_path;
std::string esrgan_path;
Expand Down
2 changes: 2 additions & 0 deletions include/stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ typedef struct {
const char* llm_vision_path;
const char* diffusion_model_path;
const char* high_noise_diffusion_model_path;
const char* embeddings_connectors_path;
const char* vae_path;
const char* taesd_path;
const char* control_net_path;
Expand Down Expand Up @@ -359,6 +360,7 @@ typedef struct {
float strength;
int64_t seed;
int video_frames;
int fps;
float vace_strength;
sd_tiling_params_t vae_tiling_params;
sd_cache_params_t cache;
Expand Down
58 changes: 58 additions & 0 deletions src/common_dit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,64 @@ namespace DiT {
x = ggml_ext_slice(ctx, x, 0, 0, W); // [N, C, H, W]
return x;
}

inline ggml_tensor* patchify(ggml_context* ctx,
ggml_tensor* x,
int pt,
int ph,
int pw,
int64_t N = 1) {
// x: [N*C, T, H, W]
// return: [N, h*w, C*pt*ph*pw]
int64_t C = x->ne[3] / N;
int64_t T = x->ne[2];
int64_t H = x->ne[1];
int64_t W = x->ne[0];
int64_t t_len = T / pt;
int64_t h_len = H / ph;
int64_t w_len = W / pw;

GGML_ASSERT(C * N == x->ne[3]);
GGML_ASSERT(t_len * pt == T && h_len * ph == H && w_len * pw == W);

x = ggml_reshape_4d(ctx, x, pw * w_len, ph * h_len, pt, t_len * C * N); // [N*C*t_len, pt, h_len*ph, w_len*pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len, h_len*ph, pt, w_len*pw]
x = ggml_reshape_4d(ctx, x, pw * w_len, pt, ph, h_len * t_len * C * N); // [N*C*t_len*h_len, ph, pt, w_len*pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len*h_len, pt, ph, w_len*pw]
x = ggml_reshape_4d(ctx, x, pw, w_len, ph * pt, h_len * t_len * C * N); // [N*C*t_len*h_len, pt*ph, w_len, pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len*h_len, w_len, pt*ph, pw]
x = ggml_reshape_4d(ctx, x, pw * ph * pt, w_len * h_len * t_len, C, N); // [N, C, t_len*h_len*w_len, pt*ph*pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N, t_len*h_len*w_len, C, pt*ph*pw]
x = ggml_reshape_4d(ctx, x, pw * ph * pt * C, w_len * h_len * t_len, N, 1); // [N, t_len*h_len*w_len, C*pt*ph*pw]
return x;
}

inline ggml_tensor* unpatchify(ggml_context* ctx,
ggml_tensor* x,
int64_t t_len,
int64_t h_len,
int64_t w_len,
int pt,
int ph,
int pw) {
// x: [N, t_len*h_len*w_len, pt*ph*pw*C]
// return: [N*C, t_len*pt, h_len*ph, w_len*pw]
int64_t N = x->ne[3];
int64_t C = x->ne[0] / pt / ph / pw;

GGML_ASSERT(C * pt * ph * pw == x->ne[0]);

x = ggml_reshape_4d(ctx, x, C, pw * ph * pt, w_len * h_len * t_len, N); // [N, t_len*h_len*w_len, pt*ph*pw, C]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 1, 2, 0, 3)); // [N, C, t_len*h_len*w_len, pt*ph*pw]
x = ggml_reshape_4d(ctx, x, pw, ph * pt, w_len, h_len * t_len * C * N); // [N*C*t_len*h_len, w_len, pt*ph, pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len*h_len, pt*ph, w_len, pw]
x = ggml_reshape_4d(ctx, x, pw * w_len, ph, pt, h_len * t_len * C * N); // [N*C*t_len*h_len, pt, ph, w_len*pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len*h_len, ph, pt, w_len*pw]
x = ggml_reshape_4d(ctx, x, pw * w_len, pt, ph * h_len, t_len * C * N); // [N*C*t_len, h_len*ph, pt, w_len*pw]
x = ggml_ext_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 1, 3)); // [N*C*t_len, pt, h_len*ph, w_len*pw]
x = ggml_reshape_4d(ctx, x, pw * w_len, ph * h_len, pt * t_len, C * N); // [N*C, t_len*pt, h_len*ph, w_len*pw]
return x;
}
} // namespace DiT

#endif // __COMMON_DIT_HPP__
Loading
Loading