Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ arguments:
--vae-on-cpu keep vae in cpu (for low vram)
--clip-on-cpu keep clip in cpu (for low vram)
--diffusion-fa use flash attention in the diffusion model (for low vram)
--diffusion-conv-direct use Conv2D direct in the diffusion model
--vae-conv-direct use Conv2D direct in the vae model (should improve the performance)
Might lower quality, since it implies converting k and v to f16.
This might crash if it is not supported by the backend.
--control-net-cpu keep controlnet in cpu (for low vram)
Expand Down
27 changes: 17 additions & 10 deletions common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ class DownSampleBlock : public GGMLBlock {
int channels;
int out_channels;
bool vae_downsample;
bool direct = false;

public:
DownSampleBlock(int channels,
int out_channels,
bool vae_downsample = false)
bool vae_downsample = false,
bool direct = false)
: channels(channels),
out_channels(out_channels),
vae_downsample(vae_downsample) {
vae_downsample(vae_downsample),
direct(direct) {
if (vae_downsample) {
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {0, 0}));
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {0, 0}, {1, 1}, true, direct));
} else {
blocks["op"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {1, 1}));
blocks["op"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {1, 1}, {1, 1}, true, direct));
}
}

Expand All @@ -43,13 +46,16 @@ class UpSampleBlock : public GGMLBlock {
protected:
int channels;
int out_channels;
bool direct = false;

public:
UpSampleBlock(int channels,
int out_channels)
int out_channels,
bool direct = false)
: channels(channels),
out_channels(out_channels) {
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1}));
out_channels(out_channels),
direct(direct) {
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, true, direct));
}

struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
Expand Down Expand Up @@ -381,7 +387,8 @@ class SpatialTransformer : public GGMLBlock {
int64_t d_head,
int64_t depth,
int64_t context_dim,
bool flash_attn = false)
bool flash_attn = false,
bool direct = false)
: in_channels(in_channels),
n_head(n_head),
d_head(d_head),
Expand All @@ -391,14 +398,14 @@ class SpatialTransformer : public GGMLBlock {
// disable_self_attn is always False
int64_t inner_dim = n_head * d_head; // in_channels
blocks["norm"] = std::shared_ptr<GGMLBlock>(new GroupNorm32(in_channels));
blocks["proj_in"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, inner_dim, {1, 1}));
blocks["proj_in"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, inner_dim, {1, 1}, {1, 1}, {0, 0}, {1, 1}, true, direct));

for (int i = 0; i < depth; i++) {
std::string name = "transformer_blocks." + std::to_string(i);
blocks[name] = std::shared_ptr<GGMLBlock>(new BasicTransformerBlock(inner_dim, n_head, d_head, context_dim, false, flash_attn));
}

blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(inner_dim, in_channels, {1, 1}));
blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(inner_dim, in_channels, {1, 1}, {1, 1}, {0, 0}, {1, 1}, true, direct));
}

virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) {
Expand Down
5 changes: 3 additions & 2 deletions diffusion_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ struct UNetModel : public DiffusionModel {
UNetModel(ggml_backend_t backend,
const String2GGMLType& tensor_types = {},
SDVersion version = VERSION_SD1,
bool flash_attn = false)
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
bool flash_attn = false,
bool direct = false)
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn, direct) {
}

void alloc_params_buffer() {
Expand Down
12 changes: 12 additions & 0 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ struct SDParams {
bool clip_on_cpu = false;
bool vae_on_cpu = false;
bool diffusion_flash_attn = false;
bool diffusion_conv_direct = false;
bool vae_conv_direct = false;
bool canny_preprocess = false;
bool color = false;
int upscale_repeats = 1;
Expand Down Expand Up @@ -142,6 +144,8 @@ void print_params(SDParams params) {
printf(" controlnet cpu: %s\n", params.control_net_cpu ? "true" : "false");
printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
printf(" diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false");
printf(" diffusion Conv2D direct:%s\n", params.diffusion_conv_direct ? "true" : "false");
printf(" vae Conv2D direct:%s\n", params.vae_conv_direct ? "true" : "false");
printf(" strength(control): %.2f\n", params.control_strength);
printf(" prompt: %s\n", params.prompt.c_str());
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
Expand Down Expand Up @@ -232,6 +236,10 @@ void print_usage(int argc, const char* argv[]) {
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
printf(" Might lower quality, since it implies converting k and v to f16.\n");
printf(" This might crash if it is not supported by the backend.\n");
printf(" --diffusion-conv-direct use Conv2D direct in the diffusion model");
printf(" This might crash if it is not supported by the backend.\n");
printf(" --vae-conv-direct use Conv2D direct in the vae model (should improve the performance)");
printf(" This might crash if it is not supported by the backend.\n");
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
printf(" --canny apply canny preprocessor (edge detection)\n");
printf(" --color colors the logging tags according to level\n");
Expand Down Expand Up @@ -422,6 +430,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--clip-on-cpu", "", true, &params.clip_on_cpu},
{"", "--vae-on-cpu", "", true, &params.vae_on_cpu},
{"", "--diffusion-fa", "", true, &params.diffusion_flash_attn},
{"", "--diffusion-conv-direct", "", true, &params.diffusion_conv_direct},
{"", "--vae-conv-direct", "", true, &params.vae_conv_direct},
{"", "--canny", "", true, &params.canny_preprocess},
{"-v", "--verbos", "", true, &params.verbose},
{"", "--color", "", true, &params.color},
Expand Down Expand Up @@ -901,6 +911,8 @@ int main(int argc, const char* argv[]) {
params.control_net_cpu,
params.vae_on_cpu,
params.diffusion_flash_attn,
params.diffusion_conv_direct,
params.vae_conv_direct,
params.chroma_use_dit_mask,
params.chroma_use_t5_mask,
params.chroma_t5_mask_pad,
Expand Down
32 changes: 29 additions & 3 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,25 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
return x;
}

__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d_direct(struct ggml_context* ctx,
struct ggml_tensor* x,
struct ggml_tensor* w,
struct ggml_tensor* b,
int s0 = 1,
int s1 = 1,
int p0 = 0,
int p1 = 0,
int d0 = 1,
int d1 = 1) {
x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
if (b != NULL) {
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
// b = ggml_repeat(ctx, b, x);
x = ggml_add(ctx, x, b);
}
return x;
}

// w: [OC,IC, KD, 1 * 1]
// x: [N, IC, IH, IW]
// b: [OC,]
Expand Down Expand Up @@ -1464,6 +1483,7 @@ class Conv2d : public UnaryBlock {
std::pair<int, int> padding;
std::pair<int, int> dilation;
bool bias;
bool direct;

void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F16;
Expand All @@ -1481,22 +1501,28 @@ class Conv2d : public UnaryBlock {
std::pair<int, int> stride = {1, 1},
std::pair<int, int> padding = {0, 0},
std::pair<int, int> dilation = {1, 1},
bool bias = true)
bool bias = true,
bool direct = false)
: in_channels(in_channels),
out_channels(out_channels),
kernel_size(kernel_size),
stride(stride),
padding(padding),
dilation(dilation),
bias(bias) {}
bias(bias),
direct(direct) {}

struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
struct ggml_tensor* w = params["weight"];
struct ggml_tensor* b = NULL;
if (bias) {
b = params["bias"];
}
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
if (direct) {
return ggml_nn_conv_2d_direct(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
} else {
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
}
}
};

Expand Down
15 changes: 12 additions & 3 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,12 @@ class StableDiffusionGGML {
LOG_INFO("CLIP: Using CPU backend");
clip_backend = ggml_backend_cpu_init();
}
if (sd_ctx_params->diffusion_conv_direct) {
LOG_INFO("Using Conv2D direct in the diffusion model");
}
if (sd_ctx_params->vae_conv_direct){
LOG_INFO("Using Conv2D direct in the vae model");
}
if (sd_ctx_params->diffusion_flash_attn) {
LOG_INFO("Using flash attention in the diffusion model");
}
Expand Down Expand Up @@ -373,7 +379,8 @@ class StableDiffusionGGML {
diffusion_model = std::make_shared<UNetModel>(backend,
model_loader.tensor_storages_types,
version,
sd_ctx_params->diffusion_flash_attn);
sd_ctx_params->diffusion_flash_attn,
sd_ctx_params->diffusion_conv_direct);
}

cond_stage_model->alloc_params_buffer();
Expand All @@ -394,15 +401,17 @@ class StableDiffusionGGML {
"first_stage_model",
vae_decode_only,
false,
version);
version,
sd_ctx_params->vae_conv_direct);
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model");
} else {
tae_first_stage = std::make_shared<TinyAutoEncoder>(backend,
model_loader.tensor_storages_types,
"decoder.layers",
vae_decode_only,
version);
version,
sd_ctx_params->vae_conv_direct);
}
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");

Expand Down
2 changes: 2 additions & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ typedef struct {
bool keep_control_net_on_cpu;
bool keep_vae_on_cpu;
bool diffusion_flash_attn;
bool diffusion_conv_direct;
bool vae_conv_direct;
bool chroma_use_dit_mask;
bool chroma_use_t5_mask;
int chroma_t5_mask_pad;
Expand Down
Loading
Loading