Skip to content

Commit 4da1ad9

Browse files
committed
Merge branch 'remoteManagement' into crokeso
2 parents 650f129 + b2a9ea9 commit 4da1ad9

File tree

13 files changed

+14999
-10758
lines changed

13 files changed

+14999
-10758
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ ifdef LLAMA_HIPBLAS
316316
ifeq ($(wildcard /opt/rocm),)
317317
ROCM_PATH ?= /usr
318318
ifdef LLAMA_PORTABLE
319-
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 $(shell $(shell which amdgpu-arch))
319+
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201 $(shell $(shell which amdgpu-arch))
320320
else
321321
GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
322322
endif
@@ -325,7 +325,7 @@ endif
325325
else
326326
ROCM_PATH ?= /opt/rocm
327327
ifdef LLAMA_PORTABLE
328-
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
328+
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
329329
else
330330
GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
331331
endif

expose.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ extern "C"
278278
int get_last_token_count() {
279279
return last_token_count;
280280
}
281+
int get_last_input_count() {
282+
return last_input_count;
283+
}
281284
int get_last_seed()
282285
{
283286
return last_seed;

expose.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ struct sd_load_model_inputs
169169
const int threads = 0;
170170
const int quant = 0;
171171
const bool taesd = false;
172-
const bool notile = false;
172+
const int tiled_vae_threshold = 0;
173173
const char * t5xxl_filename = nullptr;
174174
const char * clipl_filename = nullptr;
175175
const char * clipg_filename = nullptr;
@@ -293,6 +293,7 @@ extern bool generation_finished;
293293
extern float last_eval_time;
294294
extern float last_process_time;
295295
extern int last_token_count;
296+
extern int last_input_count;
296297
extern int last_seed;
297298
extern int total_gens;
298299
extern int total_img_gens;

ggml/include/ggml.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ extern "C" {
614614
GGML_OP_UPSCALE, // nearest interpolate
615615
GGML_OP_PAD,
616616
GGML_OP_PAD_REFLECT_1D,
617+
GGML_OP_ROLL,
617618
GGML_OP_ARANGE,
618619
GGML_OP_TIMESTEP_EMBEDDING,
619620
GGML_OP_ARGSORT,
@@ -1999,6 +2000,17 @@ extern "C" {
19992000
int p0,
20002001
int p1);
20012002

2003+
// Move tensor elements by an offset given for each dimension. Elements that
2004+
// are shifted beyond the last position are wrapped around to the beginning.
2005+
GGML_API struct ggml_tensor * ggml_roll(
2006+
struct ggml_context * ctx,
2007+
struct ggml_tensor * a,
2008+
int shift0,
2009+
int shift1,
2010+
int shift2,
2011+
int shift3);
2012+
2013+
20022014
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
20032015
// timesteps: [N,]
20042016
// return: [N, dim]

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,6 +2644,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
26442644
{
26452645
ggml_compute_forward_pad_reflect_1d(params, tensor);
26462646
} break;
2647+
case GGML_OP_ROLL:
2648+
{
2649+
ggml_compute_forward_roll(params, tensor);
2650+
} break;
26472651
case GGML_OP_ARANGE:
26482652
{
26492653
ggml_compute_forward_arange(params, tensor);
@@ -2978,6 +2982,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
29782982
case GGML_OP_UPSCALE:
29792983
case GGML_OP_PAD:
29802984
case GGML_OP_PAD_REFLECT_1D:
2985+
case GGML_OP_ROLL:
29812986
case GGML_OP_ARANGE:
29822987
case GGML_OP_TIMESTEP_EMBEDDING:
29832988
case GGML_OP_ARGSORT:

ggml/src/ggml-cpu/ops.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7648,6 +7648,73 @@ void ggml_compute_forward_pad_reflect_1d(
76487648
}
76497649
}
76507650

7651+
// ggml_compute_forward_roll
7652+
7653+
static int64_t ggml_wrap_index(int64_t i, int64_t ne) {
7654+
if (i < 0) {
7655+
return i + ne;
7656+
} else if (i >= ne) {
7657+
return i - ne;
7658+
}
7659+
return i;
7660+
}
7661+
7662+
static void ggml_compute_forward_roll_f32(
7663+
const ggml_compute_params * params,
7664+
ggml_tensor * dst) {
7665+
7666+
const ggml_tensor * src0 = dst->src[0];
7667+
const float * src_data = (const float *) src0->data;
7668+
float * dst_data = (float *) dst->data;
7669+
7670+
GGML_TENSOR_UNARY_OP_LOCALS
7671+
7672+
const int s0 = ggml_get_op_params_i32(dst, 0);
7673+
const int s1 = ggml_get_op_params_i32(dst, 1);
7674+
const int s2 = ggml_get_op_params_i32(dst, 2);
7675+
const int s3 = ggml_get_op_params_i32(dst, 3);
7676+
7677+
const int64_t total = ne1 * ne2 * ne3;
7678+
const int64_t per_thread = (total + params->nth) / params->nth;
7679+
const int64_t start = params->ith * per_thread;
7680+
const int64_t end = std::min(start + per_thread, total);
7681+
7682+
for (int64_t i = start; i < end; ++i) {
7683+
const int64_t i1 = i % ne1;
7684+
const int64_t i2 = (i / ne1) % ne2;
7685+
const int64_t i3 = i / (ne2 * ne1);
7686+
float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float);
7687+
7688+
const int64_t i01 = ggml_wrap_index(i1 - s1, ne01);
7689+
const int64_t i02 = ggml_wrap_index(i2 - s2, ne02);
7690+
const int64_t i03 = ggml_wrap_index(i3 - s3, ne03);
7691+
const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float);
7692+
7693+
const int64_t s = ggml_wrap_index(-s0, ne00);
7694+
const int64_t n = ne00 - s;
7695+
ggml_vec_cpy_f32(n, dst_row, src_row + s);
7696+
ggml_vec_cpy_f32(s, dst_row + n, src_row);
7697+
}
7698+
}
7699+
7700+
void ggml_compute_forward_roll(
7701+
const ggml_compute_params * params,
7702+
ggml_tensor * dst) {
7703+
7704+
const ggml_tensor * src0 = dst->src[0];
7705+
7706+
switch (src0->type) {
7707+
case GGML_TYPE_F32:
7708+
{
7709+
ggml_compute_forward_roll_f32(params, dst);
7710+
} break;
7711+
default:
7712+
{
7713+
GGML_ABORT("fatal error");
7714+
}
7715+
}
7716+
}
7717+
76517718
// ggml_compute_forward_arange
76527719

76537720
static void ggml_compute_forward_arange_f32(

ggml/src/ggml-cpu/ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ void ggml_compute_forward_pool_2d_back(const struct ggml_compute_params * params
7979
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
8080
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
8181
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
82+
void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst);
8283
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
8384
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
8485
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);

ggml/src/ggml.c

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
17961796
"UPSCALE",
17971797
"PAD",
17981798
"PAD_REFLECT_1D",
1799+
"ROLL",
17991800
"ARANGE",
18001801
"TIMESTEP_EMBEDDING",
18011802
"ARGSORT",
@@ -1828,7 +1829,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
18281829
"OPT_STEP_ADAMW",
18291830
};
18301831

1831-
static_assert(GGML_OP_COUNT == 87, "GGML_OP_COUNT != 87");
1832+
static_assert(GGML_OP_COUNT == 88, "GGML_OP_COUNT != 88");
18321833

18331834
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
18341835
"none",
@@ -1896,6 +1897,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
18961897
"upscale(x)",
18971898
"pad(x)",
18981899
"pad_reflect_1d(x)",
1900+
"roll(x)",
18991901
"arange(start, stop, step)",
19001902
"timestep_embedding(timesteps, dim, max_period)",
19011903
"argsort(x)",
@@ -1928,7 +1930,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
19281930
"adamw(x)",
19291931
};
19301932

1931-
static_assert(GGML_OP_COUNT == 87, "GGML_OP_COUNT != 87");
1933+
static_assert(GGML_OP_COUNT == 88, "GGML_OP_COUNT != 88");
19321934

19331935
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
19341936

@@ -5506,6 +5508,34 @@ struct ggml_tensor * ggml_pad_reflect_1d(
55065508
return result;
55075509
}
55085510

5511+
// ggml_roll
5512+
5513+
struct ggml_tensor * ggml_roll(
5514+
struct ggml_context * ctx,
5515+
struct ggml_tensor * a,
5516+
int shift0,
5517+
int shift1,
5518+
int shift2,
5519+
int shift3) {
5520+
GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5521+
GGML_ASSERT(abs(shift0) < a->ne[0]);
5522+
GGML_ASSERT(abs(shift1) < a->ne[1]);
5523+
GGML_ASSERT(abs(shift2) < a->ne[2]);
5524+
GGML_ASSERT(abs(shift3) < a->ne[3]);
5525+
5526+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5527+
5528+
ggml_set_op_params_i32(result, 0, shift0);
5529+
ggml_set_op_params_i32(result, 1, shift1);
5530+
ggml_set_op_params_i32(result, 2, shift2);
5531+
ggml_set_op_params_i32(result, 3, shift3);
5532+
5533+
result->op = GGML_OP_ROLL;
5534+
result->src[0] = a;
5535+
5536+
return result;
5537+
}
5538+
55095539
// ggml_arange
55105540

55115541
struct ggml_tensor * ggml_arange(

gpttype_adapter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ bool generation_finished;
6161
float last_process_time = 0;
6262
float last_eval_time = 0;
6363
int last_token_count = 0;
64+
int last_input_count = 0;
6465
int last_seed = -1;
6566
int total_gens = 0;
6667
int last_draft_success = 0;
@@ -1684,7 +1685,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
16841685
for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) {
16851686
rejects[reject.index] = true;
16861687
}
1687-
1688+
16881689
auto first = candidates->data;
16891690
auto last = first + candidates->size;
16901691
last = std::remove_if(first, last,
@@ -4673,6 +4674,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
46734674
last_eval_time = pt2;
46744675
last_process_time = pt1;
46754676
last_token_count = realnpredict;
4677+
last_input_count = (finaltokcount<0?0:finaltokcount);
46764678
last_seed = kcpp_data->seed;
46774679
last_draft_failed = draft_failures;
46784680
last_draft_success = draft_successes;

0 commit comments

Comments
 (0)