Skip to content

Commit bd0708c

Browse files
adlternativegitster
authored andcommitted
ref-filter: add %(raw) atom
Add new formatting option `%(raw)`, which will print the raw object data without any changes. It will help further to migrate all cat-file formatting logic from cat-file to ref-filter. The raw data of blob, tree objects may contain '\0', but most of the logic in `ref-filter` depends on the output of the atom being text (specifically, no embedded NULs in it). E.g. `quote_formatting()` use `strbuf_addstr()` or `*._quote_buf()` add the data to the buffer. The raw data of a tree object is `100644 one\0...`, only the `100644 one` will be added to the buffer, which is incorrect. Therefore, we need to find a way to record the length of the atom_value's member `s`. Although strbuf can already record the string and its length, if we want to replace the type of atom_value's member `s` with strbuf, many places in ref-filter that are filled with dynamically allocated mermory in `v->s` are not easy to replace. At the same time, we need to check if `v->s == NULL` in populate_value(), and strbuf cannot easily distinguish NULL and empty strings, but c-style "const char *" can do it. So add a new member in `struct atom_value`: `s_size`, which can record raw object size, it can help us add raw object data to the buffer or compare two buffers which contain raw object data. Note that `--format=%(raw)` cannot be used with `--python`, `--shell`, `--tcl`, and `--perl` because if the binary raw data is passed to a variable in such languages, these may not support arbitrary binary data in their string variable type. Reviewed-by: Jacob Keller <[email protected]> Mentored-by: Christian Couder <[email protected]> Mentored-by: Hariom Verma <[email protected]> Helped-by: Bagas Sanjaya <[email protected]> Helped-by: Ævar Arnfjörð Bjarmason <[email protected]> Helped-by: Felipe Contreras <[email protected]> Helped-by: Phillip Wood <[email protected]> Helped-by: Junio C Hamano <[email protected]> Based-on-patch-by: Olga Telezhnaya <[email protected]> Signed-off-by: ZheNing Hu <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 311d0b8 commit bd0708c

File tree

3 files changed

+340
-28
lines changed

3 files changed

+340
-28
lines changed

Documentation/git-for-each-ref.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,15 @@ and `date` to extract the named component. For email fields (`authoremail`,
235235
without angle brackets, and `:localpart` to get the part before the `@` symbol
236236
out of the trimmed email.
237237

238+
The raw data in an object is `raw`.
239+
240+
raw:size::
241+
The raw data size of the object.
242+
243+
Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
244+
`--perl` because such language may not support arbitrary binary data in their
245+
string variable type.
246+
238247
The message in a commit or a tag object is `contents`, from which
239248
`contents:<part>` can be used to extract various parts out of:
240249

ref-filter.c

Lines changed: 115 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ enum atom_type {
144144
ATOM_BODY,
145145
ATOM_TRAILERS,
146146
ATOM_CONTENTS,
147+
ATOM_RAW,
147148
ATOM_UPSTREAM,
148149
ATOM_PUSH,
149150
ATOM_SYMREF,
@@ -189,6 +190,9 @@ static struct used_atom {
189190
struct process_trailer_options trailer_opts;
190191
unsigned int nlines;
191192
} contents;
193+
struct {
194+
enum { RAW_BARE, RAW_LENGTH } option;
195+
} raw_data;
192196
struct {
193197
cmp_status cmp_status;
194198
const char *str;
@@ -426,6 +430,18 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
426430
return 0;
427431
}
428432

433+
static int raw_atom_parser(const struct ref_format *format, struct used_atom *atom,
434+
const char *arg, struct strbuf *err)
435+
{
436+
if (!arg)
437+
atom->u.raw_data.option = RAW_BARE;
438+
else if (!strcmp(arg, "size"))
439+
atom->u.raw_data.option = RAW_LENGTH;
440+
else
441+
return strbuf_addf_ret(err, -1, _("unrecognized %%(raw) argument: %s"), arg);
442+
return 0;
443+
}
444+
429445
static int oid_atom_parser(const struct ref_format *format, struct used_atom *atom,
430446
const char *arg, struct strbuf *err)
431447
{
@@ -586,6 +602,7 @@ static struct {
586602
[ATOM_BODY] = { "body", SOURCE_OBJ, FIELD_STR, body_atom_parser },
587603
[ATOM_TRAILERS] = { "trailers", SOURCE_OBJ, FIELD_STR, trailers_atom_parser },
588604
[ATOM_CONTENTS] = { "contents", SOURCE_OBJ, FIELD_STR, contents_atom_parser },
605+
[ATOM_RAW] = { "raw", SOURCE_OBJ, FIELD_STR, raw_atom_parser },
589606
[ATOM_UPSTREAM] = { "upstream", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
590607
[ATOM_PUSH] = { "push", SOURCE_NONE, FIELD_STR, remote_ref_atom_parser },
591608
[ATOM_SYMREF] = { "symref", SOURCE_NONE, FIELD_STR, refname_atom_parser },
@@ -620,12 +637,19 @@ struct ref_formatting_state {
620637

621638
struct atom_value {
622639
const char *s;
640+
ssize_t s_size;
623641
int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
624642
struct strbuf *err);
625643
uintmax_t value; /* used for sorting when not FIELD_STR */
626644
struct used_atom *atom;
627645
};
628646

647+
#define ATOM_SIZE_UNSPECIFIED (-1)
648+
649+
#define ATOM_VALUE_INIT { \
650+
.s_size = ATOM_SIZE_UNSPECIFIED \
651+
}
652+
629653
/*
630654
* Used to parse format string and sort specifiers
631655
*/
@@ -644,13 +668,6 @@ static int parse_ref_filter_atom(const struct ref_format *format,
644668
return strbuf_addf_ret(err, -1, _("malformed field name: %.*s"),
645669
(int)(ep-atom), atom);
646670

647-
/* Do we have the atom already used elsewhere? */
648-
for (i = 0; i < used_atom_cnt; i++) {
649-
int len = strlen(used_atom[i].name);
650-
if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
651-
return i;
652-
}
653-
654671
/*
655672
* If the atom name has a colon, strip it and everything after
656673
* it off - it specifies the format for this entry, and
@@ -660,6 +677,13 @@ static int parse_ref_filter_atom(const struct ref_format *format,
660677
arg = memchr(sp, ':', ep - sp);
661678
atom_len = (arg ? arg : ep) - sp;
662679

680+
/* Do we have the atom already used elsewhere? */
681+
for (i = 0; i < used_atom_cnt; i++) {
682+
int len = strlen(used_atom[i].name);
683+
if (len == ep - atom && !memcmp(used_atom[i].name, atom, len))
684+
return i;
685+
}
686+
663687
/* Is the atom a valid one? */
664688
for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
665689
int len = strlen(valid_atom[i].name);
@@ -709,11 +733,14 @@ static int parse_ref_filter_atom(const struct ref_format *format,
709733
return at;
710734
}
711735

712-
static void quote_formatting(struct strbuf *s, const char *str, int quote_style)
736+
static void quote_formatting(struct strbuf *s, const char *str, ssize_t len, int quote_style)
713737
{
714738
switch (quote_style) {
715739
case QUOTE_NONE:
716-
strbuf_addstr(s, str);
740+
if (len < 0)
741+
strbuf_addstr(s, str);
742+
else
743+
strbuf_add(s, str, len);
717744
break;
718745
case QUOTE_SHELL:
719746
sq_quote_buf(s, str);
@@ -740,9 +767,11 @@ static int append_atom(struct atom_value *v, struct ref_formatting_state *state,
740767
* encountered.
741768
*/
742769
if (!state->stack->prev)
743-
quote_formatting(&state->stack->output, v->s, state->quote_style);
744-
else
770+
quote_formatting(&state->stack->output, v->s, v->s_size, state->quote_style);
771+
else if (v->s_size < 0)
745772
strbuf_addstr(&state->stack->output, v->s);
773+
else
774+
strbuf_add(&state->stack->output, v->s, v->s_size);
746775
return 0;
747776
}
748777

@@ -842,21 +871,23 @@ static int if_atom_handler(struct atom_value *atomv, struct ref_formatting_state
842871
return 0;
843872
}
844873

845-
static int is_empty(const char *s)
874+
static int is_empty(struct strbuf *buf)
846875
{
847-
while (*s != '\0') {
848-
if (!isspace(*s))
849-
return 0;
850-
s++;
851-
}
852-
return 1;
853-
}
876+
const char *cur = buf->buf;
877+
const char *end = buf->buf + buf->len;
878+
879+
while (cur != end && (isspace(*cur)))
880+
cur++;
881+
882+
return cur == end;
883+
}
854884

855885
static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_state *state,
856886
struct strbuf *err)
857887
{
858888
struct ref_formatting_stack *cur = state->stack;
859889
struct if_then_else *if_then_else = NULL;
890+
size_t str_len = 0;
860891

861892
if (cur->at_end == if_then_else_handler)
862893
if_then_else = (struct if_then_else *)cur->at_end_data;
@@ -867,18 +898,22 @@ static int then_atom_handler(struct atom_value *atomv, struct ref_formatting_sta
867898
if (if_then_else->else_atom_seen)
868899
return strbuf_addf_ret(err, -1, _("format: %%(then) atom used after %%(else)"));
869900
if_then_else->then_atom_seen = 1;
901+
if (if_then_else->str)
902+
str_len = strlen(if_then_else->str);
870903
/*
871904
* If the 'equals' or 'notequals' attribute is used then
872905
* perform the required comparison. If not, only non-empty
873906
* strings satisfy the 'if' condition.
874907
*/
875908
if (if_then_else->cmp_status == COMPARE_EQUAL) {
876-
if (!strcmp(if_then_else->str, cur->output.buf))
909+
if (str_len == cur->output.len &&
910+
!memcmp(if_then_else->str, cur->output.buf, cur->output.len))
877911
if_then_else->condition_satisfied = 1;
878912
} else if (if_then_else->cmp_status == COMPARE_UNEQUAL) {
879-
if (strcmp(if_then_else->str, cur->output.buf))
913+
if (str_len != cur->output.len ||
914+
memcmp(if_then_else->str, cur->output.buf, cur->output.len))
880915
if_then_else->condition_satisfied = 1;
881-
} else if (cur->output.len && !is_empty(cur->output.buf))
916+
} else if (cur->output.len && !is_empty(&cur->output))
882917
if_then_else->condition_satisfied = 1;
883918
strbuf_reset(&cur->output);
884919
return 0;
@@ -924,7 +959,7 @@ static int end_atom_handler(struct atom_value *atomv, struct ref_formatting_stat
924959
* only on the topmost supporting atom.
925960
*/
926961
if (!current->prev->prev) {
927-
quote_formatting(&s, current->output.buf, state->quote_style);
962+
quote_formatting(&s, current->output.buf, current->output.len, state->quote_style);
928963
strbuf_swap(&current->output, &s);
929964
}
930965
strbuf_release(&s);
@@ -974,6 +1009,10 @@ int verify_ref_format(struct ref_format *format)
9741009
at = parse_ref_filter_atom(format, sp + 2, ep, &err);
9751010
if (at < 0)
9761011
die("%s", err.buf);
1012+
if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
1013+
used_atom[at].u.raw_data.option == RAW_BARE)
1014+
die(_("--format=%.*s cannot be used with"
1015+
"--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
9771016
cp = ep + 1;
9781017

9791018
if (skip_prefix(used_atom[at].name, "color:", &color))
@@ -1367,12 +1406,25 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
13671406
struct used_atom *atom = &used_atom[i];
13681407
const char *name = atom->name;
13691408
struct atom_value *v = &val[i];
1409+
enum atom_type atom_type = atom->atom_type;
13701410

13711411
if (!!deref != (*name == '*'))
13721412
continue;
13731413
if (deref)
13741414
name++;
13751415

1416+
if (atom_type == ATOM_RAW) {
1417+
unsigned long buf_size = data->size;
1418+
1419+
if (atom->u.raw_data.option == RAW_BARE) {
1420+
v->s = xmemdupz(buf, buf_size);
1421+
v->s_size = buf_size;
1422+
} else if (atom->u.raw_data.option == RAW_LENGTH) {
1423+
v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
1424+
}
1425+
continue;
1426+
}
1427+
13761428
if ((data->type != OBJ_TAG &&
13771429
data->type != OBJ_COMMIT) ||
13781430
(strcmp(name, "body") &&
@@ -1460,9 +1512,11 @@ static void grab_values(struct atom_value *val, int deref, struct object *obj, s
14601512
break;
14611513
case OBJ_TREE:
14621514
/* grab_tree_values(val, deref, obj, buf, sz); */
1515+
grab_sub_body_contents(val, deref, data);
14631516
break;
14641517
case OBJ_BLOB:
14651518
/* grab_blob_values(val, deref, obj, buf, sz); */
1519+
grab_sub_body_contents(val, deref, data);
14661520
break;
14671521
default:
14681522
die("Eh? Object of type %d?", obj->type);
@@ -1766,6 +1820,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
17661820
const char *refname;
17671821
struct branch *branch = NULL;
17681822

1823+
v->s_size = ATOM_SIZE_UNSPECIFIED;
17691824
v->handler = append_atom;
17701825
v->atom = atom;
17711826

@@ -2369,6 +2424,19 @@ static int compare_detached_head(struct ref_array_item *a, struct ref_array_item
23692424
return 0;
23702425
}
23712426

2427+
static int memcasecmp(const void *vs1, const void *vs2, size_t n)
2428+
{
2429+
const char *s1 = vs1, *s2 = vs2;
2430+
const char *end = s1 + n;
2431+
2432+
for (; s1 < end; s1++, s2++) {
2433+
int diff = tolower(*s1) - tolower(*s2);
2434+
if (diff)
2435+
return diff;
2436+
}
2437+
return 0;
2438+
}
2439+
23722440
static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, struct ref_array_item *b)
23732441
{
23742442
struct atom_value *va, *vb;
@@ -2389,10 +2457,29 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
23892457
} else if (s->sort_flags & REF_SORTING_VERSION) {
23902458
cmp = versioncmp(va->s, vb->s);
23912459
} else if (cmp_type == FIELD_STR) {
2392-
int (*cmp_fn)(const char *, const char *);
2393-
cmp_fn = s->sort_flags & REF_SORTING_ICASE
2394-
? strcasecmp : strcmp;
2395-
cmp = cmp_fn(va->s, vb->s);
2460+
if (va->s_size < 0 && vb->s_size < 0) {
2461+
int (*cmp_fn)(const char *, const char *);
2462+
cmp_fn = s->sort_flags & REF_SORTING_ICASE
2463+
? strcasecmp : strcmp;
2464+
cmp = cmp_fn(va->s, vb->s);
2465+
} else {
2466+
size_t a_size = va->s_size < 0 ?
2467+
strlen(va->s) : va->s_size;
2468+
size_t b_size = vb->s_size < 0 ?
2469+
strlen(vb->s) : vb->s_size;
2470+
int (*cmp_fn)(const void *, const void *, size_t);
2471+
cmp_fn = s->sort_flags & REF_SORTING_ICASE
2472+
? memcasecmp : memcmp;
2473+
2474+
cmp = cmp_fn(va->s, vb->s, b_size > a_size ?
2475+
a_size : b_size);
2476+
if (!cmp) {
2477+
if (a_size > b_size)
2478+
cmp = 1;
2479+
else if (a_size < b_size)
2480+
cmp = -1;
2481+
}
2482+
}
23962483
} else {
23972484
if (va->value < vb->value)
23982485
cmp = -1;
@@ -2491,7 +2578,7 @@ int format_ref_array_item(struct ref_array_item *info,
24912578
append_literal(cp, sp, &state);
24922579
}
24932580
if (format->need_color_reset_at_eol) {
2494-
struct atom_value resetv;
2581+
struct atom_value resetv = ATOM_VALUE_INIT;
24952582
resetv.s = GIT_COLOR_RESET;
24962583
if (append_atom(&resetv, &state, error_buf)) {
24972584
pop_stack_element(&state.stack);

0 commit comments

Comments
 (0)