Skip to content

Commit e313162

Browse files
peffgitster
authored andcommitted
http: optionally extract charset parameter from content-type
Since the previous commit, we now give a sanitized, shortened version of the content-type header to any callers who ask for it. This patch adds back a way for them to cleanly access specific parameters to the type. We could easily extract all parameters and make them available via a string_list, but: 1. That complicates the interface and memory management. 2. In practice, no planned callers care about anything except the charset. This patch therefore goes with the simplest thing, and we can expand or change the interface later if it becomes necessary. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent bf197fd commit e313162

File tree

2 files changed

+57
-4
lines changed

2 files changed

+57
-4
lines changed

http.c

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,32 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf)
906906
return ret;
907907
}
908908

909+
/*
910+
* Check for and extract a content-type parameter. "raw"
911+
* should be positioned at the start of the potential
912+
* parameter, with any whitespace already removed.
913+
*
914+
* "name" is the name of the parameter. The value is appended
915+
* to "out".
916+
*/
917+
static int extract_param(const char *raw, const char *name,
918+
struct strbuf *out)
919+
{
920+
size_t len = strlen(name);
921+
922+
if (strncasecmp(raw, name, len))
923+
return -1;
924+
raw += len;
925+
926+
if (*raw != '=')
927+
return -1;
928+
raw++;
929+
930+
while (*raw && !isspace(*raw))
931+
strbuf_addch(out, *raw++);
932+
return 0;
933+
}
934+
909935
/*
910936
* Extract a normalized version of the content type, with any
911937
* spaces suppressed, all letters lowercased, and no trailing ";"
@@ -916,11 +942,15 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf)
916942
* but "text/plain" is the only reasonable output, and this keeps
917943
* our code simple.
918944
*
945+
* If the "charset" argument is not NULL, store the value of any
946+
* charset parameter there.
947+
*
919948
* Example:
920-
* "TEXT/PLAIN; charset=utf-8" -> "text/plain"
949+
* "TEXT/PLAIN; charset=utf-8" -> "text/plain", "utf-8"
921950
* "text / plain" -> "text/plain"
922951
*/
923-
static void extract_content_type(struct strbuf *raw, struct strbuf *type)
952+
static void extract_content_type(struct strbuf *raw, struct strbuf *type,
953+
struct strbuf *charset)
924954
{
925955
const char *p;
926956

@@ -929,10 +959,25 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type)
929959
for (p = raw->buf; *p; p++) {
930960
if (isspace(*p))
931961
continue;
932-
if (*p == ';')
962+
if (*p == ';') {
963+
p++;
933964
break;
965+
}
934966
strbuf_addch(type, tolower(*p));
935967
}
968+
969+
if (!charset)
970+
return;
971+
972+
strbuf_reset(charset);
973+
while (*p) {
974+
while (isspace(*p))
975+
p++;
976+
if (!extract_param(p, "charset", charset))
977+
return;
978+
while (*p && !isspace(*p))
979+
p++;
980+
}
936981
}
937982

938983
/* http_request() targets */
@@ -989,7 +1034,8 @@ static int http_request(const char *url,
9891034
if (options && options->content_type) {
9901035
struct strbuf raw = STRBUF_INIT;
9911036
curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &raw);
992-
extract_content_type(&raw, options->content_type);
1037+
extract_content_type(&raw, options->content_type,
1038+
options->charset);
9931039
strbuf_release(&raw);
9941040
}
9951041

http.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,13 @@ struct http_get_options {
143143
/* If non-NULL, returns the content-type of the response. */
144144
struct strbuf *content_type;
145145

146+
/*
147+
* If non-NULL, and content_type above is non-NULL, returns
148+
* the charset parameter from the content-type. If none is
149+
* present, returns an empty string.
150+
*/
151+
struct strbuf *charset;
152+
146153
/*
147154
* If non-NULL, returns the URL we ended up at, including any
148155
* redirects we followed.

0 commit comments

Comments
 (0)