Skip to content

Commit 5ceed62

Browse files
server: fix duplicate HTTP headers in multiple models mode (ggml-org#17698)
* llama-server: fix duplicate HTTP headers in multiple models mode (ggml-org#17693) * llama-server: address review feedback from ngxson - restrict scope of header after std::move - simplify header check (remove unordered_set)
1 parent 7ca5991 commit 5ceed62

File tree

2 files changed

+47
-10
lines changed

2 files changed

+47
-10
lines changed

tools/server/server-models.cpp

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <sheredom/subprocess.h>
88

99
#include <functional>
10+
#include <algorithm>
1011
#include <thread>
1112
#include <mutex>
1213
#include <condition_variable>
@@ -889,6 +890,28 @@ struct pipe_t {
889890
}
890891
};
891892

893+
static std::string to_lower_copy(const std::string & value) {
894+
std::string lowered(value.size(), '\0');
895+
std::transform(value.begin(), value.end(), lowered.begin(), [](unsigned char c) { return std::tolower(c); });
896+
return lowered;
897+
}
898+
899+
static bool should_strip_proxy_header(const std::string & header_name) {
900+
// Headers that get duplicated when router forwards child responses
901+
if (header_name == "server" ||
902+
header_name == "transfer-encoding" ||
903+
header_name == "keep-alive") {
904+
return true;
905+
}
906+
907+
// Router injects CORS, child also sends them: duplicate
908+
if (header_name.rfind("access-control-", 0) == 0) {
909+
return true;
910+
}
911+
912+
return false;
913+
}
914+
892915
server_http_proxy::server_http_proxy(
893916
const std::string & method,
894917
const std::string & host,
@@ -925,14 +948,22 @@ server_http_proxy::server_http_proxy(
925948
msg_t msg;
926949
msg.status = response.status;
927950
for (const auto & [key, value] : response.headers) {
951+
const auto lowered = to_lower_copy(key);
952+
if (should_strip_proxy_header(lowered)) {
953+
continue;
954+
}
955+
if (lowered == "content-type") {
956+
msg.content_type = value;
957+
continue;
958+
}
928959
msg.headers[key] = value;
929960
}
930961
return pipe->write(std::move(msg)); // send headers first
931962
};
932963
httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) {
933964
// send data chunks
934965
// returns false if pipe is closed / broken (signal to stop receiving)
935-
return pipe->write({{}, 0, std::string(data, data_length)});
966+
return pipe->write({{}, 0, std::string(data, data_length), ""});
936967
};
937968

938969
// prepare the request to destination server
@@ -955,21 +986,26 @@ server_http_proxy::server_http_proxy(
955986
if (result.error() != httplib::Error::Success) {
956987
auto err_str = httplib::to_string(result.error());
957988
SRV_ERR("http client error: %s\n", err_str.c_str());
958-
pipe->write({{}, 500, ""}); // header
959-
pipe->write({{}, 0, "proxy error: " + err_str}); // body
989+
pipe->write({{}, 500, "", ""}); // header
990+
pipe->write({{}, 0, "proxy error: " + err_str, ""}); // body
960991
}
961992
pipe->close_write(); // signal EOF to reader
962993
SRV_DBG("%s", "client request thread ended\n");
963994
});
964995
this->thread.detach();
965996

966997
// wait for the first chunk (headers)
967-
msg_t header;
968-
if (pipe->read(header, should_stop)) {
969-
SRV_DBG("%s", "received response headers\n");
970-
this->status = header.status;
971-
this->headers = header.headers;
972-
} else {
973-
SRV_DBG("%s", "no response headers received (request cancelled?)\n");
998+
{
999+
msg_t header;
1000+
if (pipe->read(header, should_stop)) {
1001+
SRV_DBG("%s", "received response headers\n");
1002+
this->status = header.status;
1003+
this->headers = std::move(header.headers);
1004+
if (!header.content_type.empty()) {
1005+
this->content_type = std::move(header.content_type);
1006+
}
1007+
} else {
1008+
SRV_DBG("%s", "no response headers received (request cancelled?)\n");
1009+
}
9741010
}
9751011
}

tools/server/server-models.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,5 +170,6 @@ struct server_http_proxy : server_http_res {
170170
std::map<std::string, std::string> headers;
171171
int status = 0;
172172
std::string data;
173+
std::string content_type;
173174
};
174175
};

0 commit comments

Comments
 (0)