Skip to content

Commit 7e84856

Browse files
committed
fix(dag@gw): content type and cache headers
This unifies the way we set headers for DAG- responses, adds missing ones for HTML response, and adds bunch of regression tests to ensure desired behaviors are not changed during future refactors.
1 parent 52711d3 commit 7e84856

File tree

4 files changed

+327
-95
lines changed

4 files changed

+327
-95
lines changed

core/corehttp/gateway_handler_codec.go

Lines changed: 70 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ import (
1010
"strings"
1111
"time"
1212

13+
cid "github.com/ipfs/go-cid"
1314
ipldlegacy "github.com/ipfs/go-ipld-legacy"
1415
ipath "github.com/ipfs/interface-go-ipfs-core/path"
16+
"github.com/ipfs/kubo/assets"
1517
dih "github.com/ipfs/kubo/assets/dag-index-html"
1618
"github.com/ipfs/kubo/tracing"
1719
"github.com/ipld/go-ipld-prime"
@@ -54,33 +56,48 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
5456
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
5557
defer span.End()
5658

57-
// If the resolved path still has some remainder, return bad request.
59+
cidCodec := resolvedPath.Cid().Prefix().Codec
60+
responseContentType := requestedContentType
61+
62+
// If the resolved path still has some remainder, return error for now.
63+
// TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT
64+
// TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782)
5865
if resolvedPath.Remainder() != "" {
5966
path := strings.TrimSuffix(resolvedPath.String(), resolvedPath.Remainder())
6067
err := fmt.Errorf("%q of %q could not be returned: reading IPLD Kinds other than Links (CBOR Tag 42) is not implemented: try reading %q instead", resolvedPath.Remainder(), resolvedPath.String(), path)
6168
webError(w, "unsupported pathing", err, http.StatusNotImplemented)
6269
return
6370
}
6471

72+
// If no explicit content type was requested, the response will have one based on the codec from the CID
73+
if requestedContentType == "" {
74+
cidContentType, ok := codecToContentType[cidCodec]
75+
if !ok {
76+
// Should not happen unless function is called with wrong parameters.
77+
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
78+
webError(w, "internal error", err, http.StatusInternalServerError)
79+
return
80+
}
81+
responseContentType = cidContentType
82+
}
83+
84+
// Set HTTP headers (for caching etc)
85+
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
86+
name := setCodecContentDisposition(w, r, resolvedPath, responseContentType)
87+
w.Header().Set("Content-Type", responseContentType)
88+
w.Header().Set("X-Content-Type-Options", "nosniff")
89+
6590
// No content type is specified by the user (via Accept, or format=). However,
6691
// we support this format. Let's handle it.
6792
if requestedContentType == "" {
68-
cidCodec := resolvedPath.Cid().Prefix().Codec
6993
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
7094
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
95+
download := r.URL.Query().Get("download") == "true"
7196

72-
if isDAG && acceptsHTML {
97+
if isDAG && acceptsHTML && !download {
7398
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
7499
} else {
75-
cidContentType, ok := codecToContentType[cidCodec]
76-
if !ok {
77-
// Should not happen unless function is called with wrong parameters.
78-
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
79-
webError(w, "internal error", err, http.StatusInternalServerError)
80-
return
81-
}
82-
83-
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, cidContentType)
100+
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
84101
}
85102

86103
return
@@ -96,10 +113,13 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
96113
return
97114
}
98115

116+
// If we need to convert, use the last codec (strict dag- variant)
117+
toCodec := codecs[len(codecs)-1]
118+
99119
// If the requested content type has "dag-", ALWAYS go through the encoding
100120
// process in order to validate the content.
101121
if strings.Contains(requestedContentType, "dag-") {
102-
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
122+
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
103123
return
104124
}
105125

@@ -108,34 +128,44 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
108128
// used here as it sets different headers.
109129
for _, codec := range codecs {
110130
if resolvedPath.Cid().Prefix().Codec == codec {
111-
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, requestedContentType)
131+
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
112132
return
113133
}
114134
}
115135

116136
// Finally, if nothing of the above is true, we have to actually convert the codec.
117-
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
137+
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
118138
}
119139

120140
func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
121-
// TODO: cache-control/etag like for DirIndex
141+
// A HTML directory index will be presented, be sure to set the correct
142+
// type instead of relying on autodetection (which may fail).
143+
w.Header().Set("Content-Type", "text/html")
144+
145+
// Clear Content-Disposition -- we want HTML to be rendered inline
146+
w.Header().Del("Content-Disposition")
147+
148+
// Generated index requires custom Etag (output may change between Kubo versions)
149+
dagEtag := getDagIndexEtag(resolvedPath.Cid())
150+
w.Header().Set("Etag", dagEtag)
151+
152+
// Remove Cache-Control for now to match UnixFS dir-index-html responses
153+
// (we don't want browser to cache HTML forever)
154+
// TODO: if we ever change behavior for UnixFS dir listings, same changes should be applied here
155+
w.Header().Del("Cache-Control")
156+
122157
cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
123158
if err := dih.DagIndexTemplate.Execute(w, dih.DagIndexTemplateData{
124159
Path: contentPath.String(),
125160
CID: resolvedPath.Cid().String(),
126161
CodecName: cidCodec.String(),
127162
CodecHex: fmt.Sprintf("0x%x", uint64(cidCodec)),
128163
}); err != nil {
129-
webError(w, "failed to generate HTML listing for this DAG: retry without 'Accept: text/html'", err, http.StatusInternalServerError)
164+
webError(w, "failed to generate HTML listing for this DAG: try fetching raw block with ?format=raw", err, http.StatusInternalServerError)
130165
}
131166
}
132167

133-
func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string) {
134-
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
135-
name := setCodecContentDisposition(w, r, resolvedPath, contentType)
136-
w.Header().Set("Content-Type", contentType)
137-
w.Header().Set("X-Content-Type-Options", "nosniff")
138-
168+
func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, name string, modtime time.Time) {
139169
blockCid := resolvedPath.Cid()
140170
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
141171
if err != nil {
@@ -154,7 +184,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite
154184
_, _, _ = ServeContent(w, r, name, modtime, content)
155185
}
156186

157-
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string, codec uint64) {
187+
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
158188
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
159189
if err != nil {
160190
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
@@ -169,27 +199,20 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons
169199
}
170200
finalNode := universal.(ipld.Node)
171201

172-
encoder, err := multicodec.LookupEncoder(codec)
202+
encoder, err := multicodec.LookupEncoder(toCodec)
173203
if err != nil {
174204
webError(w, err.Error(), err, http.StatusInternalServerError)
175205
return
176206
}
177207

178-
// Keep it in memory so we can detect encoding errors in order to conform
179-
// to the specification.
208+
// Ensure IPLD node conforms to the codec specification.
180209
var buf bytes.Buffer
181210
err = encoder(finalNode, &buf)
182211
if err != nil {
183212
webError(w, err.Error(), err, http.StatusInternalServerError)
184213
return
185214
}
186215

187-
// Set Cache-Control and read optional Last-Modified time
188-
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
189-
setCodecContentDisposition(w, r, resolvedPath, contentType)
190-
w.Header().Set("Content-Type", contentType)
191-
w.Header().Set("X-Content-Type-Options", "nosniff")
192-
193216
// Sets correct Last-Modified header. This code is borrowed from the standard
194217
// library (net/http/server.go) as we cannot use serveFile.
195218
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
@@ -214,12 +237,22 @@ func setCodecContentDisposition(w http.ResponseWriter, r *http.Request, resolved
214237
name = resolvedPath.Cid().String() + ext
215238
}
216239

217-
switch ext {
218-
case ".json": // codecs that serialize to JSON can be rendered by browsers
219-
dispType = "inline"
220-
default: // everything else is assumed binary / opaque bytes
240+
// JSON should be inlined, but ?download=true should still override
241+
if r.URL.Query().Get("download") == "true" {
221242
dispType = "attachment"
243+
} else {
244+
switch ext {
245+
case ".json": // codecs that serialize to JSON can be rendered by browsers
246+
dispType = "inline"
247+
default: // everything else is assumed binary / opaque bytes
248+
dispType = "attachment"
249+
}
222250
}
251+
223252
setContentDispositionHeader(w, name, dispType)
224253
return name
225254
}
255+
256+
func getDagIndexEtag(dagCid cid.Cid) string {
257+
return `"DagIndex-` + assets.AssetHash + `_CID-` + dagCid.String() + `"`
258+
}

0 commit comments

Comments
 (0)