Skip to content

Commit 9f69239

Browse files
authored
openapi spec and Makefile for it (#518)
fix XET-741 Creates an openapi specification for all CAS API's following the first version of the protocol specification. Makefile to generate different language clients for CAS APIs.
1 parent a31df60 commit 9f69239

File tree

3 files changed

+370
-0
lines changed

3 files changed

+370
-0
lines changed

openapi/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
openapitools.json
2+
generated/

openapi/Makefile

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
.PHONY: all check-cli rust typescript python java golang clean outdir
2+
3+
CLI ?= openapi-generator-cli
4+
SPEC := $(CURDIR)/cas.openapi.yaml
5+
OUT_ROOT := $(CURDIR)/generated
6+
7+
.DEFAULT_GOAL := all
8+
9+
all: rust typescript python java golang
10+
11+
define ensure_cli
12+
@bash -c 'set -euo pipefail; \
13+
if command -v $(CLI) >/dev/null 2>&1; then \
14+
echo "$(CLI) found: '$$(command -v $(CLI))'"; \
15+
exit 0; \
16+
fi; \
17+
echo "$(CLI) not found; installing via npm..." >&2; \
18+
if ! command -v npm >/dev/null 2>&1; then \
19+
echo "npm is not installed. Please install Node.js/npm and re-run." >&2; \
20+
exit 1; \
21+
fi; \
22+
npm install @openapitools/openapi-generator-cli -g; \
23+
export PATH="$$PATH:$$(npm bin -g)"; \
24+
if ! command -v $(CLI) >/dev/null 2>&1; then \
25+
echo "$(CLI) still not found after npm installation. Ensure npm global bin is in PATH." >&2; \
26+
exit 1; \
27+
fi; \
28+
echo "$(CLI) installed: '$$(command -v $(CLI))'";'
29+
endef
30+
31+
check-cli:
32+
$(ensure_cli)
33+
34+
outdir:
35+
mkdir -p "$(OUT_ROOT)"
36+
37+
define gen
38+
@echo "Generating $(2) client -> $(OUT_ROOT)/$(2)"
39+
@rm -rf "$(OUT_ROOT)/$(2)"
40+
$(CLI) generate -i "$(SPEC)" -g "$(1)" -o "$(OUT_ROOT)/$(2)" $(3)
41+
endef
42+
43+
rust: check-cli outdir
44+
$(call gen,rust,rust,--additional-properties=packageName=xet_cas_client,packageVersion=0.1.0,library=reqwest,preferUnsignedInt=true)
45+
46+
typescript: check-cli outdir
47+
$(call gen,typescript-fetch,typescript,--additional-properties=npmName=@xet/cas-client,npmVersion=0.1.0,typescriptThreePlus=true)
48+
49+
python: check-cli outdir
50+
$(call gen,python,python,--additional-properties=packageName=xet_cas_client,projectName=xet_cas_client,packageVersion=0.1.0)
51+
52+
java: check-cli outdir
53+
$(call gen,java,java,--additional-properties=artifactId=xet-cas-client,groupId=ai.huggingface.xet,artifactVersion=0.1.0,library=webclient,datetimeLibrary=java8)
54+
55+
golang: check-cli outdir
56+
$(call gen,go,golang,--additional-properties=packageName=casclient,enumClassPrefix=true,isGoSubmodule=false,withGoCodegenComment=true)
57+
58+
clean:
59+
rm -rf "$(OUT_ROOT)"
60+

openapi/cas.openapi.yaml

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
openapi: 3.1.0
2+
info:
3+
title: Xet CAS API
4+
version: 1.0.0
5+
description: |
6+
OpenAPI specification for the Content Addressable Storage (CAS) service.
7+
See the accompanying docs for details on authentication, hashing, and formats.
8+
Reference: https://huggingface.co/docs/xet/api
9+
servers:
10+
- url: /
11+
description: Base URL; paths include the `/v1` prefix
12+
security:
13+
- bearerAuth: []
14+
paths:
15+
/v1/reconstructions/{file_id}:
16+
get:
17+
summary: Get File Reconstruction
18+
description: |
19+
Retrieves reconstruction information for a specific file. Supports byte range via the optional `Range` header.
20+
Minimum token scope: `read`.
21+
x-required-scope: read
22+
operationId: getReconstruction
23+
parameters:
24+
- $ref: '#/components/parameters/FileIdParam'
25+
- $ref: '#/components/parameters/RangeHeader'
26+
responses:
27+
'200':
28+
description: Reconstruction object
29+
content:
30+
application/json:
31+
schema:
32+
$ref: '#/components/schemas/QueryReconstructionResponse'
33+
examples:
34+
example:
35+
value:
36+
offset_into_first_range: 0
37+
terms:
38+
- hash: a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456
39+
unpacked_length: 263873
40+
range:
41+
start: 0
42+
end: 4
43+
fetch_info:
44+
a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456:
45+
- range:
46+
start: 0
47+
end: 4
48+
url: https://transfer.xethub.hf.co/xorb/default/a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456
49+
url_range:
50+
start: 0
51+
end: 131071
52+
'400':
53+
description: Bad Request — Malformed file_id
54+
'401':
55+
description: Unauthorized — Missing/expired token
56+
'404':
57+
description: Not Found — File does not exist
58+
'416':
59+
description: Range Not Satisfiable — Requested byte range start exceeds file length
60+
/v1/chunks/{prefix}/{hash}:
61+
get:
62+
summary: Query Chunk Deduplication (Global Deduplication)
63+
description: |
64+
Checks if a chunk exists in the CAS for deduplication purposes.
65+
Minimum token scope: `read`.
66+
x-required-scope: read
67+
operationId: getChunkDedupInfo
68+
parameters:
69+
- $ref: '#/components/parameters/PrefixGlobalDedupeParam'
70+
- $ref: '#/components/parameters/HashParam'
71+
responses:
72+
'200':
73+
description: Shard format bytes
74+
content:
75+
application/octet-stream:
76+
schema:
77+
type: string
78+
format: binary
79+
'400':
80+
description: Bad Request — Malformed hash
81+
'401':
82+
description: Unauthorized — Missing/expired token
83+
'404':
84+
description: Not Found — Chunk not tracked by global deduplication
85+
/v1/xorbs/{prefix}/{hash}:
86+
post:
87+
summary: Upload Xorb
88+
description: |
89+
Uploads a serialized Xorb to the server.
90+
Minimum token scope: `write`.
91+
x-required-scope: write
92+
operationId: uploadXorb
93+
parameters:
94+
- $ref: '#/components/parameters/PrefixXorbParam'
95+
- $ref: '#/components/parameters/HashParam'
96+
requestBody:
97+
required: true
98+
content:
99+
application/octet-stream:
100+
schema:
101+
type: string
102+
format: binary
103+
examples:
104+
xorbBytes:
105+
summary: Serialized Xorb bytes
106+
value: ''
107+
responses:
108+
'200':
109+
description: Upload result
110+
content:
111+
application/json:
112+
schema:
113+
$ref: '#/components/schemas/UploadXorbResponse'
114+
examples:
115+
inserted:
116+
value:
117+
was_inserted: true
118+
'400':
119+
description: Bad Request — Malformed hash, mismatched body hash, or bad serialization
120+
'401':
121+
description: Unauthorized — Missing/expired token
122+
'403':
123+
description: Forbidden — Token does not have required scope
124+
/v1/shards:
125+
post:
126+
summary: Upload Shard
127+
description: |
128+
Uploads a Shard to the CAS (file reconstructions and new xorb listing).
129+
Minimum token scope: `write`.
130+
x-required-scope: write
131+
operationId: uploadShard
132+
requestBody:
133+
required: true
134+
content:
135+
application/octet-stream:
136+
schema:
137+
type: string
138+
format: binary
139+
examples:
140+
shardBytes:
141+
summary: Serialized Shard bytes
142+
value: ''
143+
responses:
144+
'200':
145+
description: Upload result
146+
content:
147+
application/json:
148+
schema:
149+
$ref: '#/components/schemas/UploadShardResponse'
150+
examples:
151+
resultRegistered:
152+
value:
153+
result: 1
154+
'400':
155+
description: Bad Request — Invalid shard serialization or verification failure
156+
'401':
157+
description: Unauthorized — Missing/expired token
158+
'403':
159+
description: Forbidden — Token does not have required scope
160+
components:
161+
securitySchemes:
162+
bearerAuth:
163+
type: http
164+
scheme: bearer
165+
bearerFormat: JWT
166+
description: |
167+
Use `Authorization: Bearer <token>`. Tokens carry scopes (`read`, `write`).
168+
parameters:
169+
FileIdParam:
170+
name: file_id
171+
in: path
172+
required: true
173+
description: |
174+
File hash in hex format (64 lowercase hexadecimal characters). See hashing docs and string conversion procedure.
175+
schema:
176+
$ref: '#/components/schemas/HexString64Lowercase'
177+
HashParam:
178+
name: hash
179+
in: path
180+
required: true
181+
description: Chunk/Xorb hash in hex format (64 lowercase hexadecimal characters)
182+
schema:
183+
$ref: '#/components/schemas/HexString64Lowercase'
184+
PrefixGlobalDedupeParam:
185+
name: prefix
186+
in: path
187+
required: true
188+
description: The only acceptable prefix for the Global Deduplication API is `default-merkledb`.
189+
schema:
190+
type: string
191+
enum: [default-merkledb]
192+
PrefixXorbParam:
193+
name: prefix
194+
in: path
195+
required: true
196+
description: The only acceptable prefix for the Xorb upload API is `default`.
197+
schema:
198+
type: string
199+
enum: [default]
200+
RangeHeader:
201+
name: Range
202+
in: header
203+
required: false
204+
description: |
205+
Optional byte range header for reconstruction queries. Format `bytes={start}-{end}` with end inclusive.
206+
schema:
207+
type: string
208+
pattern: ^bytes=\d+-\d+$
209+
schemas:
210+
HexString64Lowercase:
211+
type: string
212+
description: 64-character lowercase hexadecimal string
213+
pattern: ^[0-9a-f]{64}$
214+
examples:
215+
- 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
216+
IndexRange:
217+
type: object
218+
description: Chunk index range; end-exclusive `[start, end)`
219+
properties:
220+
start:
221+
type: integer
222+
minimum: 0
223+
end:
224+
type: integer
225+
minimum: 0
226+
required: [start, end]
227+
additionalProperties: false
228+
ByteRange:
229+
type: object
230+
description: Byte range; end-inclusive `[start, end]` for use in HTTP Range headers
231+
properties:
232+
start:
233+
type: integer
234+
minimum: 0
235+
end:
236+
type: integer
237+
minimum: 0
238+
required: [start, end]
239+
additionalProperties: false
240+
CASReconstructionTerm:
241+
type: object
242+
description: Ordered term describing which chunks to download from which xorb
243+
properties:
244+
hash:
245+
$ref: '#/components/schemas/HexString64Lowercase'
246+
range:
247+
$ref: '#/components/schemas/IndexRange'
248+
unpacked_length:
249+
type: integer
250+
minimum: 0
251+
required: [hash, range, unpacked_length]
252+
additionalProperties: false
253+
CASReconstructionFetchInfo:
254+
type: object
255+
description: Fetch information for a range of chunks within a xorb
256+
properties:
257+
url:
258+
type: string
259+
format: uri
260+
url_range:
261+
$ref: '#/components/schemas/ByteRange'
262+
range:
263+
$ref: '#/components/schemas/IndexRange'
264+
required: [url, url_range, range]
265+
additionalProperties: false
266+
QueryReconstructionResponse:
267+
type: object
268+
description: Reconstruction object describing how to download and reconstruct a file
269+
properties:
270+
offset_into_first_range:
271+
type: integer
272+
minimum: 0
273+
description: Byte offset into the first term to start keeping data from
274+
terms:
275+
type: array
276+
items:
277+
$ref: '#/components/schemas/CASReconstructionTerm'
278+
fetch_info:
279+
type: object
280+
description: Map from xorb hash to an array of fetch info entries
281+
propertyNames:
282+
$ref: '#/components/schemas/HexString64Lowercase'
283+
additionalProperties:
284+
type: array
285+
items:
286+
$ref: '#/components/schemas/CASReconstructionFetchInfo'
287+
required: [offset_into_first_range, terms, fetch_info]
288+
additionalProperties: false
289+
UploadXorbResponse:
290+
type: object
291+
properties:
292+
was_inserted:
293+
type: boolean
294+
description: false if the Xorb already exists
295+
required: [was_inserted]
296+
additionalProperties: false
297+
UploadShardResponse:
298+
type: object
299+
properties:
300+
result:
301+
type: integer
302+
enum: [0, 1]
303+
description: |
304+
0 = Shard already exists, 1 = SyncPerformed — the Shard was registered. Any 200 OK means success.
305+
required: [result]
306+
additionalProperties: false
307+
308+

0 commit comments

Comments
 (0)