Skip to content

Commit 9da8f5d

Browse files
authored
feat: lake: record cache service in outputs (#12113)
This PR changes the alters the file format of outputs stored in the local Lake cache to include an identifier indicating the service (if any) the output came from. This will be used to enable lazily downloading artifacts on-demand during builds.
1 parent e5f0d62 commit 9da8f5d

File tree

6 files changed

+82
-23
lines changed

6 files changed

+82
-23
lines changed

src/lake/Lake/CLI/Help.lean

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,11 @@ OPTIONS:
377377
Downloads artifacts for packages in the workspace from a remote cache service.
378378
The cache service used can be configured via the environment variables:
379379
380+
LAKE_CACHE_SERVICE identifier recorded in ouptuts
380381
LAKE_CACHE_ARTIFACT_ENDPOINT base URL for artifact downloads
381382
LAKE_CACHE_REVISION_ENDPOINT base URL for the mapping download
382383
383-
If neither of these are set, Lake will use Reservoir.
384+
If neither endpoint is set, Lake will use Reservoir.
384385
385386
If an input-to-outputs mappings file, `--scope`, or `--repo` is provided,
386387
Lake will download artifacts for the root package. Otherwise, it will use

src/lake/Lake/CLI/Main.lean

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public structure LakeOptions where
6363
offline : Bool := false
6464
outputsFile? : Option FilePath := none
6565
forceDownload : Bool := false
66+
service? : Option String := none
6667
scope? : Option String := none
6768
/-- Was `scope?` set with `--repo` (and not `--scope`)? -/
6869
repoScope : Bool := false
@@ -248,6 +249,9 @@ def lakeLongOption : (opt : String) → CliM PUnit
248249
| "--wfail" => modifyThe LakeOptions ({· with failLv := .warning})
249250
| "--iofail" => modifyThe LakeOptions ({· with failLv := .info})
250251
| "--force-download" => modifyThe LakeOptions ({· with forceDownload := true})
252+
| "--service" => do
253+
let service ← takeOptArg "--service" "service name"
254+
modifyThe LakeOptions ({· with service? := some service})
251255
| "--scope" => do
252256
let scope ← takeOptArg "--scope" "cache scope"
253257
modifyThe LakeOptions ({· with scope? := some scope, repoScope := false})
@@ -396,7 +400,7 @@ protected def get : CliM PUnit := do
396400
| error "to use `cache get` with a mappings file, `--scope` or `--repo` must be set"
397401
let service : CacheService :=
398402
if let some artifactEndpoint := ws.lakeEnv.cacheArtifactEndpoint? then
399-
.downloadArtsService artifactEndpoint
403+
.downloadArtsService artifactEndpoint ws.lakeEnv.cacheService?
400404
else
401405
.reservoirService ws.lakeEnv.reservoirApiUrl
402406
let map ← CacheMap.load file
@@ -407,7 +411,7 @@ protected def get : CliM PUnit := do
407411
let service : CacheService ← id do
408412
match ws.lakeEnv.cacheArtifactEndpoint?, ws.lakeEnv.cacheRevisionEndpoint? with
409413
| some artifactEndpoint, some revisionEndpoint =>
410-
return .downloadService artifactEndpoint revisionEndpoint
414+
return .downloadService artifactEndpoint revisionEndpoint ws.lakeEnv.cacheService?
411415
| none, none =>
412416
return .reservoirService ws.lakeEnv.reservoirApiUrl
413417
| some artifactEndpoint, none =>
@@ -531,7 +535,7 @@ protected def add : CliM PUnit := do
531535
| _ => pure ws.root
532536
let scope := pkg.cacheScope
533537
let map ← CacheMap.load file
534-
ws.lakeCache.writeMap scope map
538+
ws.lakeCache.writeMap scope map opts.service?
535539

536540
protected def help : CliM PUnit := do
537541
IO.println <| helpCache <| ← takeArgD ""

src/lake/Lake/Config/Cache.lean

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,40 @@ public structure Cache where
211211
dir : FilePath
212212
deriving Inhabited
213213

214+
/-- The current version of the output file format. -/
215+
def CacheOutput.schemaVersion : String := "2026-02-10"
216+
217+
structure CacheOutput where
218+
service? : Option String := none
219+
data : Json
220+
deriving ToJson
221+
222+
namespace CacheOutput
223+
224+
protected def toJson (out : CacheOutput) : Json :=
225+
JsonObject.empty
226+
|>.insert "schemaVersion" schemaVersion
227+
|>.insert "service" out.service?
228+
|>.insert "data" out.data
229+
230+
instance : ToJson CacheOutput := ⟨CacheOutput.toJson⟩
231+
232+
protected def fromJson? (json : Json) : Except String CacheOutput := do
233+
if let .obj obj := json then
234+
let obj := JsonObject.mk obj
235+
if obj.contains "schemaVersion" then
236+
-- presumably the new format
237+
-- (the edge case of a custom output with a `schemaVersion` is not worth covering)
238+
let service? ← obj.get? "service"
239+
let data ← obj.get "data"
240+
return {service?, data}
241+
-- old format: just the data
242+
return {data := json}
243+
244+
instance : FromJson CacheOutput := ⟨CacheOutput.fromJson?⟩
245+
246+
end CacheOutput
247+
214248
namespace Cache
215249

216250
/-- Returns the artifact directory for the Lake cache. -/
@@ -260,29 +294,34 @@ public def getArtifactPaths
260294
cache.outputsDir / scope / s!"{inputHash}.json"
261295

262296
/-- Cache the outputs corresponding to the given input for the package. -/
263-
public def writeOutputsCore
297+
def writeOutputsCore
264298
(cache : Cache) (scope : String) (inputHash : Hash) (outputs : Json)
299+
(service? : Option String := none)
265300
: IO Unit := do
266301
let file := cache.outputsFile scope inputHash
267302
createParentDirs file
268-
IO.FS.writeFile file outputs.compress
303+
let out := {service?, data := outputs : CacheOutput}
304+
IO.FS.writeFile file (toJson out).pretty
269305

270306
/-- Cache the outputs corresponding to the given input for the package. -/
271307
@[inline] public def writeOutputs
272308
[ToJson α] (cache : Cache) (scope : String) (inputHash : Hash) (outputs : α)
273-
: IO Unit := cache.writeOutputsCore scope inputHash (toJson outputs)
309+
(service? : Option String := none)
310+
: IO Unit := cache.writeOutputsCore scope inputHash (toJson outputs) service?
274311

275312
/-- Cache the input-to-outputs mappings from a `CacheMap`. -/
276-
public def writeMap (cache : Cache) (scope : String) (map : CacheMap) : IO Unit :=
277-
map.forM fun i o => cache.writeOutputsCore scope i o
313+
public def writeMap
314+
(cache : Cache) (scope : String) (map : CacheMap) (service? : Option String := none)
315+
: IO Unit := map.forM fun i o => cache.writeOutputsCore scope i o service?
278316

279317
/-- Retrieve the cached outputs corresponding to the given input for the package (if any). -/
280318
public def readOutputs? (cache : Cache) (scope : String) (inputHash : Hash) : LogIO (Option Json) := do
281319
let path := cache.outputsFile scope inputHash
282320
match (← IO.FS.readFile path |>.toBaseIO) with
283321
| .ok contents =>
284-
match Json.parse contents with
285-
| .ok outputs => return outputs
322+
match Json.parse contents >>= fromJson? with
323+
| .ok out =>
324+
return CacheOutput.data out
286325
| .error e =>
287326
logWarning s!"{path}: invalid JSON: {e}"
288327
return none
@@ -336,6 +375,7 @@ the desired functions by using `CacheService`'s smart constructors
336375
-/
337376
public structure CacheService where
338377
private mk ::
378+
private name? : Option String := none
339379
/- S3 Bucket -/
340380
private key : String := ""
341381
private artifactEndpoint : String := ""
@@ -353,19 +393,22 @@ namespace CacheService
353393

354394
/-- Constructs a `CacheService` for a Reservoir endpoint. -/
355395
@[inline] public def reservoirService (apiEndpoint : String) (repoScope := false) : CacheService :=
356-
{isReservoir := true, apiEndpoint, repoScope}
396+
{name? := some "reservoir", isReservoir := true, apiEndpoint, repoScope}
357397

358398
/-- Constructs a `CacheService` to upload artifacts and/or outputs to an S3 endpoint. -/
359-
@[inline] public def uploadService (key artifactEndpoint revisionEndpoint : String) : CacheService :=
360-
{key, artifactEndpoint, revisionEndpoint}
399+
@[inline] public def uploadService
400+
(key artifactEndpoint revisionEndpoint : String)
401+
: CacheService := {key, artifactEndpoint, revisionEndpoint}
361402

362-
/-- Constructs a `CacheService` to download artifacts and/or outputs from to an S3 endpoint. -/
363-
@[inline] public def downloadService (artifactEndpoint revisionEndpoint : String) : CacheService :=
364-
{artifactEndpoint, revisionEndpoint}
403+
/-- Constructs a `CacheService` to download artifacts and/or outputs from an S3 endpoint. -/
404+
@[inline] public def downloadService
405+
(artifactEndpoint revisionEndpoint : String) (name? : Option String := none)
406+
: CacheService := {name?, artifactEndpoint, revisionEndpoint}
365407

366-
/-- Constructs a `CacheService` to download just artifacts from to an S3 endpoint. -/
367-
@[inline] public def downloadArtsService (artifactEndpoint : String) : CacheService :=
368-
{artifactEndpoint}
408+
/-- Constructs a `CacheService` to download just artifacts from an S3 endpoint. -/
409+
@[inline] public def downloadArtsService
410+
(artifactEndpoint : String) (name? : Option String := none)
411+
: CacheService := {name?, artifactEndpoint}
369412

370413
/--
371414
Reconfigures the cache service to interpret scopes as repositories (or not if `false`).
@@ -385,10 +428,10 @@ private def appendScope (endpoint : String) (scope : String) : String :=
385428
scope.split '/' |>.fold (init := endpoint) fun s component =>
386429
uriEncode component.copy s |>.push '/'
387430

388-
private def s3ArtifactUrl (contentHash : Hash) (service : CacheService) (scope : String) : String :=
431+
private def s3ArtifactUrl (contentHash : Hash) (service : CacheService) (scope : String) : String :=
389432
appendScope s!"{service.artifactEndpoint}/" scope ++ s!"{contentHash.hex}.art"
390433

391-
public def artifactUrl (contentHash : Hash) (service : CacheService) (scope : String) : String :=
434+
public def artifactUrl (contentHash : Hash) (service : CacheService) (scope : String) : String :=
392435
if service.isReservoir then
393436
let endpoint :=
394437
if service.repoScope then
@@ -435,7 +478,7 @@ public def downloadOutputArtifacts
435478
(map : CacheMap) (cache : Cache) (service : CacheService)
436479
(localScope remoteScope : String) (force := false)
437480
: LoggerIO Unit := do
438-
cache.writeMap localScope map
481+
cache.writeMap localScope map service.name?
439482
let descrs ← map.collectOutputDescrs
440483
service.downloadArtifacts descrs cache remoteScope force
441484

src/lake/Lake/Config/Env.lean

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ public structure Env where
6464
cacheArtifactEndpoint? : Option String
6565
/-- The base URL for revision uploads and downloads from the cache (i.e., `LAKE_CACHE_REVISION_ENDPOINT`). -/
6666
cacheRevisionEndpoint? : Option String
67+
/-- The name of the cache service (i.e., `LAKE_CACHE_SERVICE`). -/
68+
cacheService? : Option String
6769
/-- The initial Lean library search path of the environment (i.e., `LEAN_PATH`). -/
6870
initLeanPath : SearchPath
6971
/-- The initial Lean source search path of the environment (i.e., `LEAN_SRC_PATH`). -/
@@ -163,6 +165,7 @@ public def compute
163165
cacheKey? := (← IO.getEnv "LAKE_CACHE_KEY").map (·.trimAscii.copy)
164166
cacheArtifactEndpoint? := (← IO.getEnv "LAKE_CACHE_ARTIFACT_ENDPOINT").map normalizeUrl
165167
cacheRevisionEndpoint? := (← IO.getEnv "LAKE_CACHE_REVISION_ENDPOINT").map normalizeUrl
168+
cacheService? := (← IO.getEnv "LAKE_CACHE_SERVICE").map (·.trimAscii.copy)
166169
githashOverride := (← IO.getEnv "LEAN_GITHASH").getD ""
167170
toolchain
168171
initLeanPath := ← getSearchPath "LEAN_PATH",
@@ -273,6 +276,7 @@ public def baseVars (env : Env) : Array (String × Option String) :=
273276
("LAKE_CACHE_KEY", env.cacheKey?),
274277
("LAKE_CACHE_ARTIFACT_ENDPOINT", env.cacheArtifactEndpoint?),
275278
("LAKE_CACHE_REVISION_ENDPOINT", env.cacheRevisionEndpoint?),
279+
("LAKE_CACHE_SERVICE", env.cacheService?),
276280
("LEAN", env.lean.lean.toString),
277281
("LEAN_SYSROOT", env.lean.sysroot.toString),
278282
("LEAN_AR", env.lean.ar.toString),

src/lake/Lake/Util/JsonObject.lean

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ namespace JsonObject
2626
@[inline] public def mk (val : Std.TreeMap.Raw String Json) : JsonObject :=
2727
val
2828

29+
@[inline] public def empty : JsonObject :=
30+
Std.TreeMap.Raw.empty
31+
2932
@[inline] public protected def toJson (obj : JsonObject) : Json :=
3033
.obj obj
3134

@@ -37,6 +40,9 @@ public instance : ToJson JsonObject := ⟨JsonObject.toJson⟩
3740

3841
public instance : FromJson JsonObject := ⟨JsonObject.fromJson?⟩
3942

43+
@[inline] public nonrec def contains (obj : JsonObject) (prop : String) : Bool :=
44+
obj.contains prop
45+
4046
public def insertJson (obj : JsonObject) (prop : String) (val : Json) : JsonObject :=
4147
obj.insert prop (toJson val) -- specializes `insert`
4248

tests/lake/tests/env/test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ test_eq "foo" env env ELAN_TOOLCHAIN=foo $LAKE env printenv ELAN_TOOLCHAIN
3838
test_out "foo" env env -u LAKE_CACHE_DIR ELAN_HOME=/ ELAN_TOOLCHAIN=foo \
3939
$LAKE env printenv LAKE_CACHE_DIR
4040
LAKE_CACHE_DIR=foo test_eq "foo" env printenv LAKE_CACHE_DIR
41+
LAKE_CACHE_SERVICE=foo test_eq "foo" env printenv LAKE_CACHE_SERVICE
4142
LEAN_GITHASH=foo test_eq "foo" env printenv LEAN_GITHASH
4243
LEAN_AR=foo test_eq "foo" env printenv LEAN_AR
4344
LEAN_CC=foo test_eq "foo" env printenv LEAN_CC

0 commit comments

Comments
 (0)