diff --git a/.github/build.in.yml b/.github/build.in.yml index 2f91d72210cb0b..4e9a0cdeab6e28 100644 --- a/.github/build.in.yml +++ b/.github/build.in.yml @@ -11,6 +11,9 @@ env: # are valid by-products of the Mathlib build. Build artifacts fetched from Lake's cache do # not necessarily satisfy this property. LAKE_NO_CACHE: true + # Direct Lake to store its artifact cache in `.lake`. + # Prevents Lake from using a directory it may not have permissions to under landrun. + LAKE_CACHE_DIR: .lake/cache concurrency: # label each workflow run; only the latest with each label will run diff --git a/.github/workflows/bors.yml b/.github/workflows/bors.yml index 10ac0fcbc35d31..4af2450378e376 100644 --- a/.github/workflows/bors.yml +++ b/.github/workflows/bors.yml @@ -21,6 +21,9 @@ env: # are valid by-products of the Mathlib build. Build artifacts fetched from Lake's cache do # not necessarily satisfy this property. LAKE_NO_CACHE: true + # Direct Lake to store its artifact cache in `.lake`. + # Prevents Lake from using a directory it may not have permissions to under landrun. + LAKE_CACHE_DIR: .lake/cache concurrency: # label each workflow run; only the latest with each label will run diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 03675f301639f4..2489562d1f687a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,6 +28,9 @@ env: # are valid by-products of the Mathlib build. Build artifacts fetched from Lake's cache do # not necessarily satisfy this property. LAKE_NO_CACHE: true + # Direct Lake to store its artifact cache in `.lake`. + # Prevents Lake from using a directory it may not have permissions to under landrun. + LAKE_CACHE_DIR: .lake/cache concurrency: # label each workflow run; only the latest with each label will run diff --git a/.github/workflows/build_fork.yml b/.github/workflows/build_fork.yml index ff66b0470c4cc9..f48d67b7167276 100644 --- a/.github/workflows/build_fork.yml +++ b/.github/workflows/build_fork.yml @@ -25,6 +25,9 @@ env: # are valid by-products of the Mathlib build. Build artifacts fetched from Lake's cache do # not necessarily satisfy this property. LAKE_NO_CACHE: true + # Direct Lake to store its artifact cache in `.lake`. + # Prevents Lake from using a directory it may not have permissions to under landrun. + LAKE_CACHE_DIR: .lake/cache concurrency: # label each workflow run; only the latest with each label will run diff --git a/Cache/IO.lean b/Cache/IO.lean index a3eb4a8b85e2be..50a82fdff3a1fb 100644 --- a/Cache/IO.lean +++ b/Cache/IO.lean @@ -5,6 +5,7 @@ Authors: Arthur Paulino, Jon Eugster -/ import Cache.Lean +import Lake.Build.Common variable {α : Type} @@ -70,7 +71,7 @@ def CURLBIN := /-- leantar version at https://github.com/digama0/leangz -/ def LEANTARVERSION := - "0.1.15" + "0.1.16-pre" def EXE := if System.Platform.isWindows then ".exe" else "" @@ -111,6 +112,8 @@ structure CacheM.Context where srcSearchPath : SearchPath /-- build directory for proofwidgets -/ proofWidgetsBuildDir : FilePath + /-- directory for the Lake artifact cache -/ + lakeArtifactDir? : Option FilePath @[inherit_doc CacheM.Context] abbrev CacheM := ReaderT CacheM.Context IO @@ -133,13 +136,32 @@ def _root_.Lean.SearchPath.relativize (sp : SearchPath) : IO SearchPath := do let pwd' := pwd.toString ++ System.FilePath.pathSeparator.toString return sp.map fun x => ⟨if x = pwd then "." else x.toString.stripPrefix pwd'⟩ +/-- +Detects the directory Lake uses to cache artifacts. +This is very sensitive to changes in `Lake.Env.computeCache`. +-/ +def getLakeArtifactDir? : IO (Option FilePath) := do + let elan? ← Lake.findElanInstall? + let toolchain ← Lake.Env.computeToolchain + let cache ← Lake.Env.computeCache elan? toolchain + if cache.isDisabled then + return none + let artifactDir := cache.artifactDir + if (← artifactDir.pathExists) then + return artifactDir + else + IO.eprintln <| s!"Warning: Lake's cache directory '{artifactDir}' seems not to exist, \ + most likely `cache` will not work as expected!" + return none + private def CacheM.getContext : IO CacheM.Context := do let sp ← (← getSrcSearchPath).relativize let mathlibSource ← CacheM.mathlibDepPath sp return { mathlibDepPath := mathlibSource, srcSearchPath := sp, - proofWidgetsBuildDir := LAKEPACKAGESDIR / "proofwidgets" / ".lake" / "build"} + proofWidgetsBuildDir := LAKEPACKAGESDIR / "proofwidgets" / ".lake" / "build" + lakeArtifactDir? := ← getLakeArtifactDir?} /-- Run a `CacheM` in `IO` by loading the context from `LEAN_SRC_PATH`. -/ def CacheM.run (f : CacheM α) : IO α := do ReaderT.run f (← getContext) @@ -286,11 +308,33 @@ def hashes (hashMap : ModuleHashMap) : Lean.RBTree UInt64 compare := end ModuleHashMap +structure BuildPaths where + trace : FilePath + artifactDir : FilePath := "." + localPaths : Array FilePath := #[] + cachePaths : Array FilePath := #[] + +/-- +Extracts the Lake module build output hashes from a trace file. +This is sensitive to changes in `Lake.readTraceFile`. +-/ +def readTraceOutputs (path : FilePath) : BaseIO (Option Lake.ModuleOutputHashes) := do + let .ok contents ← IO.FS.readFile path |>.toBaseIO + | return none + let .ok data := Lake.BuildMetadata.parse contents + | return none + let some outs := data.outputs? + | return none + let .ok hashes := fromJson? outs + | return none + return hashes + + /-- Given a module name, concatenates the paths to its build files. Each build file also has a `Bool` indicating whether that file is required for caching to proceed. -/ -def mkBuildPaths (mod : Name) : CacheM <| List (FilePath × Bool) := do +def mkBuildPaths (mod : Name) : CacheM (Option BuildPaths) := do /- TODO: if `srcDir` or other custom lake layout options are set in the `lean_lib`, `packageSrcDir / LIBDIR` might be the wrong path! @@ -307,48 +351,71 @@ def mkBuildPaths (mod : Name) : CacheM <| List (FilePath × Bool) := do if !(← (packageDir / ".lake").isDir) then IO.eprintln <| s!"Warning: {packageDir / ".lake"} seems not to exist, most likely `cache` \ will not work as expected!" - - return [ - -- Note that `packCache` below requires that the `.trace` file is first in this list. - (packageDir / LIBDIR / path.withExtension "trace", true), - (packageDir / LIBDIR / path.withExtension "olean", true), - (packageDir / LIBDIR / path.withExtension "olean.hash", true), - (packageDir / LIBDIR / path.withExtension "ilean", true), - (packageDir / LIBDIR / path.withExtension "ilean.hash", true), - (packageDir / IRDIR / path.withExtension "c", true), - (packageDir / IRDIR / path.withExtension "c.hash", true), - (packageDir / LIBDIR / path.withExtension "extra", false)] - -/-- Check that all required build files exist. -/ -def allExist (paths : List (FilePath × Bool)) : IO Bool := do - for (path, required) in paths do - if required then if !(← path.pathExists) then return false - pure true + let trace := packageDir / LIBDIR / path.withExtension "trace" + if let some artifactDir := (← read).lakeArtifactDir? then + if let some outs ← readTraceOutputs trace then + let cachePaths : Array FilePath := #[ + s!"{outs.olean}.olean", + s!"{outs.ilean}.ilean", + s!"{outs.c}.c" + ] + unless ← allExists cachePaths artifactDir do + return none + return some {trace, artifactDir, cachePaths} + else + let some olean ← Lake.Hash.load? (packageDir / LIBDIR / path.withExtension "olean.hash") + | return none + let some ilean ← Lake.Hash.load? (packageDir / LIBDIR / path.withExtension "ilean.hash") + | return none + let some c ← Lake.Hash.load? (packageDir / IRDIR / path.withExtension "c.hash") + | return none + let cachePaths : Array FilePath := #[ + s!"{olean}.olean", + s!"{ilean}.ilean", + s!"{c}.c" + ] + unless ← allExists cachePaths artifactDir do + return none + return some {trace, artifactDir, cachePaths} + let localPaths := #[ + packageDir / LIBDIR / path.withExtension "olean", + packageDir / LIBDIR / path.withExtension "olean.hash", + packageDir / LIBDIR / path.withExtension "ilean", + packageDir / LIBDIR / path.withExtension "ilean.hash", + packageDir / IRDIR / path.withExtension "c", + packageDir / IRDIR / path.withExtension "c.hash" + ] + unless ← allExists localPaths do + return none + return some {trace, localPaths} +where + allExists paths (root : FilePath := ".") := paths.allM (root / · |>.pathExists) /-- Compresses build files into the local cache and returns an array with the compressed files -/ def packCache (hashMap : ModuleHashMap) (overwrite verbose unpackedOnly : Bool) - (comment : Option String := none) : + (commit? : Option String := none) : CacheM <| Array String := do IO.FS.createDirAll CACHEDIR IO.println "Compressing cache" let sp := (← read).srcSearchPath let mut acc := #[] let mut tasks := #[] + let initComment := if let some c := commit? then s!"git=mathlib4@{c};" else "" for (mod, hash) in hashMap.toList do let sourceFile ← Lean.findLean sp mod let zip := hash.asLTar let zipPath := CACHEDIR / zip - let buildPaths ← mkBuildPaths mod - if ← allExist buildPaths then + if let some buildPaths ← mkBuildPaths mod then if overwrite || !(← zipPath.pathExists) then acc := acc.push (sourceFile, zip) tasks := tasks.push <| ← IO.asTask do - -- Note here we require that the `.trace` file is first - -- in the list generated by `mkBuildPaths`. - let trace :: args := (← buildPaths.filterM (·.1.pathExists)) |>.map (·.1.toString) - | unreachable! - runCmd (← getLeanTar) <| #[zipPath.toString, trace] ++ - (if let some c := comment then #["-c", s!"git=mathlib4@{c}"] else #[]) ++ args + let usesLakeCache := !buildPaths.cachePaths.isEmpty + let comment := s!"{initComment}lakeCache={usesLakeCache}" + let opts := #["-C", "."] ++ if usesLakeCache then #["-C", buildPaths.artifactDir.toString] else #[] + let mut args := opts ++ #[zipPath.toString, buildPaths.trace.toString, "-c", comment] + args := buildPaths.localPaths.foldl (· ++ #["-i", "0", toString ·]) args + args := buildPaths.cachePaths.foldl (· ++ #["-i", "1", toString ·]) args + runCmd (← getLeanTar) args else if !unpackedOnly then acc := acc.push (sourceFile, zip) for task in tasks do @@ -394,13 +461,13 @@ def unpackCache (hashMap : ModuleHashMap) (force : Bool) : CacheM Unit := do -/ let isMathlibRoot ← isMathlibRoot let mathlibDepPath := (← read).mathlibDepPath.toString + let artifactDir? := (← read).lakeArtifactDir? let config : Array Lean.Json := hashMap.fold (init := #[]) fun config mod hash => let pathStr := s!"{CACHEDIR / hash.asLTar}" - if isMathlibRoot || !isFromMathlib mod then - config.push <| .str pathStr - else - -- only mathlib files, when not in the mathlib4 repo, need to be redirected - config.push <| .mkObj [("file", pathStr), ("base", mathlibDepPath)] + -- only mathlib files, when not in the mathlib4 repo, need to be redirected + let localDir := if isMathlibRoot || !isFromMathlib mod then "." else mathlibDepPath + let baseDirs := if let some dir := artifactDir? then #[localDir, dir.toString] else #[localDir] + config.push <| .mkObj [("file", pathStr), ("base", toJson baseDirs)] stdin.putStr <| Lean.Json.compress <| .arr config let exitCode ← child.wait if exitCode != 0 then throw <| IO.userError s!"leantar failed with error code {exitCode}" @@ -485,7 +552,7 @@ def leanModulesFromSpec (sp : SearchPath) (argₛ : String) : (i.e. `Mathlib.Data` when only the folder `Mathlib/Data/` but no \ file `Mathlib/Data.lean` exists) is not supported yet!" else - return .error "Invalid argument: non-existing module {mod}" + return .error s!"Invalid argument: non-existing module {mod}" /-- Parse command line arguments. diff --git a/lakefile.lean b/lakefile.lean index 5e1e702d86735d..b560d56ab6ef0d 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -41,6 +41,7 @@ abbrev mathlibLeanOptions := #[ package mathlib where testDriver := "MathlibTest" + enableArtifactCache := true -- These are additional settings which do not affect the lake hash, -- so they can be enabled in CI and disabled locally or vice versa. -- Warning: Do not put any options here that actually change the olean files,