Skip to content

[CAS] Add a new API in ObjectStore to import a CAS tree #10819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: stable/20240723
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CAS/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,10 @@ class ObjectStore {
/// Validate the whole node tree.
Error validateTree(ObjectRef Ref);

/// Import object from another CAS. This will import the full tree from the
/// another CAS.
Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);

/// Print the ObjectStore internals for debugging purpose.
virtual void print(raw_ostream &) const {}
void dump() const;
Expand Down
82 changes: 82 additions & 0 deletions llvm/lib/CAS/ObjectStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include <deque>

using namespace llvm;
using namespace llvm::cas;
Expand Down Expand Up @@ -217,6 +218,87 @@ Error ObjectStore::validateTree(ObjectRef Root) {
return Error::success();
}

Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
ObjectRef Other) {
// There is no work to do if importing from self.
if (this == &Upstream)
return Other;

// FIXME: This replicates the logic in `OnDiskGraphDB::importFullTree`.
// Copies the full CAS tree from upstream. Uses depth-first copying to protect
// against the process dying during importing and leaving the database with an
// incomplete tree. Note that if the upstream has missing nodes then the tree
// will be copied with missing nodes as well, it won't be considered an error.

/// Keeps track of the state of visitation for current node and all of its
/// parents. Upstream Cursor holds information only from upstream CAS.
struct UpstreamCursor {
ObjectHandle Node;
size_t RefsCount;
std::deque<ObjectRef> Refs;
};
SmallVector<UpstreamCursor, 16> CursorStack;
/// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
/// just stored in the CAS or nodes already exists in the current CAS.
SmallVector<ObjectRef, 128> PrimaryRefStack;

auto enqueueNode = [&](ObjectHandle Node) {
unsigned NumRefs = Upstream.getNumRefs(Node);
std::deque<ObjectRef> Refs;
for (unsigned I = 0; I < NumRefs; ++I)
Refs.push_back(Upstream.readRef(Node, I));

CursorStack.push_back({Node, NumRefs, std::move(Refs)});
};

auto UpstreamHandle = Upstream.load(Other);
if (!UpstreamHandle)
return UpstreamHandle.takeError();
enqueueNode(*UpstreamHandle);

while (!CursorStack.empty()) {
UpstreamCursor &Cur = CursorStack.back();
if (Cur.Refs.empty()) {
// Copy the node data into the primary store.
// The bottom of \p PrimaryRefStack contains the ObjectRef for the
// current node.
assert(PrimaryRefStack.size() >= Cur.RefsCount);
auto Refs = ArrayRef(PrimaryRefStack)
.slice(PrimaryRefStack.size() - Cur.RefsCount);
auto NewNode = store(Refs, Upstream.getData(Cur.Node));
if (!NewNode)
return NewNode.takeError();

// Remove the current node and its IDs from the stack.
PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
CursorStack.pop_back();

PrimaryRefStack.push_back(*NewNode);
continue;
}

// Check if the node exists already.
auto CurrentID = Cur.Refs.front();
Cur.Refs.pop_front();
auto Ref = getReference(Upstream.getID(CurrentID));
if (Ref) {
// If exists already, just need to enqueue the primary node.
PrimaryRefStack.push_back(*Ref);
continue;
}

// Load child.
auto PrimaryID = Upstream.load(CurrentID);
if (LLVM_UNLIKELY(!PrimaryID))
return PrimaryID.takeError();

enqueueNode(*PrimaryID);
}

assert(PrimaryRefStack.size() == 1);
return PrimaryRefStack.front();
}

std::unique_ptr<MemoryBuffer>
ObjectProxy::getMemoryBuffer(StringRef Name,
bool RequiresNullTerminator) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/tools/llvm-cas/ingest.test
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ CHECK-ERROR: llvm-cas: get-cas-id: No such file or directory
RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/cas.id 2>&1 | FileCheck %s --check-prefix=CHECK-NODE-REFS
CHECK-NODE-REFS: llvmcas://
CHECK-NODE-REFS: llvmcas://

// Test exporting the entire tree.
RUN: llvm-cas --cas %t/new-cas --upstream-cas %t/cas --import-from-upstream @%t/cas.id
RUN: llvm-cas --cas %t/new-cas --ls-tree-recursive @%t/cas.id | FileCheck %s
33 changes: 30 additions & 3 deletions llvm/tools/llvm-cas/llvm-cas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ static int getCASIDForFile(ObjectStore &CAS, const CASID &ID,
ArrayRef<std::string> Path);
static int import(ObjectStore &CAS, ObjectStore &UpstreamCAS,
ArrayRef<std::string> Objects);
static int importFromUpstream(ObjectStore &CAS, ObjectStore &UpstreamCAS,
ArrayRef<std::string> Objects);
static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
ArrayRef<std::string> Objects);
static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID);
Expand Down Expand Up @@ -115,6 +117,7 @@ int main(int Argc, char **Argv) {
MergeTrees,
GetCASIDForFile,
Import,
ImportFromUpstream,
PutCacheKey,
GetCacheResult,
CheckLockFiles,
Expand Down Expand Up @@ -142,6 +145,8 @@ int main(int Argc, char **Argv) {
clEnumValN(MergeTrees, "merge", "merge paths/cas-ids"),
clEnumValN(GetCASIDForFile, "get-cas-id", "get cas id for file"),
clEnumValN(Import, "import", "import objects from another CAS"),
clEnumValN(ImportFromUpstream, "import-from-upstream",
"import object from upstream CAS"),
clEnumValN(PutCacheKey, "put-cache-key",
"set a value for a cache key"),
clEnumValN(GetCacheResult, "get-cache-result",
Expand Down Expand Up @@ -237,11 +242,15 @@ int main(int Argc, char **Argv) {
ExitOnErr(createStringError(inconvertibleErrorCode(),
"missing <object> to operate on"));

if (Command == Import) {
if (Command == Import || Command == ImportFromUpstream) {
if (!UpstreamCAS)
ExitOnErr(createStringError(inconvertibleErrorCode(),
"missing '-upstream-cas'"));
return import(*CAS, *UpstreamCAS, Inputs);

if (Command == Import)
return import(*CAS, *UpstreamCAS, Inputs);

return importFromUpstream(*UpstreamCAS, *CAS, Inputs);
}

if (Command == PutCacheKey || Command == GetCacheResult) {
Expand Down Expand Up @@ -671,6 +680,24 @@ static int import(ObjectStore &CAS, ObjectStore &UpstreamCAS,
return 0;
}

static int importFromUpstream(ObjectStore &FromCAS, ObjectStore &ToCAS,
ArrayRef<std::string> Objects) {
ExitOnError ExitOnErr("llvm-cas: import-from-upstream: ");
for (StringRef Object : Objects) {
CASID ID = ExitOnErr(FromCAS.parseID(Object));
auto Ref = FromCAS.getReference(ID);
if (!Ref) {
ExitOnErr(createStringError(inconvertibleErrorCode(),
"input not found: " + ID.toString()));
return 1;
}

auto Imported = ExitOnErr(ToCAS.importObject(FromCAS, *Ref));
llvm::outs() << ToCAS.getID(Imported).toString() << "\n";
}
return 0;
}

static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
ArrayRef<std::string> Objects) {
ExitOnError ExitOnErr("llvm-cas: put-cache-key: ");
Expand Down Expand Up @@ -790,4 +817,4 @@ static int prune(cas::ObjectStore &CAS) {
ExitOnError ExitOnErr("llvm-cas: prune: ");
ExitOnErr(CAS.pruneStorageData());
return 0;
}
}