diff --git a/core/foundation/inc/ROOT/StringUtils.hxx b/core/foundation/inc/ROOT/StringUtils.hxx index 8439e1423121e..5891a725790aa 100644 --- a/core/foundation/inc/ROOT/StringUtils.hxx +++ b/core/foundation/inc/ROOT/StringUtils.hxx @@ -43,6 +43,16 @@ std::string Join(const std::string &sep, StringCollection_t &&strings) std::string Round(double value, double error, unsigned int cutoff = 1, std::string_view delim = "#pm"); +inline bool StartsWith(std::string_view string, std::string_view prefix) +{ + return string.size() >= prefix.size() && string.substr(0, prefix.size()) == prefix; +} + +inline bool EndsWith(std::string_view string, std::string_view suffix) +{ + return string.size() >= suffix.size() && string.substr(string.size() - suffix.size(), suffix.size()) == suffix; +} + } // namespace ROOT #endif diff --git a/io/io/CMakeLists.txt b/io/io/CMakeLists.txt index 39212c2cc3c03..4b046d55862f2 100644 --- a/io/io/CMakeLists.txt +++ b/io/io/CMakeLists.txt @@ -55,6 +55,7 @@ ROOT_LINKER_LIBRARY(RIO src/TStreamerInfoReadBuffer.cxx src/TStreamerInfoWriteBuffer.cxx src/TZIPFile.cxx + src/RFile.cxx $ LIBRARIES ${CMAKE_DL_LIBS} @@ -73,6 +74,7 @@ if(uring) endif() ROOT_GENERATE_DICTIONARY(G__RIO + ROOT/RFile.hxx ROOT/RRawFile.hxx ROOT/RRawFileTFile.hxx ${rawfile_local_headers} diff --git a/io/io/inc/ROOT/RFile.hxx b/io/io/inc/ROOT/RFile.hxx new file mode 100644 index 0000000000000..821c92fba432f --- /dev/null +++ b/io/io/inc/ROOT/RFile.hxx @@ -0,0 +1,204 @@ +/// \file ROOT/RFile.hxx +/// \ingroup Base ROOT7 +/// \author Giacomo Parolini +/// \date 2025-03-19 +/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback +/// is welcome! + +#ifndef ROOT7_RFile +#define ROOT7_RFile + +#include + +#include +#include +#include + +class TFile; +class TKey; + +namespace ROOT { +namespace Experimental { + +class RFile; +struct RFileKeyInfo; + +namespace Internal { + +ROOT::RLogChannel &RFileLog(); + +} // namespace Internal + +/** +\class ROOT::Experimental::RFile +\ingroup RFile +\brief An interface to read from, or write to, a ROOT file, as well as performing other common operations. + +## When and why should you use RFile + +RFile is a modern and minimalistic interface to ROOT files, both local and remote, that can be used instead of TFile +when the following conditions are met: +- you want a simple interface that makes it easy to do things right and hard to do things wrong; +- you only need basic Put/Get operations and don't need the more advanced TFile/TDirectory functionalities; +- you want more robustness and better error reporting for those operations; +- you want clearer ownership semantics expressed through the type system rather than having objects "automagically" + handled for you via implicit ownership of raw pointers. + +RFile doesn't try to cover the entirety of use cases covered by TFile/TDirectory/TDirectoryFile and is not +a 1:1 replacement for them. It is meant to simplify the most common use cases and make them easier to handle by +minimizing the amount of ROOT-specific quirks and conforming to more standard C++ practices. + +## Ownership model + +RFile handles ownership via smart pointers, typically std::unique_ptr. + +When getting an object from the file (via RFile::Get) you get back a unique copy of the object. Calling `Get` on the +same object twice produces two independent clones of the object. The ownership over that object is solely on the caller +and not shared with the RFile. Therefore, the object will remain valid after closing or destroying the RFile that +generated it. This also means that any modification done to the object are **not** reflected to the file automatically: +to update the object in the file you need to write it again (via RFile::Overwrite). + +RFile::Put and RFile::Overwrite are the way to write objects to the file. Both methods take a const reference to the +object to write and don't change the ownership of the object in any way. Calling Put or Overwrite doesn't guarantee that +the object is immediately written to the underlying storage: to ensure that, you need to call RFile::Flush (or close the +file). + +## Directories + +Differently from TFile, the RFile class itself is not also a "directory". In fact, there is no RDirectory class at all. + +Directories are still an existing concept in RFile (since they are a concept in the ROOT binary format), +but they are usually interacted with indirectly, via the use of filesystem-like string-based paths. If you Put an object +in an RFile under the path "path/to/object", "object" will be stored under directory "to" which is in turn stored under +directory "path". This hierarchy is encoded in the ROOT file itself and it can provide some optimization and/or +conveniencies when querying objects. + +For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are +used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like +properties are applied to paths, for ease of use: the '/' character is treated specially as the directory separator; +multiple '/' in a row are collapsed into one (since RFile doesn't allow directories with empty names). + +At the moment, RFile doesn't allow getting directories via Get, nor writing ones via Put (this may change in the +future). + +## Sample usage +Opening an RFile (for writing) and writing an object to it: +~~~{.cpp} +auto rfile = ROOT::RFile::Recreate("my_file.root"); +auto myObj = TH1D("h", "h", 10, 0, 1); +rfile->Put(myObj.GetName(), myObj); +~~~ + +Opening an RFile (for reading) and reading an object from it: +~~~{.cpp} +auto rfile = ROOT::RFile::Open("my_file.root"); +auto myObj = file->Get("h"); +~~~ +*/ +class RFile final { + enum PutFlags { + kPutAllowOverwrite = 0x1, + kPutOverwriteKeepCycle = 0x2, + }; + + std::unique_ptr fFile; + + // Outlined to avoid including TFile.h + explicit RFile(std::unique_ptr file); + + /// Gets object `path` from the file and returns an **owning** pointer to it. + /// The caller should immediately wrap it into a unique_ptr of the type described by `type`. + [[nodiscard]] void *GetUntyped(std::string_view path, const std::type_info &type) const; + + /// Writes `obj` to file, without taking its ownership. + void PutUntyped(std::string_view path, const std::type_info &type, const void *obj, std::uint32_t flags); + + /// \see Put + template + void PutInternal(std::string_view path, const T &obj, std::uint32_t flags) + { + PutUntyped(path, typeid(T), &obj, flags); + } + + /// Given `path`, returns the TKey corresponding to the object at that path (assuming the path is fully split, i.e. + /// "a/b/c" always means "object 'c' inside directory 'b' inside directory 'a'"). + /// IMPORTANT: `path` must have been validated/normalized via ValidateAndNormalizePath() (see RFile.cxx). + TKey *GetTKey(std::string_view path) const; + +public: + // This is arbitrary, but it's useful to avoid pathological cases + static constexpr int kMaxPathNesting = 1000; + + ///// Factory methods ///// + + /// Opens the file for reading. `path` may be a regular file path or a remote URL. + /// \throw ROOT::RException if the file at `path` could not be opened. + static std::unique_ptr Open(std::string_view path); + + /// Opens the file for reading/writing, overwriting it if it already exists. + /// \throw ROOT::RException if a file could not be created at `path` (e.g. if the specified + /// directory tree does not exist). + static std::unique_ptr Recreate(std::string_view path); + + /// Opens the file for updating, creating a new one if it doesn't exist. + /// \throw ROOT::RException if the file at `path` could neither be read nor created + /// (e.g. if the specified directory tree does not exist). + static std::unique_ptr Update(std::string_view path); + + ///// Instance methods ///// + + // Outlined to avoid including TFile.h + ~RFile(); + + /// Retrieves an object from the file. + /// `path` should be a string such that `IsValidPath(path) == true`, otherwise an exception will be thrown. + /// See \ref ValidateAndNormalizePath() for info about valid path names. + /// If the object is not there returns a null pointer. + template + std::unique_ptr Get(std::string_view path) const + { + void *obj = GetUntyped(path, typeid(T)); + return std::unique_ptr(static_cast(obj)); + } + + /// Puts an object into the file. + /// The application retains ownership of the object. + /// `path` should be a string such that `IsValidPath(path) == true`, otherwise an exception will be thrown. + /// See \ref ValidateAndNormalizePath() for info about valid path names. + /// + /// Throws a RException if `path` already identifies a valid object or directory. + /// Throws a RException if the file was opened in read-only mode. + template + void Put(std::string_view path, const T &obj) + { + PutInternal(path, obj, /* flags = */ 0); + } + + /// Puts an object into the file, overwriting any previously-existing object at that path. + /// The application retains ownership of the object. + /// + /// If an object already exists at that path, it is kept as a backup cycle unless `backupPrevious` is false. + /// Note that even if `backupPrevious` is false, any existing cycle except the latest will be preserved. + /// + /// Throws a RException if `path` is already the path of a directory. + /// Throws a RException if the file was opened in read-only mode. + template + void Overwrite(std::string_view path, const T &obj, bool backupPrevious = true) + { + std::uint32_t flags = kPutAllowOverwrite; + flags |= backupPrevious * kPutOverwriteKeepCycle; + PutInternal(path, obj, flags); + } + + /// Writes all objects and the file structure to disk. + /// Returns the number of bytes written. + size_t Flush(); + + /// Flushes the RFile if needed and closes it, disallowing any further reading or writing. + void Close(); +}; + +} // namespace Experimental +} // namespace ROOT + +#endif diff --git a/io/io/src/RFile.cxx b/io/io/src/RFile.cxx new file mode 100644 index 0000000000000..05e6462992f55 --- /dev/null +++ b/io/io/src/RFile.cxx @@ -0,0 +1,381 @@ +/// \file v7/src/RFile.cxx +/// \ingroup Base ROOT7 +/// \author Giacomo Parolini +/// \date 2025-03-19 +/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback +/// is welcome! + +#include "ROOT/RFile.hxx" + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +ROOT::RLogChannel &ROOT::Experimental::Internal::RFileLog() +{ + static ROOT::RLogChannel sLog("ROOT.File"); + return sLog; +} + +using ROOT::Experimental::RFile; +using ROOT::Experimental::Internal::RFileLog; + +static void CheckExtension(std::string_view path) +{ + if (ROOT::EndsWith(path, ".xml")) { + throw ROOT::RException(R__FAIL("ROOT::RFile doesn't support XML files.")); + } + + if (!ROOT::EndsWith(path, ".root")) { + R__LOG_WARNING(RFileLog()) << "ROOT::RFile only supports ROOT files. The preferred file extension is \".root\""; + } +} + +namespace { +enum class ENameCycleError { + kNoError, + kAnyCycle, + kInvalidSyntax, + kCycleTooLarge, + kNameEmpty, + kCOUNT +}; + +struct RNameCycleResult { + std::string fName; + std::optional fCycle; + ENameCycleError fError; +}; +} // namespace + +static const char *ToString(ENameCycleError err) +{ + static const char *const kErrorStr[] = {"", "", "invalid syntax", "cycle is too large", "name is empty"}; + static_assert(std::size(kErrorStr) == static_cast(ENameCycleError::kCOUNT)); + return kErrorStr[static_cast(err)]; +} + +static ENameCycleError DecodeNumericCycle(const char *str, std::optional &out) +{ + uint32_t res = 0; + do { + if (!isdigit(*str)) + return ENameCycleError::kInvalidSyntax; + if (res * 10 > std::numeric_limits::max()) + return ENameCycleError::kCycleTooLarge; + res *= 10; + res += *str - '0'; + } while (*++str); + + assert(res < std::numeric_limits::max()); + out = static_cast(res); + + return ENameCycleError::kNoError; +} + +static RNameCycleResult DecodeNameCycle(std::string_view nameCycleRaw) +{ + RNameCycleResult result{}; + + if (nameCycleRaw.empty()) + return result; + + // Scan the string to find the name length and the semicolon + std::size_t semicolonIdx = nameCycleRaw.find_first_of(';'); + + if (semicolonIdx == 0) { + result.fError = ENameCycleError::kNameEmpty; + return result; + } + + // Verify that we have at most one ';' + if (nameCycleRaw.substr(semicolonIdx + 1).find_first_of(';') != std::string_view::npos) { + result.fError = ENameCycleError::kInvalidSyntax; + return result; + } + + result.fName = nameCycleRaw.substr(0, semicolonIdx); + if (semicolonIdx < std::string_view::npos) { + if (semicolonIdx == nameCycleRaw.length() - 1 && nameCycleRaw[semicolonIdx] == '*') + result.fError = ENameCycleError::kAnyCycle; + else + result.fError = DecodeNumericCycle(nameCycleRaw.substr(semicolonIdx + 1).data(), result.fCycle); + } + + return result; +} + +/// This function first validates, then normalizes the given path in place. +/// +/// Returns an empty string if `path` is a suitable path to store an object into a RFile, +/// otherwise returns a description of why that is not the case. +/// +/// A valid object path must: +/// - not be empty +/// - not contain the character '.' +/// - not contain ASCII control characters or whitespace characters (including tab or newline). +/// - not contain more than RFile::kMaxPathNesting path fragments (i.e. more than RFile::kMaxPathNesting - 1 '/') +/// - not end with a '/' +/// +/// In addition, when *writing* an object to RFile, the character ';' is also banned. +/// +/// Passing an invalid path to Put will cause it to throw an exception, and +/// passing an invalid path to Get will always return nullptr. +/// +/// If required, `path` is modified to make its hierarchy-related meaning consistent. This entails: +/// - combining any consecutive '/' into a single one; +/// - stripping any leading '/'. +/// +static std::string ValidateAndNormalizePath(std::string &path) +{ + ////// First, validate path. + + if (path.empty()) + return "path cannot be empty"; + + if (path.back() == '/') + return "path cannot end with a '/'"; + + bool valid = true; + for (char ch : path) { + // Disallow control characters, tabs, newlines, whitespace and dot. + // NOTE: not short-circuiting or early returning to enable loop vectorization. + valid &= !(ch < 33 || ch == '.'); + } + if (!valid) + return "path cannot contain control characters, whitespaces or dots"; + + //// Path is valid so far, normalize it. + + // Strip all leading '/' + { + auto nToStrip = 0u; + const auto len = path.length(); + while (nToStrip < len && path[nToStrip] == '/') + ++nToStrip; + + if (nToStrip > 0) + path.erase(0, nToStrip); + } + + // Remove duplicate consecutive '/' + const auto it = std::unique(path.begin(), path.end(), [](char a, char b) { return (a == '/' && b == '/'); }); + path.erase(it, path.end()); + + //// After the path has been normalized, check the nesting level by counting how many slashes it contains. + const auto nesting = std::count(path.begin(), path.end(), '/'); + if (nesting > RFile::kMaxPathNesting) + return "pathView contains too many levels of nesting"; + + return ""; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +RFile::RFile(std::unique_ptr file) : fFile(std::move(file)) {} + +RFile::~RFile() = default; + +std::unique_ptr RFile::Open(std::string_view path) +{ + CheckExtension(path); + + TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? + auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "READ_WITHOUT_GLOBALREGISTRATION")); + if (!tfile || tfile->IsZombie()) + throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading")); + + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); + return rfile; +} + +std::unique_ptr RFile::Update(std::string_view path) +{ + CheckExtension(path); + + TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? + auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "UPDATE_WITHOUT_GLOBALREGISTRATION")); + if (!tfile || tfile->IsZombie()) + throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for updating")); + + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); + return rfile; +} + +std::unique_ptr RFile::Recreate(std::string_view path) +{ + CheckExtension(path); + + TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? + auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "RECREATE_WITHOUT_GLOBALREGISTRATION")); + if (!tfile || tfile->IsZombie()) + throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for writing")); + + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); + return rfile; +} + +TKey *RFile::GetTKey(std::string_view path) const +{ + // In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean + // "object 'c' in subdirectory 'b' of directory 'a'". We don't try to get any other of the possible combinations, + // including the object called "a/b/c". + std::string fullPath = std::string(path); + char *dirName = fullPath.data(); + char *restOfPath = strchr(dirName, '/'); + TDirectory *dir = fFile.get(); + while (restOfPath) { + // Truncate `dirName` to the position of this '/'. + *restOfPath = 0; + ++restOfPath; + // `restOfPath` should always be a non-empty string unless `path` ends with '/' (which it shouldn't, as we are + // supposed to have normalized it before calling this function). + assert(*restOfPath); + + dir = dir->GetDirectory(dirName); + if (!dir) + return nullptr; + + dirName = restOfPath; + restOfPath = strchr(restOfPath, '/'); + } + // NOTE: after this loop `dirName` contains the base name of the object. + + // Get the leaf object from the innermost directory. + TKey *key = dir->FindKey(dirName); + if (key) { + // For some reason, FindKey will not return nullptr if we asked for a specific cycle and that cycle + // doesn't exist. It will instead return any key whose cycle is *at most* the requested one. + // This is very confusing, so in RFile we actually return null if the requested cycle is not there. + RNameCycleResult res = DecodeNameCycle(dirName); + if (res.fError != ENameCycleError::kAnyCycle) { + if (res.fError != ENameCycleError::kNoError) { + R__LOG_ERROR(RFileLog()) << "error decoding namecycle '" << dirName << "': " << ToString(res.fError); + key = nullptr; + } else if (res.fCycle && *res.fCycle != key->GetCycle()) { + key = nullptr; + } + } + } + return key; +} + +void *RFile::GetUntyped(std::string_view pathSV, const std::type_info &type) const +{ + if (!fFile) + throw ROOT::RException(R__FAIL("File has been closed")); + + std::string path{pathSV}; + + const TClass *cls = TClass::GetClass(type); + if (!cls) + throw ROOT::RException(R__FAIL(std::string("Could not determine type of object ") + path)); + + if (auto err = ValidateAndNormalizePath(path); !err.empty()) + throw RException(R__FAIL("Invalid object path '" + path + "': " + err)); + + TKey *key = GetTKey(path); + void *obj = key ? key->ReadObjectAny(cls) : nullptr; + + if (obj) { + // Disavow any ownership on `obj` + if (auto autoAddFunc = cls->GetDirectoryAutoAdd(); autoAddFunc) { + autoAddFunc(obj, nullptr); + } + } else if (key && !GetROOT()->IsBatch()) { + R__LOG_WARNING(RFileLog()) << "Tried to get object '" << path << "' of type " << cls->GetName() + << " but that path contains an object of type " << key->GetClassName(); + } + + return obj; +} + +void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, const void *obj, std::uint32_t flags) +{ + const TClass *cls = TClass::GetClass(type); + if (!cls) + throw ROOT::RException(R__FAIL(std::string("Could not determine type of object ") + std::string(pathSV))); + + std::string path{pathSV}; + if (auto err = ValidateAndNormalizePath(path); !err.empty()) + throw RException(R__FAIL("Invalid object path '" + path + "': " + err)); + + if (path.find_first_of(';') != std::string_view::npos) { + throw RException( + R__FAIL("Invalid object path '" + path + + "': character ';' is used to specify an object cycle, which only makes sense when reading.")); + } + + if (!fFile) + throw ROOT::RException(R__FAIL("File has been closed")); + + if (!fFile->IsWritable()) + throw ROOT::RException(R__FAIL("File is not writable")); + + // If `path` refers to a subdirectory, make sure we always write in an actual TDirectory, + // otherwise we may have a mix of top-level objects called "a/b/c" and actual directory + // structures. + // Sadly, TFile does nothing to prevent this and will happily write "a/b" even if there + // is already a directory "a" containing an object "b". We don't want that ambiguity here, so we take extra steps + // to ensure it doesn't happen. + const auto tokens = ROOT::Split(path, "/"); + const auto FullPathUntil = [&tokens](auto idx) { + return ROOT::Join("/", std::span{tokens.data(), idx + 1}); + }; + TDirectory *dir = fFile.get(); + for (auto tokIdx = 0u; tokIdx < tokens.size() - 1; ++tokIdx) { + // Alas, not only does mkdir not fail if the file already contains an object "a/b" and you try + // to create dir "a", but even when it does fail it doesn't tell you why. + // We obviously don't want to allow the coexistence of regular object named "a/b" and the directory + // named "a", so we manually check if each level of nesting doesn't exist already as a non-directory. + const TKey *existing = dir->GetKey(tokens[tokIdx].c_str()); + if (existing && strcmp(existing->GetClassName(), "TDirectory") != 0 && + strcmp(existing->GetClassName(), "TDirectoryFile") != 0) { + throw ROOT::RException(R__FAIL("error adding object '" + path + "': failed to create directory '" + + FullPathUntil(tokIdx) + "': name already taken by an object of type '" + + existing->GetClassName() + "'")); + } + dir = dir->mkdir(tokens[tokIdx].c_str(), "", true); + if (!dir) { + throw ROOT::RException(R__FAIL(std::string("failed to create directory ") + FullPathUntil(tokIdx))); + } + } + + const bool allowOverwrite = (flags & kPutAllowOverwrite) != 0; + const bool backupCycle = (flags & kPutOverwriteKeepCycle) != 0; + const Option_t *writeOpts = ""; + if (!allowOverwrite) { + const TKey *existing = dir->GetKey(tokens[tokens.size() - 1].c_str()); + if (existing) { + throw ROOT::RException(R__FAIL(std::string("trying to overwrite object ") + path + " of type " + + existing->GetClassName() + " with another object of type " + cls->GetName())); + } + } else if (!backupCycle) { + writeOpts = "WriteDelete"; + } + + int success = dir->WriteObjectAny(obj, cls, tokens[tokens.size() - 1].c_str(), writeOpts); + + if (!success) { + throw ROOT::RException(R__FAIL(std::string("Failed to write ") + path + " to file")); + } +} + +size_t RFile::Flush() +{ + return fFile->Write(); +} + +void RFile::Close() +{ + // NOTE: this also flushes the file internally + fFile.reset(); +} diff --git a/io/io/test/CMakeLists.txt b/io/io/test/CMakeLists.txt index 93b9b91f445a5..06695660ef290 100644 --- a/io/io/test/CMakeLists.txt +++ b/io/io/test/CMakeLists.txt @@ -18,6 +18,8 @@ if(uring AND NOT DEFINED ENV{ROOTTEST_IGNORE_URING}) ROOT_ADD_GTEST(RIoUring RIoUring.cxx LIBRARIES RIO) endif() +ROOT_ADD_GTEST(rfile rfile.cxx LIBRARIES RIO Hist) + # Temporarily disabled. Test routinely fails on MacOS and some Linuxes. #if(NOT WIN32 AND (NOT MACOS_VERSION OR NOT MACOSX_VERSION VERSION_LESS 13.00)) # ROOT_EXECUTABLE(TMapFileTest TMapFileTest.cxx LIBRARIES RIO Hist New) diff --git a/io/io/test/rfile.cxx b/io/io/test/rfile.cxx new file mode 100644 index 0000000000000..ed6fabbc8175b --- /dev/null +++ b/io/io/test/rfile.cxx @@ -0,0 +1,500 @@ +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using ROOT::Experimental::RFile; + +namespace { + +/** + * An RAII wrapper around an open temporary file on disk. It cleans up the guarded file when the wrapper object + * goes out of scope. + */ +class FileRaii { +private: + std::string fPath; + bool fPreserveFile = false; + +public: + explicit FileRaii(const std::string &path) : fPath(path) {} + FileRaii(FileRaii &&) = default; + FileRaii(const FileRaii &) = delete; + FileRaii &operator=(FileRaii &&) = default; + FileRaii &operator=(const FileRaii &) = delete; + ~FileRaii() + { + if (!fPreserveFile) + std::remove(fPath.c_str()); + } + std::string GetPath() const { return fPath; } + + // Useful if you want to keep a test file after the test has finished running + // for debugging purposes. Should only be used locally and never pushed. + void PreserveFile() { fPreserveFile = true; } +}; + +} // anonymous namespace + +TEST(RFile, Open) +{ + FileRaii fileGuard("test_rfile_read.root"); + + // Create a root file to open + { + auto tfile = std::unique_ptr(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE")); + TH1D hist("hist", "", 100, -10, 10); + hist.FillRandom("gaus", 1000); + tfile->WriteObject(&hist, "hist"); + } + + auto file = RFile::Open(fileGuard.GetPath()); + auto hist = file->Get("hist"); + EXPECT_TRUE(hist); + + EXPECT_FALSE(file->Get("inexistent")); + EXPECT_FALSE(file->Get("hist")); + EXPECT_TRUE(file->Get("hist")); + + // We do NOT want to globally register RFiles ever. + EXPECT_EQ(ROOT::GetROOT()->GetListOfFiles()->GetSize(), 0); + + std::string foo = "foo"; + EXPECT_THROW(file->Put("foo", foo), ROOT::RException); +} + +TEST(RFile, OpenInexistent) +{ + FileRaii fileGuard("does_not_exist.root"); + + // make sure that the file really does not exist, in case a previous test didn't clean it up. + gSystem->Unlink(fileGuard.GetPath().c_str()); + + ROOT::TestSupport::CheckDiagsRAII diags; + diags.optionalDiag(kSysError, "TFile::TFile", "", false); + diags.optionalDiag(kError, "TFile::TFile", "", false); + + try { + auto f = RFile::Open("does_not_exist.root"); + FAIL() << "trying to open an inexistent file should throw"; + } catch (const ROOT::RException &e) { + EXPECT_THAT(e.what(), testing::HasSubstr("failed to open file")); + } + try { + auto f = RFile::Update("/a/random/directory/that/definitely/does_not_exist.root"); + FAIL() << "trying to update a file under an inexistent directory should throw"; + } catch (const ROOT::RException &e) { + EXPECT_THAT(e.what(), testing::HasSubstr("failed to open file")); + } + try { + auto f = RFile::Recreate("/a/random/directory/that/definitely/does_not_exist.root"); + FAIL() << "trying to create a file under an inexistent directory should throw"; + } catch (const ROOT::RException &e) { + EXPECT_THAT(e.what(), testing::HasSubstr("failed to open file")); + } + + // This succeeds because Update creates the file if it doesn't exist. + EXPECT_NO_THROW(RFile::Update("does_not_exist.root")); +} + +TEST(RFile, OpenForWriting) +{ + FileRaii fileGuard("test_rfile_write.root"); + + auto hist = std::make_unique("hist", "", 100, -10, 10); + hist->FillRandom("gaus", 1000); + + auto file = RFile::Recreate(fileGuard.GetPath()); + file->Put("hist", *hist); + EXPECT_TRUE(file->Get("hist")); + + EXPECT_EQ(ROOT::GetROOT()->GetListOfFiles()->GetSize(), 0); +} + +TEST(RFile, CheckNoAutoRegistrationWrite) +{ + FileRaii fileGuard("test_rfile_noautoreg_write.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + EXPECT_EQ(gDirectory, gROOT); + auto hist = std::make_unique("hist", "", 100, -10, 10); + file->Put("hist", *hist); + EXPECT_EQ(hist->GetDirectory(), gROOT); + file->Close(); + EXPECT_EQ(hist->GetDirectory(), gROOT); + hist.reset(); + // no double free should happen when ROOT exits +} + +TEST(RFile, CheckNoAutoRegistrationRead) +{ + FileRaii fileGuard("test_rfile_noautoreg_read.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + auto hist = std::make_unique("hist", "", 100, -10, 10); + hist->Fill(4); + file->Put("hist", *hist); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_EQ(gDirectory, gROOT); + auto hist = file->Get("hist"); + EXPECT_EQ(hist->GetDirectory(), nullptr); + ASSERT_NE(hist, nullptr); + EXPECT_FLOAT_EQ(hist->GetEntries(), 1); + } + // no double free should happen when ROOT exits +} + +TEST(RFile, CheckNoAutoRegistrationUpdate) +{ + FileRaii fileGuard("test_rfile_noautoreg_update.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + auto hist = std::make_unique("hist", "", 100, -10, 10); + hist->Fill(4); + file->Put("hist", *hist); + } + + { + auto file = RFile::Update(fileGuard.GetPath()); + EXPECT_EQ(gDirectory, gROOT); + auto hist = file->Get("hist"); + ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->GetDirectory(), nullptr); + EXPECT_FLOAT_EQ(hist->GetEntries(), 1); + } + // no double free should happen when ROOT exits +} + +TEST(RFile, WriteInvalidPaths) +{ + FileRaii fileGuard("test_rfile_write_invalid.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string a; + EXPECT_THROW(file->Put("", a), ROOT::RException); + EXPECT_THROW(file->Put("..", a), ROOT::RException); + EXPECT_THROW(file->Put(" a", a), ROOT::RException); + EXPECT_THROW(file->Put("a\n", a), ROOT::RException); + EXPECT_THROW(file->Put(".", a), ROOT::RException); + EXPECT_THROW(file->Put("\0", a), ROOT::RException); + EXPECT_THROW(file->Put(".a", a), ROOT::RException); + EXPECT_THROW(file->Put("a..", a), ROOT::RException); +} + +TEST(RFile, OpenForUpdating) +{ + FileRaii fileGuard("test_rfile_update.root"); + + { + TH1D hist("hist", "", 100, -10, 10); + hist.FillRandom("gaus", 1000); + auto file = RFile::Recreate(fileGuard.GetPath()); + file->Put("hist", hist); + } + + auto file = RFile::Update(fileGuard.GetPath()); + EXPECT_TRUE(file->Get("hist")); + { + auto hist2 = std::make_unique("hist2", "a different hist", 10, -1, 1); + file->Put("hist2", *hist2); + } + EXPECT_TRUE(file->Get("hist2")); + + EXPECT_EQ(ROOT::GetROOT()->GetListOfFiles()->GetSize(), 0); +} + +TEST(RFile, PutOverwrite) +{ + FileRaii fileGuard("test_rfile_putoverwrite.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + + { + TH1D hist("hist", "", 100, -10, 10); + hist.FillRandom("gaus", 1000); + file->Put("hist", hist); + } + + { + auto hist = file->Get("hist"); + ASSERT_TRUE(hist); + EXPECT_EQ(static_cast(hist->GetEntries()), 1000); + } + + // Try putting another object at the same path, should fail + TH1D hist2("hist2", "a different hist", 10, -1, 1); + hist2.FillRandom("gaus", 10); + EXPECT_THROW(file->Put("hist", hist2), ROOT::RException); + + // Try with Overwrite, should work (and preserve the old object) + file->Overwrite("hist", hist2); + { + auto hist = file->Get("hist"); + ASSERT_TRUE(hist); + EXPECT_EQ(static_cast(hist->GetEntries()), 10); + + hist = file->Get("hist;1"); + ASSERT_TRUE(hist); + EXPECT_EQ(static_cast(hist->GetEntries()), 1000); + } + + // Now try overwriting without preserving the existing object + std::string s; + file->Overwrite("hist", s, false); + { + // the previous cycle should be gone... + auto hist = file->Get("hist;2"); + EXPECT_EQ(hist, nullptr); + // ...but any cycle before the latest should still be there! + hist = file->Get("hist;1"); + EXPECT_NE(hist, nullptr); + } +} + +TEST(RFile, WrongExtension) +{ + { + FileRaii fileGuard("test_rfile_wrong.root.1"); + ROOT::TestSupport::CheckDiagsRAII diagsRaii; + diagsRaii.requiredDiag(kWarning, "ROOT.File", "preferred file extension is \".root\"", false); + RFile::Recreate(fileGuard.GetPath()); + } + { + FileRaii fileGuard("test_rfile_wrong.xml"); + ROOT::TestSupport::CheckDiagsRAII diagsRaii; + EXPECT_THROW(RFile::Recreate(fileGuard.GetPath()), ROOT::RException); + } +} + +TEST(RFile, WriteReadInDir) +{ + FileRaii fileGuard("test_rfile_dir.root"); + + { + auto hist = std::make_unique("hist", "", 100, -10, 10); + hist->FillRandom("gaus", 1000); + auto file = RFile::Recreate(fileGuard.GetPath()); + file->Put("a/b/hist", *hist); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_TRUE(file->Get("a/b/hist")); + } +} + +TEST(RFile, WriteReadInTFileDir) +{ + FileRaii fileGuard("test_rfile_tfile_dir.root"); + + { + auto hist = std::make_unique("hist", "", 100, -10, 10); + hist->FillRandom("gaus", 1000); + TFile file(fileGuard.GetPath().c_str(), "RECREATE"); + auto *d = file.mkdir("a/b"); + d->WriteObject(hist.get(), "hist"); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_TRUE(file->Get("a/b/hist")); + // We won't find any object with a '/' in its name through RFile. + EXPECT_FALSE(file->Get("a/b/c/d")); + } +} + +TEST(RFile, SaneHierarchy) +{ + // verify that we can't create weird hierarchies like: + // + // (root) + // `--- "a/b": object + // | + // `--- "a": dir + // | + // `--- "b": object + // + // (who should "a/b" be in this case??) + // + + FileRaii fileGuard("test_rfile_sane_hierarchy.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("a", s); + EXPECT_THROW(file->Put("a/b", s), ROOT::RException); + file->Put("b/c", s); + file->Put("b/d", s); + EXPECT_THROW(file->Put("b/c/d", s), ROOT::RException); + EXPECT_THROW(file->Put("b", s), ROOT::RException); + + EXPECT_NE(file->Get("a"), nullptr); + EXPECT_EQ(file->Get("a/b"), nullptr); + EXPECT_NE(file->Get("b/c"), nullptr); + EXPECT_NE(file->Get("b/d"), nullptr); + EXPECT_EQ(file->Get("b/c/d"), nullptr); + EXPECT_EQ(file->Get("b"), nullptr); + } +} + +TEST(RFile, RefuseToCreateDirOverLeaf) +{ + FileRaii fileGuard("test_rfile_dir_over_leaf.root"); + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("a/b", s); + try { + file->Put("a/b/c", s); + FAIL() << "creating a directory over a leaf path should fail."; + } catch (const ROOT::RException &ex) { + EXPECT_THAT(ex.what(), testing::HasSubstr("'a/b'")); + EXPECT_THAT(ex.what(), testing::HasSubstr("name already taken")); + } +} + +// TODO: this test could in principle also run without davix: need to figure out a way to detect if we have +// remote access capabilities. +#ifdef R__HAS_DAVIX +TEST(RFile, RemoteRead) +{ + constexpr const char *kFileName = "https://root.cern/files/rootcode.root"; + + auto file = RFile::Open(kFileName); + auto content = file->Get("root"); + ASSERT_NE(content, nullptr); +} +#endif + +TEST(RFile, ComplexExample) +{ + FileRaii fileGuard("test_rfile_complex.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + + const std::string topLevelDirs[] = {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}; + for (const auto &dir : topLevelDirs) { + const auto kNRuns = 5; + for (int runIdx = 0; runIdx < kNRuns; ++runIdx) { + const auto runDir = dir + "/run" + (runIdx + 1); + + const auto kNHist = 10; + for (int i = 0; i < kNHist; ++i) { + const auto histName = std::string("h") + (i + 1); + const auto histPath = runDir + "/hists/" + histName; + const auto histTitle = std::string("Histogram #") + (i + 1); + TH1D hist(histName, histTitle, 100, -10 * (i + 1), 10 * (i + 1)); + file->Put(histPath, hist); + } + } + } +} + +TEST(RFile, Closing) +{ + FileRaii fileGuard("test_rfile_closing.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("s", s); + // Explicitly close the file + file->Close(); + EXPECT_THROW(file->Put("ss", s), ROOT::RException); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_NE(file->Get("s"), nullptr); + file->Close(); + EXPECT_THROW(file->Get("s"), ROOT::RException); + } +} + +TEST(RFile, InvalidPaths) +{ + FileRaii fileGuard("test_rfile_invalidpaths.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + + static const char *const kKeyLong = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + std::string obj = "obj"; + EXPECT_NO_THROW(file->Put(kKeyLong, obj)); + + static const char *const kKeyFragmentLong = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + EXPECT_NO_THROW(file->Put(kKeyFragmentLong, obj)); + + static const char *const kKeyFragmentOk = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + EXPECT_NO_THROW(file->Put(kKeyFragmentOk, obj)); + + static const char *const kKeyWhitespaces = "my path with spaces/foo"; + EXPECT_THROW(file->Put(kKeyWhitespaces, obj), ROOT::RException); + + static const char *const kKeyCtrlChars = "my\tpath\nwith\bcontrolcharacters"; + EXPECT_THROW(file->Put(kKeyCtrlChars, obj), ROOT::RException); + + static const char *const kKeyDot = "my/./path"; + EXPECT_THROW(file->Put(kKeyDot, obj), ROOT::RException); + static const char *const kKeyDot2 = "my/.path"; + EXPECT_THROW(file->Put(kKeyDot2, obj), ROOT::RException); + static const char *const kKeyDot3 = "../my/path"; + EXPECT_THROW(file->Put(kKeyDot3, obj), ROOT::RException); + + EXPECT_THROW(file->Put("", obj), ROOT::RException); + + // ';' is banned while writing + EXPECT_THROW(file->Put("myobj;2", obj), ROOT::RException); + + static const char *const kKeyBackslash = "this\\actually\\works!"; + EXPECT_NO_THROW(file->Put(kKeyBackslash, obj)); +} + +TEST(RFile, NormalizedPaths) +{ + FileRaii fileGuard("test_rfile_normalizedpaths.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string obj = "obj"; + file->Put("/s", obj); + // "a" and "/a" are equivalent paths, so we cannot overwrite it using Put()... + EXPECT_THROW(file->Put("s", obj), ROOT::RException); + // ...and this is true no matter how many leading slashes we have. + EXPECT_THROW(file->Put("////s", obj), ROOT::RException); + EXPECT_EQ(*file->Get("s"), obj); + EXPECT_EQ(*file->Get("//s"), obj); + + TH1D h("h", "h", 10, -10, 10); + // Cannot write directory 's': already taken by `obj`. + EXPECT_THROW(file->Put("s/b//c", h), ROOT::RException); + file->Put("a/b//c", h); + EXPECT_THROW(file->Put("a/b/c", h), ROOT::RException); + EXPECT_NE(file->Get("a/b/c"), nullptr); + EXPECT_NE(file->Get("//a////b/c"), nullptr); + EXPECT_THROW(file->Get("a/b/c/"), ROOT::RException); +}