Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tree/dataframe/inc/ROOT/RDF/RDefineReader.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class RDefinesWithReaders {
// (see BookDefineJit). it is never null.
std::shared_ptr<ROOT::Detail::RDF::RDefineBase> fDefine;
// Column readers per variation (in the map) per slot (in the vector).
std::vector<std::unordered_map<std::string_view, std::unique_ptr<RDefineReader>>> fReadersPerVariation;
std::vector<std::unordered_map<std::string_view, std::shared_ptr<RDefineReader>>> fReadersPerVariation;

// Strings that were already used to represent column names in this RDataFrame instance.
ROOT::Internal::RDF::RStringCache &fCachedColNames;
Expand Down
69 changes: 34 additions & 35 deletions tree/dataframe/inc/ROOT/RDF/RInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ public:
/// return an RVec of varied values, one for each variation tag, in the same order as the tags.
/// \param[in] inputColumns the names of the columns to be passed to the callable.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
/// colName is used if none is provided.
///
Expand Down Expand Up @@ -988,7 +988,7 @@ public:
/// return an RVec of varied values, one for each variation tag, in the same order as the tags.
/// \param[in] inputColumns the names of the columns to be passed to the callable.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
/// colName is used if none is provided.
///
Expand Down Expand Up @@ -1036,7 +1036,7 @@ public:
/// \param[in] inputColumns the names of the columns to be passed to the callable.
/// \param[in] inputColumns the names of the columns to be passed to the callable.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
/// colName is used if none is provided.
///
Expand Down Expand Up @@ -1091,7 +1091,7 @@ public:
/// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
/// values for the specified column.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
/// colName is used if none is provided.
///
Expand Down Expand Up @@ -1126,7 +1126,7 @@ public:
/// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
/// values for the specified columns.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
///
/// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
Expand Down Expand Up @@ -1163,7 +1163,7 @@ public:
/// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
/// values for the specified column.
/// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
/// `"1"`, etc.
/// `"1"`, etc.
/// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
/// colName is used if none is provided.
///
Expand Down Expand Up @@ -1251,6 +1251,27 @@ public:
/// \param[in] options RSnapshotOptions struct with extra options to pass to the output TFile and TTree/RNTuple.
/// \return a `RDataFrame` that wraps the snapshotted dataset.
///
template <typename... ColumnTypes>
R__DEPRECATED(
6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.")
RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
const ColumnNames_t &columnList,
const RSnapshotOptions &options = RSnapshotOptions())
{
return Snapshot(treename, filename, columnList, options);
}

////////////////////////////////////////////////////////////////////////////
/// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
/// \param[in] treename The name of the output TTree or RNTuple.
/// \param[in] filename The name of the output TFile.
/// \param[in] columnList The list of names of the columns/branches/fields to be written.
/// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
/// \return a `RDataFrame` that wraps the snapshotted dataset.
///
/// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
/// The types of the columns are automatically inferred and do not need to be specified.
///
/// Support for writing of nested branches/fields is limited (although RDataFrame is able to read them) and dot ('.')
/// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
/// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
Expand Down Expand Up @@ -1306,28 +1327,6 @@ public:
/// opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
/// df.Snapshot("outputNTuple", "outputFile.root", {"x"}, opts);
/// ~~~
template <typename... ColumnTypes>
R__DEPRECATED(
6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.")
RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
const ColumnNames_t &columnList,
const RSnapshotOptions &options = RSnapshotOptions())
{
return Snapshot(treename, filename, columnList, options);
}

////////////////////////////////////////////////////////////////////////////
/// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
/// \param[in] treename The name of the output TTree or RNTuple.
/// \param[in] filename The name of the output TFile.
/// \param[in] columnList The list of names of the columns/branches/fields to be written.
/// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
/// \return a `RDataFrame` that wraps the snapshotted dataset.
///
/// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
/// The types of the columns are automatically inferred and do not need to be specified.
///
/// See above for a more complete description and example usages.
RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
const ColumnNames_t &columnList,
const RSnapshotOptions &options = RSnapshotOptions())
Expand Down Expand Up @@ -1464,7 +1463,7 @@ public:
/// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
/// The types of the columns are automatically inferred and do not need to be specified.
///
/// See above for a more complete description and example usages.
/// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
std::string_view columnNameRegexp = "",
const RSnapshotOptions &options = RSnapshotOptions())
Expand Down Expand Up @@ -1507,7 +1506,7 @@ public:
/// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
/// The types of the columns are automatically inferred and do not need to be specified.
///
/// See above for a more complete description and example usages.
/// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
std::initializer_list<std::string> columnList,
const RSnapshotOptions &options = RSnapshotOptions())
Expand Down Expand Up @@ -2362,13 +2361,13 @@ public:
/// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
/// ~~~
///
/// `GraphAssymErrors` should also be used for the cases in which values associated only with
/// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero.
/// In such cases, user should do the following:
/// `GraphAssymErrors` should also be used for the cases in which values associated only with
/// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero.
/// In such cases, user should do the following:
/// ~~~{.cpp}
/// // Create a column of zeros in RDataFrame
/// auto rdf_withzeros = rdf.Define("zero", "0");
/// // or alternatively:
/// auto rdf_withzeros = rdf.Define("zero", "0");
/// // or alternatively:
/// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;});
/// // Create the graph with y errors only
/// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh");
Expand Down
61 changes: 30 additions & 31 deletions tree/dataframe/inc/ROOT/RDF/SnapshotHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,6 @@ class TBufferMergerFile;

namespace ROOT::Internal::RDF {

class RBranchSet {
std::vector<TBranch *> fBranches;
std::vector<std::string> fNames;
std::vector<bool> fIsCArray;

public:
TBranch *Get(const std::string &name) const;
bool IsCArray(const std::string &name) const;
void Insert(const std::string &name, TBranch *address, bool isCArray = false);
void Clear();
void AssertNoNullBranchAddresses();
};

class R__CLING_PTRCHECK(off) UntypedSnapshotRNTupleHelper final : public RActionImpl<UntypedSnapshotRNTupleHelper> {
std::string fFileName;
std::string fDirName;
Expand Down Expand Up @@ -102,6 +89,34 @@ public:
UntypedSnapshotRNTupleHelper MakeNew(void *newName);
};

/// Stores properties of each output branch in a Snapshot.
struct RBranchData {
std::string fInputBranchName; // This contains resolved aliases
std::string fOutputBranchName;
const std::type_info *fInputTypeID = nullptr;
TBranch *fOutputBranch = nullptr;
void *fBranchAddressForCArrays = nullptr; // Used to detect if branch addresses need to be updated

std::unique_ptr<void, std::function<void(void *)>> fEmptyInstance;
bool fIsCArray = false;
bool fIsDefine = false;

RBranchData(std::string inputBranchName, std::string outputBranchName, bool isDefine, const std::type_info *typeID,
TBranch *outputBranch = nullptr)
: fInputBranchName{std::move(inputBranchName)},
fOutputBranchName{std::move(outputBranchName)},
fInputTypeID{typeID},
fOutputBranch{outputBranch},
fIsDefine(isDefine)
{
}
void ClearBranchPointers()
{
fOutputBranch = nullptr;
fBranchAddressForCArrays = nullptr;
}
};

class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelper final : public RActionImpl<UntypedSnapshotTTreeHelper> {
std::string fFileName;
std::string fDirName;
Expand All @@ -110,17 +125,10 @@ class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelper final : public RActionIm
std::unique_ptr<TFile> fOutputFile;
std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
bool fBranchAddressesNeedReset{true};
ColumnNames_t fInputBranchNames; // This contains the resolved aliases
ColumnNames_t fOutputBranchNames;
TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
// TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
RBranchSet fOutputBranches;
std::vector<bool> fIsDefine;
std::vector<RBranchData> fBranchData; // Information for all output branches
ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
ROOT::Detail::RDF::RLoopManager *fInputLoopManager;
std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns

public:
UntypedSnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
Expand Down Expand Up @@ -169,11 +177,7 @@ class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelperMT final : public RAction
std::vector<std::unique_ptr<TTree>> fOutputTrees;
std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
std::vector<TTree *> fInputTrees; // Current input trees, one per slot. Set at initialization time (`InitTask`)
// Addresses of branches in output per slot, non-null only for the ones holding C arrays
std::vector<std::vector<TBranch *>> fBranches;
// Addresses of objects associated to output branches per slot, non-null only for the ones holding C arrays
std::vector<std::vector<void *>> fBranchAddresses;
std::vector<RBranchSet> fOutputBranches; // Unique set of output branches, one per slot.
std::vector<std::vector<RBranchData>> fBranchData; // Information for all output branches of each slot

// Attributes of the output TTree

Expand All @@ -182,16 +186,11 @@ class R__CLING_PTRCHECK(off) UntypedSnapshotTTreeHelperMT final : public RAction
std::string fTreeName;
TFile *fOutputFile; // Non-owning view on the output file
RSnapshotOptions fOptions;
std::vector<std::string> fOutputBranchNames;

// Attributes related to the computation graph

ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
ROOT::Detail::RDF::RLoopManager *fInputLoopManager;
std::vector<std::string> fInputBranchNames; // This contains the resolved aliases
std::vector<const std::type_info *> fInputColumnTypeIDs; // Types for the input columns

std::vector<bool> fIsDefine;

public:
UntypedSnapshotTTreeHelperMT(unsigned int nSlots, std::string_view filename, std::string_view dirname,
Expand Down
7 changes: 4 additions & 3 deletions tree/dataframe/inc/ROOT/RDFHelpers.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ namespace Experimental {
template <typename T>
RResultMap<T> VariationsFor(RResultPtr<T> resPtr)
{
using SnapshotResult_t = ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager, void>;
static_assert(!std::is_same_v<T, SnapshotResult_t>,
"Snapshot with variations only can be enabled via RSnapshotOptions.");

R__ASSERT(resPtr != nullptr && "Calling VariationsFor on an empty RResultPtr");

// populate parts of the computation graph for which we only have "empty shells", e.g. RJittedActions and
Expand Down Expand Up @@ -270,9 +274,6 @@ RResultMap<T> VariationsFor(RResultPtr<T> resPtr)
*resPtr.fLoopManager, std::move(nominalAction), std::move(variedAction));
}

using SnapshotPtr_t = ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager, void>>;
SnapshotPtr_t VariationsFor(SnapshotPtr_t resPtr);

/// \brief Add ProgressBar to a ROOT::RDF::RNode
/// \param[in] df RDataFrame node at which ProgressBar is called.
///
Expand Down
15 changes: 3 additions & 12 deletions tree/dataframe/src/RDFHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,7 @@ unsigned int ROOT::RDF::RunGraphs(std::vector<RResultHandle> handles)
return uniqueLoops.size();
}

ROOT::RDF::Experimental::SnapshotPtr_t ROOT::RDF::Experimental::VariationsFor(ROOT::RDF::Experimental::SnapshotPtr_t)
{
throw std::logic_error("Varying a Snapshot result is not implemented yet.");
}

namespace ROOT {
namespace RDF {

namespace Experimental {
namespace ROOT::RDF::Experimental {

void ThreadsPerTH3(unsigned int N)
{
Expand Down Expand Up @@ -398,6 +390,5 @@ void AddProgressBar(ROOT::RDataFrame dataframe)
auto node = ROOT::RDF::AsRNode(dataframe);
ROOT::RDF::Experimental::AddProgressBar(node);
}
} // namespace Experimental
} // namespace RDF
} // namespace ROOT

} // namespace ROOT::RDF::Experimental
Loading