From f134dcbafa2e10a4557135a3936a5e0d2172c76e Mon Sep 17 00:00:00 2001 From: Enrico Guiraud <enrico.guiraud@cern.ch> Date: Wed, 8 Aug 2018 12:54:55 +0200 Subject: [PATCH] [DF] Simplify logic now that lifetimes are simpler - RResultPtr now stores a raw pointer intead of a weak_ptr - weak_ptr validity checks have been removed since RLoopManager cannot go out of scope before these other entities --- tree/dataframe/inc/ROOT/RDFGraphUtils.hxx | 4 +- tree/dataframe/inc/ROOT/RDFInterface.hxx | 259 ++++++++++------------ tree/dataframe/inc/ROOT/RDFNodes.hxx | 4 +- tree/dataframe/inc/ROOT/RResultPtr.hxx | 43 ++-- tree/dataframe/src/RDFGraphUtils.cxx | 2 +- tree/dataframe/src/RDataFrame.cxx | 8 +- 6 files changed, 139 insertions(+), 181 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx b/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx index 1a0363d15b2..a82e9c952c6 100644 --- a/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx +++ b/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx @@ -113,7 +113,7 @@ private: //////////////////////////////////////////////////////////////////////////// /// \brief Starting from the root node, prints the entire graph. - std::string RepresentGraph(std::shared_ptr<RLoopManager> rLoopManager); + std::string RepresentGraph(RLoopManager *rLoopManager); //////////////////////////////////////////////////////////////////////////// /// \brief Starting from a Filter or Range, prints the branch it belongs to @@ -132,7 +132,7 @@ private: template <typename T> std::string RepresentGraph(const RResultPtr<T> &resultPtr) { - auto loopManager = resultPtr.fImplWeakPtr.lock(); + auto loopManager = resultPtr.fLoopManager; if (!loopManager) throw std::runtime_error("Something went wrong"); diff --git a/tree/dataframe/inc/ROOT/RDFInterface.hxx b/tree/dataframe/inc/ROOT/RDFInterface.hxx index 88961d3cb15..ff7bc3e45d2 100644 --- a/tree/dataframe/inc/ROOT/RDFInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDFInterface.hxx @@ -106,9 +106,9 @@ class RInterface { template <typename T, typename W> friend class RInterface; - std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface. - std::weak_ptr<RLoopManager> fImplWeakPtr; ///< Weak pointer to the RLoopManager at the root of the graph. - + std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface. + ///< The RLoopManager at the root of this computation graph. Never null. + RLoopManager *fLoopManager; /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object. RDataSource *fDataSource = nullptr; std::shared_ptr<const ColumnNames_t> fBranchNames; ///< Cache of the chain columns names @@ -133,15 +133,15 @@ public: /// \brief Only enabled when building a RInterface<RLoopManager> template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0> RInterface(const std::shared_ptr<Proxied> &proxied) - : fProxiedPtr(proxied), fImplWeakPtr(proxied), fDataSource(proxied->GetDataSource()) + : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource()) { AddDefaultColumns(); } operator RNode() const { - return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), fImplWeakPtr, - fCustomColumns, fBranchNames, fDataSource); + return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns, + fBranchNames, fDataSource); } //////////////////////////////////////////////////////////////////////////// @@ -168,18 +168,17 @@ public: Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "") { RDFInternal::CheckFilter(f); - auto loopManager = GetLoopManager(); using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types; constexpr auto nColumns = ColTypes_t::list_size; const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); - - auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t()); + const auto newColumns = + CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t()); using F_t = RDFDetail::RFilter<F, Proxied>; auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name); - loopManager->Book(filterPtr.get()); - return RInterface<F_t, DS_t>(std::move(filterPtr), fImplWeakPtr, newColumns, fBranchNames, fDataSource); + fLoopManager->Book(filterPtr.get()); + return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fBranchNames, fDataSource); } //////////////////////////////////////////////////////////////////////////// @@ -222,22 +221,21 @@ public: /// Refer to the first overload of this method for the full documentation. RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "") { - auto df = GetLoopManager(); - const auto &aliasMap = df->GetAliasMap(); - auto *const tree = df->GetTree(); + const auto &aliasMap = fLoopManager->GetAliasMap(); + auto *const tree = fLoopManager->GetTree(); const auto branches = tree ? RDFInternal::GetBranchNames(*tree) : ColumnNames_t(); auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type; - RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, fImplWeakPtr, fCustomColumns, fBranchNames, + RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fBranchNames, fDataSource); - const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(df.get(), name); + const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name); RDFInternal::BookFilterJit(jittedFilter.get(), upcastNodeOnHeap, name, expression, aliasMap, branches, - fCustomColumns, tree, fDataSource, df->GetID()); + fCustomColumns, tree, fDataSource, fLoopManager->GetID()); - df->Book(jittedFilter.get()); - return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), fImplWeakPtr, fCustomColumns, + fLoopManager->Book(jittedFilter.get()); + return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fCustomColumns, fBranchNames, fDataSource); } @@ -337,21 +335,22 @@ public: /// Refer to the first overload of this method for the full documentation. RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression) { - auto loopManager = GetLoopManager(); - RDFInternal::CheckCustomColumn(name, loopManager->GetTree(), fCustomColumns.GetNames(), + // this check must be done before jitting lest we throw exceptions in jitted code + RDFInternal::CheckCustomColumn(name, fLoopManager->GetTree(), fCustomColumns.GetNames(), fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); - auto jittedCustomColumn = std::make_shared<RDFDetail::RJittedCustomColumn>(loopManager.get(), name, loopManager->GetNSlots()); + auto jittedCustomColumn = + std::make_shared<RDFDetail::RJittedCustomColumn>(fLoopManager, name, fLoopManager->GetNSlots()); - RDFInternal::BookDefineJit(name, expression, *loopManager, fDataSource, jittedCustomColumn, fCustomColumns); + RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, jittedCustomColumn, fCustomColumns); RDFInternal::RBookedCustomColumns newCols(fCustomColumns); newCols.AddName(name); newCols.AddColumn(jittedCustomColumn, name); - loopManager->RegisterCustomColumn(jittedCustomColumn.get()); + fLoopManager->RegisterCustomColumn(jittedCustomColumn.get()); - RInterface<Proxied, DS_t> newInterface(fProxiedPtr, fImplWeakPtr, std::move(newCols), fBranchNames, fDataSource); + RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fBranchNames, fDataSource); return newInterface; } @@ -366,23 +365,21 @@ public: // The symmetry with Define is clear. We want to: // - Create globally the alias and return this very node, unchanged // - Make aliases accessible based on chains and not globally - auto loopManager = GetLoopManager(); // Helper to find out if a name is a column auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}; // If the alias name is a column name, there is a problem - RDFInternal::CheckCustomColumn(alias, loopManager->GetTree(), fCustomColumns.GetNames(), - dsColumnNames); + RDFInternal::CheckCustomColumn(alias, fLoopManager->GetTree(), fCustomColumns.GetNames(), dsColumnNames); const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0]; - loopManager->AddColumnAlias(std::string(alias), validColumnName); + fLoopManager->AddColumnAlias(std::string(alias), validColumnName); RDFInternal::RBookedCustomColumns newCols(fCustomColumns); newCols.AddName(alias); - RInterface<Proxied, DS_t> newInterface(fProxiedPtr, fImplWeakPtr, std::move(newCols), fBranchNames, fDataSource); + RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fBranchNames, fDataSource); return newInterface; } @@ -417,22 +414,19 @@ public: const ColumnNames_t &columnList, const RSnapshotOptions &options = RSnapshotOptions()) { - auto df = GetLoopManager(); - // Early return: if the list of columns is empty, just return an empty RDF // If we proceed, the jitted call will not compile! if (columnList.empty()) { auto nEntries = *this->Count(); - auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>( - std::make_shared<RLoopManager>(nEntries)); - return MakeResultPtr(snapshotRDF, df, nullptr); + auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries)); + return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr); } - auto tree = df->GetTree(); - const auto nsID = df->GetID(); + auto tree = fLoopManager->GetTree(); + const auto nsID = fLoopManager->GetID(); std::stringstream snapCall; auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr); RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface( - fProxiedPtr, fImplWeakPtr, fCustomColumns, fBranchNames, fDataSource); + fProxiedPtr, *fLoopManager, fCustomColumns, fBranchNames, fDataSource); // build a string equivalent to // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)" @@ -539,13 +533,12 @@ public: return emptyRDF; } - auto df = GetLoopManager(); - auto tree = df->GetTree(); - const auto nsID = df->GetID(); + auto tree = fLoopManager->GetTree(); + const auto nsID = fLoopManager->GetID(); std::stringstream cacheCall; auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr); - RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr, - fCustomColumns, fBranchNames, fDataSource); + RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface( + fProxiedPtr, *fLoopManager, fCustomColumns, fBranchNames, fDataSource); // build a string equivalent to // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))" RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0)); @@ -613,11 +606,10 @@ public: throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin."); CheckIMTDisabled("Range"); - auto df = GetLoopManager(); using Range_t = RDFDetail::RRange<Proxied>; auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr); - df->Book(rangePtr.get()); - RInterface<RDFDetail::RRange<Proxied>> tdf_r(std::move(rangePtr), fImplWeakPtr, fCustomColumns, fBranchNames, + fLoopManager->Book(rangePtr.get()); + RInterface<RDFDetail::RRange<Proxied>> tdf_r(std::move(rangePtr), *fLoopManager, fCustomColumns, fBranchNames, fDataSource); return tdf_r; } @@ -671,7 +663,6 @@ public: template <typename F> void ForeachSlot(F f, const ColumnNames_t &columns = {}) { - auto loopManager = GetLoopManager(); using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>; constexpr auto nColumns = ColTypes_t::list_size; @@ -683,9 +674,9 @@ public: using Action_t = RDFInternal::RAction<Helper_t, Proxied>; auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, newColumns); - loopManager->Book(action.get()); + fLoopManager->Book(action.get()); - loopManager->Run(); + fLoopManager->Run(); } // clang-format off @@ -744,14 +735,13 @@ public: /// booked but not executed. See RResultPtr documentation. RResultPtr<ULong64_t> Count() { - auto df = GetLoopManager(); - const auto nSlots = df->GetNSlots(); + const auto nSlots = fLoopManager->GetNSlots(); auto cSPtr = std::make_shared<ULong64_t>(0); using Helper_t = RDFInternal::CountHelper; using Action_t = RDFInternal::RAction<Helper_t, Proxied>; auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, fCustomColumns); - df->Book(action.get()); - return MakeResultPtr(cSPtr, df, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(cSPtr, *fLoopManager, std::move(action)); } //////////////////////////////////////////////////////////////////////////// @@ -766,7 +756,6 @@ public: template <typename T, typename COLL = std::vector<T>> RResultPtr<COLL> Take(std::string_view column = "") { - auto loopManager = GetLoopManager(); const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)}); const auto validColumnNames = GetValidatedColumnNames(1, columns); @@ -777,11 +766,11 @@ public: using Helper_t = RDFInternal::TakeHelper<T, T, COLL>; using Action_t = RDFInternal::RAction<Helper_t, Proxied>; auto valuesPtr = std::make_shared<COLL>(); - const auto nSlots = loopManager->GetNSlots(); + const auto nSlots = fLoopManager->GetNSlots(); auto action = std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, newColumns); - loopManager->Book(action.get()); - return MakeResultPtr(valuesPtr, loopManager, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action)); } //////////////////////////////////////////////////////////////////////////// @@ -1362,7 +1351,6 @@ public: if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 2) returnEmptyReport = true; - auto lm = GetLoopManager(); auto rep = std::make_shared<RCutFlowReport>(); using Helper_t = RDFInternal::ReportHelper<Proxied>; using Action_t = RDFInternal::RAction<Helper_t, Proxied>; @@ -1370,8 +1358,8 @@ public: auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}), fProxiedPtr, fCustomColumns); - lm->Book(action.get()); - return MakeResultPtr(rep, lm, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(rep, *fLoopManager, std::move(action)); } ///////////////////////////////////////////////////////////////////////////// @@ -1392,8 +1380,7 @@ public: std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal); - auto df = GetLoopManager(); - auto tree = df->GetTree(); + auto tree = fLoopManager->GetTree(); if (tree) { auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false); allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end()); @@ -1413,16 +1400,16 @@ public: /// This is not an action nor a transformation, just a query to the RDataFrame object. std::string GetColumnType(std::string_view column) { - auto lm = GetLoopManager(); const auto &customCols = fCustomColumns.GetNames(); const bool convertVector2RVec = true; const auto isCustom = std::find(customCols.begin(), customCols.end(), column) != customCols.end(); if (!isCustom) { - return RDFInternal::ColumnName2ColumnTypeName(std::string(column), lm->GetID(), lm->GetTree(), - lm->GetDataSource(), isCustom, convertVector2RVec); + return RDFInternal::ColumnName2ColumnTypeName(std::string(column), fLoopManager->GetID(), + fLoopManager->GetTree(), fLoopManager->GetDataSource(), isCustom, + convertVector2RVec); } else { // must convert the alias "__tdf::column_type" to a readable type - const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__tdf" + std::to_string(lm->GetID()) + + const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__tdf" + std::to_string(fLoopManager->GetID()) + "::" + std::string(column) + "_type))"; const auto callRes = gInterpreter->Calc(call.c_str()); return *reinterpret_cast<std::string *>(callRes); // copy result to stack @@ -1489,7 +1476,6 @@ public: RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity) { RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay()); - auto loopManager = GetLoopManager(); const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); constexpr auto nColumns = ArgTypes::list_size; @@ -1501,10 +1487,10 @@ public: using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>; using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>; auto action = std::make_unique<Action_t>( - Helper_t(std::move(aggregator), std::move(merger), accObjPtr, loopManager->GetNSlots()), validColumnNames, + Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames, fProxiedPtr, newColumns); - loopManager->Book(action.get()); - return MakeResultPtr(accObjPtr, loopManager, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action)); } // clang-format off @@ -1570,13 +1556,12 @@ public: using AH = RDFDetail::RActionImpl<Helper>; static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value, "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>"); - auto lm = GetLoopManager(); using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>; auto resPtr = h.GetResultPtr(); auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(h)), columns, fProxiedPtr, fCustomColumns); - lm->Book(action.get()); - return MakeResultPtr(resPtr, lm, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(resPtr, *fLoopManager, std::move(action)); } //////////////////////////////////////////////////////////////////////////// @@ -1646,8 +1631,6 @@ public: private: void AddDefaultColumns() { - auto loopManager = GetLoopManager(); - RDFInternal::RBookedCustomColumns newCols; // Entry number column @@ -1656,39 +1639,37 @@ private: using NewColEntry_t = RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>; - auto entryColumn = std::make_shared<NewColEntry_t>(loopManager.get(), entryColName, std::move(entryColGen), newCols.GetNames(), loopManager->GetNSlots(), - newCols); + auto entryColumn = std::make_shared<NewColEntry_t>(fLoopManager, entryColName, std::move(entryColGen), + newCols.GetNames(), fLoopManager->GetNSlots(), newCols); newCols.AddName(entryColName); newCols.AddColumn(entryColumn, entryColName); - loopManager->RegisterCustomColumn(entryColumn.get()); + fLoopManager->RegisterCustomColumn(entryColumn.get()); // Declare return type to the interpreter, for future use by jitted actions - auto retTypeDeclaration = "namespace __tdf" + std::to_string(loopManager->GetID()) + " { using " + - entryColName + "_type = ULong64_t; }"; + auto retTypeDeclaration = + "namespace __tdf" + std::to_string(fLoopManager->GetID()) + " { using " + entryColName + "_type = ULong64_t; }"; gInterpreter->Declare(retTypeDeclaration.c_str()); - // Slot number column const auto slotColName = "tdfslot_"; auto slotColGen = [](unsigned int slot) { return slot; }; using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>; - auto slotColumn = std::make_shared<NewColSlot_t>(loopManager.get(), slotColName, std::move(slotColGen), newCols.GetNames(), loopManager->GetNSlots(), - newCols); + auto slotColumn = std::make_shared<NewColSlot_t>(fLoopManager, slotColName, std::move(slotColGen), + newCols.GetNames(), fLoopManager->GetNSlots(), newCols); newCols.AddName(slotColName); newCols.AddColumn(slotColumn, slotColName); - loopManager->RegisterCustomColumn(slotColumn.get()); + fLoopManager->RegisterCustomColumn(slotColumn.get()); fCustomColumns = std::move(newCols); - // Declare return type to the interpreter, for future use by jitted actions - retTypeDeclaration = "namespace __tdf" + std::to_string(loopManager->GetID()) + " { using " + - slotColName + "_type = unsigned int; }"; + // Declare return type to the interpreter, for future use by jitted actions + retTypeDeclaration = "namespace __tdf" + std::to_string(fLoopManager->GetID()) + " { using " + slotColName + + "_type = unsigned int; }"; gInterpreter->Declare(retTypeDeclaration.c_str()); - } ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp, std::string_view callerName) @@ -1717,8 +1698,7 @@ private: } } - auto df = GetLoopManager(); - auto tree = df->GetTree(); + auto tree = fLoopManager->GetTree(); if (tree) { auto branchNames = RDFInternal::GetTopLevelBranchNames(*tree); for (auto &branchName : branchNames) { @@ -1773,7 +1753,6 @@ private: typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0> RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r) { - auto lm = GetLoopManager(); constexpr auto nColumns = sizeof...(BranchTypes); const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); @@ -1781,12 +1760,12 @@ private: auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), RDFInternal::TypeList<BranchTypes...>()); - const auto nSlots = lm->GetNSlots(); + const auto nSlots = fLoopManager->GetNSlots(); auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{}, newColumns); - lm->Book(action.get()); - return MakeResultPtr(r, lm, std::move(action)); + fLoopManager->Book(action.get()); + return MakeResultPtr(r, *fLoopManager, std::move(action)); } // User did not specify type, do type inference @@ -1797,37 +1776,35 @@ private: RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1) { - auto lm = GetLoopManager(); - auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes)); const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns); - const unsigned int nSlots = lm->GetNSlots(); + const unsigned int nSlots = fLoopManager->GetNSlots(); - auto tree = lm->GetTree(); + auto tree = fLoopManager->GetTree(); auto rOnHeap = RDFInternal::MakeSharedOnHeap(r); auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type; - RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, fImplWeakPtr, fCustomColumns, fBranchNames, + RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fBranchNames, fDataSource); - auto jittedActionOnHeap = RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*lm)); + auto jittedActionOnHeap = + RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*fLoopManager)); auto toJit = RDFInternal::JitBuildAction( validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<ActionResultType>), typeid(ActionTag), rOnHeap, - tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, lm->GetID()); - lm->Book(jittedActionOnHeap->get()); - lm->ToJit(toJit); - return MakeResultPtr(r, lm, *jittedActionOnHeap); + tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, fLoopManager->GetID()); + fLoopManager->Book(jittedActionOnHeap->get()); + fLoopManager->ToJit(toJit); + return MakeResultPtr(r, *fLoopManager, *jittedActionOnHeap); } template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type> typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns) { - auto loopManager = GetLoopManager(); - RDFInternal::CheckCustomColumn(name, loopManager->GetTree(), fCustomColumns.GetNames(), + RDFInternal::CheckCustomColumn(name, fLoopManager->GetTree(), fCustomColumns.GetNames(), fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types; @@ -1858,21 +1835,21 @@ private: retTypeName = TClassEdit::DemangleTypeIdName(typeid(RetType), errCode); retTypeNameFwdDecl = "class " + retTypeName + ";/* Did you forget to declare type " + retTypeName + " in the interpreter?*/"; } - const auto retTypeDeclaration = "namespace __tdf" + std::to_string(loopManager->GetID()) + " { " + retTypeNameFwdDecl + " using " + - std::string(name) + "_type = " + retTypeName + "; }"; + const auto retTypeDeclaration = "namespace __tdf" + std::to_string(fLoopManager->GetID()) + " { " + + retTypeNameFwdDecl + " using " + std::string(name) + "_type = " + retTypeName + + "; }"; gInterpreter->Declare(retTypeDeclaration.c_str()); - RDFInternal::RBookedCustomColumns newCols(newColumns); - auto newColumn = std::make_shared<NewCol_t>(loopManager.get(), name, std::move(expression), validColumnNames, loopManager->GetNSlots(), - newCols); - loopManager->RegisterCustomColumn(newColumn.get()); + auto newColumn = std::make_shared<NewCol_t>(fLoopManager, name, std::move(expression), validColumnNames, + fLoopManager->GetNSlots(), newCols); + fLoopManager->RegisterCustomColumn(newColumn.get()); newCols.AddName(name); newCols.AddColumn(newColumn, name); - RInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, std::move(newCols), fBranchNames, fDataSource); + RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fBranchNames, fDataSource); return newInterface; } @@ -1907,11 +1884,10 @@ private: { RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size()); - auto lm = GetLoopManager(); const auto validCols = GetValidatedColumnNames(columnList.size(), columnList); - auto newColumns = - CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(), TTraits::TypeList<ColumnTypes...>()); + auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(), + TTraits::TypeList<ColumnTypes...>()); const std::string fullTreename(treename); // split name into directory and treename if needed @@ -1934,12 +1910,12 @@ private: // multi-thread snapshot using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>; using Action_t = RDFInternal::RAction<Helper_t, Proxied>; - actionPtr.reset( - new Action_t(Helper_t(lm->GetNSlots(), filename, dirname, treename, validCols, columnList, options), - validCols, fProxiedPtr, newColumns)); + actionPtr.reset(new Action_t( + Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols, + fProxiedPtr, newColumns)); } - lm->Book(actionPtr.get()); + fLoopManager->Book(actionPtr.get()); // create new RDF ::TDirectory::TContext ctxt; @@ -1955,7 +1931,8 @@ private: auto chain = std::make_shared<TChain>(fullTreename.c_str()); chain->Add(std::string(filename).c_str()); snapshotRDF->fProxiedPtr->SetTree(chain); - auto snapshotRDFResPtr = MakeResultPtr(snapshotRDF, lm, std::move(actionPtr)); + auto snapshotRDFResPtr = MakeResultPtr(snapshotRDF, *fLoopManager, std::move(actionPtr)); + if (!options.fLazy) { *snapshotRDFResPtr; } @@ -1989,22 +1966,13 @@ private: } protected: - /// Get the RLoopManager if reachable. If not, throw. - std::shared_ptr<RLoopManager> GetLoopManager() + RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, RDFInternal::RBookedCustomColumns columns, + const std::shared_ptr<const ColumnNames_t> &datasetColumns, RDataSource *ds) + : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fBranchNames(datasetColumns), fCustomColumns(columns) { - auto df = fImplWeakPtr.lock(); - if (!df) { - throw std::runtime_error("The main RDataFrame is not reachable: did it go out of scope?"); - } - return df; } - RInterface(const std::shared_ptr<Proxied> &proxied, const std::weak_ptr<RLoopManager> &impl, - RDFInternal::RBookedCustomColumns columns, const std::shared_ptr<const ColumnNames_t> &datasetColumns, - RDataSource *ds) - : fProxiedPtr(proxied), fImplWeakPtr(impl), fDataSource(ds), fBranchNames(datasetColumns), fCustomColumns(columns) - { - } + RLoopManager *GetLoopManager() const { return fLoopManager; } const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; } @@ -2012,23 +1980,24 @@ protected: /// which is expensive in terms of runtime, is called at most once. ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns) { - auto loopManager = GetLoopManager(); - auto tree = loopManager->GetTree(); + auto tree = fLoopManager->GetTree(); if (tree && !fBranchNames) { fBranchNames = std::make_shared<ColumnNames_t>(RDFInternal::GetBranchNames(*tree)); } - return RDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, - (tree ? *fBranchNames : ColumnNames_t{}), - fCustomColumns.GetNames(), fDataSource); + return RDFInternal::GetValidatedColumnNames(*fLoopManager, nColumns, columns, + (tree ? *fBranchNames : ColumnNames_t{}), fCustomColumns.GetNames(), + fDataSource); } template <typename... ColumnTypes, std::size_t... S> - RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence<S...>, TTraits::TypeList<ColumnTypes...>){ - auto lm = GetLoopManager(); - return fDataSource - ? RDFInternal::AddDSColumns(*lm, validCols, fCustomColumns, *fDataSource, lm->GetNSlots(), - std::index_sequence_for<ColumnTypes...>(), TTraits::TypeList<ColumnTypes...>()) - : fCustomColumns; + RDFInternal::RBookedCustomColumns + CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence<S...>, TTraits::TypeList<ColumnTypes...>) + { + return fDataSource + ? RDFInternal::AddDSColumns(*fLoopManager, validCols, fCustomColumns, *fDataSource, + fLoopManager->GetNSlots(), std::index_sequence_for<ColumnTypes...>(), + TTraits::TypeList<ColumnTypes...>()) + : fCustomColumns; } }; diff --git a/tree/dataframe/inc/ROOT/RDFNodes.hxx b/tree/dataframe/inc/ROOT/RDFNodes.hxx index ebb05c4eb0b..bfcfe43d3d1 100644 --- a/tree/dataframe/inc/ROOT/RDFNodes.hxx +++ b/tree/dataframe/inc/ROOT/RDFNodes.hxx @@ -666,8 +666,8 @@ class RCustomColumn final : public RCustomColumnBase { public: RCustomColumn(RLoopManager *lm, std::string_view name, F &&expression, const ColumnNames_t &bl, unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customColumns, bool isDSColumn = false) - : RCustomColumnBase(lm, name, nSlots, isDSColumn, customColumns), fExpression(std::move(expression)), fBranches(bl), - fLastResults(fNSlots), fValues(fNSlots) + : RCustomColumnBase(lm, name, nSlots, isDSColumn, customColumns), fExpression(std::move(expression)), + fBranches(bl), fLastResults(fNSlots), fValues(fNSlots) { } diff --git a/tree/dataframe/inc/ROOT/RResultPtr.hxx b/tree/dataframe/inc/ROOT/RResultPtr.hxx index 1a32e94a3a7..cdd753aa0bf 100644 --- a/tree/dataframe/inc/ROOT/RResultPtr.hxx +++ b/tree/dataframe/inc/ROOT/RResultPtr.hxx @@ -39,7 +39,7 @@ namespace RDF { using ROOT::RDF::RResultPtr; // Fwd decl for RResultPtr template <typename T> -RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, const std::shared_ptr<RLoopManager> &df, +RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df, std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr); } // ns RDF } // ns Detail @@ -71,13 +71,11 @@ template <typename T> class RResultPtr { // private using declarations using SPT_t = std::shared_ptr<T>; - using SPTLM_t = std::shared_ptr<RDFDetail::RLoopManager>; - using WPTLM_t = std::weak_ptr<RDFDetail::RLoopManager>; // friend declarations template <typename T1> - friend RResultPtr<T1> - RDFDetail::MakeResultPtr(const std::shared_ptr<T1> &, const SPTLM_t &, std::shared_ptr<RDFInternal::RActionBase>); + friend RResultPtr<T1> RDFDetail::MakeResultPtr(const std::shared_ptr<T1> &, ::ROOT::Detail::RDF::RLoopManager &, + std::shared_ptr<RDFInternal::RActionBase>); template <class T1, class T2> friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs); template <class T1, class T2> @@ -109,13 +107,15 @@ class RResultPtr { }; /// \endcond - WPTLM_t fImplWeakPtr; ///< Points to the RLoopManager at the root of the functional graph + /// Non-owning pointer to the RLoopManager at the root of this computation graph. + /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr. + RDFDetail::RLoopManager *fLoopManager = nullptr; SPT_t fObjPtr; ///< Shared pointer encapsulating the wrapped result /// Owning pointer to the action that will produce this result. /// Ownership is shared with other copies of this ResultPtr. std::shared_ptr<RDFInternal::RActionBase> fActionPtr; - /// Triggers the event loop in the RLoopManager instance to which it's associated via the fImplWeakPtr + /// Triggers the event loop in the RLoopManager void TriggerRun(); /// Get the pointer to the encapsulated result. @@ -128,9 +128,9 @@ class RResultPtr { return fObjPtr.get(); } - RResultPtr(std::shared_ptr<T> objPtr, std::shared_ptr<RDFDetail::RLoopManager> lm, + RResultPtr(std::shared_ptr<T> objPtr, RDFDetail::RLoopManager *lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr) - : fImplWeakPtr(std::move(lm)), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr)) + : fLoopManager(lm), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr)) { } @@ -224,10 +224,7 @@ public: // clang-format on RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback) { - auto lm = fImplWeakPtr.lock(); - if (!lm) - throw std::runtime_error("The main RDataFrame is not reachable: did it go out of scope?"); - const auto nSlots = lm->GetNSlots(); + const auto nSlots = fLoopManager->GetNSlots(); auto actionPtr = fActionPtr; auto c = [nSlots, actionPtr, callback](unsigned int slot) { if (slot != nSlots - 1) @@ -235,7 +232,7 @@ public: auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot)); callback(*partialResult); }; - lm->RegisterCallback(everyNEvents, std::move(c)); + fLoopManager->RegisterCallback(everyNEvents, std::move(c)); return *this; } @@ -272,15 +269,12 @@ public: // clang-format on RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback) { - auto lm = fImplWeakPtr.lock(); - if (!lm) - throw std::runtime_error("The main RDataFrame is not reachable: did it go out of scope?"); auto actionPtr = fActionPtr; auto c = [actionPtr, callback](unsigned int slot) { auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot)); callback(slot, *partialResult); }; - lm->RegisterCallback(everyNEvents, std::move(c)); + fLoopManager->RegisterCallback(everyNEvents, std::move(c)); return *this; } }; @@ -288,11 +282,7 @@ public: template <typename T> void RResultPtr<T>::TriggerRun() { - auto df = fImplWeakPtr.lock(); - if (!df) { - throw std::runtime_error("The main RDataFrame is not reachable: did it go out of scope?"); - } - df->Run(); + fLoopManager->Run(); } template <class T1, class T2> @@ -339,11 +329,10 @@ namespace RDF { /// Create a RResultPtr and set its pointer to the corresponding RAction /// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr. template <typename T> -RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, const std::shared_ptr<RLoopManager> &df, - std::shared_ptr<RDFInternal::RActionBase> actionPtr) +RResultPtr<T> +MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr) { - auto resPtr = RResultPtr<T>(r, df, std::move(actionPtr)); - return resPtr; + return RResultPtr<T>(r, &lm, std::move(actionPtr)); } } // end NS RDF } // end NS Detail diff --git a/tree/dataframe/src/RDFGraphUtils.cxx b/tree/dataframe/src/RDFGraphUtils.cxx index 7396d3ab98b..cfd60887ac2 100644 --- a/tree/dataframe/src/RDFGraphUtils.cxx +++ b/tree/dataframe/src/RDFGraphUtils.cxx @@ -64,7 +64,7 @@ std::string GraphCreatorHelper::RepresentGraph(ROOT::RDataFrame &rDataFrame) return RepresentGraph(loopManager); } -std::string GraphCreatorHelper::RepresentGraph(std::shared_ptr<RLoopManager> loopManager) +std::string GraphCreatorHelper::RepresentGraph(RLoopManager *loopManager) { auto actions = loopManager->GetAllActions(); diff --git a/tree/dataframe/src/RDataFrame.cxx b/tree/dataframe/src/RDataFrame.cxx index ffca50d0109..d4306faacce 100644 --- a/tree/dataframe/src/RDataFrame.cxx +++ b/tree/dataframe/src/RDataFrame.cxx @@ -863,9 +863,9 @@ namespace cling { /// Print a RDataFrame at the prompt std::string printValue(ROOT::RDataFrame *tdf) { - auto df = tdf->GetLoopManager(); - auto *tree = df->GetTree(); - auto defBranches = df->GetDefaultColumnNames(); + auto &df = *tdf->GetLoopManager(); + auto *tree = df.GetTree(); + auto defBranches = df.GetDefaultColumnNames(); std::ostringstream ret; if (tree) { @@ -884,7 +884,7 @@ std::string printValue(ROOT::RDataFrame *tdf) ret << "A data frame associated to the data source \"" << cling::printValue(ds) << "\""; } else { - ret << "An empty data frame that will create " << df->GetNEmptyEntries() << " entries\n"; + ret << "An empty data frame that will create " << df.GetNEmptyEntries() << " entries\n"; } return ret.str(); -- GitLab