From f323fffada6340c776dfc8785ae0f64b4eade31b Mon Sep 17 00:00:00 2001 From: massimo <imaxoi@hotmail.it> Date: Fri, 6 Jul 2018 17:09:38 +0200 Subject: [PATCH] [DF] Helper to draw the operation graph In order to this PR to work, RFilter and Datasources expose their names. --- tree/dataframe/inc/ROOT/GraphNode.hxx | 150 ++++++++++++ tree/dataframe/inc/ROOT/RArrowDS.hxx | 1 + tree/dataframe/inc/ROOT/RCsvDS.hxx | 1 + tree/dataframe/inc/ROOT/RDFActionHelpers.hxx | 82 +++++++ .../inc/ROOT/RDFBookedCustomColumns.hxx | 4 +- tree/dataframe/inc/ROOT/RDFGraphUtils.hxx | 173 ++++++++++++++ tree/dataframe/inc/ROOT/RDFHelpers.hxx | 42 +++- tree/dataframe/inc/ROOT/RDFInterface.hxx | 7 + tree/dataframe/inc/ROOT/RDFInterfaceUtils.hxx | 2 +- tree/dataframe/inc/ROOT/RDFNodes.hxx | 145 ++++++++++- tree/dataframe/inc/ROOT/RDataSource.hxx | 3 + tree/dataframe/inc/ROOT/RLazyDSImpl.hxx | 5 + tree/dataframe/inc/ROOT/RResultPtr.hxx | 13 +- tree/dataframe/inc/ROOT/RRootDS.hxx | 1 + tree/dataframe/inc/ROOT/RSqliteDS.hxx | 1 + tree/dataframe/inc/ROOT/RTrivialDS.hxx | 1 + tree/dataframe/src/RArrowDS.cxx | 5 + tree/dataframe/src/RCsvDS.cxx | 5 + tree/dataframe/src/RDFGraphUtils.cxx | 136 +++++++++++ tree/dataframe/src/RDFInterfaceUtils.cxx | 4 +- tree/dataframe/src/RDFNodes.cxx | 23 ++ tree/dataframe/src/RDataFrame.cxx | 47 +++- tree/dataframe/src/RRootDS.cxx | 5 + tree/dataframe/src/RSqliteDS.cxx | 5 + tree/dataframe/src/RTrivialDS.cxx | 5 + tree/dataframe/test/RNonCopiableColumnDS.hxx | 3 + tree/dataframe/test/RStreamingDS.hxx | 5 + tree/dataframe/test/dataframe_helpers.cxx | 226 ++++++++++++++++++ tree/dataframe/test/dataframe_simple.cxx | 2 + tutorials/dataframe/df018_customActions.C | 4 + tutorials/dataframe/df022_useKahan.C | 5 + 31 files changed, 1100 insertions(+), 11 deletions(-) create mode 100644 tree/dataframe/inc/ROOT/GraphNode.hxx create mode 100644 tree/dataframe/inc/ROOT/RDFGraphUtils.hxx create mode 100644 tree/dataframe/src/RDFGraphUtils.cxx diff --git a/tree/dataframe/inc/ROOT/GraphNode.hxx b/tree/dataframe/inc/ROOT/GraphNode.hxx new file mode 100644 index 00000000000..1d58ff29cbd --- /dev/null +++ b/tree/dataframe/inc/ROOT/GraphNode.hxx @@ -0,0 +1,150 @@ +// Author: Enrico Guiraud, Danilo Piparo, CERN, Massimo Tumolo Politecnico di Torino 08/2018 + +/************************************************************************* + * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +#ifndef ROOT_RDF_GRAPHNODE +#define ROOT_RDF_GRAPHNODE + +#include <string> +#include <memory> +#include <vector> +#include "TString.h" + +#include <iostream> + +namespace ROOT { +namespace Internal { +namespace RDF { +namespace GraphDrawing { + +class GraphCreatorHelper; + +// clang-format off +/** +\class ROOT::Internal::RDF::GraphNode +\ingroup dataframe +\brief Class used to create the operation graph to be printed in the dot representation + + This represent a single node of the overall graph. Each node maps the real RNode keeping just + the name and the columns defined up to that point. +*/ +// clang-format on +class GraphNode { + friend class GraphCreatorHelper; + +private: + unsigned int fCounter; ///< Nodes may share the same name (e.g. Filter). To manage this situation in dot, each node + ///< is represented by an unique id. + std::string fName, fColor, fShape; + std::vector<std::string> + fDefinedColumns; ///< Columns defined up to this node. By checking the defined columns between two consecutive + ///< nodes, it is possible to know if there was some Define in between. + std::shared_ptr<GraphNode> fPrevNode; + + bool fIsExplored = false; ///< When the graph is reconstructed, the first time this node has been explored this flag + ///< is set and it won't be explored anymore + bool fIsNew = true; ///< A just created node. This means that in no other exploration the node was already created + ///< (this is needed because branches may share some common node). + + //////////////////////////////////////////////////////////////////////////// + /// \brief Returns a static variable to allow each node to retrieve its counter + static unsigned int &GetStaticGlobalCounter() + { + static unsigned int sGlobalCounter = 1; + return sGlobalCounter; + } + +public: + //////////////////////////////////////////////////////////////////////////// + /// \brief Creates a node with a name and a counter + GraphNode(const std::string_view &name) : fName(name) { fCounter = GetStaticGlobalCounter()++; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Resets the counter. + /// This is not strictly needed but guarantees that two consecutive request to the graph return the same result. + static void ClearCounter() { GraphNode::GetStaticGlobalCounter() = 1; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Appends a node on the head of the current node + void SetPrevNode(const std::shared_ptr<GraphNode> &node) { fPrevNode = node; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Adds the column defined up to the node + void AddDefinedColumns(const std::vector<std::string> &columns) { fDefinedColumns = columns; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gets the column defined up to the node + std::vector<std::string> GetDefinedColumns() { return fDefinedColumns; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Manually sets the counter to a node. + /// It is used by the root node to set its counter to zero. + void SetCounter(unsigned int counter) { fCounter = counter; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Allows to stop the graph traversal when an explored node is encountered + void SetIsExplored(bool isExplored) { fIsExplored = isExplored; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief The node is considered just created + void SetIsNew(bool isNew) { fIsNew = isNew; } + + bool GetIsNew() { return fIsNew; } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gives a different shape based on the node type + void SetRoot() + { + fColor = "#e8f8fc"; + fShape = "oval"; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gives a different shape based on the node type + void SetFilter() + { + fColor = "#c4cfd4"; + fShape = "diamond"; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gives a different shape based on the node type + void SetDefine() + { + fColor = "#60aef3"; + fShape = "oval"; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gives a different shape based on the node type + void SetRange() + { + fColor = "#6F4D8F"; + fShape = "diamond"; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Gives a different shape based on the node type + void SetAction(bool hasRun) + { + if (hasRun) { + fColor = "#baf1e5"; + } else { + fColor = "#9cbbe5"; + } + fShape = "box"; + } +}; + +} // namespace GraphDrawing +} // namespace RDF +} // namespace Internal +} // namespace ROOT + +#endif diff --git a/tree/dataframe/inc/ROOT/RArrowDS.hxx b/tree/dataframe/inc/ROOT/RArrowDS.hxx index ebbc184e9f9..43d6c112350 100644 --- a/tree/dataframe/inc/ROOT/RArrowDS.hxx +++ b/tree/dataframe/inc/ROOT/RArrowDS.hxx @@ -42,6 +42,7 @@ public: void InitSlot(unsigned int slot, ULong64_t firstEntry) override; void SetNSlots(unsigned int nSlots) override; void Initialise() override; + std::string GetDataSourceType() override; }; //////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tree/dataframe/inc/ROOT/RCsvDS.hxx b/tree/dataframe/inc/ROOT/RCsvDS.hxx index 737eb9edb35..52d4b5fcbab 100644 --- a/tree/dataframe/inc/ROOT/RCsvDS.hxx +++ b/tree/dataframe/inc/ROOT/RCsvDS.hxx @@ -78,6 +78,7 @@ public: bool HasColumn(std::string_view colName) const; bool SetEntry(unsigned int slot, ULong64_t entry); void SetNSlots(unsigned int nSlots); + std::string GetDataSourceType(); }; //////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tree/dataframe/inc/ROOT/RDFActionHelpers.hxx b/tree/dataframe/inc/ROOT/RDFActionHelpers.hxx index 3fd4e69a04e..641dcc083a3 100644 --- a/tree/dataframe/inc/ROOT/RDFActionHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDFActionHelpers.hxx @@ -64,6 +64,7 @@ public: template <typename... Args> void CallFinalizeTask(unsigned int, Args...) {} + }; } // namespace RDF @@ -109,6 +110,10 @@ public: void Initialize() { /* noop */} void Finalize() { /* noop */} + + std::string GetActionName(){ + return "ForeachSlot"; + } }; class CountHelper : public RActionImpl<CountHelper> { @@ -125,6 +130,11 @@ public: void Initialize() { /* noop */} void Finalize(); ULong64_t &PartialUpdate(unsigned int slot); + + std::string GetActionName(){ + return "Count"; + } + }; template <typename ProxiedVal_t> @@ -152,6 +162,11 @@ public: if (!fReturnEmptyReport && !fProxiedWPtr.expired()) fProxiedWPtr.lock()->Report(*fReport); } + + std::string GetActionName(){ + return "Report"; + } + }; class FillHelper : public RActionImpl<FillHelper> { @@ -211,6 +226,11 @@ public: void Initialize() { /* noop */} void Finalize(); + + std::string GetActionName(){ + return "Fill"; + } + }; extern template void FillHelper::Exec(unsigned int, const std::vector<float> &); @@ -332,6 +352,10 @@ public: void Finalize() { fTo->Merge(); } HIST &PartialUpdate(unsigned int slot) { return *fTo->GetAtSlotRaw(slot); } + + std::string GetActionName(){ + return "FillTO"; + } }; class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> { @@ -391,6 +415,10 @@ public: *fResultGraph = *graph; } + std::string GetActionName(){ + return "Graph"; + } + ::TGraph &PartialUpdate(unsigned int slot) { return *fTo->GetAtSlotRaw(slot); } }; @@ -435,6 +463,10 @@ public: } COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); } + + std::string GetActionName(){ + return "Take"; + } }; // Case 2.: The column is not an RVec, the collection is a vector @@ -478,6 +510,10 @@ public: } std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; } + + std::string GetActionName(){ + return "Take"; + } }; // Case 3.: The column is a RVec, the collection is not a vector @@ -513,6 +549,11 @@ public: } } } + + std::string GetActionName(){ + return "Take"; + } + }; // Case 4.: The column is an RVec, the collection is a vector @@ -556,6 +597,11 @@ public: rColl->insert(rColl->end(), coll->begin(), coll->end()); } } + +std::string GetActionName(){ + return "Take"; +} + }; template <typename ResultType> @@ -591,6 +637,10 @@ public: } ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; } + + std::string GetActionName(){ + return "Min"; + } }; // TODO @@ -634,6 +684,10 @@ public: } ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; } + + std::string GetActionName(){ + return "Max"; + } }; // TODO @@ -690,6 +744,11 @@ public: } ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; } + + std::string GetActionName(){ + return "Sum"; + } + }; class MeanHelper : public RActionImpl<MeanHelper> { @@ -719,6 +778,10 @@ public: void Finalize(); double &PartialUpdate(unsigned int slot); + + std::string GetActionName(){ + return "Mean"; + } }; extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &); @@ -756,6 +819,11 @@ public: void Initialize() { /* noop */} void Finalize(); + + std::string GetActionName(){ + return "StdDev"; + } + }; extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &); @@ -925,6 +993,11 @@ public: } } + + std::string GetActionName(){ + return "Snapshot"; + } + }; @@ -1049,6 +1122,10 @@ public: fMerger.reset(); } + std::string GetActionName(){ + return "Snapshot"; + } + }; template <typename Acc, typename Merge, typename R, typename T, typename U, @@ -1101,6 +1178,11 @@ public: } U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; } + + std::string GetActionName(){ + return "Aggregate"; + } + }; } // end of NS RDF diff --git a/tree/dataframe/inc/ROOT/RDFBookedCustomColumns.hxx b/tree/dataframe/inc/ROOT/RDFBookedCustomColumns.hxx index c36b29d2065..353eaac3f11 100644 --- a/tree/dataframe/inc/ROOT/RDFBookedCustomColumns.hxx +++ b/tree/dataframe/inc/ROOT/RDFBookedCustomColumns.hxx @@ -88,11 +88,11 @@ public: //////////////////////////////////////////////////////////////////////////// /// \brief Internally it recreates the map with the new column, and swaps with the old one. - void AddColumn(const std::shared_ptr<RDFDetail::RCustomColumnBase>& column, const std::string_view& name); + void AddColumn(const std::shared_ptr<RDFDetail::RCustomColumnBase> &column, const std::string_view &name); //////////////////////////////////////////////////////////////////////////// /// \brief Internally it recreates the map with the new column name, and swaps with the old one. - void AddName(const std::string_view& name); + void AddName(const std::string_view &name); }; diff --git a/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx b/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx new file mode 100644 index 00000000000..1a0363d15b2 --- /dev/null +++ b/tree/dataframe/inc/ROOT/RDFGraphUtils.hxx @@ -0,0 +1,173 @@ +// Author: Enrico Guiraud, Danilo Piparo, CERN, Massimo Tumolo Politecnico di Torino 08/2018 + +/************************************************************************* + * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +#ifndef ROOT_GRAPHUTILS +#define ROOT_GRAPHUTILS + +#include <string> +#include <sstream> +#include <vector> +#include <map> +#include <memory> +#include <type_traits> +#include <ROOT/RDataFrame.hxx> +#include <ROOT/RDFInterface.hxx> +#include <ROOT/GraphNode.hxx> + +#include <iostream> + +namespace ROOT { +namespace Detail { +namespace RDF { +class RCustomColumnBase; +class RFilterBase; +class RRangeBase; +} // namespace RDF +} // namespace Detail + +namespace Internal { +namespace RDF { +namespace GraphDrawing { +std::shared_ptr<GraphNode> +CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RCustomColumnBase *columnPtr); + +std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr); + +std::shared_ptr<GraphNode> CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr); + +bool CheckIfDefaultOrDSColumn(const std::string &name, + const std::shared_ptr<ROOT::Detail::RDF::RCustomColumnBase> &column); + +// clang-format off +/** +\class ROOT::Internal::RDF::GraphCreatorHelper +\ingroup dataframe +\brief Helper class that provides the operation graph nodes. + + This class is the single point from which graph nodes can be retrieved. Every time an object is created, + it clears the static members and starts again. + By asking this class to create a node, it will return an existing node if already created, otherwise a new one. +*/ +// clang-format on +class GraphCreatorHelper { +private: + using ColumnsNodesMap_t = std::map<const ROOT::Detail::RDF::RCustomColumnBase *, std::weak_ptr<GraphNode>>; + using FiltersNodesMap_t = std::map<const ROOT::Detail::RDF::RFilterBase *, std::weak_ptr<GraphNode>>; + using RangesNodesMap_t = std::map<const ROOT::Detail::RDF::RRangeBase *, std::weak_ptr<GraphNode>>; + + //////////////////////////////////////////////////////////////////////////// + /// \brief Stores the columns defined and which node in the graph defined them. + static ColumnsNodesMap_t &GetStaticColumnsMap() + { + static ColumnsNodesMap_t sMap; + return sMap; + }; + + //////////////////////////////////////////////////////////////////////////// + /// \brief Stores the filters defined and which node in the graph defined them. + static FiltersNodesMap_t &GetStaticFiltersMap() + { + static FiltersNodesMap_t sMap; + return sMap; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Stores the ranges defined and which node in the graph defined them. + static RangesNodesMap_t &GetStaticRangesMap() + { + static RangesNodesMap_t sMap; + return sMap; + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Invoked by the RNodes to create a define graph node. + friend std::shared_ptr<GraphNode> + CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RCustomColumnBase *columnPtr); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Invoked by the RNodes to create a Filter graph node. + friend std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Invoked by the RNodes to create a Range graph node. + friend std::shared_ptr<GraphNode> CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting from any leaf (Action, Filter, Range) it draws the dot representation of the branch. + std::string FromGraphLeafToDot(std::shared_ptr<GraphNode> leaf); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting by an array of leaves, it draws the entire graph. + std::string FromGraphActionsToDot(std::vector<std::shared_ptr<GraphNode>> leaves); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting from the root node, prints the entire graph. + std::string RepresentGraph(ROOT::RDataFrame &rDataFrame); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting from the root node, prints the entire graph. + std::string RepresentGraph(std::shared_ptr<RLoopManager> rLoopManager); + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting from a Filter or Range, prints the branch it belongs to + template <typename Proxied, typename DataSource> + std::string RepresentGraph(RInterface<Proxied, DataSource> &rInterface) + { + auto loopManager = rInterface.GetLoopManager(); + if (!loopManager->fToJit.empty()) + loopManager->BuildJittedNodes(); + + return FromGraphLeafToDot(rInterface.GetProxiedPtr()->GetGraph()); + } + + //////////////////////////////////////////////////////////////////////////// + /// \brief Starting from an action, prints the branch it belongs to + template <typename T> + std::string RepresentGraph(const RResultPtr<T> &resultPtr) + { + auto loopManager = resultPtr.fImplWeakPtr.lock(); + if (!loopManager) + throw std::runtime_error("Something went wrong"); + + if (std::is_same<T, RInterface<RLoopManager, void>>::value) { + return RepresentGraph(loopManager); + } + + if (!loopManager->fToJit.empty()) + loopManager->BuildJittedNodes(); + + auto actionPtr = resultPtr.fActionPtr; + return FromGraphLeafToDot(actionPtr->GetGraph()); + } + +public: + //////////////////////////////////////////////////////////////////////////// + /// \brief Functor. Initializes the static members and delegates the work to the right override. + /// \tparam NodeType the RNode from which the graph has to be drawn + template <typename NodeType> + std::string operator()(NodeType &node) + { + // First all static data structures are cleaned, to avoid undefined behaviours if more than one Represent is + // called + GetStaticFiltersMap() = FiltersNodesMap_t(); + GetStaticColumnsMap() = ColumnsNodesMap_t(); + GetStaticRangesMap() = RangesNodesMap_t(); + GraphNode::ClearCounter(); + // The Represent can now start on a clean environment + return RepresentGraph(node); + } +}; + +} // namespace GraphDrawing +} // namespace RDF +} // namespace Internal +} // namespace ROOT + +#endif diff --git a/tree/dataframe/inc/ROOT/RDFHelpers.hxx b/tree/dataframe/inc/ROOT/RDFHelpers.hxx index 12f72f67166..51aa7a65cad 100644 --- a/tree/dataframe/inc/ROOT/RDFHelpers.hxx +++ b/tree/dataframe/inc/ROOT/RDFHelpers.hxx @@ -13,11 +13,20 @@ #ifndef ROOT_RDF_HELPERS #define ROOT_RDF_HELPERS -#include <ROOT/TypeTraits.hxx> +#include <ROOT/RDataFrame.hxx> +#include <ROOT/RDFGraphUtils.hxx> #include <ROOT/RIntegerSequence.hxx> +#include <ROOT/TypeTraits.hxx> +#include <algorithm> // std::transform #include <functional> #include <type_traits> +#include <vector> +#include <memory> +#include <fstream> +#include <iostream> +#include "TString.h" + namespace ROOT { namespace Internal { @@ -59,6 +68,8 @@ auto PassAsVec(F &&f) -> PassAsVecHelper<std::make_index_sequence<N>, T, F> namespace RDF { namespace RDFInternal = ROOT::Internal::RDF; + + // clag-format off /// Given a callable with signature bool(T1, T2, ...) return a callable with same signature that returns the negated result /// @@ -92,6 +103,35 @@ auto PassAsVec(F &&f) -> RDFInternal::PassAsVecHelper<std::make_index_sequence<N { return RDFInternal::PassAsVecHelper<std::make_index_sequence<N>, T, F>(std::forward<F>(f)); } +template <typename Proxied, typename DataSource> +class RInterface; + +// clang-format off +/// Creates the dot representation of the graph. +/// Won't work if the event loop has been executed +/// \param[in] node any node of the graph. If the node is a LoopManager, it prints the entire graph. Otherwise, only the branch the node belongs to. +/// \param[in] filePath where to save the representation. If not specified, will be printed on standard output. +// clang-format on +template <typename NodeType> +void RepresentGraph(NodeType &node, const std::string &dotFilePath="") +{ + ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper helper; + std::string dotGraph = helper(node); + + if(dotFilePath==""){ + // No file specified, print on standard output + std::cout << dotGraph <<std::endl; + return; + } + + std::ofstream out(dotFilePath); + if (!out.is_open()) { + throw std::runtime_error("File path not valid"); + } + + out << dotGraph; + out.close(); +} } // namespace RDF } // namespace ROOT diff --git a/tree/dataframe/inc/ROOT/RDFInterface.hxx b/tree/dataframe/inc/ROOT/RDFInterface.hxx index 1ae815c17f0..61439e2d5af 100644 --- a/tree/dataframe/inc/ROOT/RDFInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDFInterface.hxx @@ -65,6 +65,11 @@ class TProfile; // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface namespace ROOT { class RDataFrame; +namespace Internal{ +namespace RDF{ + class GraphCreatorHelper; +} +} } namespace cling { std::string printValue(ROOT::RDataFrame *tdf); @@ -90,6 +95,8 @@ class RInterface { using RRangeBase = RDFDetail::RRangeBase; using RLoopManager = RDFDetail::RLoopManager; friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt + friend class RDFInternal::GraphDrawing::GraphCreatorHelper; + template <typename T, typename W> friend class RInterface; diff --git a/tree/dataframe/inc/ROOT/RDFInterfaceUtils.hxx b/tree/dataframe/inc/ROOT/RDFInterfaceUtils.hxx index 6d11d7605be..59c1d2a4213 100644 --- a/tree/dataframe/inc/ROOT/RDFInterfaceUtils.hxx +++ b/tree/dataframe/inc/ROOT/RDFInterfaceUtils.hxx @@ -371,7 +371,7 @@ template <typename ActionTag, typename... BranchTypes, typename PrevNodeType, ty void CallBuildAction(std::shared_ptr<PrevNodeType> *prevNodeOnHeap, const ColumnNames_t &bl, const unsigned int nSlots, const std::shared_ptr<ActionResultType> *rOnHeap, std::shared_ptr<RJittedAction> *jittedActionOnHeap, - RDFInternal::RBookedCustomColumns *customColumns) + RDFInternal::RBookedCustomColumns *customColumns) { // if we are here it means we are jitting, if we are jitting the loop manager must be alive auto &prevNodePtr = *prevNodeOnHeap; diff --git a/tree/dataframe/inc/ROOT/RDFNodes.hxx b/tree/dataframe/inc/ROOT/RDFNodes.hxx index fa7a8fb404a..b191a499259 100644 --- a/tree/dataframe/inc/ROOT/RDFNodes.hxx +++ b/tree/dataframe/inc/ROOT/RDFNodes.hxx @@ -16,6 +16,7 @@ #include "ROOT/RDFNodesUtils.hxx" #include "ROOT/RDFBookedCustomColumns.hxx" #include "ROOT/RDFUtils.hxx" +#include "ROOT/GraphNode.hxx" #include "ROOT/RIntegerSequence.hxx" #include "ROOT/RMakeUnique.hxx" #include "ROOT/RVec.hxx" @@ -38,9 +39,18 @@ #include <vector> namespace ROOT { +namespace RDF{ +template <typename Proxied, typename DataSource> +class RInterface; +template <typename T> +class RResultPtr; + +}// Namespace RDF + namespace Internal { namespace RDF { class RActionBase; +class GraphCreatorHelper; // This is an helper class to allow to pick a slot resorting to a map // indexed by thread ids. @@ -65,6 +75,8 @@ public: } // namespace RDF } // namespace Internal + + namespace Detail { namespace RDF { class RCustomColumnBase; @@ -78,6 +90,8 @@ class RFilterBase; class RRangeBase; class RLoopManager { + friend class ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper; + using RDataSource = ROOT::RDF::RDataSource; enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT }; using Callback_t = std::function<void(unsigned int)>; @@ -212,8 +226,36 @@ public: fCustomColumns.erase(std::remove(fCustomColumns.begin(), fCustomColumns.end(), column), fCustomColumns.end()); } + std::vector<RDFInternal::RActionBase *> GetBookedActions(){ + return fBookedActions; + } + std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph(); + }; +} //namespace RDF +} //namespace Detail + +namespace Internal { +namespace RDF { +namespace GraphDrawing { +// Forward declarations for all nodes. Putting them here because RFilter, RRange, and RCustomColumn have been already +// declared. +std::shared_ptr<GraphNode> +CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RCustomColumnBase *columnPtr); + +std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr); + +std::shared_ptr<GraphNode> CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr); + +bool CheckIfDefaultOrDSColumn(const std::string &name, + const std::shared_ptr<ROOT::Detail::RDF::RCustomColumnBase> &column); +} // namespace GraphDrawing +} // namespace RDF +} // namespace Internal + +namespace Detail { +namespace RDF { class RCustomColumnBase { protected: RLoopManager *fLoopManager; ///< A raw pointer to the RLoopManager at the root of this functional graph. It is only @@ -281,6 +323,7 @@ struct SlotAndEntry{}; namespace Internal { namespace RDF { using namespace ROOT::Detail::RDF; +namespace RDFGraphDrawing = ROOT::Internal::RDF::GraphDrawing; /** \class ROOT::Internal::RDF::TColumnValue @@ -451,6 +494,7 @@ public: virtual void *PartialUpdate(unsigned int slot) = 0; virtual bool HasRun() const = 0; + virtual std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph() = 0; }; class RJittedAction : public RActionBase { @@ -471,6 +515,8 @@ public: void *PartialUpdate(unsigned int slot) final; bool HasRun() const final; void ClearValueReaders(unsigned int slot) final; + + std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph(); }; template <typename Helper, typename PrevDataFrame, typename ColumnTypes_t = typename Helper::ColumnTypes_t> @@ -540,6 +586,33 @@ public: fHasRun = true; } + std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph() + { + auto prevNode = fPrevData.GetGraph(); + auto prevColumns = prevNode->GetDefinedColumns(); + + // Action nodes do not need to ask an helper to create the graph nodes. They are never common nodes between + // multiple branches + auto thisNode = std::make_shared< RDFGraphDrawing::GraphNode>(fHelper.GetActionName()); + auto evaluatedNode = thisNode; + for (auto &column : fCustomColumns.GetColumns()) { + /* Each column that this node has but the previous hadn't has been defined in between, + * so it has to be built and appended. */ + if (RDFGraphDrawing::CheckIfDefaultOrDSColumn(column.first, column.second)) + continue; + if (std::find(prevColumns.begin(), prevColumns.end(), column.first) == prevColumns.end()) { + auto defineNode = RDFGraphDrawing::CreateDefineNode(column.first, column.second.get()); + evaluatedNode->SetPrevNode(defineNode); + evaluatedNode = defineNode; + } + } + + thisNode->AddDefinedColumns(fCustomColumns.GetNames()); + thisNode->SetAction(HasRun()); + evaluatedNode->SetPrevNode(prevNode); + return thisNode; + } + /// This method is invoked to update a partial result during the event loop, right before passing the result to a /// user-defined callback registered via RResultPtr::RegisterCallback void *PartialUpdate(unsigned int slot) final { return PartialUpdateImpl(slot); } @@ -555,7 +628,7 @@ private: return &fHelper.PartialUpdate(slot); } // this one is always available but has lower precedence thanks to `...` - void *PartialUpdateImpl(...) { throw std::runtime_error("This action does not support callbacks!"); } + void *PartialUpdateImpl(...) { throw std::runtime_error("This action does not support callbacks yet!"); } }; } // namespace RDF @@ -563,6 +636,7 @@ private: namespace Detail { namespace RDF { +namespace RDFGraphDrawing = ROOT::Internal::RDF::GraphDrawing; template <typename F, typename ExtraArgsTag = CustomColExtraArgs::None> class RCustomColumn final : public RCustomColumnBase { @@ -705,6 +779,7 @@ public: virtual void ClearTask(unsigned int slot) = 0; virtual void InitNode(); virtual void AddFilterName(std::vector<std::string> &filters) = 0; + virtual std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph() = 0; }; /// A wrapper around a concrete RFilter, which forwards all calls to it @@ -735,6 +810,15 @@ public: void InitNode() final; void AddFilterName(std::vector<std::string> &filters) final; void ClearTask(unsigned int slot) final; + + std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph(){ + if(fConcreteFilter != nullptr ){ + //Here the filter exists, so it can be served + return fConcreteFilter->GetGraph(); + } + throw std::runtime_error("The Jitting should have been invoked before this method."); + } + }; template <typename FilterF, typename PrevDataFrame> @@ -842,6 +926,42 @@ public: ClearValueReaders(slot); } + + std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph(){ + // Recursively call for the previous node. + auto prevNode = fPrevData.GetGraph(); + auto prevColumns = prevNode->GetDefinedColumns(); + + auto thisNode = RDFGraphDrawing::CreateFilterNode(this); + + /* If the returned node is not new, there is no need to perform any other operation. + * This is a likely scenario when building the entire graph in which branches share + * some nodes. */ + if(!thisNode->GetIsNew()){ + return thisNode; + } + + auto evaluatedNode = thisNode; + /* Each column that this node has but the previous hadn't has been defined in between, + * so it has to be built and appended. */ + + for (auto &column: fCustomColumns.GetColumns()){ + // Even if treated as custom columns by the Dataframe, datasource columns must not be in the graph. + if(RDFGraphDrawing::CheckIfDefaultOrDSColumn(column.first, column.second)) + continue; + if(std::find(prevColumns.begin(), prevColumns.end(), column.first) == prevColumns.end()){ + auto defineNode = RDFGraphDrawing::CreateDefineNode(column.first, column.second.get()); + evaluatedNode->SetPrevNode(defineNode); + evaluatedNode = defineNode; + } + } + + // Keep track of the columns defined up to this point. + thisNode->AddDefinedColumns(fCustomColumns.GetNames()); + + evaluatedNode->SetPrevNode(prevNode); + return thisNode; + } }; class RRangeBase { @@ -881,6 +1001,7 @@ public: fNStopsReceived = 0; } void InitNode() { ResetCounters(); } + virtual std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph() = 0; }; template <typename PrevData> @@ -948,6 +1069,28 @@ public: /// This function must be defined by all nodes, but only the filters will add their name void AddFilterName(std::vector<std::string> &filters) { fPrevData.AddFilterName(filters); } + std::shared_ptr<RDFGraphDrawing::GraphNode> GetGraph() + { + // TODO: Ranges node have no information about custom columns, hence it is not possible now + // if defines have been used before. + auto prevNode = fPrevData.GetGraph(); + auto prevColumns = prevNode->GetDefinedColumns(); + + auto thisNode = RDFGraphDrawing::CreateRangeNode(this); + + /* If the returned node is not new, there is no need to perform any other operation. + * This is a likely scenario when building the entire graph in which branches share + * some nodes. */ + if (!thisNode->GetIsNew()) { + return thisNode; + } + thisNode->SetPrevNode(prevNode); + + // If there have been some defines before it, this node won't detect them. + thisNode->AddDefinedColumns(prevColumns); + + return thisNode; + } }; } // namespace RDF diff --git a/tree/dataframe/inc/ROOT/RDataSource.hxx b/tree/dataframe/inc/ROOT/RDataSource.hxx index 3d80731439d..8e80a7becaf 100644 --- a/tree/dataframe/inc/ROOT/RDataSource.hxx +++ b/tree/dataframe/inc/ROOT/RDataSource.hxx @@ -17,6 +17,7 @@ #include <string> #include <vector> #include <typeinfo> +#include "TString.h" namespace ROOT { namespace RDF { @@ -199,6 +200,8 @@ public: // clang-format on virtual void Finalise() {} + virtual std::string GetDataSourceType() = 0; + protected: /// type-erased vector of pointers to pointers to column values - one per slot virtual Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) = 0; diff --git a/tree/dataframe/inc/ROOT/RLazyDSImpl.hxx b/tree/dataframe/inc/ROOT/RLazyDSImpl.hxx index d73d5501a0c..a3df7dc9526 100644 --- a/tree/dataframe/inc/ROOT/RLazyDSImpl.hxx +++ b/tree/dataframe/inc/ROOT/RLazyDSImpl.hxx @@ -199,6 +199,11 @@ public: init = end; } } + + std::string GetDataSourceType(){ + return "LazyDS"; + } + }; } // ns RDF diff --git a/tree/dataframe/inc/ROOT/RResultPtr.hxx b/tree/dataframe/inc/ROOT/RResultPtr.hxx index e07d3b1431c..1a32e94a3a7 100644 --- a/tree/dataframe/inc/ROOT/RResultPtr.hxx +++ b/tree/dataframe/inc/ROOT/RResultPtr.hxx @@ -19,12 +19,19 @@ #include <functional> namespace ROOT { +namespace Internal{ +namespace RDF{ + class GraphCreatorHelper; +} +} +} - +namespace ROOT { namespace RDF { // Fwd decl for MakeResultPtr template <typename T> class RResultPtr; + } // ns RDF namespace Detail { @@ -36,8 +43,6 @@ RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, const std::shared_ptr<R std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr); } // ns RDF } // ns Detail - - namespace RDF { namespace RDFInternal = ROOT::Internal::RDF; namespace RDFDetail = ROOT::Detail::RDF; @@ -86,6 +91,8 @@ class RResultPtr { template <class T1> friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs); + friend class ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper; + /// \cond HIDDEN_SYMBOLS template <typename V, bool hasBeginEnd = TTraits::HasBeginAndEnd<V>::value> struct TIterationHelper { diff --git a/tree/dataframe/inc/ROOT/RRootDS.hxx b/tree/dataframe/inc/ROOT/RRootDS.hxx index bed237d2ce2..cefbf28082f 100644 --- a/tree/dataframe/inc/ROOT/RRootDS.hxx +++ b/tree/dataframe/inc/ROOT/RRootDS.hxx @@ -50,6 +50,7 @@ public: bool SetEntry(unsigned int slot, ULong64_t entry); void SetNSlots(unsigned int nSlots); void Initialise(); + std::string GetDataSourceType(); }; RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob); diff --git a/tree/dataframe/inc/ROOT/RSqliteDS.hxx b/tree/dataframe/inc/ROOT/RSqliteDS.hxx index ad5214ad315..f792c9553d7 100644 --- a/tree/dataframe/inc/ROOT/RSqliteDS.hxx +++ b/tree/dataframe/inc/ROOT/RSqliteDS.hxx @@ -106,6 +106,7 @@ public: std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final; bool SetEntry(unsigned int slot, ULong64_t entry) final; void Initialise() final; + std::string GetDataSourceType() final; protected: Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final; diff --git a/tree/dataframe/inc/ROOT/RTrivialDS.hxx b/tree/dataframe/inc/ROOT/RTrivialDS.hxx index eb3dacc2c65..d524f63d92b 100644 --- a/tree/dataframe/inc/ROOT/RTrivialDS.hxx +++ b/tree/dataframe/inc/ROOT/RTrivialDS.hxx @@ -42,6 +42,7 @@ public: bool SetEntry(unsigned int slot, ULong64_t entry); void SetNSlots(unsigned int nSlots); void Initialise(); + std::string GetDataSourceType(); }; RDataFrame MakeTrivialDataFrame(ULong64_t size, bool skipEvenEntries = false); diff --git a/tree/dataframe/src/RArrowDS.cxx b/tree/dataframe/src/RArrowDS.cxx index eec430266db..f7ce1e4589d 100644 --- a/tree/dataframe/src/RArrowDS.cxx +++ b/tree/dataframe/src/RArrowDS.cxx @@ -493,6 +493,11 @@ void RArrowDS::Initialise() { } +std::string RArrowDS::GetDataSourceType() +{ + return "ArrowDS"; +} + /// Creates a RDataFrame using an arrow::Table as input. /// \param[in] table the arrow Table to observe. /// \param[in] columnNames the name of the columns to use diff --git a/tree/dataframe/src/RCsvDS.cxx b/tree/dataframe/src/RCsvDS.cxx index 4f29154f1c8..938b378df82 100644 --- a/tree/dataframe/src/RCsvDS.cxx +++ b/tree/dataframe/src/RCsvDS.cxx @@ -448,6 +448,11 @@ void RCsvDS::SetNSlots(unsigned int nSlots) fBoolEvtValues.resize(nColumns, std::deque<bool>(fNSlots)); } +std::string RCsvDS::GetDataSourceType() +{ + return "RCsv"; +} + RDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders, char delimiter, Long64_t linesChunkSize) { ROOT::RDataFrame tdf(std::make_unique<RCsvDS>(fileName, readHeaders, delimiter, linesChunkSize)); diff --git a/tree/dataframe/src/RDFGraphUtils.cxx b/tree/dataframe/src/RDFGraphUtils.cxx new file mode 100644 index 00000000000..7396d3ab98b --- /dev/null +++ b/tree/dataframe/src/RDFGraphUtils.cxx @@ -0,0 +1,136 @@ +#include "ROOT/RDFGraphUtils.hxx" + +namespace ROOT { +namespace Internal { +namespace RDF { +namespace GraphDrawing { + +std::string GraphCreatorHelper::FromGraphLeafToDot(std::shared_ptr<GraphNode> leaf) +{ + // Only the mapping between node id and node label (i.e. name) + std::stringstream dotStringLabels; + // Representation of the relationships between nodes + std::stringstream dotStringGraph; + + // Explore the graph bottom-up and store its dot representation. + while (leaf) { + dotStringLabels << "\t" << leaf->fCounter << " [label=\"" << leaf->fName << "\", style=\"filled\", fillcolor=\"" + << leaf->fColor << "\", shape=\"" << leaf->fShape << "\"];\n"; + if (leaf->fPrevNode) { + dotStringGraph << "\t" << leaf->fPrevNode->fCounter << " -> " << leaf->fCounter << ";\n"; + } + leaf = leaf->fPrevNode; + } + + return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}"; +} + +std::string GraphCreatorHelper::FromGraphActionsToDot(std::vector<std::shared_ptr<GraphNode>> leaves) +{ + // Only the mapping between node id and node label (i.e. name) + std::stringstream dotStringLabels; + // Representation of the relationships between nodes + std::stringstream dotStringGraph; + + for (auto leaf : leaves) { + while (leaf && !leaf->fIsExplored) { + dotStringLabels << "\t" << leaf->fCounter << " [label=\"" << leaf->fName + << "\", style=\"filled\", fillcolor=\"" << leaf->fColor << "\", shape=\"" << leaf->fShape + << "\"];\n"; + if (leaf->fPrevNode) { + dotStringGraph << "\t" << leaf->fPrevNode->fCounter << " -> " << leaf->fCounter << ";\n"; + } + // Multiple branches may share the same nodes. It is wrong to explore them more than once. + leaf->fIsExplored = true; + leaf = leaf->fPrevNode; + } + } + return "digraph {\n" + dotStringLabels.str() + dotStringGraph.str() + "}"; +} + +bool CheckIfDefaultOrDSColumn(const std::string &name, + const std::shared_ptr<ROOT::Detail::RDF::RCustomColumnBase> &column) +{ + return (name == "tdfentry_" || name == "tdfslot_" || column->IsDataSourceColumn()); +} + +std::string GraphCreatorHelper::RepresentGraph(ROOT::RDataFrame &rDataFrame) +{ + auto loopManager = rDataFrame.GetLoopManager(); + // Jitting is triggered because nodes must not be empty at the time of the calling in order to draw the graph. + if (!loopManager->fToJit.empty()) + loopManager->BuildJittedNodes(); + + return RepresentGraph(loopManager); +} + +std::string GraphCreatorHelper::RepresentGraph(std::shared_ptr<RLoopManager> loopManager) +{ + + auto actions = loopManager->GetAllActions(); + std::vector<std::shared_ptr<GraphNode>> leaves; + for (auto action : actions) { + // Triggers the graph construction. When action->GetGraph() will return, the node will be linked to all the branch + leaves.push_back(action->GetGraph()); + } + + return FromGraphActionsToDot(leaves); +} + +std::shared_ptr<GraphNode> +CreateDefineNode(const std::string &columnName, const ROOT::Detail::RDF::RCustomColumnBase *columnPtr) +{ + // If there is already a node for this define (recognized by the custom column it is defining) return it. If there is + // not, return a new one. + auto &sColumnsMap = GraphCreatorHelper::GetStaticColumnsMap(); + auto duplicateDefineIt = sColumnsMap.find(columnPtr); + if (duplicateDefineIt != sColumnsMap.end()) { + auto duplicateDefine = duplicateDefineIt->second.lock(); + return duplicateDefine; + } + + auto node = std::make_shared<GraphNode>("Define\n" + columnName); + node->SetDefine(); + + sColumnsMap[columnPtr] = node; + return node; +} + +std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr) +{ + // If there is already a node for this filter return it. If there is not, return a new one. + auto &sFiltersMap = GraphCreatorHelper::GetStaticFiltersMap(); + auto duplicateFilterIt = sFiltersMap.find(filterPtr); + if (duplicateFilterIt != sFiltersMap.end()) { + auto duplicateFilter = duplicateFilterIt->second.lock(); + duplicateFilter->SetIsNew(false); + return duplicateFilter; + } + auto filterName = (filterPtr->HasName() ? filterPtr->GetName() : "Filter"); + auto node = std::make_shared<GraphNode>(filterName); + + sFiltersMap[filterPtr] = node; + node->SetFilter(); + return node; +} + +std::shared_ptr<GraphNode> CreateRangeNode(const ROOT::Detail::RDF::RRangeBase *rangePtr) +{ + // If there is already a node for this range return it. If there is not, return a new one. + auto &sRangesMap = GraphCreatorHelper::GetStaticRangesMap(); + auto duplicateRangeIt = sRangesMap.find(rangePtr); + if (duplicateRangeIt != sRangesMap.end()) { + auto duplicateRange = duplicateRangeIt->second.lock(); + duplicateRange->SetIsNew(false); + return duplicateRange; + } + auto node = std::make_shared<GraphNode>("Range"); + node->SetRange(); + + sRangesMap[rangePtr] = node; + return node; +} +} // namespace GraphDrawing +} // namespace RDF +} // namespace Internal +} // namespace ROOT diff --git a/tree/dataframe/src/RDFInterfaceUtils.cxx b/tree/dataframe/src/RDFInterfaceUtils.cxx index dda4ba7e110..b468db6be22 100644 --- a/tree/dataframe/src/RDFInterfaceUtils.cxx +++ b/tree/dataframe/src/RDFInterfaceUtils.cxx @@ -639,8 +639,8 @@ void BookDefineJit(std::string_view name, std::string_view expression, RLoopMana TryToJitExpression(dotlessExpr, varNames, usedColTypes, hasReturnStmt); const auto definelambda = BuildLambdaString(dotlessExpr, varNames, usedColTypes, hasReturnStmt); - const auto lambdaName = "eval_" + std::string(name); -const auto ns = "__tdf" + std::to_string(namespaceID); + const auto lambdaName = "eval_" + std::string(name); + const auto ns = "__tdf" + std::to_string(namespaceID); auto customColumnsCopy = new RDFInternal::RBookedCustomColumns(customCols); auto customColumnsAddr = PrettyPrintAddr(customColumnsCopy); diff --git a/tree/dataframe/src/RDFNodes.cxx b/tree/dataframe/src/RDFNodes.cxx index 276ea16d931..170ed5a49df 100644 --- a/tree/dataframe/src/RDFNodes.cxx +++ b/tree/dataframe/src/RDFNodes.cxx @@ -112,6 +112,12 @@ void RJittedAction::ClearValueReaders(unsigned int slot) return fConcreteAction->ClearValueReaders(slot); } +std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> RJittedAction::GetGraph() +{ + R__ASSERT(fConcreteAction != nullptr); + return fConcreteAction->GetGraph(); +} + // Some extern instaniations to speed-up compilation/interpretation time // These are not active if c++17 is enabled because of a bug in our clang // See ROOT-9499. @@ -783,6 +789,23 @@ RLoopManager *RRangeBase::GetLoopManagerUnchecked() const return fLoopManager; } +std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> RLoopManager::GetGraph() +{ + std::string name; + if(fDataSource){ + name= fDataSource->GetDataSourceType(); + }else if (fTree){ + name = fTree->GetName(); + }else{ + name = std::to_string(fNEmptyEntries); + } + + auto thisNode = std::make_shared<ROOT::Internal::RDF::GraphDrawing::GraphNode>(name); + thisNode->SetRoot(); + thisNode->SetCounter(0); + return thisNode; +} + void RRangeBase::ResetCounters() { fLastCheckedEntry = -1; diff --git a/tree/dataframe/src/RDataFrame.cxx b/tree/dataframe/src/RDataFrame.cxx index ab8e3c76cd3..ffca50d0109 100644 --- a/tree/dataframe/src/RDataFrame.cxx +++ b/tree/dataframe/src/RDataFrame.cxx @@ -580,6 +580,52 @@ opts.fLazy = true; df.Snapshot("outputTree", "outputFile.root", {"x"}, opts); ~~~ +### <a name="representgraph"></a>Printing the computation graph +It is possible to print the computation graph from any node to obtain a dot representation either on the standard output +or in a file. + +By invoking the method **RepresentGraph** using any node that is not the root node, the computation graph of the branch the node +belongs to is printed. By using the root node, the entire computation graph is printed. + +<b>This method does not work if the event loop has been executed.</b> + +Following there is an example of usage: +~~~{.cpp} +// First, a sample computational graph is built +std::unique_ptr<RDataSource> tds(new RTrivialDS(32)); +ROOT::RDataFrame rd1(std::move(tds)); + +auto root = rd1.Define("Root_def1", []() { return 1; }) + .Filter("col0 % 1 == col0") + .Filter([](int b1) { return b1 <2; }, {"Root_def1"}) + .Define("Root_def2", []() { return 1; }); + +auto branch1 = root.Define("Branch_1_def", []() { return 1; }); +auto branch2 = root.Define("Branch_2_def", []() { return 1; }); + +auto branch1_1 = branch1.Filter([](int b1) { return b1 <2; }, {"Branch_1_def"}) + .Define("Branch_1_1_def", []() { return 1; }) + .Filter("1 == Branch_1_1_def % 2") + .Mean("Branch_1_1_def"); // complete + +auto branch1_2 = branch1.Define("Branch_1_2_def", []() { return 1; }) + .Filter([](int b1) { return b1 <2; }, {"Branch_1_2_def"}) + .Count(); // complete + +auto branch2_1 = branch2.Filter([](int b1) { return b1 <2; }, {"Branch_2_def"}) + .Define("Branch_2_1_def", []() { return 1; }) + .Define("Branch_2_2_def", []() { return 1; }) + .Filter("1 == Branch_2_1_def % 2") + .Max("Branch_2_1_def"); + +auto branch2_2 = branch2.Count(); + +// Prints the graph on a the rd1.dot file in the root directory +ROOT::RDF::Show(rd1, "./mydot.dot"); +// Prints the graph on the standard output +ROOT::RDF::Show(rd1); +~~~ + ## <a name="transformations"></a>Transformations ### <a name="Filters"></a> Filters A filter is defined through a call to `Filter(f, columnList)`. `f` can be a function, a lambda expression, a functor @@ -686,7 +732,6 @@ thread-safety, see [here](#generic-actions). // clang-format on namespace ROOT { - namespace Detail { namespace RDF { class RCustomColumnBase; diff --git a/tree/dataframe/src/RRootDS.cxx b/tree/dataframe/src/RRootDS.cxx index f2c063b2b91..268e902b521 100644 --- a/tree/dataframe/src/RRootDS.cxx +++ b/tree/dataframe/src/RRootDS.cxx @@ -155,6 +155,11 @@ void RRootDS::Initialise() fEntryRanges.back().second += reminder; } +std::string RRootDS::GetDataSourceType() +{ + return "Root"; +} + RDataFrame MakeRootDataFrame(std::string_view treeName, std::string_view fileNameGlob) { ROOT::RDataFrame tdf(std::make_unique<RRootDS>(treeName, fileNameGlob)); diff --git a/tree/dataframe/src/RSqliteDS.cxx b/tree/dataframe/src/RSqliteDS.cxx index ccc8dd6937c..43712aa2cc1 100644 --- a/tree/dataframe/src/RSqliteDS.cxx +++ b/tree/dataframe/src/RSqliteDS.cxx @@ -221,6 +221,11 @@ void RSqliteDS::Initialise() throw std::runtime_error("SQlite error, reset"); } +std::string RSqliteDS::GetDataSourceType() +{ + return "RSqliteDS"; +} + //////////////////////////////////////////////////////////////////////////////////////////////// /// \brief Factory method to create a SQlite RDataFrame. /// \param[in] fileName Path of the sqlite file. diff --git a/tree/dataframe/src/RTrivialDS.cxx b/tree/dataframe/src/RTrivialDS.cxx index abe1e41e3fd..b2b309df173 100644 --- a/tree/dataframe/src/RTrivialDS.cxx +++ b/tree/dataframe/src/RTrivialDS.cxx @@ -84,6 +84,11 @@ void RTrivialDS::Initialise() fEntryRanges.back().second += fSize % fNSlots; } +std::string RTrivialDS::GetDataSourceType() +{ + return "TrivialDS"; +} + RDataFrame MakeTrivialDataFrame(ULong64_t size, bool skipEvenEntries) { ROOT::RDataFrame tdf(std::make_unique<RTrivialDS>(size, skipEvenEntries)); diff --git a/tree/dataframe/test/RNonCopiableColumnDS.hxx b/tree/dataframe/test/RNonCopiableColumnDS.hxx index e15a73f43ea..8de789c1a0c 100644 --- a/tree/dataframe/test/RNonCopiableColumnDS.hxx +++ b/tree/dataframe/test/RNonCopiableColumnDS.hxx @@ -41,6 +41,9 @@ public: }; bool SetEntry(unsigned int, ULong64_t){ return true;}; void SetNSlots(unsigned int){}; + std::string GetDataSourceType(){ + return "NonCopiableColumnDS"; + } }; #endif diff --git a/tree/dataframe/test/RStreamingDS.hxx b/tree/dataframe/test/RStreamingDS.hxx index b274335b0ac..154a949989c 100644 --- a/tree/dataframe/test/RStreamingDS.hxx +++ b/tree/dataframe/test/RStreamingDS.hxx @@ -33,6 +33,11 @@ public: } bool SetEntry(unsigned int, ULong64_t) {return true;} void Initialise() { fCounter = 0; } + + std::string GetDataSourceType() override { + return "Streaming"; + } + protected: std::vector<void *> GetColumnReadersImpl(std::string_view name, const std::type_info &t) { if (t != typeid(int) && std::string(name) != "ans") diff --git a/tree/dataframe/test/dataframe_helpers.cxx b/tree/dataframe/test/dataframe_helpers.cxx index e4af7372c89..e6de9086835 100644 --- a/tree/dataframe/test/dataframe_helpers.cxx +++ b/tree/dataframe/test/dataframe_helpers.cxx @@ -49,3 +49,229 @@ TEST(RDFHelpers, PassAsVec) auto TwoOnesDeque = [](const std::deque<int> &v) { return v.size() == 2 && v[0] == 1 && v[1] == 1; }; EXPECT_EQ(1u, *df.Filter(PassAsVec<2, int>(TwoOnesDeque), {"one", "_1"}).Count()); } + +class RepresentGraphTestHelper { +private: + RDataFrame rd1; + + bool hasLoopRun = false; + + RResultPtr<double> branch1_1; + RResultPtr<unsigned long long> branch1_2; + RResultPtr<double> branch2_1; + RResultPtr<unsigned long long> branch2_2; + +public: + RepresentGraphTestHelper() : rd1(8) + { + auto root = rd1.Define("Root_def1", []() { return 1; }) + .Filter([](int b1) { return b1 < 2; }, {"Root_def1"}) + .Define("Root_def2", []() { return 1; }); + + auto branch1 = root.Define("Branch_1_def", []() { return 1; }); // hanging + auto branch2 = root.Define("Branch_2_def", []() { return 1; }); // hanging + + branch1_1 = branch1.Filter([](int b1) { return b1 < 2; }, {"Branch_1_def"}) + .Define("Branch_1_1_def", []() { return 1; }) + .Filter("1 == Branch_1_1_def % 2") + .Mean("Branch_1_1_def"); // complete + + branch1_2 = branch1.Define("Branch_1_2_def", []() { return 1; }) + .Filter([](int b1) { return b1 < 2; }, {"Branch_1_2_def"}) + .Count(); // complete + + branch2_1 = branch2.Filter([](int b1) { return b1 < 2; }, {"Branch_2_def"}) + .Define("Branch_2_1_def", []() { return 1; }) + .Define("Branch_2_2_def", []() { return 1; }) + .Filter("1 == Branch_2_1_def % 2") + .Max("Branch_2_1_def"); + + branch2_2 = branch2.Count(); + } + + void RunLoop() + { + hasLoopRun = true; + *branch2_2; + } + + std::string GetRepresentationFromRoot() + { + std::cout << std::flush; + // Redirect cout. + std::streambuf *oldCoutStreamBuf = std::cout.rdbuf(); + std::ostringstream strCout; + std::cout.rdbuf(strCout.rdbuf()); + + RepresentGraph(rd1); + + // Restore old cout. + std::cout.rdbuf(oldCoutStreamBuf); + + return strCout.str(); + } + + std::string GetRealRepresentationFromRoot() + { + return std::string("digraph {\n" + "\t9 [label=\"Mean\", style=\"filled\", fillcolor=\"") + + (hasLoopRun ? "#baf1e5" : "#9cbbe5") + + "\", shape=\"box\"];\n" + "\t7 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t8 [label=\"Define\n" + "Branch_1_1_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t4 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t5 [label=\"Define\n" + "Branch_1_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t6 [label=\"Define\n" + "Root_def2\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t2 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t3 [label=\"Define\n" + "Root_def1\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t0 [label=\"8\", style=\"filled\", fillcolor=\"#e8f8fc\", shape=\"oval\"];\n" + "\t13 [label=\"Count\", style=\"filled\", fillcolor=\"" + + (hasLoopRun ? "#baf1e5" : "#9cbbe5") + + "\", shape=\"box\"];\n" + "\t11 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t12 [label=\"Define\n" + "Branch_1_2_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t20 [label=\"Max\", style=\"filled\", fillcolor=\"" + + (hasLoopRun ? "#baf1e5" : "#9cbbe5") + + "\", shape=\"box\"];\n" + "\t17 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t18 [label=\"Define\n" + "Branch_2_1_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t19 [label=\"Define\n" + "Branch_2_2_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t15 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t16 [label=\"Define\n" + "Branch_2_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t22 [label=\"Count\", style=\"filled\", fillcolor=\"" + + (hasLoopRun ? "#baf1e5" : "#9cbbe5") + + "\", shape=\"box\"];\n" + "\t7 -> 9;\n" + "\t8 -> 7;\n" + "\t4 -> 8;\n" + "\t5 -> 4;\n" + "\t6 -> 5;\n" + "\t2 -> 6;\n" + "\t3 -> 2;\n" + "\t0 -> 3;\n" + "\t11 -> 13;\n" + "\t12 -> 11;\n" + "\t5 -> 12;\n" + "\t17 -> 20;\n" + "\t18 -> 17;\n" + "\t19 -> 18;\n" + "\t15 -> 19;\n" + "\t16 -> 15;\n" + "\t6 -> 16;\n" + "\t16 -> 22;\n" + "}\n"; + } + + std::string GetRepresentationFromAction() + { + std::cout << std::flush; + // Redirect cout. + std::streambuf *oldCoutStreamBuf = std::cout.rdbuf(); + std::ostringstream strCout; + std::cout.rdbuf(strCout.rdbuf()); + + RepresentGraph(branch1_1); + + // Restore old cout. + std::cout.rdbuf(oldCoutStreamBuf); + + return strCout.str(); + } + + std::string GetRealRepresentationFromAction() + { + return std::string("digraph {\n" + "\t9 [label=\"Mean\", style=\"filled\", fillcolor=\"") + + (hasLoopRun ? "#baf1e5" : "#9cbbe5") + + "\", shape=\"box\"];\n" + "\t7 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t8 [label=\"Define\n" + "Branch_1_1_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t4 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t5 [label=\"Define\n" + "Branch_1_def\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t6 [label=\"Define\n" + "Root_def2\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t2 [label=\"Filter\", style=\"filled\", fillcolor=\"#c4cfd4\", shape=\"diamond\"];\n" + "\t3 [label=\"Define\n" + "Root_def1\", style=\"filled\", fillcolor=\"#60aef3\", shape=\"oval\"];\n" + "\t0 [label=\"8\", style=\"filled\", fillcolor=\"#e8f8fc\", shape=\"oval\"];\n" + "\t7 -> 9;\n" + "\t8 -> 7;\n" + "\t4 -> 8;\n" + "\t5 -> 4;\n" + "\t6 -> 5;\n" + "\t2 -> 6;\n" + "\t3 -> 2;\n" + "\t0 -> 3;\n" + "}\n"; + } +}; + +TEST(RDFHelpers, RepresentGraphFromRoot) +{ + RepresentGraphTestHelper helper; + EXPECT_EQ(helper.GetRepresentationFromRoot(), helper.GetRealRepresentationFromRoot()); +} + +TEST(RDFHelpers, RepresentGraphFromAction) +{ + RepresentGraphTestHelper helper; + EXPECT_EQ(helper.GetRepresentationFromAction(), helper.GetRealRepresentationFromAction()); +} + +TEST(RDFHelpers, RepresentGraphMultipleTimes) +{ + RepresentGraphTestHelper helper; + EXPECT_EQ(helper.GetRepresentationFromRoot(), helper.GetRealRepresentationFromRoot()); + EXPECT_EQ(helper.GetRepresentationFromAction(), helper.GetRealRepresentationFromAction()); + EXPECT_EQ(helper.GetRepresentationFromRoot(), helper.GetRealRepresentationFromRoot()); + EXPECT_EQ(helper.GetRepresentationFromAction(), helper.GetRealRepresentationFromAction()); +} + +TEST(RDFHelpers, RepresentGraphAfterEventLoop) +{ + RepresentGraphTestHelper helper; + helper.RunLoop(); + EXPECT_EQ(helper.GetRepresentationFromRoot(), helper.GetRealRepresentationFromRoot()); + EXPECT_EQ(helper.GetRepresentationFromAction(), helper.GetRealRepresentationFromAction()); +} + +TEST(RDFHelpers, RepresentGraphRootFromTree) +{ + TFile f("f.root", "recreate"); + TTree t("t", "t"); + int a; + t.Branch("a", &a); + a = 42; // The answer to life the universe and everything + t.Fill(); + t.Write(); + f.Close(); + + static const std::string expectedGraph( + "digraph {\n\t2 [label=\"Count\", style=\"filled\", fillcolor=\"#9cbbe5\", shape=\"box\"];\n\t0 [label=\"t\", " + "style=\"filled\", fillcolor=\"#e8f8fc\", shape=\"oval\"];\n\t0 -> 2;\n}\n"); + + ROOT::RDataFrame df("t", "f.root"); + auto c = df.Count(); + + std::cout << std::flush; + // Redirect cout. + std::streambuf *oldCoutStreamBuf = std::cout.rdbuf(); + std::ostringstream strCout; + std::cout.rdbuf(strCout.rdbuf()); + + RepresentGraph(c); + + // Restore old cout. + std::cout.rdbuf(oldCoutStreamBuf); + EXPECT_EQ(expectedGraph, strCout.str()); +} diff --git a/tree/dataframe/test/dataframe_simple.cxx b/tree/dataframe/test/dataframe_simple.cxx index e621321df9d..96f1f7242b7 100644 --- a/tree/dataframe/test/dataframe_simple.cxx +++ b/tree/dataframe/test/dataframe_simple.cxx @@ -581,6 +581,8 @@ public: void InitTask(TTreeReader *, unsigned int) {} void Exec(unsigned int slot, unsigned int /*slot2*/) { fMaxSlots[slot] = std::max(fMaxSlots[slot], slot); } void Finalize() { *fMaxSlot = *std::max_element(fMaxSlots.begin(), fMaxSlots.end()); } + + std::string GetActionName() { return "MaxSlot"; } }; TEST_P(RDFSimpleTests, BookCustomAction) diff --git a/tutorials/dataframe/df018_customActions.C b/tutorials/dataframe/df018_customActions.C index 0f11fec8982..a816129d704 100644 --- a/tutorials/dataframe/df018_customActions.C +++ b/tutorials/dataframe/df018_customActions.C @@ -61,6 +61,10 @@ public: res->Add(fHistos[slot].get()); } } + + std::string GetActionName(){ + return "THnHelper"; + } }; void df018_customActions() diff --git a/tutorials/dataframe/df022_useKahan.C b/tutorials/dataframe/df022_useKahan.C index 270da338915..5607f86baa9 100644 --- a/tutorials/dataframe/df022_useKahan.C +++ b/tutorials/dataframe/df022_useKahan.C @@ -68,6 +68,11 @@ public: } *fResultSum = sum; } + + std::string GetActionName(){ + return "THnHelper"; + } + }; void df022_useKahan() -- GitLab