From 57a1ace39729cd584aed8d039e0cf83226ec686b Mon Sep 17 00:00:00 2001 From: Danilo Piparo <danilo.piparo@cern.ch> Date: Thu, 6 Apr 2017 17:12:29 +0200 Subject: [PATCH] [TDF] Move AddColumn to Define --- .../inc/ROOT/TActionResultProxy.hxx | 17 ++++++------- tree/treeplayer/inc/ROOT/TDFUtils.hxx | 2 +- .../inc/ROOT/TDataFrameInterface.hxx | 10 ++++---- tree/treeplayer/src/TDFUtils.cxx | 2 +- tree/treeplayer/src/TDataFrame.cxx | 24 +++++++++---------- tree/treeplayer/src/TDataFrameInterface.cxx | 4 ++-- tutorials/dataframe/tdf001_introduction.C | 4 ++-- tutorials/dataframe/tdf002_dataModel.C | 6 ++--- tutorials/dataframe/tdf004_cutFlowReport.C | 2 +- tutorials/dataframe/tdf101_h1Analysis.C | 2 +- 10 files changed, 37 insertions(+), 36 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TActionResultProxy.hxx b/tree/treeplayer/inc/ROOT/TActionResultProxy.hxx index d9c497863e0..902b8709221 100644 --- a/tree/treeplayer/inc/ROOT/TActionResultProxy.hxx +++ b/tree/treeplayer/inc/ROOT/TActionResultProxy.hxx @@ -26,7 +26,7 @@ class TActionResultProxy; namespace Detail { template <typename T> -ROOT::Experimental::TActionResultProxy<T> MakeActionResultProxy(const std::shared_ptr<T> & r, +ROOT::Experimental::TActionResultProxy<T> MakeActionResultProxy(const std::shared_ptr<T> &r, const std::shared_ptr<TDataFrameImpl> &df); } @@ -68,17 +68,18 @@ class TActionResultProxy { static Iterator_t GetEnd(const V &v) { return std::end(v); }; }; /// \endcond - using SPT_t = std::shared_ptr<T>; - using SPTDFI_t = std::shared_ptr<ROOT::Detail::TDataFrameImpl>; - using WPTDFI_t = std::weak_ptr<ROOT::Detail::TDataFrameImpl>; + using SPT_t = std::shared_ptr<T>; + using SPTDFI_t = std::shared_ptr<ROOT::Detail::TDataFrameImpl>; + using WPTDFI_t = std::weak_ptr<ROOT::Detail::TDataFrameImpl>; using ShrdPtrBool_t = std::shared_ptr<bool>; - template<typename W> friend TActionResultProxy<W> ROOT::Detail::MakeActionResultProxy( + template <typename W> + friend TActionResultProxy<W> ROOT::Detail::MakeActionResultProxy( const std::shared_ptr<W> &, const std::shared_ptr<ROOT::Detail::TDataFrameImpl> &); ShrdPtrBool_t fReadiness = std::make_shared<bool>(false); ///< State registered also in the TDataFrameImpl until the event loop is executed WPTDFI_t fImplWeakPtr; ///< Points to the TDataFrameImpl at the root of the functional graph - SPT_t fObjPtr; ///< Shared pointer encapsulating the wrapped result + SPT_t fObjPtr; ///< Shared pointer encapsulating the wrapped result /// Triggers the event loop in the TDataFrameImpl instance to which it's associated via the fImplWeakPtr void TriggerRun(); @@ -139,11 +140,11 @@ void TActionResultProxy<T>::TriggerRun() namespace Detail { template <typename T> -ROOT::Experimental::TActionResultProxy<T> MakeActionResultProxy(const std::shared_ptr<T> & r, +ROOT::Experimental::TActionResultProxy<T> MakeActionResultProxy(const std::shared_ptr<T> &r, const std::shared_ptr<TDataFrameImpl> &df) { auto readiness = std::make_shared<bool>(false); - auto resPtr = ROOT::Experimental::TActionResultProxy<T>(r, readiness, df); + auto resPtr = ROOT::Experimental::TActionResultProxy<T>(r, readiness, df); df->Book(readiness); return resPtr; } diff --git a/tree/treeplayer/inc/ROOT/TDFUtils.hxx b/tree/treeplayer/inc/ROOT/TDFUtils.hxx index b335df42ef1..4f27ab18c0f 100644 --- a/tree/treeplayer/inc/ROOT/TDFUtils.hxx +++ b/tree/treeplayer/inc/ROOT/TDFUtils.hxx @@ -196,7 +196,7 @@ void InitTDFValues(unsigned int slot, TDFValueTuple &valueTuple, TTreeReader &r, ROOT::Internal::TDFTraitsUtils::TStaticSeq<S...>) { // isTmpBranch has length bn.size(). Elements are true if the corresponding - // branch is a temporary branch created with AddColumn, false if they are + // branch is a temporary branch created with Define, false if they are // actual branches present in the TTree. std::array<bool, sizeof...(S)> isTmpColumn; for (auto i = 0u; i < isTmpColumn.size(); ++i) diff --git a/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx b/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx index b39b326b5eb..48a9e6c1e59 100644 --- a/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx @@ -192,8 +192,8 @@ public: /// An exception is thrown if the name of the new branch is already in use /// for another branch in the TTree. template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0> - TDataFrameInterface<ROOT::Detail::TDataFrameBranchBase> AddColumn(const std::string &name, F expression, - const BranchNames_t &bl = {}) + TDataFrameInterface<ROOT::Detail::TDataFrameBranchBase> Define(const std::string &name, F expression, + const BranchNames_t &bl = {}) { auto df = GetDataFrameChecked(); ROOT::Internal::CheckTmpBranch(name, df->GetTree()); @@ -217,15 +217,15 @@ public: /// variable names to be used inside are the names of the branches. Only /// valid C++ is accepted. /// Refer to the first overload of this method for the full documentation. - TDataFrameInterface<ROOT::Detail::TDataFrameBranchBase> AddColumn(const std::string &name, - const std::string &expression) + TDataFrameInterface<ROOT::Detail::TDataFrameBranchBase> Define(const std::string &name, + const std::string &expression) { auto df = GetDataFrameChecked(); auto tree = df->GetTree(); auto branches = tree->GetListOfBranches(); auto tmpBranches = fProxiedPtr->GetTmpBranches(); auto tmpBookedBranches = df->GetBookedBranches(); - auto retVal = ROOT::Internal::InterpretCall(this, "AddColumn", GetNodeTypeName(), name, expression, branches, + auto retVal = ROOT::Internal::InterpretCall(this, "Define", GetNodeTypeName(), name, expression, branches, tmpBranches, tmpBookedBranches, tree); return *(TDataFrameInterface<ROOT::Detail::TDataFrameBranchBase> *)retVal; } diff --git a/tree/treeplayer/src/TDFUtils.cxx b/tree/treeplayer/src/TDFUtils.cxx index 7b9aa616c26..5688e1eadc7 100644 --- a/tree/treeplayer/src/TDFUtils.cxx +++ b/tree/treeplayer/src/TDFUtils.cxx @@ -24,7 +24,7 @@ namespace ROOT { namespace Internal { /// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary -/// column created by AddColumn. +/// column created by Define. std::string ColumnName2ColumnTypeName(const std::string &colName, TTree &tree, ROOT::Detail::TDataFrameBranchBase *tmpBranch) { diff --git a/tree/treeplayer/src/TDataFrame.cxx b/tree/treeplayer/src/TDataFrame.cxx index d0f575864f0..0c72a9d0bf3 100644 --- a/tree/treeplayer/src/TDataFrame.cxx +++ b/tree/treeplayer/src/TDataFrame.cxx @@ -158,20 +158,20 @@ calls can be chained one after another. ### Creating a temporary column Let's now consider the case in which "myTree" contains two quantities "x" and "y", but our analysis relies on a derived quantity `z = sqrt(x*x + y*y)`. -Using the `AddColumn` transformation, we can create a new column in the data-set containing the variable "z": +Using the `Define` transformation, we can create a new column in the data-set containing the variable "z": ~~~{.cpp} auto sqrtSum = [](double x, double y) { return sqrt(x*x + y*y); }; auto zCut = [](double z) { return z > 0.; } ROOT::Experimental::TDataFrame d(treeName, filePtr); -auto zMean = d.AddColumn("z", sqrtSum, {"x","y"}) +auto zMean = d.Define("z", sqrtSum, {"x","y"}) .Filter(zCut, {"z"}) .Mean("z"); std::cout << *zMean << std::endl; ~~~ -`AddColumn` creates the variable "z" by applying `sqrtSum` to "x" and "y". Later in the chain of calls we refer to -variables created with `AddColumn` as if they were actual tree branches, but they are evaluated on the fly, once per -event. As with filters, `AddColumn` calls can be chained with other transformations to create multiple temporary +`Define` creates the variable "z" by applying `sqrtSum` to "x" and "y". Later in the chain of calls we refer to +variables created with `Define` as if they were actual tree branches, but they are evaluated on the fly, once per +event. As with filters, `Define` calls can be chained with other transformations to create multiple temporary columns. ### Executing multiple actions @@ -213,7 +213,7 @@ When constructing a `TDataFrame` object, it is possible to specify a **default b usual form of a list of strings representing branch names. The default branch list will be used as fallback whenever one specific to the transformation/action is not present. ~~~{.cpp} -// use "b1" and "b2" as default branches for `Filter`, `AddColumn` and actions +// use "b1" and "b2" as default branches for `Filter`, `Define` and actions ROOT::Experimental::TDataFrame d1(treeName, &file, {"b1","b2"}); // filter acts on default branch list, no need to specify it auto h = d1.Filter([](int b1, int b2) { return b1 > b2; }).Histo1D("otherVar"); @@ -314,7 +314,7 @@ auto h1 = filtered.Histo1D("var1"); // create a new branch "vec" with a vector extracted from a complex object (only for filtered entries) // and save the state of the chain -auto newBranchFiltered = filtered.AddColumn("vec", [](const Obj& o) { return o.getVector(); }, {"obj"}); +auto newBranchFiltered = filtered.Define("vec", [](const Obj& o) { return o.getVector(); }, {"obj"}); // apply a cut and fill a histogram with "vec" auto h2 = newBranchFiltered.Filter(cut1).Histo1D("vec"); @@ -381,7 +381,7 @@ Ranges allow "early quitting": if all branches of execution of a functional grap processed entries, the event-loop is immediately interrupted. This is useful for debugging and initial explorations. ### Temporary columns -Temporary columns are created by invoking `AddColumn(name, f, branchList)`. As usual, `f` can be any callable object +Temporary columns are created by invoking `Define(name, f, branchList)`. As usual, `f` can be any callable object (function, lambda expression, functor class...); it takes the values of the branches listed in `branchList` (a list of strings) as parameters, in the same order as they are listed in `branchList`. `f` must return the value that will be assigned to the temporary column. @@ -397,10 +397,10 @@ Use cases include: An exception is thrown if the `name` of the new branch is already in use for another branch in the `TTree`. It is also possible to specify the quantity to be stored in the new temporary column as a C++ expression with the method -`AddColumn(name, expression)`. For example this invocation +`Define(name, expression)`. For example this invocation ~~~{.cpp} -tdf.AddColumn("pt", "sqrt(px*px + py*py)"); +tdf.Define("pt", "sqrt(px*px + py*py)"); ~~~ will create a new column called "pt" the value of which is calculated starting from the branches px and py. The system @@ -447,9 +447,9 @@ object to indicate that it should take advantage of a pool of worker threads. ** subset of entries**, and their partial results are merged before returning the final values to the user. ### Thread safety -`Filter` and `AddColumn` transformations should be inherently thread-safe: they have no side-effects and are not +`Filter` and `Define` transformations should be inherently thread-safe: they have no side-effects and are not dependent on global state. -Most `Filter`/`AddColumn` functions will in fact be pure in the functional programming sense. +Most `Filter`/`Define` functions will in fact be pure in the functional programming sense. All actions are built to be thread-safe with the exception of `Foreach`, in which case users are responsible of thread-safety, see [here](#generic-actions). diff --git a/tree/treeplayer/src/TDataFrameInterface.cxx b/tree/treeplayer/src/TDataFrameInterface.cxx index 613a2414461..b5ed1975186 100644 --- a/tree/treeplayer/src/TDataFrameInterface.cxx +++ b/tree/treeplayer/src/TDataFrameInterface.cxx @@ -48,7 +48,7 @@ std::vector<std::string> GetUsedBranchesNames(const std::string expression, TObj return usedBranches; } -// Jit a string filter or a string temporary column, call this->AddColumn or this->Filter as needed +// Jit a string filter or a string temporary column, call this->Define or this->Filter as needed // Return pointer to the new functional chain node returned by the call, cast to Long_t Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::string &nodeTypeName, const std::string &name, const std::string &expression, TObjArray *branches, @@ -122,7 +122,7 @@ Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::st // Here we have two cases: filter and column ss.str(""); ss << "((" << nodeTypeName << "*)" << thisPtr << ")->" << methodName << "("; - if (methodName == "AddColumn") { + if (methodName == "Define") { ss << "\"" << name << "\", "; } ss << filterLambda << ", {"; diff --git a/tutorials/dataframe/tdf001_introduction.C b/tutorials/dataframe/tdf001_introduction.C index f60d0298ff7..b360ab93e6f 100644 --- a/tutorials/dataframe/tdf001_introduction.C +++ b/tutorials/dataframe/tdf001_introduction.C @@ -155,7 +155,7 @@ int tdf001_introduction() // this is like having a general container at disposal able to accommodate // any value of any type. // Let's dive in an example: - auto entries_sum = d.AddColumn("sum", [](double b1, int b2) { return b2 + b1; }, {"b1", "b2"}) + auto entries_sum = d.Define("sum", [](double b1, int b2) { return b2 + b1; }, {"b1", "b2"}) .Filter([](double sum) { return sum > 4.2; }, {"sum"}) .Count(); std::cout << *entries_sum << std::endl; @@ -163,7 +163,7 @@ int tdf001_introduction() // Additional columns can be expressed as strings. The content must be C++ // code. The name of the variables must be the name of the branches. The code // is just in time compiled. - auto entries_sum2 = d.AddColumn("sum", "b1 + b2").Filter("sum > 4.2").Count(); + auto entries_sum2 = d.Define("sum", "b1 + b2").Filter("sum > 4.2").Count(); std::cout << *entries_sum2 << std::endl; return 0; diff --git a/tutorials/dataframe/tdf002_dataModel.C b/tutorials/dataframe/tdf002_dataModel.C index b6d14a640b9..13c9bff5c73 100644 --- a/tutorials/dataframe/tdf002_dataModel.C +++ b/tutorials/dataframe/tdf002_dataModel.C @@ -104,10 +104,10 @@ int tdf002_dataModel() return ptsw; }; - auto augmented_d = d.AddColumn("tracks_n", [](const FourVectors &tracks) { return (int)tracks.size(); }) + auto augmented_d = d.Define("tracks_n", [](const FourVectors &tracks) { return (int)tracks.size(); }) .Filter([](int tracks_n) { return tracks_n > 2; }, {"tracks_n"}) - .AddColumn("tracks_pts", getPt) - .AddColumn("tracks_pts_weights", getPtWeights); + .Define("tracks_pts", getPt) + .Define("tracks_pts_weights", getPtWeights); auto trN = augmented_d.Histo1D("tracks_n", 40, -.5, 39.5); auto trPts = augmented_d.Histo1D("tracks_pts"); diff --git a/tutorials/dataframe/tdf004_cutFlowReport.C b/tutorials/dataframe/tdf004_cutFlowReport.C index 6ec9f737000..628841c99c5 100644 --- a/tutorials/dataframe/tdf004_cutFlowReport.C +++ b/tutorials/dataframe/tdf004_cutFlowReport.C @@ -64,7 +64,7 @@ void tdf004_cutFlowReport() auto filtered1 = d.Filter(cut1, {"b1"}, "Cut1"); auto filtered2 = d.Filter(cut2, {"b2"}, "Cut2"); - auto augmented1 = filtered2.AddColumn("b3", [](double b1, int b2) { return b1 / b2; }); + auto augmented1 = filtered2.Define("b3", [](double b1, int b2) { return b1 / b2; }); auto cut3 = [](double x) { return x < .5; }; auto filtered3 = augmented1.Filter(cut3, {"b3"}, "Cut3"); diff --git a/tutorials/dataframe/tdf101_h1Analysis.C b/tutorials/dataframe/tdf101_h1Analysis.C index a432742fb37..90360205b5b 100644 --- a/tutorials/dataframe/tdf101_h1Analysis.C +++ b/tutorials/dataframe/tdf101_h1Analysis.C @@ -121,7 +121,7 @@ void tdf101_h1Analysis() auto selected = Select(dataFrame); auto hdmdARP = selected.Histo1D(TH1F("hdmd", "Dm_d", 40, 0.13, 0.17), "dm_d"); - auto selectedAddedBranch = selected.AddColumn( + auto selectedAddedBranch = selected.Define( "h2_y", [](float rpd0_t, float ptd0_d) { return rpd0_t / 0.029979f * 1.8646f / ptd0_d; }, {"rpd0_t", "ptd0_d"}); auto h2ARP = selectedAddedBranch.Histo2D<float, float>(TH2F("h2", "ptD0 vs Dm_d", 30, 0.135, 0.165, 30, -3, 6), "dm_d", "h2_y"); -- GitLab