From 06cd6dce8fde1599f15acad7e0829dbca2408a1f Mon Sep 17 00:00:00 2001 From: Guilherme Amadio <amadio@cern.ch> Date: Tue, 3 Oct 2017 13:34:27 +0200 Subject: [PATCH] [TDF] Add tutorials to show how to use jitted defines and filters --- tutorials/dataframe/tdf012_using_jit.C | 63 +++++++++++++++++++++++++ tutorials/dataframe/tdf012_using_jit.py | 51 ++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 tutorials/dataframe/tdf012_using_jit.C create mode 100644 tutorials/dataframe/tdf012_using_jit.py diff --git a/tutorials/dataframe/tdf012_using_jit.C b/tutorials/dataframe/tdf012_using_jit.C new file mode 100644 index 00000000000..feafd831bc9 --- /dev/null +++ b/tutorials/dataframe/tdf012_using_jit.C @@ -0,0 +1,63 @@ +/// \file +/// \ingroup tutorial_tdataframe +/// \notebook -nodraw +/// +/// This tutorial illustrates how to save some typing when using TDataFrame +/// by invoking functions that perform jit-compiling at runtime. +/// +/// \macro_code +/// +/// \date October 2017 +/// \author Guilherme Amadio + +#include "TRandom.h" +#include "ROOT/TDataFrame.hxx" + +void tdf012_using_jit() +{ + // We will inefficiently calculate an approximation of pi by generating + // some data and doing very simple filtering and analysis on it + + // We start by creating an empty dataframe where we will insert 10 million + // random points in a square of side 2.0 (that is, with an inscribed circle + // of radius 1.0) + + size_t npoints = 10000000; + ROOT::Experimental::TDataFrame tdf(npoints); + + // Make generators known to interpreter + gInterpreter->ProcessLine("TRandom rx, ry;"); + + // Use different seeds for independent streams + gInterpreter->ProcessLine("rx.SetSeed(1);"); + gInterpreter->ProcessLine("ry.SetSeed(2);"); + + // Define what we want inside the dataframe. We do not need to define p as an array, + // but we do it here to demonstrate how to use jitting with TDataFrame + + // NOTE: Although it's possible to use "for (auto&& x : p)" below, it will + // shadow the name of the data column "x", and may cause compilation failures + // if the local variable and the data column are of different types or the + // local x variable is declared in the global scope of the lambda function + + auto pidf = tdf.Define("x", "rx.Uniform(-1.0, 1.0)") + .Define("y", "ry.Uniform(-1.0, 1.0)") + .Define("p", "std::array<double, 2> v{x, y}; return v;") + .Define("r", "double r2 = 0.0; for (auto&& x : p) r2 += x*x; return sqrt(r2);"); + + // Now we have a dataframe with columns x, y, p (which is a point based on x + // and y), and the radius r = sqrt(x*x + y*y). In order to approximate pi, we + // need to know how many of our data points fall inside the unit circle compared + // with the total number of points. The ratio of the areas is + // + // A_circle / A_square = pi r*r / l * l, where r = 1.0, and l = 2.0 + // + // Therefore, we can approximate pi with 4 times the number of points inside the + // unit circle over the total number of points in our dataframe: + + auto incircle = *(pidf.Filter("r <= 1.0").Count()); + + double pi_approx = 4.0 * incircle / npoints; + + std::cout << "pi is approximately equal to " << pi_approx << std::endl; +} diff --git a/tutorials/dataframe/tdf012_using_jit.py b/tutorials/dataframe/tdf012_using_jit.py new file mode 100644 index 00000000000..cdadb7bbb5e --- /dev/null +++ b/tutorials/dataframe/tdf012_using_jit.py @@ -0,0 +1,51 @@ +## \file +## \ingroup tutorial_tdataframe +## \notebook -nodraw +## +## This tutorial illustrates how to use jit-compiling features of TDataFrame +## to define data using C++ code in a Python script +## +## \macro_code +## +## \date October 2017 +## \author Guilherme Amadio + +import ROOT + +## We will inefficiently calculate an approximation of pi by generating +## some data and doing very simple filtering and analysis on it. + +## We start by creating an empty dataframe where we will insert 10 million +## random points in a square of side 2.0 (that is, with an inscribed unit +## circle). + +npoints = 10000000 +tdf = ROOT.ROOT.Experimental.TDataFrame(npoints) + +ROOT.gInterpreter.ProcessLine("TRandom rx, ry;") +ROOT.gInterpreter.ProcessLine("rx.SetSeed(1);") +ROOT.gInterpreter.ProcessLine("ry.SetSeed(2);") + +## Define what data we want inside the dataframe. We do not need to define p +## as an array, but we do it here to demonstrate how to use jitting with TDataFrame + +pidf = tdf.Define("x", "rx.Uniform(-1.0, 1.0)") \ + .Define("y", "ry.Uniform(-1.0, 1.0)") \ + .Define("p", "std::array<double, 2> v{x, y}; return v;") \ + .Define("r", "double r2 = 0.0; for (auto&& w : p) r2 += w*w; return sqrt(r2);") + +## Now we have a dataframe with columns x, y, p (which is a point based on x +## and y), and the radius r = sqrt(x*x + y*y). In order to approximate pi, we +## need to know how many of our data points fall inside the circle of radius +## one compared with the total number of points. The ratio of the areas is +## +## A_circle / A_square = pi r*r / l * l, where r = 1.0, and l = 2.0 +## +## Therefore, we can approximate pi with 4 times the number of points inside +## the unit circle over the total number of points: + +incircle = pidf.Filter("r <= 1.0").Count().GetValue() + +pi_approx = 4.0 * incircle / npoints + +print("pi is approximately equal to %g" % (pi_approx)) -- GitLab