From 989619c9a105e8199cce76b7bf27f8fe164b6439 Mon Sep 17 00:00:00 2001
From: Stefan Wunsch <stefan.wunsch@cern.ch>
Date: Mon, 8 Apr 2019 14:17:57 +0200
Subject: [PATCH] [PyROOT exp] Introduce MakeNumpyDataFrame to read numpy
 arrays with RDF

---
 .../pyroot_experimental/PyROOT/CMakeLists.txt |   2 +
 .../PyROOT/inc/ROOT/RNumpyDS.hxx              | 236 ++++++++++++++++++
 .../python/ROOT/pythonization/_rdataframe.py  |   5 +
 .../PyROOT/src/PyROOTModule.cxx               |   2 +
 .../PyROOT/src/PyROOTPythonize.h              |   2 +
 .../PyROOT/src/RDataFramePyz.cxx              | 111 ++++++++
 .../PyROOT/test/CMakeLists.txt                |   1 +
 .../PyROOT/test/rdataframe_makenumpy.py       | 138 ++++++++++
 8 files changed, 497 insertions(+)
 create mode 100644 bindings/pyroot_experimental/PyROOT/inc/ROOT/RNumpyDS.hxx
 create mode 100644 bindings/pyroot_experimental/PyROOT/src/RDataFramePyz.cxx
 create mode 100644 bindings/pyroot_experimental/PyROOT/test/rdataframe_makenumpy.py

diff --git a/bindings/pyroot_experimental/PyROOT/CMakeLists.txt b/bindings/pyroot_experimental/PyROOT/CMakeLists.txt
index a696d356f47..33df4980e61 100644
--- a/bindings/pyroot_experimental/PyROOT/CMakeLists.txt
+++ b/bindings/pyroot_experimental/PyROOT/CMakeLists.txt
@@ -36,6 +36,7 @@ set(sources
   src/PyROOTStrings.cxx
   src/PyROOTWrapper.cxx
   src/GenericPyz.cxx
+  src/RDataFramePyz.cxx
   src/RVecPyz.cxx
   src/TClassPyz.cxx
   src/TClonesArrayPyz.cxx
@@ -56,5 +57,6 @@ foreach(py_source ${py_sources})
 endforeach()
 
 ROOT_LINKER_LIBRARY(ROOTPython ${sources} LIBRARIES Core Tree cppyy)
+ROOT_INSTALL_HEADERS(inc)
 
 ROOT_ADD_TEST_SUBDIRECTORY(test)
diff --git a/bindings/pyroot_experimental/PyROOT/inc/ROOT/RNumpyDS.hxx b/bindings/pyroot_experimental/PyROOT/inc/ROOT/RNumpyDS.hxx
new file mode 100644
index 00000000000..2013b29c998
--- /dev/null
+++ b/bindings/pyroot_experimental/PyROOT/inc/ROOT/RNumpyDS.hxx
@@ -0,0 +1,236 @@
+// Author: Stefan Wunsch CERN  04/2019
+
+/*************************************************************************
+ * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers.               *
+ * All rights reserved.                                                  *
+ *                                                                       *
+ * For the licensing terms see $ROOTSYS/LICENSE.                         *
+ * For the list of contributors see $ROOTSYS/README/CREDITS.             *
+ *************************************************************************/
+
+#include "ROOT/RIntegerSequence.hxx"
+#include "ROOT/RMakeUnique.hxx"
+#include "ROOT/RDataSource.hxx"
+#include "ROOT/TSeq.hxx"
+#include "ROOT/RVec.hxx"
+
+#include <algorithm>
+#include <map>
+#include <tuple>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include "Python.h"
+
+#ifndef ROOT_RNUMPYDS
+#define ROOT_RNUMPYDS
+
+namespace ROOT {
+
+namespace Internal {
+
+namespace RDF {
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+/// \brief A RDataSource implementation which takes a collection of RVecs, which
+/// are able to adopt data from Numpy arrays
+///
+/// This component allows to create a data source on a set of columns with data
+/// coming from RVecs. The adoption of externally provided data, e.g., via Numpy
+/// arrays, with RVecs allows to read arbitrary data from memory.
+/// In addition, the data source has to keep a reference on the Python owned data
+/// so that the lifetime of the data is tied to the datasource.
+template <typename... ColumnTypes>
+class RNumpyDS final : public ROOT::RDF::RDataSource {
+   using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
+
+   std::tuple<ROOT::RVec<ColumnTypes>*...> fColumns;
+   const std::vector<std::string> fColNames;
+   const std::map<std::string, std::string> fColTypesMap;
+   // The role of the fPointerHoldersModels is to be initialised with the pack
+   // of arguments in the constrcutor signature at construction time
+   // Once the number of slots is known, the fPointerHolders are initialised
+   // according to the models.
+   const PointerHolderPtrs_t fPointerHoldersModels;
+   std::vector<PointerHolderPtrs_t> fPointerHolders;
+   std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
+   unsigned int fNSlots{0};
+   // Pointer to PyObject holding RVecs
+   // The RVecs itself hold a reference to the associated Numpy arrays so that
+   // the data cannot go out of scope as long as the datasource survives.
+   PyObject* fPyRVecs;
+
+   Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
+   {
+      auto colNameStr = std::string(colName);
+      // This could be optimised and done statically
+      const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
+      auto it = fColTypesMap.find(colNameStr);
+      if (fColTypesMap.end() == it) {
+         std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
+         throw std::runtime_error(err);
+      }
+
+      const auto colIdName = it->second;
+      if (colIdName != idName) {
+         std::string err = "Column " + colNameStr + " has type " + colIdName +
+                           " while the id specified is associated to type " + idName;
+         throw std::runtime_error(err);
+      }
+
+      const auto colBegin = fColNames.begin();
+      const auto colEnd = fColNames.end();
+      const auto namesIt = std::find(colBegin, colEnd, colName);
+      const auto index = std::distance(colBegin, namesIt);
+
+      Record_t ret(fNSlots);
+      for (auto slot : ROOT::TSeqU(fNSlots)) {
+         ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
+      }
+      return ret;
+   }
+
+   size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
+   template <std::size_t... S>
+   void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
+   {
+      std::initializer_list<int> expander{
+         (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
+      (void)expander; // avoid unused variable warnings
+   }
+
+   template <std::size_t... S>
+   void ColLenghtChecker(std::index_sequence<S...>)
+   {
+      if (sizeof...(S) < 2)
+         return;
+
+      const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
+      const auto expectedLen = colLengths[0];
+      std::string err;
+      for (auto i : TSeqI(1, colLengths.size())) {
+         if (expectedLen != colLengths[i]) {
+            err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
+                   "\" have different lengths: " + std::to_string(expectedLen) + " and " +
+                   std::to_string(colLengths[i]);
+         }
+      }
+      if (!err.empty()) {
+         throw std::runtime_error(err);
+      }
+   }
+
+protected:
+   std::string AsString() { return "Numpy data source"; };
+
+public:
+   RNumpyDS(PyObject* pyRVecs,
+                  std::pair<std::string, ROOT::RVec<ColumnTypes>*>... colsNameVals)
+      : fColumns(std::tuple<ROOT::RVec<ColumnTypes>*...>(colsNameVals.second...)),
+        fColNames({colsNameVals.first...}),
+        fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
+        fPointerHoldersModels({new ROOT::Internal::TDS::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...}),
+        fPyRVecs(pyRVecs)
+   {
+      // Take a reference to the data associated with this data source
+      Py_INCREF(fPyRVecs);
+   }
+
+   ~RNumpyDS()
+   {
+      for (auto &&ptrHolderv : fPointerHolders) {
+         for (auto &&ptrHolder : ptrHolderv) {
+            delete ptrHolder;
+         }
+      }
+      // Release the data associated to this data source
+      Py_DECREF(fPyRVecs);
+   }
+
+   const std::vector<std::string> &GetColumnNames() const { return fColNames; }
+
+   std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
+   {
+      auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
+      return entryRanges;
+   }
+
+   std::string GetTypeName(std::string_view colName) const
+   {
+      const auto key = std::string(colName);
+      return fColTypesMap.at(key);
+   }
+
+   bool HasColumn(std::string_view colName) const
+   {
+      const auto key = std::string(colName);
+      const auto endIt = fColTypesMap.end();
+      return endIt != fColTypesMap.find(key);
+   }
+
+   bool SetEntry(unsigned int slot, ULong64_t entry)
+   {
+      SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
+      return true;
+   }
+
+   void SetNSlots(unsigned int nSlots)
+   {
+      fNSlots = nSlots;
+      const auto nCols = fColNames.size();
+      fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
+      auto colIndex = 0U;
+      for (auto &&ptrHolderv : fPointerHolders) {
+         for (auto slot : ROOT::TSeqI(fNSlots)) {
+            auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
+            ptrHolderv.emplace_back(ptrHolder);
+            (void)slot;
+         }
+         colIndex++;
+      }
+      for (auto &&ptrHolder : fPointerHoldersModels)
+         delete ptrHolder;
+   }
+
+   void Initialise()
+   {
+      ColLenghtChecker(std::index_sequence_for<ColumnTypes...>());
+      const auto nEntries = GetEntriesNumber();
+      const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
+      auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
+      fEntryRanges.resize(fNSlots);
+      auto init = 0ULL;
+      auto end = 0ULL;
+      for (auto &&range : fEntryRanges) {
+         end = init + nEntriesInRange;
+         if (0 != reminder) { // Distribute the reminder among the first chunks
+            reminder--;
+            end += 1;
+         }
+         range.first = init;
+         range.second = end;
+         init = end;
+      }
+   }
+
+   std::string GetLabel() { return "RNumpyDS"; }
+};
+
+// Factory to create datasource able to read Numpy arrays through RVecs
+// Note that we have to return the object on the heap so that the interpreter
+// does not clean it up during shutdown and causes a double delete.
+template <typename... ColumnTypes>
+RDataFrame* MakeNumpyDataFrame(PyObject* pyRVecs,
+                              std::pair<std::string, ROOT::RVec<ColumnTypes>*> &&... colNameProxyPairs)
+{
+   return new RDataFrame(std::make_unique<RNumpyDS<ColumnTypes...>>(
+      std::forward<PyObject*>(pyRVecs),
+      std::forward<std::pair<std::string, ROOT::RVec<ColumnTypes>*>>(colNameProxyPairs)...));
+}
+
+} // namespace RDF
+} // namespace Internal
+} // namespace ROOT
+
+#endif // ROOT_RNUMPYDS
diff --git a/bindings/pyroot_experimental/PyROOT/python/ROOT/pythonization/_rdataframe.py b/bindings/pyroot_experimental/PyROOT/python/ROOT/pythonization/_rdataframe.py
index 05a421ee6da..9b303bc13c1 100644
--- a/bindings/pyroot_experimental/PyROOT/python/ROOT/pythonization/_rdataframe.py
+++ b/bindings/pyroot_experimental/PyROOT/python/ROOT/pythonization/_rdataframe.py
@@ -9,6 +9,7 @@
 ################################################################################
 
 from ROOT import pythonization
+from libROOTPython import MakeNumpyDataFrame
 
 
 def RDataFrameAsNumpy(df, columns=None, exclude=None):
@@ -96,3 +97,7 @@ def pythonize_rdataframe(klass, name):
         klass.AsNumpy = RDataFrameAsNumpy
 
     return True
+
+# Add MakeNumpyDataFrame feature as free function to the ROOT module
+import cppyy
+cppyy.gbl.ROOT.RDF.MakeNumpyDataFrame = MakeNumpyDataFrame
diff --git a/bindings/pyroot_experimental/PyROOT/src/PyROOTModule.cxx b/bindings/pyroot_experimental/PyROOT/src/PyROOTModule.cxx
index 63d560af6d0..228cd873e11 100644
--- a/bindings/pyroot_experimental/PyROOT/src/PyROOTModule.cxx
+++ b/bindings/pyroot_experimental/PyROOT/src/PyROOTModule.cxx
@@ -67,6 +67,8 @@ static PyMethodDef gPyROOTMethods[] = {{(char *)"AddDirectoryWritePyz", (PyCFunc
                                         (char *)"Get class to wrap Python callable as C++ callable"},
                                        {(char *)"AsRVec", (PyCFunction)PyROOT::AsRVec, METH_O,
                                         (char *)"Get object with array interface as RVec"},
+                                       {(char *)"MakeNumpyDataFrame", (PyCFunction)PyROOT::MakeNumpyDataFrame, METH_O,
+                                        (char *)"Make RDataFrame from dictionary of numpy arrays"},
                                        {NULL, NULL, 0, NULL}};
 
 #if PY_VERSION_HEX >= 0x03000000
diff --git a/bindings/pyroot_experimental/PyROOT/src/PyROOTPythonize.h b/bindings/pyroot_experimental/PyROOT/src/PyROOTPythonize.h
index 682245338ea..d5291698f55 100644
--- a/bindings/pyroot_experimental/PyROOT/src/PyROOTPythonize.h
+++ b/bindings/pyroot_experimental/PyROOT/src/PyROOTPythonize.h
@@ -39,6 +39,8 @@ PyObject *GetEndianess(PyObject *self, PyObject *args);
 PyObject *GetVectorDataPointer(PyObject *self, PyObject *args);
 PyObject *GetSizeOfType(PyObject *self, PyObject *args);
 
+PyObject *MakeNumpyDataFrame(PyObject *self, PyObject *obj);
+
 } // namespace PyROOT
 
 #endif // !PYROOT_PYTHONIZE_H
diff --git a/bindings/pyroot_experimental/PyROOT/src/RDataFramePyz.cxx b/bindings/pyroot_experimental/PyROOT/src/RDataFramePyz.cxx
new file mode 100644
index 00000000000..fccce3c01b4
--- /dev/null
+++ b/bindings/pyroot_experimental/PyROOT/src/RDataFramePyz.cxx
@@ -0,0 +1,111 @@
+// Author: Stefan Wunsch CERN  04/2019
+// Original PyROOT code by Wim Lavrijsen, LBL
+
+/*************************************************************************
+ * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers.               *
+ * All rights reserved.                                                  *
+ *                                                                       *
+ * For the licensing terms see $ROOTSYS/LICENSE.                         *
+ * For the list of contributors see $ROOTSYS/README/CREDITS.             *
+ *************************************************************************/
+
+#include "CPyCppyy.h"
+#include "CPPInstance.h"
+#include "ProxyWrappers.h"
+#include "PyROOTPythonize.h"
+#include "RConfig.h"
+#include "TInterpreter.h"
+#include "TPython.h"
+
+#include <utility> // std::pair
+#include <sstream> // std::stringstream
+
+////////////////////////////////////////////////////////////////////////////
+/// \brief Make an RDataFrame from a dictionary of numpy arrays
+/// \param[in] pydata Dictionary with numpy arrays
+///
+/// This function takes a dictionary of numpy arrays and creates an RDataFrame
+/// using the keys as column names and the numpy arrays as data.
+PyObject *PyROOT::MakeNumpyDataFrame(PyObject * /*self*/, PyObject * pydata)
+{
+   if (!pydata) {
+      PyErr_SetString(PyExc_RuntimeError, "Object not convertible: Invalid Python object.");
+      return NULL;
+   }
+
+   if (!PyDict_Check(pydata)) {
+      PyErr_SetString(PyExc_RuntimeError, "Object not convertible: Python object is not a dictionary.");
+      return NULL;
+   }
+
+   if (PyDict_Size(pydata) == 0) {
+      PyErr_SetString(PyExc_RuntimeError, "Object not convertible: Dictionary is empty.");
+      return NULL;
+   }
+
+
+   // Add PyObject (dictionary) holding RVecs to data source
+   std::stringstream code;
+   code << "ROOT::Internal::RDF::MakeNumpyDataFrame(";
+   std::stringstream pyaddress;
+   auto pyvecs = PyDict_New();
+   pyaddress << pyvecs;
+   code << "reinterpret_cast<PyObject*>(" << pyaddress.str() << "), ";
+
+   // Iterate over dictionary, convert numpy arrays to RVecs and put together interpreter code
+   PyObject *key, *value;
+   Py_ssize_t pos = 0;
+   const auto size = PyObject_Size(pydata);
+   auto counter = 0u;
+   while (PyDict_Next(pydata, &pos, &key, &value)) {
+      // Get name of key
+      if (!CPyCppyy_PyUnicode_Check(key)) {
+         PyErr_SetString(PyExc_RuntimeError, "Object not convertible: Dictionary key is not convertible to a string.");
+         return NULL;
+      }
+      std::string keystr = CPyCppyy_PyUnicode_AsString(key);
+
+      // Convert value to RVec and attach to dictionary
+      auto pyvec = PyROOT::AsRVec(NULL, value);
+      if (pyvec == NULL) {
+         PyErr_SetString(PyExc_RuntimeError,
+                         ("Object not convertible: Dictionary entry " + keystr + " is not convertible with AsRVec.").c_str());
+         return NULL;
+      }
+      PyDict_SetItem(pyvecs, key, pyvec);
+      Py_DECREF(pyvec);
+
+      // Add pairs of column name and associated RVec to signature
+      std::string vectype = Cppyy::GetScopedFinalName(((CPyCppyy::CPPInstance*)pyvec)->ObjectIsA());
+      std::stringstream vecaddress;
+      vecaddress << ((CPyCppyy::CPPInstance*)pyvec)->GetObject();
+      code << "std::pair<std::string, " << vectype <<  "*>(\"" + keystr
+           << "\", reinterpret_cast<" << vectype+ "*>(" << vecaddress.str() << "))";
+      if (counter != size - 1) {
+         code << ",";
+      } else {
+         code << ");";
+      }
+      counter++;
+   }
+
+   // Create RDataFrame and build Python proxy
+   const auto err = gInterpreter->Declare("#include \"ROOT/RNumpyDS.hxx\"");
+   if (!err) {
+      PyErr_SetString(PyExc_RuntimeError, "Failed to find \"ROOT/RNumpyDS.hxx\".");
+      return NULL;
+   }
+   const auto codeStr = code.str();
+   auto address = (void*) gInterpreter->Calc(codeStr.c_str());
+   const auto pythonOwns = true;
+   auto pyobj = TPython::CPPInstance_FromVoidPtr(address, "ROOT::RDataFrame", pythonOwns);
+
+   // Bind pyobject holding adopted memory to the RVec
+   if (PyObject_SetAttrString(pyobj, "__data__", pyvecs)) {
+      PyErr_SetString(PyExc_RuntimeError, "Object not convertible: Failed to set dictionary as attribute __data__.");
+      return NULL;
+   }
+   Py_DECREF(pyvecs);
+
+   return pyobj;
+}
diff --git a/bindings/pyroot_experimental/PyROOT/test/CMakeLists.txt b/bindings/pyroot_experimental/PyROOT/test/CMakeLists.txt
index 6d5f3c6459a..1dcde5356e5 100644
--- a/bindings/pyroot_experimental/PyROOT/test/CMakeLists.txt
+++ b/bindings/pyroot_experimental/PyROOT/test/CMakeLists.txt
@@ -76,6 +76,7 @@ ROOT_ADD_PYUNITTEST(pyroot_pyz_rvec_asrvec rvec_asrvec.py)
 
 # RDataFrame and subclasses pythonizations
 ROOT_ADD_PYUNITTEST(pyroot_pyz_rdataframe_asnumpy rdataframe_asnumpy.py)
+ROOT_ADD_PYUNITTEST(pyroot_pyz_rdataframe_makenumpy rdataframe_makenumpy.py)
 
 # Passing Python callables to ROOT.TF
 ROOT_ADD_PYUNITTEST(pyroot_pyz_tf_pycallables tf_pycallables.py)
diff --git a/bindings/pyroot_experimental/PyROOT/test/rdataframe_makenumpy.py b/bindings/pyroot_experimental/PyROOT/test/rdataframe_makenumpy.py
new file mode 100644
index 00000000000..43173b8fa97
--- /dev/null
+++ b/bindings/pyroot_experimental/PyROOT/test/rdataframe_makenumpy.py
@@ -0,0 +1,138 @@
+import unittest
+import ROOT
+import numpy as np
+import sys
+
+
+class MakeNumpyDataFrame(unittest.TestCase):
+    """
+    Tests for the MakeNumpyDataFrame feature enabling to read numpy arrays
+    with RDataFrame.
+    """
+
+    dtypes = [
+        "int32", "int64", "uint32", "uint64", "float32", "float64"
+    ]
+
+    def test_dtypes(self):
+        """
+        Test reading different datatypes
+        """
+        for dtype in self.dtypes:
+            data = {"x": np.array([1, 2, 3], dtype=dtype)}
+            df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+            self.assertEqual(df.Mean("x").GetValue(), 2)
+
+    def test_multiple_columns(self):
+        """
+        Test reading multiple columns
+        """
+        data = {}
+        for dtype in self.dtypes:
+            data[dtype] = np.array([1, 2, 3], dtype=dtype)
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+        colnames = df.GetColumnNames()
+        # Test column names
+        for dtype in colnames:
+            self.assertIn(dtype, self.dtypes)
+        # Test mean
+        for dtype in self.dtypes:
+            self.assertEqual(df.Mean(dtype).GetValue(), 2)
+
+    def test_refcount(self):
+        """
+        Check refcounts of associated PyObjects
+        """
+        data = {"x": np.array([1, 2, 3], dtype="float32")}
+        self.assertEqual(sys.getrefcount(data), 2)
+        self.assertEqual(sys.getrefcount(data["x"]), 2)
+
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+        self.assertTrue(hasattr(df, "__data__"))
+        self.assertEqual(sys.getrefcount(df), 2)
+
+        self.assertEqual(sys.getrefcount(data["x"]), 3)
+
+    def test_transformations(self):
+        """
+        Test the use of transformations
+        """
+        data = {"x": np.array([1, 2, 3], dtype="float32")}
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+        df2 = df.Filter("x>1").Define("y", "2*x")
+        self.assertEqual(df2.Mean("x").GetValue(), 2.5)
+        self.assertEqual(df2.Mean("y").GetValue(), 5)
+
+    def test_delete_dict(self):
+        """
+        Test behaviour with data dictionary going out of scope
+        """
+        data = {"x": np.array([1, 2, 3], dtype="float32")}
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+        del data
+        self.assertEqual(df.Mean("x").GetValue(), 2)
+
+    def test_delete_numpy_array(self):
+        """
+        Test behaviour with numpy array going out of scope
+        """
+        x = np.array([1, 2, 3], dtype="float32")
+        data = {"x": x}
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame(data)
+        del x
+        self.assertEqual(df.Mean("x").GetValue(), 2)
+
+    def test_inplace_dict(self):
+        """
+        Test behaviour with inplace dictionary
+        """
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame({"x": np.array([1, 2, 3], dtype="float32")})
+        self.assertEqual(df.Mean("x").GetValue(), 2)
+
+    def test_lifetime_numpy_array(self):
+        """
+        Test lifetime of numpy array
+        """
+        x = np.array([1, 2, 3], dtype="float32")
+        ref1 = sys.getrefcount(x)
+
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame({"x": x})
+        ref2 = sys.getrefcount(x)
+        self.assertEqual(ref2, ref1 + 1)
+
+        del df
+        ref3 = sys.getrefcount(x)
+        self.assertEqual(ref1, ref3)
+
+    def test_lifetime_datasource(self):
+        """
+        Test lifetime of datasource
+
+        Datasource survives until last node of the graph goes out of scope
+        """
+        x = np.array([1, 2, 3], dtype="float32")
+        ref1 = sys.getrefcount(x)
+
+        # Data source has dictionary with RVecs attached, which take a reference
+        # to the numpy array
+        df = ROOT.ROOT.RDF.MakeNumpyDataFrame({"x": x})
+        m = df.Mean("x")
+        ref2 = sys.getrefcount(x)
+        self.assertEqual(ref1 + 1, ref2)
+
+        # Deleting the root node does not change anything since the datasource
+        # owns the RVecs
+        del df
+        self.assertEqual(m.GetValue(), 2)
+        ref3 = sys.getrefcount(x)
+        self.assertEqual(ref1 + 1, ref3)
+
+        # Deleting the last node releases the RVecs and releases the reference
+        # to the numpy array
+        del m
+        ref4 = sys.getrefcount(x)
+        self.assertEqual(ref1, ref4)
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab