From d75c56ad66e457f1e3234ec43147d8f8f4f1289a Mon Sep 17 00:00:00 2001 From: Enrico Guiraud <enrico.guiraud@cern.ch> Date: Wed, 30 May 2018 13:17:07 +0200 Subject: [PATCH] [TREEPROCMT] Let MakeClusters take the list of files as argument The new signature will come in handy to let different tasks generate clusters for different files or sets of files in the future. --- tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx | 4 +- tree/treeplayer/src/TTreeProcessorMT.cxx | 57 +++++++++---------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx index d1519b699c9..bed164adfa1 100644 --- a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx +++ b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx @@ -54,6 +54,9 @@ namespace ROOT { Long64_t end; }; + std::vector<ROOT::Internal::EntryCluster> + MakeClusters(const std::string &treename, const std::vector<std::string> &filenames); + class TTreeView { private: using TreeReaderEntryListPair = std::pair<std::unique_ptr<TTreeReader>, std::unique_ptr<TEntryList>>; @@ -321,7 +324,6 @@ namespace ROOT { private: ROOT::TThreadedObject<ROOT::Internal::TTreeView> treeView; ///<! Thread-local TreeViews - std::vector<ROOT::Internal::EntryCluster> MakeClusters(); public: TTreeProcessorMT(std::string_view filename, std::string_view treename = ""); TTreeProcessorMT(const std::vector<std::string_view>& filenames, std::string_view treename = ""); diff --git a/tree/treeplayer/src/TTreeProcessorMT.cxx b/tree/treeplayer/src/TTreeProcessorMT.cxx index fc6bb7315a3..af70a03145f 100644 --- a/tree/treeplayer/src/TTreeProcessorMT.cxx +++ b/tree/treeplayer/src/TTreeProcessorMT.cxx @@ -30,6 +30,33 @@ objects. using namespace ROOT; +//////////////////////////////////////////////////////////////////////// +/// Return a vector of cluster boundaries for the given tree and files. +std::vector<ROOT::Internal::EntryCluster> +ROOT::Internal::MakeClusters(const std::string &treeName, const std::vector<std::string> &fileNames) +{ + TDirectory::TContext c; + std::vector<ROOT::Internal::EntryCluster> clusters; + const auto nFileNames = fileNames.size(); + Long64_t offset = 0; + for (auto i = 0u; i < nFileNames; ++i) { // EntryCluster requires the index of the file the cluster belongs to + std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be + TTree *t = nullptr; // not a leak, t will be deleted by f + f->GetObject(treeName.c_str(), t); + auto clusterIter = t->GetClusterIterator(0); + Long64_t start = 0, end = 0; + const Long64_t entries = t->GetEntries(); + // Iterate over the clusters in the current file + while ((start = clusterIter()) < entries) { + end = clusterIter.GetNextEntry(); + // Add the current file's offset to start and end to make them (chain) global + clusters.emplace_back(ROOT::Internal::EntryCluster{start + offset, end + offset}); + } + offset += entries; + } + return clusters; +} + //////////////////////////////////////////////////////////////////////// /// Constructor based on a file name. /// \param[in] filename Name of the file containing the tree to process. @@ -57,34 +84,6 @@ TTreeProcessorMT::TTreeProcessorMT(TTree &tree) : treeView(tree) {} /// \param[in] entries List of entry numbers to process. TTreeProcessorMT::TTreeProcessorMT(TTree &tree, TEntryList &entries) : treeView(tree, entries) {} -//////////////////////////////////////////////////////////////////////// -/// Divide input data in clusters, i.e. the workloads to distribute to tasks -std::vector<ROOT::Internal::EntryCluster> TTreeProcessorMT::MakeClusters() -{ - TDirectory::TContext c; - std::vector<ROOT::Internal::EntryCluster> clusters; - const auto &fileNames = treeView->GetFileNames(); - const auto nFileNames = fileNames.size(); - const auto &treeName = treeView->GetTreeName(); - Long64_t offset = 0; - for (auto i = 0u; i < nFileNames; ++i) { // EntryCluster requires the index of the file the cluster belongs to - std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be - TTree *t = nullptr; // not a leak, t will be deleted by f - f->GetObject(treeName.c_str(), t); - auto clusterIter = t->GetClusterIterator(0); - Long64_t start = 0, end = 0; - const Long64_t entries = t->GetEntries(); - // Iterate over the clusters in the current file and generate a task for each of them - while ((start = clusterIter()) < entries) { - end = clusterIter.GetNextEntry(); - // Add the current file's offset to start and end to make them (chain) global - clusters.emplace_back(ROOT::Internal::EntryCluster{start + offset, end + offset}); - } - offset += entries; - } - return clusters; -} - ////////////////////////////////////////////////////////////////////////////// /// Process the entries of a TTree in parallel. The user-provided function /// receives a TTreeReader which can be used to iterate on a subrange of @@ -107,7 +106,7 @@ void TTreeProcessorMT::Process(std::function<void(TTreeReader &)> func) // Enable this IMT use case (activate its locks) Internal::TParTreeProcessingRAII ptpRAII; - auto clusters = MakeClusters(); + const auto clusters = ROOT::Internal::MakeClusters(treeView->GetTreeName(), treeView->GetFileNames()); auto mapFunction = [this, &func](const ROOT::Internal::EntryCluster &c) { // This task will operate with the tree that contains start -- GitLab