diff --git a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx index 426f63f31f3654d44119ac04b503888ee8ff1f8f..6f4ade603a8f107d1a4aa0228685b064de01e8c2 100644 --- a/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx +++ b/tree/treeplayer/inc/ROOT/TTreeProcessorMT.hxx @@ -46,33 +46,39 @@ the threaded object. namespace ROOT { namespace Internal { + /// A cluster of entries as seen by TTreeView struct TreeViewCluster { Long64_t startEntry; Long64_t endEntry; std::size_t filenameIdx; }; + /// Input data as seen by TTreeView. + /// + /// Each thread will contain a TTreeView that will perform bookkeping of a vector of (few) TreeViewInputs. + /// This vector will contain a TreeViewInput for each task currently working on a different input file. + struct TreeViewInput { + std::unique_ptr<TFile> file; + TTree *tree; // needs to be a raw pointer because file destructs this tree when deleted + std::size_t filenameIdx; ///< The filename index of this file in the list of filenames contained in TTreeView + unsigned int useCount; ///< Number of tasks that are currently using this input + }; + class TTreeView { private: - std::vector<std::string> fFileNames; ///< Names of the files - std::string fTreeName; ///< Name of the tree - std::unique_ptr<TFile> fCurrentFile; ///<! Current file object of this view. - TTree *fCurrentTree; ///<! Current tree object of this view. - std::size_t fCurrentIdx; ///<! Index of the current file. - std::vector<TEntryList> fEntryLists; ///< Entry numbers to be processed per tree/file - TEntryList fCurrentEntryList; ///< Entry numbers for the current range being processed + std::vector<std::string> fFileNames; ///< Names of the files + std::vector<TEntryList> fEntryLists; ///< Entry numbers to be processed per tree/file. 1:1 with fFileNames + std::string fTreeName; ///< Name of the tree + std::vector<TreeViewInput> fOpenInputs; ///< Input files currently open. //////////////////////////////////////////////////////////////////////////////// /// Initialize TTreeView. void Init() { - // Set the current index to an "impossible" value - fCurrentIdx = std::numeric_limits<decltype(fCurrentIdx)>::max(); - // If the tree name is empty, look for a tree in the file if (fTreeName.empty()) { ::TDirectory::TContext ctxt(gDirectory); - std::unique_ptr<TFile> f(TFile::Open(fFileNames[fCurrentIdx].data())); + std::unique_ptr<TFile> f(TFile::Open(fFileNames[0].c_str())); TIter next(f->GetListOfKeys()); while (TKey *key = (TKey*)next()) { const char *className = key->GetClassName(); @@ -82,7 +88,7 @@ namespace ROOT { } } if (fTreeName.empty()) { - auto msg = "Cannot find any tree in file " + fFileNames[fCurrentIdx]; + auto msg = "Cannot find any tree in file " + fFileNames[0]; throw std::runtime_error(msg); } } @@ -95,7 +101,7 @@ namespace ROOT { /// \param[in] tn Name of the tree to process. If not provided, /// the implementation will automatically search for a /// tree in the file. - TTreeView(std::string_view fn, std::string_view tn) : fTreeName(tn), fCurrentIdx(0) + TTreeView(std::string_view fn, std::string_view tn) : fTreeName(tn) { fFileNames.emplace_back(fn); Init(); @@ -107,7 +113,7 @@ namespace ROOT { /// \param[in] tn Name of the tree to process. If not provided, /// the implementation will automatically search for a /// tree in the collection of files. - TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn), fCurrentIdx(0) + TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn) { if (fns.size() > 0) { for (auto& fn : fns) @@ -123,7 +129,7 @@ namespace ROOT { ////////////////////////////////////////////////////////////////////////// /// Constructor based on a TTree. /// \param[in] tree Tree or chain of files containing the tree to process. - TTreeView(TTree& tree) : fTreeName(tree.GetName()), fCurrentIdx(0) + TTreeView(TTree& tree) : fTreeName(tree.GetName()) { static const TClassRef clRefTChain("TChain"); if (clRefTChain == tree.IsA()) { @@ -190,50 +196,43 @@ namespace ROOT { ////////////////////////////////////////////////////////////////////////// /// Copy constructor. /// \param[in] view Object to copy. - TTreeView(const TTreeView& view) : fTreeName(view.fTreeName), fCurrentIdx(view.fCurrentIdx) + TTreeView(const TTreeView& view) : fTreeName(view.fTreeName) { for (auto& fn : view.fFileNames) fFileNames.emplace_back(fn); for (auto& el : view.fEntryLists) fEntryLists.emplace_back(el); - - Init(); } ////////////////////////////////////////////////////////////////////////// /// Get a TTreeReader for the current tree of this view. - std::unique_ptr<TTreeReader> GetTreeReader(Long64_t start, Long64_t end) + using TreeReaderEntryListPair = std::pair<std::unique_ptr<TTreeReader>, std::unique_ptr<TEntryList>>; + TreeReaderEntryListPair GetTreeReader(std::size_t dataIdx, Long64_t start, Long64_t end) { - TTreeReader *reader; + std::unique_ptr<TTreeReader> reader; + std::unique_ptr<TEntryList> elist; if (fEntryLists.size() > 0) { // TEntryList and SetEntriesRange do not work together (the former has precedence). // We need to construct a TEntryList that contains only those entry numbers // in our desired range. - fCurrentEntryList.Reset(); - if (fEntryLists[fCurrentIdx].GetN() > 0) { - Long64_t entry = fEntryLists[fCurrentIdx].GetEntry(0); + const auto filenameIdx = fOpenInputs[dataIdx].filenameIdx; + elist.reset(new TEntryList); + if (fEntryLists[filenameIdx].GetN() > 0) { + Long64_t entry = fEntryLists[filenameIdx].GetEntry(0); do { - if (entry >= start && entry < end) fCurrentEntryList.Enter(entry); - } while ((entry = fEntryLists[fCurrentIdx].Next()) >= 0); + if (entry >= start && entry < end) // TODO can quit this loop early when entry >= end + elist->Enter(entry); + } while ((entry = fEntryLists[filenameIdx].Next()) >= 0); } - - reader = new TTreeReader(fCurrentTree, &fCurrentEntryList); - } - else { + reader.reset(new TTreeReader(fOpenInputs[dataIdx].tree, elist.get())); + } else { // If no TEntryList is involved we can safely set the range in the reader - reader = new TTreeReader(fCurrentTree); + reader.reset(new TTreeReader(fOpenInputs[dataIdx].tree)); reader->SetEntriesRange(start, end); } - return std::unique_ptr<TTreeReader>(reader); - } - - ////////////////////////////////////////////////////////////////////////// - /// Get the number of entries of the current tree of this view. - Long64_t GetEntries() const - { - return fCurrentTree->GetEntries(); + return std::make_pair(std::move(reader), std::move(elist)); } ////////////////////////////////////////////////////////////////////////// @@ -251,26 +250,44 @@ namespace ROOT { } ////////////////////////////////////////////////////////////////////////// - /// Set the current file and tree of this view. - void SetCurrent(unsigned int i) + /// Search the open files for the filename with index i. If found, increment its "user counter", otherwise + /// open it and add it to the vector of open files. Return the file's index in the vector of open files. + std::size_t FindOrOpenFile(std::size_t filenameIdx) { - if (i != fCurrentIdx) { - fCurrentIdx = i; - // Here we need to restore the directory after opening the file. - TDirectory::TContext ctxt(gDirectory); - TFile *f = TFile::Open(fFileNames[fCurrentIdx].data()); - fCurrentTree = (TTree*)f->Get(fTreeName.data()); - fCurrentTree->ResetBit(TObject::kMustCleanup); - fCurrentFile.reset(f); + const auto inputIt = + std::find_if(fOpenInputs.begin(), fOpenInputs.end(), + [filenameIdx](const TreeViewInput &i) { return i.filenameIdx == filenameIdx; }); + if (inputIt != fOpenInputs.end()) { + // requested file is already open + inputIt->useCount++; + return std::distance(fOpenInputs.begin(), inputIt); // return input's index in fOpenInputs + } else { + // requested file needs to be added to fOpenInputs + TDirectory::TContext ctxt(gDirectory); // needed to restore the directory after opening the file + std::unique_ptr<TFile> f(TFile::Open(fFileNames[filenameIdx].data())); + TTree *t = nullptr; + f->GetObject(fTreeName.c_str(), t); + t->ResetBit(TObject::kMustCleanup); + fOpenInputs.emplace_back(TreeViewInput{std::move(f), t, filenameIdx, /*useCount=*/1}); + return fOpenInputs.size() - 1; } } + + ////////////////////////////////////////////////////////////////////////// + /// Decrease "use count" of the file at filenameIdx, delete corresponding TreeViewInput if use count is zero. + void Cleanup(std::size_t dataIdx) + { + fOpenInputs[dataIdx].useCount--; + if(fOpenInputs[dataIdx].useCount == 0) + fOpenInputs.erase(fOpenInputs.begin() + dataIdx); + } }; } // End of namespace Internal class TTreeProcessorMT { private: - ROOT::TThreadedObject<ROOT::Internal::TTreeView> treeView; ///<! Threaded object with <file,tree> per thread + ROOT::TThreadedObject<ROOT::Internal::TTreeView> treeView; ///<! Thread-local TreeViews std::vector<ROOT::Internal::TreeViewCluster> MakeClusters(); public: diff --git a/tree/treeplayer/src/TTreeProcessorMT.cxx b/tree/treeplayer/src/TTreeProcessorMT.cxx index 1c3b9b5f86044d0c7ee8c735e7deb69f80035310..8ee4bff8a9f3b174bd8a873edadc433e23ddfc0b 100644 --- a/tree/treeplayer/src/TTreeProcessorMT.cxx +++ b/tree/treeplayer/src/TTreeProcessorMT.cxx @@ -104,9 +104,12 @@ void TTreeProcessorMT::Process(std::function<void(TTreeReader &)> func) auto clusters = MakeClusters(); auto mapFunction = [this, &func](const ROOT::Internal::TreeViewCluster &c) { - treeView->SetCurrent(c.filenameIdx); - auto tr = treeView->GetTreeReader(c.startEntry, c.endEntry); - func(*tr); + // get the idx to the TreeViewInput for this task in the current thread + const auto dataIdx = treeView->FindOrOpenFile(c.filenameIdx); + auto readerAndEntryList = treeView->GetTreeReader(dataIdx, c.startEntry, c.endEntry); + auto &reader = std::get<0>(readerAndEntryList); + func(*reader); + treeView->Cleanup(dataIdx); }; // Assume number of threads has been initialized via ROOT::EnableImplicitMT