Skip to content
Snippets Groups Projects
Commit 157d83e7 authored by Enrico Guiraud's avatar Enrico Guiraud
Browse files

[TREEPROCMT] Pass per-tree number of entries to thread-local TChains

This ensures that each thread-local TChain knows which tree contains
which global entry number without having to open all intermediate
files to check how many entries they contain, resulting in much
less contention in TTreeProcessorMT when multiple threads are loading
the right file to process.
parent d75c56ad
No related branches found
No related tags found
No related merge requests found
...@@ -54,8 +54,8 @@ namespace ROOT { ...@@ -54,8 +54,8 @@ namespace ROOT {
Long64_t end; Long64_t end;
}; };
std::vector<ROOT::Internal::EntryCluster> using ClustersAndEntries = std::pair<std::vector<EntryCluster>, std::vector<Long64_t>>;
MakeClusters(const std::string &treename, const std::vector<std::string> &filenames); ClustersAndEntries MakeClusters(const std::string &treename, const std::vector<std::string> &filenames);
class TTreeView { class TTreeView {
private: private:
...@@ -74,8 +74,8 @@ namespace ROOT { ...@@ -74,8 +74,8 @@ namespace ROOT {
std::vector<std::vector<std::string>> fFriendFileNames; ///< Names of the files where friends are stored std::vector<std::vector<std::string>> fFriendFileNames; ///< Names of the files where friends are stored
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// Construct fChain, also adding friends if needed /// Construct fChain, also adding friends if needed and injecting knowledge of offsets if available.
void MakeChain() void MakeChain(const std::vector<Long64_t> &nEntries)
{ {
// If the tree name is empty, look for a tree in the file // If the tree name is empty, look for a tree in the file
if (fTreeName.empty()) { if (fTreeName.empty()) {
...@@ -96,8 +96,9 @@ namespace ROOT { ...@@ -96,8 +96,9 @@ namespace ROOT {
} }
fChain.reset(new TChain(fTreeName.c_str())); fChain.reset(new TChain(fTreeName.c_str()));
for (auto &fn : fFileNames) { const auto nFiles = fFileNames.size();
fChain->Add(fn.c_str()); for (auto i = 0u; i < nFiles; ++i) {
fChain->Add(fFileNames[i].c_str(), nEntries[i]);
} }
fChain->ResetBit(TObject::kMustCleanup); fChain->ResetBit(TObject::kMustCleanup);
...@@ -273,9 +274,9 @@ namespace ROOT { ...@@ -273,9 +274,9 @@ namespace ROOT {
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
/// Get a TTreeReader for the current tree of this view. /// Get a TTreeReader for the current tree of this view.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end) TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end, const std::vector<Long64_t> &nEntries)
{ {
MakeChain(); MakeChain(nEntries);
std::unique_ptr<TTreeReader> reader; std::unique_ptr<TTreeReader> reader;
std::unique_ptr<TEntryList> elist; std::unique_ptr<TEntryList> elist;
......
...@@ -30,31 +30,40 @@ objects. ...@@ -30,31 +30,40 @@ objects.
using namespace ROOT; using namespace ROOT;
namespace ROOT {
namespace Internal {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
/// Return a vector of cluster boundaries for the given tree and files. /// Return a vector of cluster boundaries for the given tree and files.
std::vector<ROOT::Internal::EntryCluster> ClustersAndEntries
ROOT::Internal::MakeClusters(const std::string &treeName, const std::vector<std::string> &fileNames) MakeClusters(const std::string &treeName, const std::vector<std::string> &fileNames)
{ {
// Note that as a side-effect of opening all files that are going to be used in the
// analysis once, all necessary streamers will be loaded into memory.
TDirectory::TContext c; TDirectory::TContext c;
std::vector<ROOT::Internal::EntryCluster> clusters; std::vector<EntryCluster> clusters;
std::vector<Long64_t> nEntries;
const auto nFileNames = fileNames.size(); const auto nFileNames = fileNames.size();
Long64_t offset = 0; Long64_t offset = 0ll;
for (auto i = 0u; i < nFileNames; ++i) { // EntryCluster requires the index of the file the cluster belongs to for (auto i = 0u; i < nFileNames; ++i) {
std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be std::unique_ptr<TFile> f(TFile::Open(fileNames[i].c_str())); // need TFile::Open to load plugins if need be
TTree *t = nullptr; // not a leak, t will be deleted by f TTree *t = nullptr; // not a leak, t will be deleted by f
f->GetObject(treeName.c_str(), t); f->GetObject(treeName.c_str(), t);
auto clusterIter = t->GetClusterIterator(0); auto clusterIter = t->GetClusterIterator(0);
Long64_t start = 0, end = 0; Long64_t start = 0ll, end = 0ll;
const Long64_t entries = t->GetEntries(); const Long64_t entries = t->GetEntries();
nEntries.emplace_back(entries);
// Iterate over the clusters in the current file // Iterate over the clusters in the current file
while ((start = clusterIter()) < entries) { while ((start = clusterIter()) < entries) {
end = clusterIter.GetNextEntry(); end = clusterIter.GetNextEntry();
// Add the current file's offset to start and end to make them (chain) global // Add the current file's offset to start and end to make them (chain) global
clusters.emplace_back(ROOT::Internal::EntryCluster{start + offset, end + offset}); clusters.emplace_back(EntryCluster{start + offset, end + offset});
} }
offset += entries; offset += entries;
} }
return clusters;
return std::make_pair(std::move(clusters), std::move(nEntries));
}
}
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -106,15 +115,17 @@ void TTreeProcessorMT::Process(std::function<void(TTreeReader &)> func) ...@@ -106,15 +115,17 @@ void TTreeProcessorMT::Process(std::function<void(TTreeReader &)> func)
// Enable this IMT use case (activate its locks) // Enable this IMT use case (activate its locks)
Internal::TParTreeProcessingRAII ptpRAII; Internal::TParTreeProcessingRAII ptpRAII;
const auto clusters = ROOT::Internal::MakeClusters(treeView->GetTreeName(), treeView->GetFileNames()); const auto clustersAndEntries = ROOT::Internal::MakeClusters(treeView->GetTreeName(), treeView->GetFileNames());
const auto &clusters = clustersAndEntries.first;
const auto &entries = clustersAndEntries.second;
auto mapFunction = [this, &func](const ROOT::Internal::EntryCluster &c) { auto mapFunction = [this, &func, &entries](const ROOT::Internal::EntryCluster &c) {
// This task will operate with the tree that contains start // This task will operate with the tree that contains start
treeView->PushTaskFirstEntry(c.start); treeView->PushTaskFirstEntry(c.start);
std::unique_ptr<TTreeReader> reader; std::unique_ptr<TTreeReader> reader;
std::unique_ptr<TEntryList> elist; std::unique_ptr<TEntryList> elist;
std::tie(reader, elist) = treeView->GetTreeReader(c.start, c.end); std::tie(reader, elist) = treeView->GetTreeReader(c.start, c.end, entries);
func(*reader); func(*reader);
// In case of task interleaving, we need to load here the tree of the parent task // In case of task interleaving, we need to load here the tree of the parent task
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment