From 2aa8e78a16eaf8b0cbc1fa7cb09296fe307f1a10 Mon Sep 17 00:00:00 2001
From: Philippe Canal <pcanal@fnal.gov>
Date: Mon, 20 Nov 2017 15:13:46 -0600
Subject: [PATCH] Improve tear down in case of interpreter static object
 holding pointer to object in TFiles.

We need to first Close the file before deleting the interpreter object,
otherwise the script:

   auto f = TFile::Open(filename,"RECREATE");
   TH1F h(...);
   h.Fill();
   // etc...
   .q

would result in an 'empty' file.

However, when closing the file, we can not delete the object its own and some interpreter
static object (for example a MakeClass skeleton) might hold pointer to those objects.

To resolve this, we introduce a new flag to TDirectory/TDirectoryFile/TFile Close ("nodelete")
that will write the data in to the physical file but will not remove the object from memory.

valgrind --suppressions=/opt/build/root_builds/rootcling.cmake/etc/valgrind-root.supp root.exe -b -l -q execLHEF.C
==6337== Memcheck, a memory error detector
==6337== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==6337== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==6337== Command: root.exe -b -l -q execLHEF.C
==6337==

Processing execLHEF.C...
direct
Warning in <TClass::Init>: no dictionary for class TRootLHEFEvent is available
Warning in <TClass::Init>: no dictionary for class TRootWeight is available
Warning in <TClass::Init>: no dictionary for class TRootLHEFParticle is available
Warning in <TClass::Init>: no dictionary for class TSortableObject is available
all=79618 zero=60000 low=9599 high=10019
reader
all=79618 zero=60000 low=9599 high=10019
legacySelector
Info in <TTreePlayer::MakeClass>: Files: lhef_leg_sel_gen.h and lhef_leg_sel_gen.C generated from TTree: LHEF
all=79618 zero=60000 low=9599 high=10019
selector
all=79618 zero=60000 low=9599 high=10019
makeClass
Info in <TTreePlayer::MakeClass>: Files: lhef_mc_gen.h and lhef_mc_gen.C generated from TTree: LHEF
all=79618 zero=60000 low=9599 high=10019
(int) 0
==6337== Invalid read of size 8
==6337==    at 0x1636165F: TTree::GetCurrentFile() const (TTree.cxx:5247)
==6337==    by 0x405F343: ???
==6337==    by 0x405E6DB: ???
==6337==    by 0x405E6AB: ???
==6337==    by 0x911919A: cling::IncrementalExecutor::runAndRemoveStaticDestructors(cling::Transaction*) (in /mnt/vdb/lsf/workspace/root-pullrequests-build/build/lib/libCling.so)
==6337==    by 0x90C7A3D: cling::Interpreter::runAndRemoveStaticDestructors() (in /mnt/vdb/lsf/workspace/root-pullrequests-build/build/lib/libCling.so)
==6337==    by 0x8FB13E2: TCling::ResetGlobals() (TCling.cxx:3237)
==6337==    by 0x52C4915: TROOT::EndOfProcessCleanups() (TROOT.cxx:1193)
==6337==    by 0x54CC89F: TUnixSystem::Exit(int, bool) (TUnixSystem.cxx:2153)
==6337==    by 0x5361A5B: TApplication::Terminate(int) (TApplication.cxx:1281)
==6337==    by 0x4E52AE2: TRint::Terminate(int) (TRint.cxx:686)
==6337==    by 0x4E5183E: TRint::Run(bool) (TRint.cxx:437)
==6337==  Address 0x11387c40 is 336 bytes inside a block of size 696 free'd
==6337==    at 0x4C2C171: operator delete(void*) (vg_replace_malloc.c:575)
==6337==    by 0x53BC849: TStorage::ObjectDealloc(void*) (TStorage.cxx:362)
==6337==    by 0x53A2A9A: TObject::operator delete(void*) (TObject.cxx:988)
==6337==    by 0x16353FA7: TTree::~TTree() (TTree.cxx:958)
==6337==    by 0x5416C48: TCollection::GarbageCollect(TObject*) (TCollection.cxx:734)
==6337==    by 0x541F3B6: TList::Delete(char const*) (TList.cxx:534)
==6337==    by 0x5419B66: THashList::Delete(char const*) (THashList.cxx:215)
==6337==    by 0x82D03C9: TDirectoryFile::Close(char const*) (TDirectoryFile.cxx:556)
==6337==    by 0x82E1049: TFile::Close(char const*) (TFile.cxx:942)
==6337==    by 0x52C411F: (anonymous namespace)::R__ListSlowClose(TList*) (TROOT.cxx:1092)
==6337==    by 0x52C4220: TROOT::CloseFiles() (TROOT.cxx:1113)
==6337==    by 0x52C48EF: TROOT::EndOfProcessCleanups() (TROOT.cxx:1190)
==6337==  Block was alloc'd at
==6337==    at 0x4C2B145: operator new(unsigned long) (vg_replace_malloc.c:333)
==6337==    by 0x53BC7DD: TStorage::ObjectAlloc(unsigned long) (TStorage.cxx:330)
==6337==    by 0x4E4F165: TObject::operator new(unsigned long) (TObject.h:152)
==6337==    by 0x162D5267: ROOT::new_TTree(void*) (G__Tree.cxx:3940)
==6337==    by 0x54669D2: TClass::New(TClass::ENewType, bool) const (TClass.cxx:4777)
==6337==    by 0x8329330: TKey::ReadObjectAny(TClass const*) (TKey.cxx:1058)
==6337==    by 0x82D1FB0: TDirectoryFile::GetObjectChecked(char const*, TClass const*) (TDirectoryFile.cxx:1044)
==6337==    by 0x16340AC4: void TDirectoryFile::GetObject<TTree>(char const*, TTree*&) (TDirectoryFile.h:80)
==6337==    by 0x405F27F: ???
==6337==    by 0x405E5F3: ???
==6337==    by 0x405E5BB: ???
==6337==    by 0x405E01D: ???
==6337==
==6337==
---
 core/base/src/TDirectory.cxx | 43 +++++++++++++++++++++---------------
 core/base/src/TROOT.cxx      |  3 ++-
 core/meta/src/TClass.cxx     |  1 +
 io/io/src/TDirectoryFile.cxx | 34 +++++++++++++++-------------
 io/io/src/TFile.cxx          | 11 +++++++++
 5 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/core/base/src/TDirectory.cxx b/core/base/src/TDirectory.cxx
index b9aa5d9b91a..393b1724163 100644
--- a/core/base/src/TDirectory.cxx
+++ b/core/base/src/TDirectory.cxx
@@ -543,7 +543,10 @@ void TDirectory::Clear(Option_t *)
 
 ////////////////////////////////////////////////////////////////////////////////
 /// Delete all objects from memory and directory structure itself.
-
+/// if option is "slow", iterate through the containers in a way to can handle
+///    'external' modification (induced by recursions)
+/// if option is "nodelete", write the TDirectory but do not delete the contained
+///    objects.
 void TDirectory::Close(Option_t *option)
 {
    if (!fList) {
@@ -553,26 +556,30 @@ void TDirectory::Close(Option_t *option)
    // Save the directory key list and header
    Save();
 
-   Bool_t slow = option ? (!strcmp(option, "slow") ? kTRUE : kFALSE) : kFALSE;
-   if (!slow) {
-      // Check if it is wise to use the fast deletion path.
-      TObjLink *lnk = fList->FirstLink();
-      while (lnk) {
-         if (lnk->GetObject()->IsA() == TDirectory::Class()) {
-            slow = kTRUE;
-            break;
+   Bool_t nodelete = option ? (!strcmp(option, "nodelete") ? kTRUE : kFALSE) : kFALSE;
+
+   if (!nodelete) {
+      Bool_t slow = option ? (!strcmp(option, "slow") ? kTRUE : kFALSE) : kFALSE;
+      if (!slow) {
+         // Check if it is wise to use the fast deletion path.
+         TObjLink *lnk = fList->FirstLink();
+         while (lnk) {
+            if (lnk->GetObject()->IsA() == TDirectory::Class()) {
+               slow = kTRUE;
+               break;
+            }
+            lnk = lnk->Next();
          }
-         lnk = lnk->Next();
       }
-   }
 
-   // Delete objects from directory list, this in turn, recursively closes all
-   // sub-directories (that were allocated on the heap)
-   // if this dir contains subdirs, we must use the slow option for Delete!
-   // we must avoid "slow" as much as possible, in particular Delete("slow")
-   // with a large number of objects (eg >10^5) would take for ever.
-   if (slow) fList->Delete("slow");
-   else      fList->Delete();
+      // Delete objects from directory list, this in turn, recursively closes all
+      // sub-directories (that were allocated on the heap)
+      // if this dir contains subdirs, we must use the slow option for Delete!
+      // we must avoid "slow" as much as possible, in particular Delete("slow")
+      // with a large number of objects (eg >10^5) would take for ever.
+      if (slow) fList->Delete("slow");
+      else      fList->Delete();
+   }
 
    CleanTargets();
 }
diff --git a/core/base/src/TROOT.cxx b/core/base/src/TROOT.cxx
index 34401511878..63578f63ad9 100644
--- a/core/base/src/TROOT.cxx
+++ b/core/base/src/TROOT.cxx
@@ -1089,7 +1089,8 @@ namespace {
             // is not seen as part of the list.
             // We will later, remove all the object (see files->Clear()
             cursor->SetObject(&harmless); // this must not be zero otherwise things go wrong.
-            dir->Close();
+            // See related comment at the files->Clear("nodelete");
+            dir->Close("nodelete");
             // Put it back
             cursor->SetObject(dir);
          }
diff --git a/core/meta/src/TClass.cxx b/core/meta/src/TClass.cxx
index ae95138a9f9..7b02c050f19 100644
--- a/core/meta/src/TClass.cxx
+++ b/core/meta/src/TClass.cxx
@@ -2732,6 +2732,7 @@ Int_t TClass::GetBaseClassOffset(const TClass *toBase, void *address, bool isDer
    ClassInfo_t* derived = GetClassInfo();
    ClassInfo_t* base = toBase->GetClassInfo();
    if(derived && base) {
+      // TClingClassInfo::GetBaseOffset takes the lock.
       return gCling->ClassInfo_GetBaseOffset(derived, base, address, isDerivedObject);
    }
    else {
diff --git a/io/io/src/TDirectoryFile.cxx b/io/io/src/TDirectoryFile.cxx
index 627440ed38f..7a7ecfc1a1c 100644
--- a/io/io/src/TDirectoryFile.cxx
+++ b/io/io/src/TDirectoryFile.cxx
@@ -532,7 +532,7 @@ TDirectory *TDirectoryFile::GetDirectory(const char *apath,
 ////////////////////////////////////////////////////////////////////////////////
 /// Delete all objects from memory and directory structure itself.
 
-void TDirectoryFile::Close(Option_t *)
+void TDirectoryFile::Close(Option_t *option)
 {
    if (!fList || !fSeekDir) {
       return;
@@ -541,20 +541,24 @@ void TDirectoryFile::Close(Option_t *)
    // Save the directory key list and header
    Save();
 
-   Bool_t fast = kTRUE;
-   TObjLink *lnk = fList->FirstLink();
-   while (lnk) {
-      if (lnk->GetObject()->IsA() == TDirectoryFile::Class()) {fast = kFALSE;break;}
-      lnk = lnk->Next();
-   }
-   // Delete objects from directory list, this in turn, recursively closes all
-   // sub-directories (that were allocated on the heap)
-   // if this dir contains subdirs, we must use the slow option for Delete!
-   // we must avoid "slow" as much as possible, in particular Delete("slow")
-   // with a large number of objects (eg >10^5) would take for ever.
-   {
-      if (fast) fList->Delete();
-      else      fList->Delete("slow");
+   Bool_t nodelete = option ? (!strcmp(option, "nodelete") ? kTRUE : kFALSE) : kFALSE;
+
+   if (!nodelete) {
+      Bool_t fast = kTRUE;
+      TObjLink *lnk = fList->FirstLink();
+      while (lnk) {
+         if (lnk->GetObject()->IsA() == TDirectoryFile::Class()) {fast = kFALSE;break;}
+         lnk = lnk->Next();
+      }
+      // Delete objects from directory list, this in turn, recursively closes all
+      // sub-directories (that were allocated on the heap)
+      // if this dir contains subdirs, we must use the slow option for Delete!
+      // we must avoid "slow" as much as possible, in particular Delete("slow")
+      // with a large number of objects (eg >10^5) would take for ever.
+      {
+         if (fast) fList->Delete();
+         else      fList->Delete("slow");
+      }
    }
 
    // Delete keys from key list (but don't delete the list header)
diff --git a/io/io/src/TFile.cxx b/io/io/src/TFile.cxx
index 01730b99f6c..52089f63979 100644
--- a/io/io/src/TFile.cxx
+++ b/io/io/src/TFile.cxx
@@ -546,6 +546,17 @@ TFile::~TFile()
 {
    Close();
 
+   // In case where the TFile is still open at 'tear-down' time the order of operation will be
+   // call Close("nodelete")
+   // then later call delete TFile
+   // which means that at this point we might still have object held and those
+   // might requires a 'valid' TFile object in their desctructor (for example,
+   // TTree call's GetReadCache which expects a non-null fCacheReadMap).
+   // So delete the objects (if any) now.
+
+   if (fList)
+      fList->Delete("slow");
+
    SafeDelete(fAsyncHandle);
    SafeDelete(fCacheRead);
    SafeDelete(fCacheReadMap);
-- 
GitLab