From 2da6ec40ba8965b75e009a1709779e0407609b9a Mon Sep 17 00:00:00 2001
From: Joerg Stelzer <joerg.stelzer@cern.ch>
Date: Mon, 27 Jun 2011 13:19:22 +0000
Subject: [PATCH] fix event requests for Category classifier

git-svn-id: http://root.cern.ch/svn/root/trunk@39989 27541ba8-7e3a-0410-8455-c3a389f83636
---
 tmva/doc/LICENSE                         |   2 +-
 tmva/doc/README                          |  70 +--
 tmva/inc/TMVA/DataSetFactory.h           | 107 ++--
 tmva/inc/TMVA/DataSetInfo.h              |   4 +-
 tmva/inc/TMVA/MethodBase.h               |  51 +-
 tmva/inc/TMVA/SVKernelFunction.h         |   0
 tmva/inc/TMVA/SVKernelMatrix.h           |   0
 tmva/src/CrossEntropy.cxx                |   1 -
 tmva/src/DataSetFactory.cxx              | 720 ++++++++++++-----------
 tmva/src/DataSetInfo.cxx                 |  84 +--
 tmva/src/DataSetManager.cxx              |  10 +-
 tmva/src/Factory.cxx                     |   4 +-
 tmva/src/MethodBoost.cxx                 |   2 +
 tmva/src/MethodCategory.cxx              |  46 +-
 tmva/src/MethodCompositeBase.cxx         |  90 +--
 tmva/src/PDEFoamVect.cxx                 |   8 +-
 tmva/src/RegressionVariance.cxx          |   2 +-
 tmva/src/VariableNormalizeTransform.cxx  |   1 +
 tmva/test/TMVAClassificationCategory.cxx |  34 +-
 tmva/test/setup.sh                       |   3 +
 20 files changed, 660 insertions(+), 579 deletions(-)
 mode change 100644 => 100755 tmva/inc/TMVA/SVKernelFunction.h
 mode change 100644 => 100755 tmva/inc/TMVA/SVKernelMatrix.h

diff --git a/tmva/doc/LICENSE b/tmva/doc/LICENSE
index e555484b511..40a9b19fd4e 100644
--- a/tmva/doc/LICENSE
+++ b/tmva/doc/LICENSE
@@ -1,6 +1,6 @@
 TMVA -- Toolkit for Multivariate Data Analysis
 
-Copyright (c) 2005-2009, Regents of CERN (Switzerland), the 
+Copyright (c) 2005-20010, Regents of CERN (Switzerland), the 
 University of Victoria (Canada), the MPI fuer Kernphysik Heidelberg
 (Germany), LAPP (France), the University of Bonn (Germany).
 
diff --git a/tmva/doc/README b/tmva/doc/README
index 0df2207c9d3..68c92e81a10 100644
--- a/tmva/doc/README
+++ b/tmva/doc/README
@@ -4,7 +4,7 @@
 
 TMVA Users Guide    : http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf
 TMVA home page      : http://tmva.sourceforge.net/
-TMVA developer page : http://sourceforge.net/projects/tmva
+TMVA download page  : http://sourceforge.net/projects/tmva
 TMVA mailing list   : http://sourceforge.net/mailarchive/forum.php?forum_name=tmva-users
 TMVA license (BSD)  : http://tmva.sourceforge.net/LICENSE
 
@@ -15,7 +15,7 @@ System requirements:
 
   TMVA has been tested to run on Linux, MAC/OSX and Windows platforms.
 
-  Running TMVA requires the availability of ROOT shared libraries with ROOT_VERSION >= 5.08
+  Running TMVA requires the availability of ROOT shared libraries with ROOT_VERSION >= 5.14
   (type "root-config --version" to see the version installed)
 
 ========================================================================================
@@ -26,64 +26,58 @@ Getting Started:
   How to compile the code:
   ------------------------
   /home> cd TMVA
-  /home/TMVA> source setup.[c]sh     // includes TMVA/lib in your lib path
-  /home/TMVA> cd src                              
-  /home/TMVA/src> make               // compile and build the library ../libTMVA.1.so
+  /home/TMVA> make                   # compile and build the library lib/libTMVA.1.so
 
-  How to run the code as ROOT macro: // training/testing of an academic example
+  How to run the code as ROOT macro: # training/testing of an academic example
   ----------------------------------
-  /home/TMVA> cd macros
+  /home/TMVA> cd test
+  /home/TMVA/test> source setup.sh   # setup script must be exectuted before running macros (use setup.csh for c-shell)
 
   --- For classification:
-  /home/TMVA/macros> root -l TMVAClassification.C                       // run all standard classifiers
-  /home/TMVA/macros> root -l TMVAClassification.C\(\"LD,Likelihood\"\)  // run LD and Likelihood classifiers
+  /home/TMVA/test> root -l TMVAClassification.C                       # run all standard classifiers (takes a while)
+  /home/TMVA/test> root -l TMVAClassification.C\(\"LD,Likelihood\"\)  # run LD and Likelihood classifiers
 
   --- For regression:
-  /home/TMVA/macros> root -l TMVARegression.C                           // run all regression algorithms
-  /home/TMVA/macros> root -l TMVARegression.C\(\"LD,KNN\"\)             // run LD and k-NN regression algorithms
+  /home/TMVA/test> root -l TMVARegression.C                           # run all regression algorithms n(takes a while)
+  /home/TMVA/test> root -l TMVARegression.C\(\"LD,KNN\"\)             # run LD and k-NN regression algorithms
 
   --> at the end of the jobs, a GUI will pop up: try to click through all the buttons;
       some of the lower buttons are method-specific, and will only work when the
       corresponding classifiers/regression algorithms have been trained/tested before
       (unless they are greyed out)
 
-  How to run the code as an executable: // training/testing of an academic example
+  How to run the code as an executable: 
   -------------------------------------
-  /home/TMVA> cd execs
-  /home/TMVA/execs> make
-  /home/TMVA/execs> ./TMVAClassification                                // run all standard classifiers 
-  /home/TMVA/execs> ./TMVAClassification LD Likelihood                  // run LD and Likelihood classifiers 
+  /home/TMVA/test> make
+  /home/TMVA/test> ./TMVAClassification                              # run all standard classifiers 
+  /home/TMVA/test> ./TMVAClassification LD Likelihood                # run LD and Likelihood classifiers 
 
-  ... and similar for regression
+  ... and similarly for regression
 
-  /home/TMVA/examples> root -l ../macros/TMVAGui.C // start the GUI
+  /home/TMVA/test> root -l TMVAGui.C   # start the GUI
+
+  How to run the code as an python script using PyROOT: 
+  -------------------------------------
+  /home/TMVA/test> make
+  /home/TMVA/test> python ./TMVAClassification.py --method LD,Likelihood
 
   How to apply the TMVA methods:
   -------------------------------------
-  /home/TMVA> cd macros
+  /home/TMVA> cd test
 
   --- For classification:
-  /home/TMVA/macros> root -l TMVAClassificationApplication.C                
-  /home/TMVA/macros> root -l TMVAClassificationApplication.C\(\"LD,Likelihood\"\) 
+  /home/TMVA/test> root -l TMVAClassificationApplication.C                
+  /home/TMVA/test> root -l TMVAClassificationApplication.C\(\"LD,Likelihood\"\) 
 
-  ... and similar for regression
+  ... and similar for regression.
+  ... and similar for executables.
 
   The directory structure:
   ------------------------
-  src/          : the TMVA source code
+  inc/          : the TMVA class headers
+  src/          : the TMVA class source
   lib/          : here you'll find the TMVA library (libTMVA.1.so) after compilation
-                  (copy it to you preferred library directory or include
-                   this directory in your LD_LIBRARY_PATH as it is done
-                   by: source setup.[c]sh
-  macros/       : example code of how to use the TMVA library with a ROOT macro
-                  uses input data from a Toy Monte Carlo;
-                  also: handy root macros which read and display the 
-                  results produced by TMVAClassification and TMVARegression
-  execs/        : same example code as in 'macros', but for using the TMVA library in an executable
-  execs/data    : the Toy Monte Carlo data 
-  python/       : example code of how to use the TMVA library with a python script;
-                  requires availability of PyROOT 
-  development/  : for use by developers only
+  test/         : example code for analysis macros and executables, the GUI and analysis scripts.
 
 ========================================================================================
 
@@ -158,11 +152,11 @@ Please report any problems and/or suggestions for improvements to the authors.
 
 ========================================================================================
 
-Copyright 漏 (2005-2009):  
+Copyright 漏 (2005-2010):  
 ------------------------
 
-   Andreas Hoecker, Peter Speckmayer, J枚rg Stelzer (all: CERN, Switzerland),
-   Jan Therhaag, Eckhard von Toerne (both: U. Bonn, Germany), 
+   Andreas Hoecker, Peter Speckmayer, J枚rg Stelzer (CERN, Switzerland),
+   Jan Therhaag, Eckhard von Toerne (U. Bonn, Germany), 
    Helge Voss (MPI-KP Heidelberg, Germany), 
 
 Contributed to TMVA have, please see: http://tmva.sourceforge.net/#authors
diff --git a/tmva/inc/TMVA/DataSetFactory.h b/tmva/inc/TMVA/DataSetFactory.h
index 04856343b55..bc92730269f 100644
--- a/tmva/inc/TMVA/DataSetFactory.h
+++ b/tmva/inc/TMVA/DataSetFactory.h
@@ -73,24 +73,13 @@
 #endif
 
 namespace TMVA {
-   
+
    class DataSet;
    class DataSetInfo;
    class DataInputHandler;
    class TreeInfo;
    class MsgLogger;
 
-   typedef std::vector< Event* >                              EventVector;
-   typedef std::vector< EventVector >                         EventVectorOfClasses;
-   typedef std::map<Types::ETreeType,  EventVectorOfClasses > EventVectorOfClassesOfTreeType;
-   typedef std::map<Types::ETreeType,  EventVector >          EventVectorOfTreeType;
-
-   typedef std::vector< Double_t >                            ValuePerClass;
-   typedef std::map<Types::ETreeType,  ValuePerClass >        ValuePerClassOfTreeType;
-
-   typedef std::vector< Int_t >                               NumberPerClass;
-   typedef std::map<Types::ETreeType,  NumberPerClass >       NumberPerClassOfTreeType;
-
    // =============== maybe move these elswhere (e.g. into the tools )
 
    // =============== functors =======================
@@ -139,7 +128,7 @@ namespace TMVA {
 
 
    template <typename F>
-   class null_t 
+   class null_t
    {
    private:
       // returns argF
@@ -157,7 +146,7 @@ namespace TMVA {
    }
 
 
-   
+
    template <typename F, typename G, typename H>
    class compose_binary_t : public std::binary_function<typename G::argument_type,
                                                         typename H::argument_type,
@@ -171,7 +160,7 @@ namespace TMVA {
       compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h) 
       {
       }
-      
+
       typename F::result_type operator()(const typename G::argument_type& argG,
                                          const typename H::argument_type& argH) const 
       {
@@ -218,12 +207,49 @@ namespace TMVA {
 
    class DataSetFactory {
 
+      typedef std::vector< Event* >                             EventVector;
+      typedef std::vector< EventVector >                        EventVectorOfClasses;
+      typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
+      typedef std::map<Types::ETreeType, EventVector >          EventVectorOfTreeType;
+
+      typedef std::vector< Double_t >                    ValuePerClass;
+      typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
+
+      class EventStats {
+      public:
+         Int_t    nTrainingEventsRequested;
+         Int_t    nTestingEventsRequested;
+         Int_t    nInitialEvents;
+         Int_t    nEvBeforeCut;
+         Int_t    nEvAfterCut;
+         Float_t  nWeEvBeforeCut;
+         Float_t  nWeEvAfterCut;
+         Double_t nNegWeights;
+         Float_t* varAvLength;
+         EventStats():
+            nTrainingEventsRequested(0),
+            nTestingEventsRequested(0),
+            nInitialEvents(0),
+            nEvBeforeCut(0),
+            nEvAfterCut(0),
+            nWeEvBeforeCut(0),
+            nWeEvAfterCut(0),
+            nNegWeights(0),
+            varAvLength(0)
+         {}
+         ~EventStats() { delete[] varAvLength; }
+         Float_t cutScaling() const { return Float_t(nEvAfterCut)/nEvBeforeCut; }
+      };
+
+      typedef std::vector< int >                            NumberPerClass;
+      typedef std::vector< EventStats >                     EvtStatsPerClass;
+
    public:
 
 
 
       // singleton class
-      static DataSetFactory& Instance() { if (!fgInstance) fgInstance = new DataSetFactory(); return *fgInstance; } 
+      static DataSetFactory& Instance() { if (!fgInstance) fgInstance = new DataSetFactory(); return *fgInstance; }
       static void destroyInstance() { if (fgInstance) { delete fgInstance; fgInstance=0; } }
 
       DataSet* CreateDataSet( DataSetInfo &, DataInputHandler& );
@@ -231,34 +257,37 @@ namespace TMVA {
    protected:
 
       ~DataSetFactory();
-      
+
       DataSetFactory();
       static DataSetFactory *fgInstance;
 
       DataSet*  BuildInitialDataSet( DataSetInfo&, TMVA::DataInputHandler& );
       DataSet*  BuildDynamicDataSet( DataSetInfo& );
-     
+
       // ---------- new versions
-      void      BuildEventVector    ( DataSetInfo& dsi, 
-                                      DataInputHandler& dataInput, 
-                                      EventVectorOfClassesOfTreeType& tmpEventVector);
-      
-      DataSet*  MixEvents           ( DataSetInfo& dsi, 
-                                      EventVectorOfClassesOfTreeType& tmpEventVector, 
-                                      NumberPerClassOfTreeType& nTrainTestEvents,
-                                      const TString& splitMode,
-                                      const TString& mixMode, 
-                                      const TString& normMode, 
-                                      UInt_t splitSeed);
-      
-      void      RenormEvents        ( DataSetInfo& dsi, 
-                                      EventVectorOfClassesOfTreeType& tmpEventVector,
-                                      const TString& normMode );
-
-      void      InitOptions         ( DataSetInfo& dsi, 
-                                      NumberPerClassOfTreeType& nTrainTestEvents, 
-                                      TString& normMode, UInt_t& splitSeed, TString& splitMode, TString& mixMode );
-      
+      void      BuildEventVector ( DataSetInfo& dsi,
+                                   DataInputHandler& dataInput,
+                                   EventVectorOfClassesOfTreeType& eventsmap,
+                                   EvtStatsPerClass& eventCounts);
+
+      DataSet*  MixEvents        ( DataSetInfo& dsi,
+                                   EventVectorOfClassesOfTreeType& eventsmap,
+                                   EvtStatsPerClass& eventCounts,
+                                   const TString& splitMode,
+                                   const TString& mixMode,
+                                   const TString& normMode,
+                                   UInt_t splitSeed);
+
+      void      RenormEvents     ( DataSetInfo& dsi,
+                                   EventVectorOfClassesOfTreeType& eventsmap,
+                                   const EvtStatsPerClass& eventCounts,
+                                   const TString& normMode );
+
+      void      InitOptions      ( DataSetInfo& dsi,
+                                   EvtStatsPerClass& eventsmap,
+                                   TString& normMode, UInt_t& splitSeed,
+                                   TString& splitMode, TString& mixMode );
+
 
       // ------------------------
 
@@ -282,7 +311,7 @@ namespace TMVA {
       Bool_t                     fVerbose;           //! Verbosity
       TString                    fVerboseLevel;      //! VerboseLevel
 
-      // the event 
+      // the event
       mutable TTree*             fCurrentTree;       //! the tree, events are currently read from
       mutable UInt_t             fCurrentEvtIdx;     //! the current event (to avoid reading of the same event)
 
diff --git a/tmva/inc/TMVA/DataSetInfo.h b/tmva/inc/TMVA/DataSetInfo.h
index 776d8993565..2d4038c3480 100644
--- a/tmva/inc/TMVA/DataSetInfo.h
+++ b/tmva/inc/TMVA/DataSetInfo.h
@@ -14,7 +14,7 @@
  *      Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland              *
  *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - DESY, Germany                  *
  *                                                                                *
- * Copyright (c) 2008:                                                            *
+ * Copyright (c) 2008-2011:                                                       *
  *      CERN, Switzerland                                                         *
  *      MPI-K Heidelberg, Germany                                                 *
  *      DESY Hamburg, Germany                                                     *
@@ -157,7 +157,7 @@ namespace TMVA {
 
       std::vector<TString> GetListOfVariables() const;
 
-      // correlation matrix 
+      // correlation matrix
       const TMatrixD*    CorrelationMatrix     ( const TString& className ) const;
       void               SetCorrelationMatrix  ( const TString& className, TMatrixD* matrix );
       void               PrintCorrelationMatrix( const TString& className );
diff --git a/tmva/inc/TMVA/MethodBase.h b/tmva/inc/TMVA/MethodBase.h
index 58eb36cf9de..5c43369b066 100644
--- a/tmva/inc/TMVA/MethodBase.h
+++ b/tmva/inc/TMVA/MethodBase.h
@@ -341,14 +341,14 @@ namespace TMVA {
       TString          GetTrainingTMVAVersionString() const;
       TString          GetTrainingROOTVersionString() const;
 
-      TransformationHandler&        GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) 
-          { 
-	     if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; 
-	  }
-      const TransformationHandler&  GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const 
-          { 
-	     if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; 
-	  }
+      TransformationHandler&        GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
+      {
+         if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
+      }
+      const TransformationHandler&  GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
+      {
+         if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
+      }
 
       void             RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; }
 
@@ -413,7 +413,7 @@ namespace TMVA {
 
       // ---------- protected event and tree accessors -----------------------------
 
-      // names of input variables (if the original names are expressions, they are 
+      // names of input variables (if the original names are expressions, they are
       // transformed into regexps)
       const TString&   GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; }
       const TString&   GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); }
@@ -435,18 +435,18 @@ namespace TMVA {
 
       // some basic statistical analysis
       void Statistics( Types::ETreeType treeType, const TString& theVarName,
-                       Double_t&, Double_t&, Double_t&, 
+                       Double_t&, Double_t&, Double_t&,
                        Double_t&, Double_t&, Double_t& );
 
-      // if TRUE, write weights only to text files 
+      // if TRUE, write weights only to text files
       Bool_t           TxtWeightsOnly() const { return kTRUE; }
 
    protected:
-      
+
       // access to event information that needs method-specific information
-      
-      Float_t GetTWeight( const Event* ev ) const { 
-         return (fIgnoreNegWeightsInTraining && (ev->GetWeight() < 0)) ? 0. : ev->GetWeight(); 
+
+      Float_t GetTWeight( const Event* ev ) const {
+         return (fIgnoreNegWeightsInTraining && (ev->GetWeight() < 0)) ? 0. : ev->GetWeight();
       }
 
       Bool_t           IsConstructedFromWeightFile() const { return fConstructedFromWeightFile; }
@@ -479,13 +479,13 @@ namespace TMVA {
       // PDFs for classifier response (required to compute signal probability and Rarity)
       void             CreateMVAPdfs();
 
-      // for root finder 
+      // for root finder
       static Double_t  IGetEffForRoot( Double_t );  // interface
       Double_t         GetEffForRoot ( Double_t );  // implementation
 
       // used for file parsing
       Bool_t           GetLine( std::istream& fin, char * buf );
-      
+
       // fill test tree with classification or regression results
       virtual void     AddClassifierOutput    ( Types::ETreeType type );
       virtual void     AddClassifierOutputProb( Types::ETreeType type );
@@ -494,12 +494,13 @@ namespace TMVA {
 
    private:
 
-      void             AddInfoItem( void* gi, const TString& name, const TString& value) const;
+      void             AddInfoItem( void* gi, const TString& name,
+                                    const TString& value) const;
 
-      static void      CreateVariableTransforms(const TString& trafoDefinition, 
-						TMVA::DataSetInfo& dataInfo,
-						TMVA::TransformationHandler& transformationHandler,
-						TMVA::MsgLogger& log );
+      static void      CreateVariableTransforms(const TString& trafoDefinition,
+                                                TMVA::DataSetInfo& dataInfo,
+                                                TMVA::TransformationHandler& transformationHandler,
+                                                TMVA::MsgLogger& log );
 
 
       // ========== class members ==================================================
@@ -523,7 +524,7 @@ namespace TMVA {
    private:
 
       // MethodCuts redefines some of the evaluation variables and histograms -> must access private members
-      friend class MethodCuts; 
+      friend class MethodCuts;
 
       Bool_t           fDisableWriting;       //! set to true in order to suppress writing to XML
 
@@ -537,11 +538,11 @@ namespace TMVA {
       // naming and versioning
       TString          fJobName;             // name of job -> user defined, appears in weight files
       TString          fMethodName;          // name of the method (set in derived class)
-      Types::EMVA      fMethodType;          // type of method (set in derived class)      
+      Types::EMVA      fMethodType;          // type of method (set in derived class)
       TString          fTestvar;             // variable used in evaluation, etc (mostly the MVA)
       UInt_t           fTMVATrainingVersion; // TMVA version used for training
       UInt_t           fROOTTrainingVersion; // ROOT version used for training
-      Bool_t           fConstructedFromWeightFile; // is it obtained from weight file? 
+      Bool_t           fConstructedFromWeightFile; // is it obtained from weight file?
 
       // Directory structure: fMethodBaseDir/fBaseDir
       // where the first directory name is defined by the method type
diff --git a/tmva/inc/TMVA/SVKernelFunction.h b/tmva/inc/TMVA/SVKernelFunction.h
old mode 100644
new mode 100755
diff --git a/tmva/inc/TMVA/SVKernelMatrix.h b/tmva/inc/TMVA/SVKernelMatrix.h
old mode 100644
new mode 100755
diff --git a/tmva/src/CrossEntropy.cxx b/tmva/src/CrossEntropy.cxx
index e7885cdc684..e886ff381a3 100644
--- a/tmva/src/CrossEntropy.cxx
+++ b/tmva/src/CrossEntropy.cxx
@@ -32,7 +32,6 @@
 //             -p log (p) - (1-p)log(1-p);     p=purity                        
 //_______________________________________________________________________
 
-//#include <math.h>
 #include "TMath.h"
 #include "TMVA/CrossEntropy.h"
 
diff --git a/tmva/src/DataSetFactory.cxx b/tmva/src/DataSetFactory.cxx
index 74b629c8d7a..68cdc6f5f8f 100644
--- a/tmva/src/DataSetFactory.cxx
+++ b/tmva/src/DataSetFactory.cxx
@@ -1,30 +1,30 @@
 // @(#)root/tmva $Id$
 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
 
-/**********************************************************************************
- * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
- * Package: TMVA                                                                  *
- * Class  : DataSetFactory                                                        *
- * Web    : http://tmva.sourceforge.net                                           *
- *                                                                                *
- * Description:                                                                   *
- *      Implementation (see header for description)                               *
- *                                                                                *
- * Authors (alphabetical):                                                        *
- *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
- *      Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland           *
- *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
- *      Eckhard von Toerne <evt@physik.uni-bonn.de>  - U. of Bonn, Germany        *
- *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
- *                                                                                *
- * Copyright (c) 2009:                                                            *
- *      CERN, Switzerland                                                         *
- *      MPI-K Heidelberg, Germany                                                 *
- *      U. of Bonn, Germany                                                       *
- * Redistribution and use in source and binary forms, with or without             *
- * modification, are permitted according to the terms listed in LICENSE           *
- * (http://tmva.sourceforge.net/LICENSE)                                          *
- **********************************************************************************/
+/*****************************************************************************
+ * Project: TMVA - a Root-integrated toolkit for multivariate data analysis  *
+ * Package: TMVA                                                             *
+ * Class  : DataSetFactory                                                   *
+ * Web    : http://tmva.sourceforge.net                                      *
+ *                                                                           *
+ * Description:                                                              *
+ *      Implementation (see header for description)                          *
+ *                                                                           *
+ * Authors (alphabetical):                                                   *
+ *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland         *
+ *      Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland      *
+ *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - MSU, USA                  *
+ *      Eckhard von Toerne <evt@physik.uni-bonn.de>  - U. of Bonn, Germany   *
+ *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany *
+ *                                                                           *
+ * Copyright (c) 2009:                                                       *
+ *      CERN, Switzerland                                                    *
+ *      MPI-K Heidelberg, Germany                                            *
+ *      U. of Bonn, Germany                                                  *
+ * Redistribution and use in source and binary forms, with or without        *
+ * modification, are permitted according to the terms listed in LICENSE      *
+ * (http://tmva.sourceforge.net/LICENSE)                                     *
+ *****************************************************************************/
 
 #include <assert.h>
 
@@ -85,7 +85,7 @@ TMVA::DataSetFactory* TMVA::DataSetFactory::fgInstance = 0;
 namespace TMVA {
    // calculate the largest common divider
    // this function is not happy if numbers are negative!
-   Int_t LargestCommonDivider(Int_t a, Int_t b) 
+   Int_t LargestCommonDivider(Int_t a, Int_t b)
    {
       if (a<b) {Int_t tmp = a; a=b; b=tmp; } // achieve a>=b
       if (b==0) return a;
@@ -107,7 +107,7 @@ TMVA::DataSetFactory::DataSetFactory() :
 }
 
 //_______________________________________________________________________
-TMVA::DataSetFactory::~DataSetFactory() 
+TMVA::DataSetFactory::~DataSetFactory()
 {
    // destructor
    std::vector<TTreeFormula*>::const_iterator formIt;
@@ -122,7 +122,8 @@ TMVA::DataSetFactory::~DataSetFactory()
 }
 
 //_______________________________________________________________________
-TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA::DataInputHandler& dataInput ) 
+TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi,
+                                                    TMVA::DataInputHandler& dataInput )
 {
    // steering the creation of a new dataset
 
@@ -131,7 +132,7 @@ TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA
 
    if (ds->GetNEvents() > 1) {
       CalcMinMax(ds,dsi);
-      
+
       // from the the final dataset build the correlation matrix
       for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) {
          const TString className = dsi.GetClassInfo(cl)->GetName();
@@ -144,17 +145,18 @@ TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA
 }
 
 //_______________________________________________________________________
-TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi ) 
+TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi )
 {
    Log() << kDEBUG << "Build DataSet consisting of one Event with dynamically changing variables" << Endl;
    DataSet* ds = new DataSet(dsi);
 
-   // create a DataSet with one Event which uses dynamic variables (pointers to variables)
+   // create a DataSet with one Event which uses dynamic variables
+   // (pointers to variables)
    if(dsi.GetNClasses()==0){
       dsi.AddClass( "data" );
       dsi.GetClassInfo( "data" )->SetNumber(0);
    }
-   
+
    std::vector<Float_t*>* evdyn = new std::vector<Float_t*>(0);
 
    std::vector<VariableInfo>& varinfos = dsi.GetVariableInfos();
@@ -166,7 +168,7 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi
    for (;it!=itEnd;++it) {
       Float_t* external=(Float_t*)(*it).GetExternalLink();
       if (external==0)
-	 Log() << kDEBUG << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl;
+         Log() << kDEBUG << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl;
       evdyn->push_back (external);
    }
 
@@ -186,11 +188,14 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi
 }
 
 //_______________________________________________________________________
-TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, DataInputHandler& dataInput ) 
+TMVA::DataSet*
+TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi,
+                                           DataInputHandler& dataInput )
 {
-   // if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variables)
+   // if no entries, than create a DataSet with one Event which uses
+   // dynamic variables (pointers to variables)
    if (dataInput.GetEntries()==0) return BuildDynamicDataSet( dsi );
-   // ------------------------------------------------------------------------------------
+   // -------------------------------------------------------------------------
 
    // register the classes in the datasetinfo-object
    // information comes from the trees in the dataInputHandler-object
@@ -200,32 +205,31 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, Data
    }
    delete classList;
 
+   EvtStatsPerClass eventCounts(dsi.GetNClasses());
    TString normMode;
    TString splitMode;
    TString mixMode;
-   UInt_t splitSeed;
+   UInt_t  splitSeed;
+   InitOptions( dsi, eventCounts, normMode, splitSeed, splitMode , mixMode );
 
-   // ======= build event-vector tentative new ordering =================================
-   
-   TMVA::EventVectorOfClassesOfTreeType tmpEventVector;
-   TMVA::NumberPerClassOfTreeType       nTrainTestEvents;
+   // ======= build event-vector from input, apply preselection ===============
+   EventVectorOfClassesOfTreeType tmpEventVector;
+   BuildEventVector( dsi, dataInput, tmpEventVector, eventCounts );
 
-   InitOptions     ( dsi, nTrainTestEvents, normMode, splitSeed, splitMode , mixMode );
-   BuildEventVector( dsi, dataInput, tmpEventVector );
-      
-   DataSet* ds = MixEvents( dsi, tmpEventVector, nTrainTestEvents, splitMode, mixMode, normMode, splitSeed);
+   DataSet* ds = MixEvents( dsi, tmpEventVector, eventCounts,
+                            splitMode, mixMode, normMode, splitSeed);
 
    const Bool_t showCollectedOutput = kFALSE;
    if (showCollectedOutput) {
       Int_t maxL = dsi.GetClassNameMaxLength();
       Log() << kINFO << "Collected:" << Endl;
       for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
-         Log() << kINFO << "    " 
-               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
+         Log() << kINFO << "    "
+               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
                << " training entries: " << ds->GetNClassEvents( 0, cl ) << Endl;
-         Log() << kINFO << "    " 
-               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
-               << " testing  entries: " << ds->GetNClassEvents( 1, cl ) << Endl;      
+         Log() << kINFO << "    "
+               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
+               << " testing  entries: " << ds->GetNClassEvents( 1, cl ) << Endl;
       }
       Log() << kINFO << " " << Endl;
    }
@@ -234,19 +238,18 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, Data
 }
 
 //_______________________________________________________________________
-Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar )
-{ 
+Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf,
+                                                const TString& expression,
+                                                Bool_t& hasDollar )
+{
    // checks a TTreeFormula for problems
    Bool_t worked = kTRUE;
-      
+
    if( ttf->GetNdim() <= 0 )
-      Log() << kFATAL << "Expression " << expression.Data() << " could not be resolved to a valid formula. " << Endl;
-   //    if( ttf->GetNcodes() == 0 ){
-   //       Log() << kWARNING << "Expression: " << expression.Data() << " does not appear to depend on any TTree variable --> please check spelling" << Endl;
-   //       worked = kFALSE;
-   //    }
+      Log() << kFATAL << "Expression " << expression.Data()
+            << " could not be resolved to a valid formula. " << Endl;
    if( ttf->GetNdata() == 0 ){
-      Log() << kWARNING << "Expression: " << expression.Data() 
+      Log() << kWARNING << "Expression: " << expression.Data()
             << " does not provide data for this event. "
             << "This event is not taken into account. --> please check if you use as a variable "
             << "an entry of an array which is not filled for some events "
@@ -262,7 +265,7 @@ Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf, const TString
 
 //_______________________________________________________________________
 void TMVA::DataSetFactory::ChangeToNewTree( TreeInfo& tinfo, const DataSetInfo & dsi )
-{ 
+{
    // While the data gets copied into the local training and testing
    // trees, the input tree can change (for intance when changing from
    // signal to background tree, or using TChains as input) The
@@ -347,7 +350,7 @@ void TMVA::DataSetFactory::ChangeToNewTree( TreeInfo& tinfo, const DataSetInfo &
 
       if (dsi.GetClassInfo(clIdx)->GetName() != tinfo.GetClassName() ) { // if the tree is of another class
          fWeightFormula.push_back( 0 );
-         continue; 
+         continue;
       }
 
       ttf = 0;
@@ -482,7 +485,7 @@ void TMVA::DataSetFactory::CalcMinMax( DataSet* ds, TMVA::DataSetInfo& dsi )
 TMatrixD* TMVA::DataSetFactory::CalcCorrelationMatrix( DataSet* ds, const UInt_t classNumber )
 {
    // computes correlation matrix for variables "theVars" in tree;
-   // "theType" defines the required event "type" 
+   // "theType" defines the required event "type"
    // ("type" variable must be present in tree)
 
    // first compute variance-covariance
@@ -498,7 +501,7 @@ TMatrixD* TMVA::DataSetFactory::CalcCorrelationMatrix( DataSet* ds, const UInt_t
             if (d > 0) (*mat)(ivar, jvar) /= sqrt(d);
             else {
                Log() << kWARNING << "<GetCorrelationMatrix> Zero variances for variables "
-                     << "(" << ivar << ", " << jvar << ") = " << d                   
+                     << "(" << ivar << ", " << jvar << ") = " << d
                      << Endl;
                (*mat)(ivar, jvar) = 0;
             }
@@ -523,7 +526,7 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t
 
    // init matrices
    TVectorD vec(nvar);
-   TMatrixD mat2(nvar, nvar);      
+   TMatrixD mat2(nvar, nvar);
    for (ivar=0; ivar<nvar; ivar++) {
       vec(ivar) = 0;
       for (jvar=0; jvar<nvar; jvar++) mat2(ivar, jvar) = 0;
@@ -538,13 +541,13 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t
 
       Double_t weight = ev->GetWeight();
       ic += weight; // count used events
-      
+
       for (ivar=0; ivar<nvar; ivar++) {
-         
+
          Double_t xi = ev->GetValue(ivar);
          vec(ivar) += xi*weight;
          mat2(ivar, ivar) += (xi*xi*weight);
-         
+
          for (jvar=ivar+1; jvar<nvar; jvar++) {
             Double_t xj =  ev->GetValue(jvar);
             mat2(ivar, jvar) += (xi*xj*weight);
@@ -570,13 +573,16 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t
 // --------------------------------------- new versions
 
 //_______________________________________________________________________
-void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, 
-                                        TMVA::NumberPerClassOfTreeType& nTrainTestEvents, 
-                                        TString& normMode, UInt_t& splitSeed, 
-                                        TString& splitMode,
-                                        TString& mixMode  ) 
+void
+TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi,
+                                   EvtStatsPerClass& nEventRequests,
+                                   TString& normMode,
+                                   UInt_t&  splitSeed,
+                                   TString& splitMode,
+                                   TString& mixMode  )
 {
    // the dataset splitting
+
    Configurable splitSpecs( dsi.GetSplitOptions() );
    splitSpecs.SetConfigName("DataSetFactory");
    splitSpecs.SetConfigDescription( "Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" );
@@ -595,10 +601,10 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi,
    splitSpecs.AddPreDefVal(TString("Random"));
    splitSpecs.AddPreDefVal(TString("Alternate"));
    splitSpecs.AddPreDefVal(TString("Block"));
-   
+
    splitSeed = 100;
    splitSpecs.DeclareOptionRef( splitSeed, "SplitSeed",
-                                "Seed for random event shuffling" );   
+                                "Seed for random event shuffling" );
 
    normMode = "NumEvents";  // the weight normalisation modes
    splitSpecs.DeclareOptionRef( normMode, "NormMode",
@@ -609,21 +615,14 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi,
 
    // the number of events
 
-   // initialization
-   nTrainTestEvents.insert( TMVA::NumberPerClassOfTreeType::value_type( Types::kTraining, TMVA::NumberPerClass( dsi.GetNClasses() ) ) );
-   nTrainTestEvents.insert( TMVA::NumberPerClassOfTreeType::value_type( Types::kTesting,  TMVA::NumberPerClass( dsi.GetNClasses() ) ) );
-
    // fill in the numbers
    for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
-      nTrainTestEvents[Types::kTraining].at(cl)  = 0;
-      nTrainTestEvents[Types::kTesting].at(cl)   = 0;
-
       TString clName = dsi.GetClassInfo(cl)->GetName();
       TString titleTrain =  TString().Format("Number of training events of class %s (default: 0 = all)",clName.Data()).Data();
       TString titleTest  =  TString().Format("Number of test events of class %s (default: 0 = all)",clName.Data()).Data();
 
-      splitSpecs.DeclareOptionRef( nTrainTestEvents[Types::kTraining].at(cl) , TString("nTrain_")+clName, titleTrain );
-      splitSpecs.DeclareOptionRef( nTrainTestEvents[Types::kTesting].at(cl)  , TString("nTest_")+clName , titleTest  );
+      splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTrainingEventsRequested, TString("nTrain_")+clName, titleTrain );
+      splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTestingEventsRequested , TString("nTest_")+clName , titleTest  );
    }
 
    splitSpecs.DeclareOptionRef( fVerbose, "V", "Verbosity (default: true)" );
@@ -637,7 +636,7 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi,
    splitSpecs.CheckForUnusedOptions();
 
    // output logging verbosity
-   if (Verbose()) fLogger->SetMinType( kVERBOSE );   
+   if (Verbose()) fLogger->SetMinType( kVERBOSE );
    if (fVerboseLevel.CompareTo("Debug")   ==0) fLogger->SetMinType( kDEBUG );
    if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE );
    if (fVerboseLevel.CompareTo("Info")    ==0) fLogger->SetMinType( kINFO );
@@ -647,72 +646,58 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi,
    // adjust mixmode if same as splitmode option has been set
    Log() << kINFO << "Splitmode is: \"" << splitMode << "\" the mixmode is: \"" << mixMode << "\"" << Endl;
    if (mixMode=="SAMEASSPLITMODE") mixMode = splitMode;
-   else if (mixMode!=splitMode) 
+   else if (mixMode!=splitMode)
       Log() << kINFO << "DataSet splitmode="<<splitMode
             <<" differs from mixmode="<<mixMode<<Endl;
 }
 
 
 //_______________________________________________________________________
-void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, 
-                                              TMVA::DataInputHandler& dataInput, 
-                                              TMVA::EventVectorOfClassesOfTreeType& tmpEventVector )
+void
+TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
+                                        TMVA::DataInputHandler& dataInput,
+                                        EventVectorOfClassesOfTreeType& eventsmap,
+                                        EvtStatsPerClass& eventCounts)
 {
    // build empty event vectors
    // distributes events between kTraining/kTesting/kMaxTreeType
-   
-   tmpEventVector.insert( std::make_pair(Types::kTraining   ,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) );
-   tmpEventVector.insert( std::make_pair(Types::kTesting    ,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) );
-   tmpEventVector.insert( std::make_pair(Types::kMaxTreeType,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) );
 
+   const UInt_t nclasses = dsi.GetNClasses();
+
+   eventsmap[ Types::kTraining ]    = EventVectorOfClasses(nclasses);
+   eventsmap[ Types::kTesting ]     = EventVectorOfClasses(nclasses);
+   eventsmap[ Types::kMaxTreeType ] = EventVectorOfClasses(nclasses);
 
    // create the type, weight and boostweight branches
-   const UInt_t nvars    = dsi.GetNVariables();
-   const UInt_t ntgts    = dsi.GetNTargets();
-   const UInt_t nvis     = dsi.GetNSpectators();
-   //   std::vector<Float_t> fmlEval(nvars+ntgts+1+1+nvis);     // +1+1 for results of evaluation of cut and weight ttreeformula  
-
-   // number of signal and background events passing cuts
-   std::vector< Int_t >    nInitialEvents( dsi.GetNClasses() );
-   std::vector< Int_t >    nEvBeforeCut(   dsi.GetNClasses() );
-   std::vector< Int_t >    nEvAfterCut(    dsi.GetNClasses() );
-   std::vector< Float_t >  nWeEvBeforeCut( dsi.GetNClasses() );
-   std::vector< Float_t >  nWeEvAfterCut(  dsi.GetNClasses() );
-   std::vector< Double_t > nNegWeights(    dsi.GetNClasses() );
-   std::vector< Float_t* > varAvLength(    dsi.GetNClasses() );
+   const UInt_t nvars = dsi.GetNVariables();
+   const UInt_t ntgts = dsi.GetNTargets();
+   const UInt_t nvis  = dsi.GetNSpectators();
+
+   for (size_t i=0; i<nclasses; i++) {
+      eventCounts[i].varAvLength = new Float_t[nvars];
+      for (UInt_t ivar=0; ivar<nvars; ivar++)
+         eventCounts[i].varAvLength[ivar] = 0;
+   }
 
    Bool_t haveArrayVariable = kFALSE;
    Bool_t *varIsArray = new Bool_t[nvars];
 
-   for (size_t i=0; i<varAvLength.size(); i++) {
-      varAvLength[i] = new Float_t[nvars];
-      for (UInt_t ivar=0; ivar<nvars; ivar++) {
-         //varIsArray[ivar] = kFALSE;
-         varAvLength[i][ivar] = 0;
-      }
-   }
+   // if we work with chains we need to remember the current tree if
+   // the chain jumps to a new tree we have to reset the formulas
+   for (UInt_t cl=0; cl<nclasses; cl++) {
 
-   // if we work with chains we need to remember the current tree
-   // if the chain jumps to a new tree we have to reset the formulas
-   for (UInt_t cl=0; cl<dsi.GetNClasses(); cl++) {
-
-      Log() << kINFO << "Create training and testing trees -- looping over class \"" 
+      Log() << kINFO << "Create training and testing trees -- looping over class \""
             << dsi.GetClassInfo(cl)->GetName() << "\" ..." << Endl;
 
+      EventStats& classEventCounts = eventCounts[cl];
+
       // info output for weights
-      const TString tmpWeight = dsi.GetClassInfo(cl)->GetWeight();
-      if (tmpWeight!="") {
-         Log() << kINFO << "Weight expression for class \"" << dsi.GetClassInfo(cl)->GetName() << "\": \""
-               << tmpWeight << "\"" << Endl; 
-      }
-      else {
-         Log() << kINFO << "No weight expression defined for class \"" << dsi.GetClassInfo(cl)->GetName() 
-               << "\"" << Endl; 
-      }
-      
+      Log() << kINFO << "Weight expression for class \'" << dsi.GetClassInfo(cl)->GetName() << "\': \""
+            << dsi.GetClassInfo(cl)->GetWeight() << "\"" << Endl;
+
       // used for chains only
       TString currentFileName("");
-      
+
       std::vector<TreeInfo>::const_iterator treeIt(dataInput.begin(dsi.GetClassInfo(cl)->GetName()));
       for (;treeIt!=dataInput.end(dsi.GetClassInfo(cl)->GetName()); treeIt++) {
 
@@ -721,18 +706,23 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
          std::vector<Float_t> tgts(ntgts);
          std::vector<Float_t> vis(nvis);
          TreeInfo currentInfo = *treeIt;
-         
+
+         Log() << kDEBUG << "Building event vectors " << currentInfo.GetTreeType() << Endl;
+
+         EventVector& event_v = eventsmap[currentInfo.GetTreeType()].at(cl);
+
          Bool_t isChain = (TString("TChain") == currentInfo.GetTree()->ClassName());
          currentInfo.GetTree()->LoadTree(0);
          ChangeToNewTree( currentInfo, dsi );
 
          // count number of events in tree before cut
-         nInitialEvents.at(cl) += currentInfo.GetTree()->GetEntries();
-         
+         classEventCounts.nInitialEvents += currentInfo.GetTree()->GetEntries();
+
          // loop over events in ntuple
-         for (Long64_t evtIdx = 0; evtIdx < currentInfo.GetTree()->GetEntries(); evtIdx++) {
+         const UInt_t nEvts = currentInfo.GetTree()->GetEntries();
+         for (Long64_t evtIdx = 0; evtIdx < nEvts; evtIdx++) {
             currentInfo.GetTree()->LoadTree(evtIdx);
-            
+
             // may need to reload tree in case of chains
             if (isChain) {
                if (currentInfo.GetTree()->GetTree()->GetDirectory()->GetFile()->GetName() != currentFileName) {
@@ -743,27 +733,27 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
             currentInfo.GetTree()->GetEntry(evtIdx);
             Int_t sizeOfArrays = 1;
             Int_t prevArrExpr = 0;
-            
+
             // ======= evaluate all formulas =================
 
             // first we check if some of the formulas are arrays
             for (UInt_t ivar=0; ivar<nvars; ivar++) {
                Int_t ndata = fInputFormulas[ivar]->GetNdata();
-               varAvLength[cl][ivar] += ndata;
+               classEventCounts.varAvLength[ivar] += ndata;
                if (ndata == 1) continue;
                haveArrayVariable = kTRUE;
                varIsArray[ivar] = kTRUE;
                if (sizeOfArrays == 1) {
                   sizeOfArrays = ndata;
                   prevArrExpr = ivar;
-               } 
+               }
                else if (sizeOfArrays!=ndata) {
                   Log() << kERROR << "ERROR while preparing training and testing trees:" << Endl;
                   Log() << "   multiple array-type expressions of different length were encountered" << Endl;
-                  Log() << "   location of error: event " << evtIdx 
+                  Log() << "   location of error: event " << evtIdx
                         << " in tree " << currentInfo.GetTree()->GetName()
                         << " of file " << currentInfo.GetTree()->GetCurrentFile()->GetName() << Endl;
-                  Log() << "   expression " << fInputFormulas[ivar]->GetTitle() << " has " 
+                  Log() << "   expression " << fInputFormulas[ivar]->GetTitle() << " has "
                         << ndata << " entries, while" << Endl;
                   Log() << "   expression " << fInputFormulas[prevArrExpr]->GetTitle() << " has "
                         << fInputFormulas[prevArrExpr]->GetNdata() << " entries" << Endl;
@@ -782,26 +772,26 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
                formula = fCutFormulas[cl];
                if (formula) {
                   Int_t ndata = formula->GetNdata();
-                  cutVal = (ndata==1 ? 
+                  cutVal = (ndata==1 ?
                             formula->EvalInstance(0) :
                             formula->EvalInstance(idata));
                   if (TMath::IsNaN(cutVal)) {
                      containsNaN = kTRUE;
-                     Log() << kWARNING << "Cut expression resolves to infinite value (NaN): " 
+                     Log() << kWARNING << "Cut expression resolves to infinite value (NaN): "
                            << formula->GetTitle() << Endl;
                   }
                }
-               
+
                // the input variable
                for (UInt_t ivar=0; ivar<nvars; ivar++) {
                   formula = fInputFormulas[ivar];
-                  Int_t ndata = formula->GetNdata();               
-                  vars[ivar] = (ndata == 1 ? 
-                                formula->EvalInstance(0) : 
+                  Int_t ndata = formula->GetNdata();
+                  vars[ivar] = (ndata == 1 ?
+                                formula->EvalInstance(0) :
                                 formula->EvalInstance(idata));
                   if (TMath::IsNaN(vars[ivar])) {
                      containsNaN = kTRUE;
-                     Log() << kWARNING << "Input expression resolves to infinite value (NaN): " 
+                     Log() << kWARNING << "Input expression resolves to infinite value (NaN): "
                            << formula->GetTitle() << Endl;
                   }
                }
@@ -809,13 +799,13 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
                // the targets
                for (UInt_t itrgt=0; itrgt<ntgts; itrgt++) {
                   formula = fTargetFormulas[itrgt];
-                  Int_t ndata = formula->GetNdata();               
-                  tgts[itrgt] = (ndata == 1 ? 
-                                 formula->EvalInstance(0) : 
+                  Int_t ndata = formula->GetNdata();
+                  tgts[itrgt] = (ndata == 1 ?
+                                 formula->EvalInstance(0) :
                                  formula->EvalInstance(idata));
                   if (TMath::IsNaN(tgts[itrgt])) {
                      containsNaN = kTRUE;
-                     Log() << kWARNING << "Target expression resolves to infinite value (NaN): " 
+                     Log() << kWARNING << "Target expression resolves to infinite value (NaN): "
                            << formula->GetTitle() << Endl;
                   }
                }
@@ -823,13 +813,13 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
                // the spectators
                for (UInt_t itVis=0; itVis<nvis; itVis++) {
                   formula = fSpectatorFormulas[itVis];
-                  Int_t ndata = formula->GetNdata();               
-                  vis[itVis] = (ndata == 1 ? 
-                                formula->EvalInstance(0) : 
+                  Int_t ndata = formula->GetNdata();
+                  vis[itVis] = (ndata == 1 ?
+                                formula->EvalInstance(0) :
                                 formula->EvalInstance(idata));
                   if (TMath::IsNaN(vis[itVis])) {
                      containsNaN = kTRUE;
-                     Log() << kWARNING << "Spectator expression resolves to infinite value (NaN): " 
+                     Log() << kWARNING << "Spectator expression resolves to infinite value (NaN): "
                            << formula->GetTitle() << Endl;
                   }
                }
@@ -845,24 +835,24 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
                              formula->EvalInstance(idata));
                   if (TMath::IsNaN(weight)) {
                      containsNaN = kTRUE;
-                     Log() << kWARNING << "Weight expression resolves to infinite value (NaN): " 
+                     Log() << kWARNING << "Weight expression resolves to infinite value (NaN): "
                            << formula->GetTitle() << Endl;
                   }
                }
-            
-               // Count the events before rejection due to cut or NaN value
-               // (weighted and unweighted)
-               nEvBeforeCut.at(cl) ++;
+
+               // Count the events before rejection due to cut or NaN
+               // value (weighted and unweighted)
+               classEventCounts.nEvBeforeCut++;
                if (!TMath::IsNaN(weight))
-                  nWeEvBeforeCut.at(cl) += weight;
+                  classEventCounts.nWeEvBeforeCut += weight;
 
-               // apply the cut
-               // skip rest if cut is not fulfilled
+               // apply the cut, skip rest if cut is not fulfilled
                if (cutVal<0.5) continue;
 
-               // global flag if negative weights exist -> can be used by classifiers who may 
-               // require special data treatment (also print warning)
-               if (weight < 0) nNegWeights.at(cl)++;
+               // global flag if negative weights exist -> can be used
+               // by classifiers who may require special data
+               // treatment (also print warning)
+               if (weight < 0) classEventCounts.nNegWeights++;
 
                // now read the event-values (variables and regression targets)
 
@@ -874,90 +864,93 @@ void  TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi,
 
                // Count the events after rejection due to cut or NaN value
                // (weighted and unweighted)
-               nEvAfterCut.at(cl) ++;
-               nWeEvAfterCut.at(cl) += weight;
+               classEventCounts.nEvAfterCut++;
+               classEventCounts.nWeEvAfterCut += weight;
 
                // event accepted, fill temporary ntuple
-               tmpEventVector.find(currentInfo.GetTreeType())->second.at(cl).push_back(new Event(vars, tgts , vis, cl , weight));
-
+               event_v.push_back(new Event(vars, tgts , vis, cl , weight));
             }
          }
-         
          currentInfo.GetTree()->ResetBranchAddresses();
       }
    }
 
-   // for output, check maximum class name length
+   // for output format, get the maximum class name length
    Int_t maxL = dsi.GetClassNameMaxLength();
-   
+
    Log() << kINFO << "Number of events in input trees (after possible flattening of arrays):" << Endl;
    for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
-      Log() << kINFO << "    " 
-            << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
+      Log() << kINFO << "    "
+            << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
             << "      -- number of events       : "
-            << std::setw(5) << nEvBeforeCut.at(cl) 
-            << "  / sum of weights: " << std::setw(5) << nWeEvBeforeCut.at(cl) << Endl;
+            << std::setw(5) << eventCounts[cl].nEvBeforeCut
+            << "  / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvBeforeCut << Endl;
    }
 
    for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) {
-      Log() << kINFO << "    " << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
-            <<" tree -- total number of entries: " 
+      Log() << kINFO << "    " << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
+            <<" tree -- total number of entries: "
             << std::setw(5) << dataInput.GetEntries(dsi.GetClassInfo(cl)->GetName()) << Endl;
    }
 
-   Log() << kINFO << "Preselection:" << Endl;
+   Log() << kINFO << "Preselection: (will effect number of requested training and testing events)" << Endl;
    if (dsi.HasCuts()) {
       for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) {
-         Log() << kINFO << "    " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
+         Log() << kINFO << "    " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
                << " requirement: \"" << dsi.GetClassInfo(cl)->GetCut() << "\"" << Endl;
-         Log() << kINFO << "    " 
-               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
+         Log() << kINFO << "    "
+               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
                << "      -- number of events passed: "
-               << std::setw(5) << nEvAfterCut.at(cl)
-               << "  / sum of weights: " << std::setw(5) << nWeEvAfterCut.at(cl) << Endl;
-         Log() << kINFO << "    " 
-               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() 
+               << std::setw(5) << eventCounts[cl].nEvAfterCut
+               << "  / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvAfterCut << Endl;
+         Log() << kINFO << "    "
+               << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName()
                << "      -- efficiency             : "
-               << std::setw(6) << nWeEvAfterCut.at(cl)/nWeEvBeforeCut.at(cl) << Endl;
+               << std::setw(6) << eventCounts[cl].nWeEvAfterCut/eventCounts[cl].nWeEvBeforeCut << Endl;
       }
    }
    else Log() << kINFO << "    No preselection cuts applied on event classes" << Endl;
 
    delete[] varIsArray;
-   for (size_t i=0; i<varAvLength.size(); i++)
-      delete[] varAvLength[i];
 
 }
 
 //_______________________________________________________________________
-TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, 
-                                                 TMVA::EventVectorOfClassesOfTreeType& tmpEventVector, 
-                                                 TMVA::NumberPerClassOfTreeType& nTrainTestEvents,
-                                                 const TString& splitMode,
-                                                 const TString& mixMode, 
-                                                 const TString& normMode, 
-                                                 UInt_t splitSeed)
+TMVA::DataSet*
+TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
+                                 EventVectorOfClassesOfTreeType& tmpEventVector,
+                                 EvtStatsPerClass& eventCounts,
+                                 const TString& splitMode,
+                                 const TString& mixMode,
+                                 const TString& normMode,
+                                 UInt_t splitSeed)
 {
    // Select and distribute unassigned events to kTraining and kTesting
-   Bool_t emptyUndefined  = kTRUE;
+   //Bool_t emptyUndefined  = kTRUE;
 
-//    // check if the vectors of all classes are empty
-   for( Int_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){
-      emptyUndefined &= tmpEventVector[Types::kMaxTreeType].at(cls).empty();
-   }
+   // check if the vectors of all classes are empty
+   //for( Int_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){
+   //   emptyUndefined &= tmpEventVector[Types::kMaxTreeType].at(cls).empty();
+   //}
 
    TMVA::RandomGenerator rndm( splitSeed );
-   
+
    // ==== splitting of undefined events to kTraining and kTesting
 
    // if splitMode contains "RANDOM", then shuffle the undefined events
-   if (splitMode.Contains( "RANDOM" ) && !emptyUndefined ) {
-      Log() << kDEBUG << "randomly shuffling events which are not yet associated to testing or training"<<Endl;
+   if (splitMode.Contains( "RANDOM" ) /*&& !emptyUndefined*/ ) {
       // random shuffle the undefined events of each class
       for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
-         std::random_shuffle(tmpEventVector[Types::kMaxTreeType].at(cls).begin(), 
-                             tmpEventVector[Types::kMaxTreeType].at(cls).end(),
-                             rndm );
+         EventVector& unspecifiedEvents = tmpEventVector[Types::kMaxTreeType].at(cls);
+         if( ! unspecifiedEvents.empty() ) {
+            Log() << kDEBUG << "randomly shuffling "
+                  << unspecifiedEvents.size()
+                  << " events of class " << cls
+                  << " which are not yet associated to testing or training" << Endl;
+            std::random_shuffle( unspecifiedEvents.begin(),
+                                 unspecifiedEvents.end(),
+                                 rndm );
+         }
       }
    }
 
@@ -968,59 +961,72 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
       Log() << kDEBUG << "check number of training/testing events, requested and available number of events and for class " << cls << Endl;
 
       // check if enough or too many events are already in the training/testing eventvectors of the class cls
-      EventVector& eventVectorTraining = tmpEventVector.find( Types::kTraining    )->second.at(cls);
-      EventVector& eventVectorTesting  = tmpEventVector.find( Types::kTesting     )->second.at(cls);
-      EventVector& eventVectorUndefined= tmpEventVector.find( Types::kMaxTreeType )->second.at(cls);
-      
-      Int_t alreadyAvailableTraining   = eventVectorTraining.size();
-      Int_t alreadyAvailableTesting    = eventVectorTesting.size();
-      Int_t availableUndefined         = eventVectorUndefined.size();
-
-      Int_t requestedTraining          = nTrainTestEvents.find( Types::kTraining )->second.at(cls);
-      Int_t requestedTesting           = nTrainTestEvents.find( Types::kTesting  )->second.at(cls);
-      
-      Log() << kDEBUG << "availableTraining  " << alreadyAvailableTraining << Endl;
-      Log() << kDEBUG << "availableTesting   " << alreadyAvailableTesting << Endl;
-      Log() << kDEBUG << "availableUndefined " << availableUndefined << Endl;
-      Log() << kDEBUG << "requestedTraining  " << requestedTraining << Endl;
-      Log() << kDEBUG << "requestedTesting  " << requestedTesting << Endl;
-      //
-      // nomenclature r=available training
-      //              s=available testing 
-      //              u=available undefined
-      //              R= requested training
-      //              S= requested testing
-      //              nR = used for selection of training events
-      //              nS = used for selection of test events
-      //              we have: nR + nS = r+s+u
+      EventVector& eventVectorTraining  = tmpEventVector[ Types::kTraining    ].at(cls);
+      EventVector& eventVectorTesting   = tmpEventVector[ Types::kTesting     ].at(cls);
+      EventVector& eventVectorUndefined = tmpEventVector[ Types::kMaxTreeType ].at(cls);
+
+      Int_t availableTraining  = eventVectorTraining.size();
+      Int_t availableTesting   = eventVectorTesting.size();
+      Int_t availableUndefined = eventVectorUndefined.size();
+
+      Float_t presel_scale = eventCounts[cls].cutScaling();
+
+      Int_t requestedTraining = Int_t(eventCounts[cls].nTrainingEventsRequested * presel_scale);
+      Int_t requestedTesting  = Int_t(eventCounts[cls].nTestingEventsRequested  * presel_scale);
+
+      Log() << kDEBUG << "events in training trees    : " << availableTraining  << Endl;
+      Log() << kDEBUG << "events in testing trees     : " << availableTesting   << Endl;
+      Log() << kDEBUG << "events in unspecified trees : " << availableUndefined << Endl;
+      Log() << kDEBUG << "requested for training      : " << requestedTraining;
+      if(presel_scale<1)
+         Log() << " ( " << eventCounts[cls].nTrainingEventsRequested
+               << " * " << presel_scale << " preselection efficiency)" << Endl;
+      else
+         Log() << Endl;
+      Log() << kDEBUG << "requested for testing       : " << requestedTesting;
+      if(presel_scale<1)
+         Log() << " ( " << eventCounts[cls].nTestingEventsRequested
+               << " * " << presel_scale << " preselection efficiency)" << Endl;
+      else
+         Log() << Endl;
+
+      // nomenclature r = available training
+      //              s = available testing
+      //              u = available undefined
+      //              R = requested training
+      //              S = requested testing
+      //              nR = to be used to select training events
+      //              nS = to be used to select test events
+      //              we have the constraint: nR + nS < r+s+u,
+      //                 since we can not use more events than we have
       //              free events: Nfree = u-Thet(R-r)-Thet(S-s)
-      //              nomenclature: Thet(x) = x,  if x>0 else 0;
+      //              nomenclature: Thet(x) = x,  if x>0 else 0
       //              nR = max(R,r) + 0.5 * Nfree
       //              nS = max(S,s) + 0.5 * Nfree
       //              nR +nS = R+S + u-R+r-S+s = u+r+s= ok! for R>r
       //              nR +nS = r+S + u-S+s = u+r+s= ok! for r>R
 
-      //EVT three different cases might occur here
+      // three different cases might occur here
       //
       // Case a
-      // requestedTraining and requestedTesting >0 
+      // requestedTraining and requestedTesting >0
       // free events: Nfree = u-Thet(R-r)-Thet(S-s)
       //              nR = Max(R,r) + 0.5 * Nfree
       //              nS = Max(S,s) + 0.5 * Nfree
-      // 
+      //
       // Case b
       // exactly one of requestedTraining or requestedTesting >0
       // assume training R >0
-      //    nR  = max(R,r) 
+      //    nR  = max(R,r)
       //    nS  = s+u+r-nR
       //    and  s=nS
       //
-      //Case c: 
-      // requestedTraining=0, requestedTesting=0 
+      // Case c
+      // requestedTraining=0, requestedTesting=0
       // Nfree = u-|r-s|
       // if NFree >=0
       //    R = Max(r,s) + 0.5 * Nfree = S
-      // else if r>s 
+      // else if r>s
       //    R = r; S=s+u
       // else
       //    R = r+u; S=s
@@ -1030,90 +1036,119 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
       // distribute undefined events according to nR, nS
       // finally determine actual sub samples from nR and nS to be used in training / testing
       //
-      // implementation of case C)
-      int useForTesting,useForTraining;
-      if( (requestedTraining == 0) && (requestedTesting == 0)){ 
-         // 0 means automatic distribution of events
-         Log() << kDEBUG << "requested 0" << Endl;         
-         // try to get the same number of events in training and testing for this class (balance)
-         Int_t NFree = availableUndefined - TMath::Abs(alreadyAvailableTraining - alreadyAvailableTesting);
-         if (NFree >=0){
-            requestedTraining = TMath::Max(alreadyAvailableTraining,alreadyAvailableTesting) + NFree/2;
-            requestedTesting  = availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - requestedTraining; // the rest
-         } else if (alreadyAvailableTraining > alreadyAvailableTesting){ //r>s
-            requestedTraining = alreadyAvailableTraining;
-            requestedTesting  = alreadyAvailableTesting +availableUndefined;
-         }
-         else {
-            requestedTraining = alreadyAvailableTraining+availableUndefined;
-            requestedTesting  = alreadyAvailableTesting;            
+
+      Int_t useForTesting(0),useForTraining(0);
+      Int_t allAvailable(availableUndefined + availableTraining + availableTesting);
+
+      if( (requestedTraining == 0) && (requestedTesting == 0)){
+
+         // Case C: balance the number of training and testing events
+
+         if ( availableUndefined >= TMath::Abs(availableTraining - availableTesting) ) {
+            // enough unspecified are available to equal training and testing
+            useForTraining = useForTesting = allAvailable/2;
+         } else {
+            // all unspecified are assigned to the smaller of training / testing
+            useForTraining = availableTraining;
+            useForTesting  = availableTesting;
+            if (availableTraining < availableTesting)
+               useForTraining += availableUndefined;
+            else
+               useForTesting += availableUndefined;
          }
-         useForTraining = requestedTraining; 
-         useForTesting  = requestedTesting; 
+         requestedTraining = useForTraining;
+         requestedTesting  = useForTesting;
       }
-      else if (requestedTesting == 0){ // case B)
-         useForTraining = TMath::Max(requestedTraining,alreadyAvailableTraining);
-         useForTesting= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTraining; // the rest
+
+      else if (requestedTesting == 0){
+         // case B
+         useForTraining = TMath::Max(requestedTraining,availableTraining);
+         if (allAvailable <  useForTraining) {
+            Log() << kFATAL << "More events requested for training ("
+                  << requestedTraining << ") than available ("
+                  << allAvailable << ")!" << Endl;
+         }
+         useForTesting  = allAvailable - useForTraining; // the rest
          requestedTesting = useForTesting;
       }
+
       else if (requestedTraining == 0){ // case B)
-         useForTesting = TMath::Max(requestedTesting,alreadyAvailableTesting);
-         useForTraining= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTesting; // the rest
+         useForTesting = TMath::Max(requestedTesting,availableTesting);
+         if (allAvailable <  useForTesting) {
+            Log() << kFATAL << "More events requested for testing ("
+                  << requestedTesting << ") than available ("
+                  << allAvailable << ")!" << Endl;
+         }
+         useForTraining= allAvailable - useForTesting; // the rest
          requestedTraining = useForTraining;
       }
-      else{ // case A
-         int NFree = availableUndefined-TMath::Max(requestedTraining-alreadyAvailableTraining,0)-TMath::Max(requestedTesting-alreadyAvailableTesting,0);
+
+      else {
+         // Case A
+         // requestedTraining R and requestedTesting S >0
+         // free events: Nfree = u-Thet(R-r)-Thet(S-s)
+         //              nR = Max(R,r) + 0.5 * Nfree
+         //              nS = Max(S,s) + 0.5 * Nfree
+         Int_t stillNeedForTraining = TMath::Max(requestedTraining-availableTraining,0);
+         Int_t stillNeedForTesting = TMath::Max(requestedTesting-availableTesting,0);
+
+         int NFree = availableUndefined - stillNeedForTraining - stillNeedForTesting;
          if (NFree <0) NFree = 0;
-         useForTraining = TMath::Max(requestedTraining,alreadyAvailableTraining) + NFree/2;
-         useForTesting= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTraining; // the rest
+         useForTraining = TMath::Max(requestedTraining,availableTraining) + NFree/2;
+         useForTesting= allAvailable - useForTraining; // the rest
       }
+
       Log() << kDEBUG << "determined event sample size to select training sample from="<<useForTraining<<Endl;
       Log() << kDEBUG << "determined event sample size to select test sample from="<<useForTesting<<Endl;
-      
 
-      // associate undefined events 
+
+
+
+
+
+      // associate undefined events
       if( splitMode == "ALTERNATE" ){
          Log() << kDEBUG << "split 'ALTERNATE'" << Endl;
-	 Int_t nTraining = alreadyAvailableTraining;
-	 Int_t nTesting  = alreadyAvailableTesting;
+         Int_t nTraining = availableTraining;
+         Int_t nTesting  = availableTesting;
          for( EventVector::iterator it = eventVectorUndefined.begin(), itEnd = eventVectorUndefined.end(); it != itEnd; ){
-	    ++nTraining;
-	    if( nTraining <= requestedTraining ){
-	       eventVectorTraining.insert( eventVectorTraining.end(), (*it) );
-	       ++it;
-	    }
+            ++nTraining;
+            if( nTraining <= requestedTraining ){
+               eventVectorTraining.insert( eventVectorTraining.end(), (*it) );
+               ++it;
+            }
             if( it != itEnd ){
-	       ++nTesting;
+               ++nTesting;
                eventVectorTesting.insert( eventVectorTesting.end(), (*it) );
                ++it;
             }
          }
-      }else{
+      } else {
          Log() << kDEBUG << "split '" << splitMode << "'" << Endl;
 
-	 // test if enough events are available
-	 Log() << kDEBUG << "availableundefined : " << availableUndefined << Endl;
-	 Log() << kDEBUG << "useForTraining     : " << useForTraining << Endl;
-	 Log() << kDEBUG << "useForTesting      : " << useForTesting  << Endl;
-	 Log() << kDEBUG << "alreadyAvailableTraining      : " << alreadyAvailableTraining  << Endl;
-	 Log() << kDEBUG << "alreadyAvailableTesting       : " << alreadyAvailableTesting  << Endl;
-
-	 if( availableUndefined<(useForTraining-alreadyAvailableTraining) ||
-	     availableUndefined<(useForTesting -alreadyAvailableTesting ) || 
-	     availableUndefined<(useForTraining+useForTesting-alreadyAvailableTraining-alreadyAvailableTesting ) ){
-	    Log() << kFATAL << "More events requested than available!" << Endl;
-	 }
-
-	 // select the events
-         if (useForTraining>alreadyAvailableTraining){
-            eventVectorTraining.insert(  eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- alreadyAvailableTraining );
-            eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- alreadyAvailableTraining);
+         // test if enough events are available
+         Log() << kDEBUG << "availableundefined : " << availableUndefined << Endl;
+         Log() << kDEBUG << "useForTraining     : " << useForTraining << Endl;
+         Log() << kDEBUG << "useForTesting      : " << useForTesting  << Endl;
+         Log() << kDEBUG << "availableTraining      : " << availableTraining  << Endl;
+         Log() << kDEBUG << "availableTesting       : " << availableTesting  << Endl;
+
+         if( availableUndefined<(useForTraining-availableTraining) ||
+             availableUndefined<(useForTesting -availableTesting ) ||
+             availableUndefined<(useForTraining+useForTesting-availableTraining-availableTesting ) ){
+            Log() << kFATAL << "More events requested than available!" << Endl;
+         }
+
+         // select the events
+         if (useForTraining>availableTraining){
+            eventVectorTraining.insert(  eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- availableTraining );
+            eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- availableTraining);
          }
-         if (useForTesting>alreadyAvailableTesting){
-            eventVectorTesting.insert(  eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- alreadyAvailableTesting );
+         if (useForTesting>availableTesting){
+            eventVectorTesting.insert(  eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- availableTesting );
          }
       }
-      eventVectorUndefined.clear();      
+      eventVectorUndefined.clear();
       // finally shorten the event vectors to the requested size by removing random events
       if (splitMode.Contains( "RANDOM" )){
          UInt_t sizeTraining  = eventVectorTraining.size();
@@ -1153,21 +1188,21 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
          }
       }
       else { // erase at end
-	 if( eventVectorTraining.size() < UInt_t(requestedTraining) )
-	    Log() << kWARNING << "DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
-		  << "There is probably an issue. Please contact the TMVA developers." << Endl;
+         if( eventVectorTraining.size() < UInt_t(requestedTraining) )
+            Log() << kWARNING << "DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
+                  << "There is probably an issue. Please contact the TMVA developers." << Endl;
          std::for_each( eventVectorTraining.begin()+requestedTraining, eventVectorTraining.end(), DeleteFunctor<Event>() );
          eventVectorTraining.erase(eventVectorTraining.begin()+requestedTraining,eventVectorTraining.end());
 
-	 if( eventVectorTesting.size() < UInt_t(requestedTesting) )
-	    Log() << kWARNING << "DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
-		  << "There is probably an issue. Please contact the TMVA developers." << Endl;
+         if( eventVectorTesting.size() < UInt_t(requestedTesting) )
+            Log() << kWARNING << "DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
+                  << "There is probably an issue. Please contact the TMVA developers." << Endl;
          std::for_each( eventVectorTesting.begin()+requestedTesting, eventVectorTesting.end(), DeleteFunctor<Event>() );
          eventVectorTesting.erase(eventVectorTesting.begin()+requestedTesting,eventVectorTesting.end());
       }
    }
 
-   TMVA::DataSetFactory::RenormEvents( dsi, tmpEventVector, normMode );
+   TMVA::DataSetFactory::RenormEvents( dsi, tmpEventVector, eventCounts, normMode );
 
    Int_t trainingSize = 0;
    Int_t testingSize  = 0;
@@ -1211,35 +1246,35 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
       Log() << kDEBUG << "insert class 0 into training and test vector" << Endl;
       trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(0).begin(), tmpEventVector[Types::kTraining].at(0).end() );
       testingEventVector->insert( testingEventVector->end(),   tmpEventVector[Types::kTesting].at(0).begin(),  tmpEventVector[Types::kTesting].at(0).end() );
-      
+
       // insert other classes
       EvtVecIt itTarget;
       for( UInt_t cls = 1; cls < dsi.GetNClasses(); ++cls ){
          Log() << kDEBUG << "insert class " << cls << Endl;
          // training vector
          itTarget = trainingEventVector->begin() - 1; // start one before begin
-         // loop over source 
+         // loop over source
          for( itEvent = tmpEventVector[Types::kTraining].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTraining].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
 //            if( std::distance( itTarget, trainingEventVector->end()) < Int_t(cls+1) ) {
             if( (trainingEventVector->end() - itTarget) < Int_t(cls+1) ) {
                itTarget = trainingEventVector->end();
                trainingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing
                break;
-            }else{ 
+            }else{
                itTarget += cls+1;
                trainingEventVector->insert( itTarget, (*itEvent) ); // fill event
             }
          }
          // testing vector
          itTarget = testingEventVector->begin() - 1;
-         // loop over source 
+         // loop over source
          for( itEvent = tmpEventVector[Types::kTesting].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTesting].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
 //             if( std::distance( itTarget, testingEventVector->end()) < Int_t(cls+1) ) {
             if( ( testingEventVector->end() - itTarget ) < Int_t(cls+1) ) {
                itTarget = testingEventVector->end();
                testingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing
                break;
-            }else{ 
+            }else{
                itTarget += cls+1;
                testingEventVector->insert( itTarget, (*itEvent) ); // fill event
             }
@@ -1250,13 +1285,13 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
       //       std::cout << std::endl;
       //       std::cout << "TRAINING VECTOR" << std::endl;
       //       std::transform( trainingEventVector->begin(), trainingEventVector->end(), ostream_iterator<Int_t>(std::cout, "|"), std::mem_fun(&TMVA::Event::GetClass) );
-      
+
       //       std::cout << std::endl;
       //       std::cout << "TESTING VECTOR" << std::endl;
       //       std::transform( testingEventVector->begin(), testingEventVector->end(), ostream_iterator<Int_t>(std::cout, "|"), std::mem_fun(&TMVA::Event::GetClass) );
       //       std::cout << std::endl;
 
-   }else{ 
+   }else{
       for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){
          trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(cls).begin(), tmpEventVector[Types::kTraining].at(cls).end() );
          testingEventVector->insert ( testingEventVector->end(),  tmpEventVector[Types::kTesting].at(cls).begin(),  tmpEventVector[Types::kTesting].at(cls).end()  );
@@ -1288,20 +1323,22 @@ TMVA::DataSet*  TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi,
    // create dataset
    DataSet* ds = new DataSet(dsi);
 
-   Log() << kINFO << "Create internal training tree" << Endl;        
-   ds->SetEventCollection(trainingEventVector, Types::kTraining ); 
-   Log() << kINFO << "Create internal testing tree" << Endl;        
-   ds->SetEventCollection(testingEventVector,  Types::kTesting  ); 
+   Log() << kINFO << "Create internal training tree" << Endl;
+   ds->SetEventCollection(trainingEventVector, Types::kTraining );
+   Log() << kINFO << "Create internal testing tree" << Endl;
+   ds->SetEventCollection(testingEventVector,  Types::kTesting  );
 
 
    return ds;
-   
+
 }
 
 //_______________________________________________________________________
-void  TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi, 
-                                          TMVA::EventVectorOfClassesOfTreeType& tmpEventVector, 
-                                          const TString&        normMode )
+void
+TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi,
+                                    EventVectorOfClassesOfTreeType& tmpEventVector,
+                                    const EvtStatsPerClass& eventCounts,
+                                    const TString& normMode )
 {
    // ============================================================
    // renormalisation
@@ -1406,20 +1443,18 @@ void  TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi,
    // ---------------------------------
    // now apply the normalization factors
    Int_t maxL = dsi.GetClassNameMaxLength();
-   for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls<clsEnd; ++cls) { 
-      Log() << kINFO << "--> Rescale " << setiosflags(ios::left) << std::setw(maxL) 
+   for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls<clsEnd; ++cls) {
+      Log() << kINFO << "--> Rescale " << setiosflags(ios::left) << std::setw(maxL)
             << dsi.GetClassInfo(cls)->GetName() << " event weights by factor: " << renormFactor.at(cls) << Endl;
-      std::for_each( tmpEventVector[Types::kTraining].at(cls).begin(), 
+      std::for_each( tmpEventVector[Types::kTraining].at(cls).begin(),
                      tmpEventVector[Types::kTraining].at(cls).end(),
                      std::bind2nd(std::mem_fun(&TMVA::Event::ScaleWeight),renormFactor.at(cls)) );
-      std::for_each( tmpEventVector[Types::kTesting].at(cls).begin(), 
+      std::for_each( tmpEventVector[Types::kTesting].at(cls).begin(),
                      tmpEventVector[Types::kTesting].at(cls).end(),
                      std::bind2nd(std::mem_fun(&TMVA::Event::ScaleWeight),renormFactor.at(cls)) );
    }
 
 
-
-      
    // ---------------------------------
    // for information purposes
    dsi.SetNormalization( normMode );
@@ -1454,20 +1489,29 @@ void  TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi,
       testingSumWeights  += testingSumWeightsPerClass.at(cls);
 
       // output statistics
-      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) 
-            << dsi.GetClassInfo(cls)->GetName() << " -- " 
-            << "training entries            : " << trainingSizePerClass.at(cls) 
-            <<  " (" << "sum of weights: " << trainingSumWeightsPerClass.at(cls) << ")" << Endl;
-      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) 
-            << dsi.GetClassInfo(cls)->GetName() << " -- " 
-            << "testing entries             : " << testingSizePerClass.at(cls) 
-            <<  " (" << "sum of weights: " << testingSumWeightsPerClass.at(cls) << ")" << Endl;
-      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) 
-            << dsi.GetClassInfo(cls)->GetName() << " -- " 
-            << "training and testing entries: " 
-            << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) 
-            << " (" << "sum of weights: " 
+
+      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL)
+            << dsi.GetClassInfo(cls)->GetName() << " -- "
+            << "training events            : " << trainingSizePerClass.at(cls)
+            <<  " (sum of weights: " << trainingSumWeightsPerClass.at(cls) << ")"
+            <<  " - requested were " << eventCounts[cls].nTrainingEventsRequested << " events" << Endl;
+      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL)
+            << dsi.GetClassInfo(cls)->GetName() << " -- "
+            << "testing events             : " << testingSizePerClass.at(cls)
+            <<  " (sum of weights: " << testingSumWeightsPerClass.at(cls) << ")"
+            <<  " - requested were " << eventCounts[cls].nTestingEventsRequested << " events" << Endl;
+      Log() << kINFO << setiosflags(ios::left) << std::setw(maxL)
+            << dsi.GetClassInfo(cls)->GetName() << " -- "
+            << "training and testing events: "
+            << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls))
+            << " (sum of weights: "
             << (trainingSumWeightsPerClass.at(cls)+testingSumWeightsPerClass.at(cls)) << ")" << Endl;
+      if(eventCounts[cls].nEvAfterCut<eventCounts[cls].nEvBeforeCut) {
+         Log() << kINFO << setiosflags(ios::left) << std::setw(maxL)
+               << dsi.GetClassInfo(cls)->GetName() << " -- "
+               << "due to the preselection a scaling factor has been applied to the numbers of requested events: "
+               << eventCounts[cls].cutScaling() << Endl;
+      }
    }
 
 }
diff --git a/tmva/src/DataSetInfo.cxx b/tmva/src/DataSetInfo.cxx
index e59f7c47eb0..1d773b0e851 100644
--- a/tmva/src/DataSetInfo.cxx
+++ b/tmva/src/DataSetInfo.cxx
@@ -58,7 +58,7 @@
 #endif
 
 //_______________________________________________________________________
-TMVA::DataSetInfo::DataSetInfo(const TString& name) 
+TMVA::DataSetInfo::DataSetInfo(const TString& name)
    : TObject(),
      fDataSetManager(NULL),
      fName(name),
@@ -81,7 +81,7 @@ TMVA::DataSetInfo::DataSetInfo(const TString& name)
 }
 
 //_______________________________________________________________________
-TMVA::DataSetInfo::~DataSetInfo() 
+TMVA::DataSetInfo::~DataSetInfo()
 {
    // destructor
    ClearDataSet();
@@ -101,6 +101,12 @@ void TMVA::DataSetInfo::ClearDataSet() const
    if(fDataSet!=0) { delete fDataSet; fDataSet=0; }
 }
 
+void
+TMVA::DataSetInfo::SetMsgType( EMsgType t ) const
+{
+   fLogger->SetMinType(t);
+}
+
 //_______________________________________________________________________
 TMVA::ClassInfo* TMVA::DataSetInfo::AddClass( const TString& className ) 
 {
@@ -119,12 +125,6 @@ TMVA::ClassInfo* TMVA::DataSetInfo::AddClass( const TString& className )
    return fClasses.back();
 }
 
-//_______________________________________________________________________
-void TMVA::DataSetInfo::SetMsgType( EMsgType t ) const 
-{  
-    fLogger->SetMinType(t);  
-} 
-
 //_______________________________________________________________________
 TMVA::ClassInfo* TMVA::DataSetInfo::GetClassInfo( const TString& name ) const 
 {
@@ -166,12 +166,12 @@ std::vector<Float_t>*  TMVA::DataSetInfo::GetTargetsForMulticlass( const TMVA::E
 //   fTargetsForMulticlass->resize( GetNClasses() );
    fTargetsForMulticlass->assign( GetNClasses(), 0.0 );
    fTargetsForMulticlass->at( ev->GetClass() ) = 1.0;
-   return fTargetsForMulticlass; 
+   return fTargetsForMulticlass;
 }
 
 
 //_______________________________________________________________________
-Bool_t TMVA::DataSetInfo::HasCuts() const 
+Bool_t TMVA::DataSetInfo::HasCuts() const
 {
    Bool_t hasCuts = kFALSE;
    for (std::vector<ClassInfo*>::iterator it = fClasses.begin(); it < fClasses.end(); it++) {
@@ -181,22 +181,26 @@ Bool_t TMVA::DataSetInfo::HasCuts() const
 }
 
 //_______________________________________________________________________
-const TMatrixD* TMVA::DataSetInfo::CorrelationMatrix( const TString& className ) const 
-{ 
+const TMatrixD* TMVA::DataSetInfo::CorrelationMatrix( const TString& className ) const
+{
    ClassInfo* ptr = GetClassInfo(className);
    return ptr?ptr->GetCorrelationMatrix():0;
 }
 
 //_______________________________________________________________________
-TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const TString& expression, const TString& title, const TString& unit, 
-                                                    Double_t min, Double_t max, char varType,
-                                                    Bool_t normalized, void* external )
+TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const TString& expression,
+                                                    const TString& title,
+                                                    const TString& unit,
+                                                    Double_t min, Double_t max,
+                                                    char varType,
+                                                    Bool_t normalized,
+                                                    void* external )
 {
-   // add a variable (can be a complex expression) to the set of variables used in
-   // the MV analysis
+   // add a variable (can be a complex expression) to the set of
+   // variables used in the MV analysis
    TString regexpr = expression; // remove possible blanks
    regexpr.ReplaceAll(" ", "" );
-   fVariables.push_back(VariableInfo( regexpr, title, unit, 
+   fVariables.push_back(VariableInfo( regexpr, title, unit,
                                       fVariables.size()+1, varType, external, min, max, normalized ));
    fNeedsRebuilding = kTRUE;
    return fVariables.back();
@@ -211,17 +215,21 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const VariableInfo& varInfo)
 }
 
 //_______________________________________________________________________
-TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const TString& expression, const TString& title, const TString& unit, 
-                                                  Double_t min, Double_t max, 
-                                                  Bool_t normalized, void* external )
+TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const TString& expression,
+                                                  const TString& title,
+                                                  const TString& unit,
+                                                  Double_t min, Double_t max,
+                                                  Bool_t normalized,
+                                                  void* external )
 {
-   // add a variable (can be a complex expression) to the set of variables used in
-   // the MV analysis
+   // add a variable (can be a complex expression) to the set of
+   // variables used in the MV analysis
    TString regexpr = expression; // remove possible blanks
    regexpr.ReplaceAll(" ", "" );
    char type='F';
-   fTargets.push_back(VariableInfo( regexpr, title, unit, 
-                                    fTargets.size()+1, type, external, min, max, normalized ));
+   fTargets.push_back(VariableInfo( regexpr, title, unit,
+                                    fTargets.size()+1, type, external, min,
+                                    max, normalized ));
    fNeedsRebuilding = kTRUE;
    return fTargets.back();
 }
@@ -235,7 +243,9 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const VariableInfo& varInfo){
 }
 
 //_______________________________________________________________________
-TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression, const TString& title, const TString& unit, 
+TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression,
+                                                     const TString& title,
+                                                     const TString& unit,
                                                      Double_t min, Double_t max, char type,
                                                      Bool_t normalized, void* external )
 {
@@ -243,7 +253,7 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression,
    // the MV analysis
    TString regexpr = expression; // remove possible blanks
    regexpr.ReplaceAll(" ", "" );
-   fSpectators.push_back(VariableInfo( regexpr, title, unit, 
+   fSpectators.push_back(VariableInfo( regexpr, title, unit,
                                        fSpectators.size()+1, type, external, min, max, normalized ));
    fNeedsRebuilding = kTRUE;
    return fSpectators.back();
@@ -282,7 +292,7 @@ void TMVA::DataSetInfo::SetWeightExpression( const TString& expr, const TString&
    if (className != "") {
       TMVA::ClassInfo* ci = AddClass(className);
       ci->SetWeight( expr );
-   } 
+   }
    else {
       // no class name specified, set weight for all classes
       if (fClasses.size()==0) {
@@ -321,7 +331,7 @@ void TMVA::DataSetInfo::AddCut( const TCut& cut, const TString& className )
    // set the cut for the classes
    if (className == "") {  // if no className has been given set the cut for all the classes
       for (std::vector<ClassInfo*>::iterator it = fClasses.begin(); it < fClasses.end(); it++) {
-         const TCut& oldCut = (*it)->GetCut(); 
+         const TCut& oldCut = (*it)->GetCut();
          (*it)->SetCut( oldCut+cut );
       }
    }
@@ -344,7 +354,7 @@ std::vector<TString> TMVA::DataSetInfo::GetListOfVariables() const
 
 //_______________________________________________________________________
 void TMVA::DataSetInfo::PrintCorrelationMatrix( const TString& className )
-{ 
+{
    // calculates the correlation matrices for signal and background, 
    // prints them to standard output, and fills 2D histograms
    Log() << kINFO << "Correlation matrix (" << className << "):" << Endl;
@@ -367,7 +377,7 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m,
       for (UInt_t jvar=0; jvar<nvar; jvar++) {
          (*tm)(ivar, jvar) = (*m)(ivar,jvar);
       }
-   }  
+   }
 
    TH2F* h2 = new TH2F( *tm );
    h2->SetNameTitle( hName, hTitle );
@@ -379,7 +389,7 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m,
    
    // present in percent, and round off digits
    // also, use absolute value of correlation coefficient (ignore sign)
-   h2->Scale( 100.0  ); 
+   h2->Scale( 100.0  );
    for (UInt_t ibin=1; ibin<=nvar; ibin++) {
       for (UInt_t jbin=1; jbin<=nvar; jbin++) {
          h2->SetBinContent( ibin, jbin, Int_t(h2->GetBinContent( ibin, jbin )) );
@@ -404,24 +414,24 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m,
    //     gROOT->SetStyle("Plain");
    //     TStyle* gStyle = gROOT->GetStyle( "Plain" );
    //     gStyle->SetPalette( 1, 0 );
-   //     TPaletteAxis* paletteAxis 
+   //     TPaletteAxis* paletteAxis
    //                   = (TPaletteAxis*)h2->GetListOfFunctions()->FindObject( "palette" );
    // -------------------------------------------------------------------------------------
-   
+
    Log() << kDEBUG << "Created correlation matrix as 2D histogram: " << h2->GetName() << Endl;
-   
+
    return h2;
 }
 
 //_______________________________________________________________________
-TMVA::DataSet* TMVA::DataSetInfo::GetDataSet() const 
+TMVA::DataSet* TMVA::DataSetInfo::GetDataSet() const
 {
    // returns data set
    if (fDataSet==0 || fNeedsRebuilding) {
       if(fDataSet!=0) ClearDataSet();
 //      fDataSet = DataSetManager::Instance().CreateDataSet(GetName()); //DSMTEST replaced by following lines
       if( !fDataSetManager )
-	 Log() << kFATAL << "DataSetManager has not been set in DataSetInfo (GetDataSet() )." << Endl;
+         Log() << kFATAL << "DataSetManager has not been set in DataSetInfo (GetDataSet() )." << Endl;
       fDataSet = fDataSetManager->CreateDataSet(GetName());
 
       fNeedsRebuilding = kFALSE;
diff --git a/tmva/src/DataSetManager.cxx b/tmva/src/DataSetManager.cxx
index caa8119d719..63c81e0ea14 100644
--- a/tmva/src/DataSetManager.cxx
+++ b/tmva/src/DataSetManager.cxx
@@ -51,7 +51,7 @@ using std::endl;
 // void TMVA::DataSetManager::DestroyInstance() { if (fgDSManager) { delete fgDSManager; fgDSManager=0; } } // DSMTEST removed
 
 //_______________________________________________________________________
-TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput ) 
+TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput )
    : fDataInput(dataInput),
      fDataSetInfoCollection(),
      fLogger( new MsgLogger("DataSetManager", kINFO) )
@@ -60,20 +60,20 @@ TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput )
 }
 
 //_______________________________________________________________________
-TMVA::DataSetManager::~DataSetManager() 
+TMVA::DataSetManager::~DataSetManager()
 {
    // destructor
 //   fDataSetInfoCollection.SetOwner(); // DSMTEST --> created a segfault because the DataSetInfo-objects got deleted twice
 
-   TMVA::DataSetFactory::destroyInstance(); 
+   TMVA::DataSetFactory::destroyInstance();
    
    delete fLogger;
 }
 
 //_______________________________________________________________________
-TMVA::DataSet* TMVA::DataSetManager::CreateDataSet( const TString& dsiName ) 
+TMVA::DataSet* TMVA::DataSetManager::CreateDataSet( const TString& dsiName )
 {
-   // Creates the singleton dataset 
+   // Creates the singleton dataset
    DataSetInfo* dsi = GetDataSetInfo( dsiName );
    if (!dsi) Log() << kFATAL << "DataSetInfo object '" << dsiName << "' not found" << Endl;
 
diff --git a/tmva/src/Factory.cxx b/tmva/src/Factory.cxx
index 1dee0b59184..c5d839953f2 100644
--- a/tmva/src/Factory.cxx
+++ b/tmva/src/Factory.cxx
@@ -621,8 +621,8 @@ void TMVA::Factory::PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, I
 
 //_______________________________________________________________________
 void TMVA::Factory::PrepareTrainingAndTestTree( const TCut& cut, const TString& opt )
-{ 
-   // prepare the training and test trees 
+{
+   // prepare the training and test trees
    // -> same cuts for signal and background
    SetInputTreesFromEventAssignTrees();
 
diff --git a/tmva/src/MethodBoost.cxx b/tmva/src/MethodBoost.cxx
index 1041ad144b0..018d8690442 100644
--- a/tmva/src/MethodBoost.cxx
+++ b/tmva/src/MethodBoost.cxx
@@ -83,6 +83,7 @@ TMVA::MethodBoost::MethodBoost( const TString& jobName,
    , fMethodError(0)
    , fOrigMethodError(0)
    , fBoostWeight(0)
+   , fDetailedMonitoring(kFALSE)
    , fADABoostBeta(0)
    , fRandomSeed(0)
    , fBoostedMethodTitle(methodTitle)
@@ -109,6 +110,7 @@ TMVA::MethodBoost::MethodBoost( DataSetInfo& dsi,
    , fMethodError(0)
    , fOrigMethodError(0)
    , fBoostWeight(0)
+   , fDetailedMonitoring(kFALSE)
    , fADABoostBeta(0)
    , fRandomSeed(0)
    , fBoostedMethodTitle("")
diff --git a/tmva/src/MethodCategory.cxx b/tmva/src/MethodCategory.cxx
index f1354d07d45..cfc7ef73dcb 100644
--- a/tmva/src/MethodCategory.cxx
+++ b/tmva/src/MethodCategory.cxx
@@ -1,5 +1,5 @@
 // @(#)root/tmva $Id$
-// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Eckhard von Toerne
+// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard von Toerne
 
 /**********************************************************************************
  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
@@ -11,17 +11,17 @@
  *      Virtual base class for all MVA method                                     *
  *                                                                                *
  * Authors (alphabetical):                                                        *
- *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
- *      Nadim Sah       <Nadim.Sah@cern.ch>      - Berlin, Germany                *
- *      Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland           *
- *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
- *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
- *      Jan Therhaag  <Jan.Therhaag@cern.ch>        - U of Bonn, Germany          *
- *      Eckhard v. Toerne  <evt@uni-bonn.de>        - U of Bonn, Germany          *
+ *      Andreas Hoecker   <Andreas.Hocker@cern.ch>   - CERN, Switzerland          *
+ *      Nadim Sah         <Nadim.Sah@cern.ch>        - Berlin, Germany            *
+ *      Peter Speckmayer  <Peter.Speckmazer@cern.ch> - CERN, Switzerland          *
+ *      Joerg Stelzer     <Joerg.Stelzer@cern.ch>    - MSU East Lansing, USA      *
+ *      Helge Voss        <Helge.Voss@cern.ch>       - MPI-K Heidelberg, Germany  *
+ *      Jan Therhaag      <Jan.Therhaag@cern.ch>     - U of Bonn, Germany         *
+ *      Eckhard v. Toerne <evt@uni-bonn.de>          - U of Bonn, Germany         *
  *                                                                                *
  * Copyright (c) 2005-2011:                                                       *
  *      CERN, Switzerland                                                         *
- *      U. of Victoria, Canada                                                    *
+ *      MSU East Lansing, USA                                                     *
  *      MPI-K Heidelberg, Germany                                                 *
  *      U. of Bonn, Germany                                                       *
  *                                                                                *
@@ -151,12 +151,15 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut,
 //   std::cout << "set input done "  << std::endl;
 
    rearrangeTransformation->SetEnabled(kFALSE);
-   IMethod* addedMethod = ClassifierFactory::Instance().Create(addedMethodName,GetJobName(),theTitle,dsi,theOptions);
+   IMethod* addedMethod = ClassifierFactory::Instance().Create(addedMethodName,
+                                                               GetJobName(),
+                                                               theTitle,
+                                                               dsi,
+                                                               theOptions);
 
    MethodBase *method = (dynamic_cast<MethodBase*>(addedMethod));
-
    if(method==0) return 0;
-   
+
    method->SetupMethod();
    method->ParseOptions();
    method->GetTransformationHandler().AddTransformation( rearrangeTransformation, -1 );
@@ -186,7 +189,7 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut,
 
    UInt_t newSpectatorIndex = primaryDSI.GetSpectatorInfos().size();
    fCategorySpecIdx.push_back(newSpectatorIndex);
-   
+
    primaryDSI.AddSpectator( Form("%s_cat%i:=%s", GetName(),(int)fMethods.size(),theCut.GetTitle()),
                             Form("%s:%s",GetName(),method->GetName()),
                             "pass", 0, 0, 'C' );
@@ -358,15 +361,10 @@ void TMVA::MethodCategory::Train()
    // specify the minimum # of training events and set 'classification'
    const Int_t  MinNoTrainingEvents = 10;
 
-   // THIS NEEDS TO BE CHANGED:
-//    TString what("Classification");
-//    what.ToLower();
-//    Types::EAnalysisType analysisType = ( what.CompareTo("regression")==0 ? Types::kRegression : Types::kClassification );
-
    Types::EAnalysisType analysisType = GetAnalysisType();
 
    // start the training
-   Log() << kINFO << "Train all sub-classifiers for " 
+   Log() << kINFO << "Train all sub-classifiers for "
          << (analysisType == Types::kRegression ? "Regression" : "Classification") << " ..." << Endl;
 
    // don't do anything if no sub-classifier booked
@@ -374,7 +372,7 @@ void TMVA::MethodCategory::Train()
       Log() << kINFO << "...nothing found to train" << Endl;
       return;
    }
-   
+
    std::vector<IMethod*>::iterator itrMethod;
 
    // iterate over all booked sub-classifiers  and train them
@@ -382,9 +380,9 @@ void TMVA::MethodCategory::Train()
 
       MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
       if(!mva) continue;
-      mva->SetAnalysisType(GetAnalysisType());
-      if (!mva->HasAnalysisType( analysisType, 
-                                 mva->DataInfo().GetNClasses(), 
+      mva->SetAnalysisType( analysisType );
+      if (!mva->HasAnalysisType( analysisType,
+                                 mva->DataInfo().GetNClasses(),
                                  mva->DataInfo().GetNTargets() ) ) {
          Log() << kWARNING << "Method " << mva->GetMethodTypeName() << " is not capable of handling " ;
          if (analysisType == Types::kRegression)
@@ -394,8 +392,6 @@ void TMVA::MethodCategory::Train()
          itrMethod = fMethods.erase( itrMethod );
          continue;
       }
-
-      mva->SetAnalysisType( analysisType );
       if (mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
 
          Log() << kINFO << "Train method: " << mva->GetMethodName() << " for "
diff --git a/tmva/src/MethodCompositeBase.cxx b/tmva/src/MethodCompositeBase.cxx
index 11e255187fe..313edc22c79 100644
--- a/tmva/src/MethodCompositeBase.cxx
+++ b/tmva/src/MethodCompositeBase.cxx
@@ -1,32 +1,32 @@
 // @(#)root/tmva $Id$
 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen
 
-/**********************************************************************************
- * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
- * Package: TMVA                                                                  *
- * Class  : MethodCompositeBase                                                   *
- * Web    : http://tmva.sourceforge.net                                           *
- *                                                                                *
- * Description:                                                                   *
- *      Virtual base class for all MVA method                                     *
- *                                                                                *
- * Authors (alphabetical):                                                        *
- *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
- *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
- *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
- *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
- *      Or Cohen        <orcohenor@gmail.com>    - Weizmann Inst., Israel         *
- *                                                                                *
- * Copyright (c) 2005:                                                            *
- *      CERN, Switzerland                                                         *
- *      U. of Victoria, Canada                                                    *
- *      MPI-K Heidelberg, Germany                                                 *
- *      LAPP, Annecy, France                                                      *
- *                                                                                *
- * Redistribution and use in source and binary forms, with or without             *
- * modification, are permitted according to the terms listed in LICENSE           *
- * (http://tmva.sourceforge.net/LICENSE)                                          *
- **********************************************************************************/
+/*****************************************************************************
+ * Project: TMVA - a Root-integrated toolkit for multivariate data analysis  *
+ * Package: TMVA                                                             *
+ * Class  : MethodCompositeBase                                              *
+ * Web    : http://tmva.sourceforge.net                                      *
+ *                                                                           *
+ * Description:                                                              *
+ *      Virtual base class for all MVA method                                *
+ *                                                                           *
+ * Authors (alphabetical):                                                   *
+ *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland         *
+ *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - MSU, USA                  *
+ *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany *
+ *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada    *
+ *      Or Cohen        <orcohenor@gmail.com>    - Weizmann Inst., Israel    *
+ *                                                                           *
+ * Copyright (c) 2005:                                                       *
+ *      CERN, Switzerland                                                    *
+ *      U. of Victoria, Canada                                               *
+ *      MPI-K Heidelberg, Germany                                            *
+ *      LAPP, Annecy, France                                                 *
+ *                                                                           *
+ * Redistribution and use in source and binary forms, with or without        *
+ * modification, are permitted according to the terms listed in LICENSE      *
+ * (http://tmva.sourceforge.net/LICENSE)                                     *
+ *****************************************************************************/
 
 //_______________________________________________________________________
 //
@@ -58,7 +58,7 @@ using std::vector;
 ClassImp(TMVA::MethodCompositeBase)
 
 //_______________________________________________________________________
-TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName, 
+TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName,
                                                 Types::EMVA methodType,
                                                 const TString& methodTitle,
                                                 DataSetInfo& theData,
@@ -71,7 +71,7 @@ TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName,
 //_______________________________________________________________________
 TMVA::MethodCompositeBase::MethodCompositeBase( Types::EMVA methodType,
                                                 DataSetInfo& dsi,
-                                                const TString& weightFile, 
+                                                const TString& weightFile,
                                                 TDirectory* theTargetDir )
    : TMVA::MethodBase( methodType, dsi, weightFile, theTargetDir ),
      fMethodIndex(0)
@@ -85,7 +85,7 @@ TMVA::IMethod* TMVA::MethodCompositeBase::GetMethod( const TString &methodTitle
    vector<IMethod*>::const_iterator itrMethodEnd = fMethods.end();
 
    for (; itrMethod != itrMethodEnd; itrMethod++) {
-      MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);    
+      MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
       if ( (mva->GetMethodName())==methodTitle ) return mva;
    }
    return 0;
@@ -102,26 +102,26 @@ TMVA::IMethod* TMVA::MethodCompositeBase::GetMethod( const Int_t index ) const
 
 
 //_______________________________________________________________________
-void TMVA::MethodCompositeBase::AddWeightsXMLTo( void* parent ) const 
+void TMVA::MethodCompositeBase::AddWeightsXMLTo( void* parent ) const
 {
    void* wght = gTools().AddChild(parent, "Weights");
    gTools().AddAttr( wght, "NMethods",   fMethods.size()   );
-   for (UInt_t i=0; i< fMethods.size(); i++) 
+   for (UInt_t i=0; i< fMethods.size(); i++)
    {
       void* methxml = gTools().AddChild( wght, "Method" );
       MethodBase* method = dynamic_cast<MethodBase*>(fMethods[i]);
-      gTools().AddAttr(methxml,"Index",          i ); 
-      gTools().AddAttr(methxml,"Weight",         fMethodWeight[i]); 
+      gTools().AddAttr(methxml,"Index",          i );
+      gTools().AddAttr(methxml,"Weight",         fMethodWeight[i]);
       gTools().AddAttr(methxml,"MethodSigCut",   method->GetSignalReferenceCut());
       gTools().AddAttr(methxml,"MethodSigCutOrientation", method->GetSignalReferenceCutOrientation());
       gTools().AddAttr(methxml,"MethodTypeName", method->GetMethodTypeName());
-      gTools().AddAttr(methxml,"MethodName",     method->GetMethodName()   ); 
+      gTools().AddAttr(methxml,"MethodName",     method->GetMethodName()   );
       gTools().AddAttr(methxml,"JobName",        method->GetJobName());
-      gTools().AddAttr(methxml,"Options",        method->GetOptions()); 
+      gTools().AddAttr(methxml,"Options",        method->GetOptions());
       if (method->fTransformationPointer)
-	 gTools().AddAttr(methxml,"UseMainMethodTransformation",  TString("true")); 
+         gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("true"));
       else
-	 gTools().AddAttr(methxml,"UseMainMethodTransformation",  TString("false")); 
+         gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("false"));
       method->AddWeightsXMLTo(methxml);
    }
 }
@@ -132,14 +132,14 @@ TMVA::MethodCompositeBase::~MethodCompositeBase( void )
    // delete methods
    vector<IMethod*>::iterator itrMethod = fMethods.begin();
    for (; itrMethod != fMethods.end(); itrMethod++) {
-      Log() << kVERBOSE << "Delete method: " << (*itrMethod)->GetName() << Endl;    
+      Log() << kVERBOSE << "Delete method: " << (*itrMethod)->GetName() << Endl;
       delete (*itrMethod);
    }
    fMethods.clear();
 }
 
 //_______________________________________________________________________
-void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) 
+void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode )
 {
    // XML streamer
    UInt_t nMethods;
@@ -162,13 +162,13 @@ void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode )
 
       Bool_t rerouteTransformation = kFALSE;
       if (gTools().HasAttr( ch, "UseMainMethodTransformation")) {
-	 TString rerouteString("");
-	 gTools().ReadAttr( ch, "UseMainMethodTransformation",  rerouteString );
-	 rerouteString.ToLower();
-	 if (rerouteString=="true")
-	    rerouteTransformation=kTRUE;
+         TString rerouteString("");
+         gTools().ReadAttr( ch, "UseMainMethodTransformation", rerouteString );
+         rerouteString.ToLower();
+         if (rerouteString=="true")
+            rerouteTransformation=kTRUE;
       }
-      
+
       //remove trailing "~" to signal that options have to be reused
       optionString.ReplaceAll("~","");
       //ignore meta-options for method Boost
diff --git a/tmva/src/PDEFoamVect.cxx b/tmva/src/PDEFoamVect.cxx
index d26b012f68f..94dfb64f78d 100644
--- a/tmva/src/PDEFoamVect.cxx
+++ b/tmva/src/PDEFoamVect.cxx
@@ -35,7 +35,7 @@
 
 using namespace std;
 
-//#define SW2 std::setw(12)
+#define SW2 std::setprecision(7) << std::setw(12)
 
 ClassImp(TMVA::PDEFoamVect)
 
@@ -200,13 +200,11 @@ TMVA::PDEFoamVect& TMVA::PDEFoamVect::operator =(Double_t x)
 //_____________________________________________________________________
 void TMVA::PDEFoamVect::Print(Option_t *option) const
 {
-   streamsize wid = cout.width(); // saving current field width
    // Printout of all vector components
    if(!option) Error( "Print ", "No option set \n");
    cout << "(";
    for(Int_t i=0; i<fDim-1; i++) 
-      cout << std::setw(12) << *(fCoords+i) << ",";
-   cout << std::setw(12) << *(fCoords+fDim-1);
+      cout << SW2 << *(fCoords+i) << ",";
+   cout << SW2 << *(fCoords+fDim-1);
    cout << ")";
-   cout.width(wid);
 }
diff --git a/tmva/src/RegressionVariance.cxx b/tmva/src/RegressionVariance.cxx
index a4bbe0cc458..d0ab388a66d 100644
--- a/tmva/src/RegressionVariance.cxx
+++ b/tmva/src/RegressionVariance.cxx
@@ -61,7 +61,7 @@ Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t &nLeft,
    Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
    Double_t leftIndex   = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
    Double_t rightIndex  =    nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
-
+    
    //  return 1/ (leftIndex + rightIndex);   
    return (parentIndex - leftIndex - rightIndex)/(parentIndex);   
 }
diff --git a/tmva/src/VariableNormalizeTransform.cxx b/tmva/src/VariableNormalizeTransform.cxx
index eebe6bced5b..ea6fc57ca74 100644
--- a/tmva/src/VariableNormalizeTransform.cxx
+++ b/tmva/src/VariableNormalizeTransform.cxx
@@ -401,6 +401,7 @@ void TMVA::VariableNormalizeTransform::ReadFromXML( void* trfnode )
    gTools().ReadAttr(trfnode, "NVariables", nvars);
    // coverity[tainted_data_argument]
    gTools().ReadAttr(trfnode, "NTargets",   ntgts);
+   // coverity[tainted_data_argument]
 
    for( UInt_t ivar = 0; ivar < nvars; ++ivar ){
       fGet.push_back(std::make_pair<Char_t,UInt_t>('v',ivar));
diff --git a/tmva/test/TMVAClassificationCategory.cxx b/tmva/test/TMVAClassificationCategory.cxx
index 60433d839ca..f2768c7a60b 100644
--- a/tmva/test/TMVAClassificationCategory.cxx
+++ b/tmva/test/TMVAClassificationCategory.cxx
@@ -21,11 +21,11 @@
  * macros (simply say: root -l <../macros/macro.C>), which can be conveniently    *
  * invoked through a GUI launched by the command                                  *
  *                                                                                *
- *    root -l TMVAGui.C                                                        *
+ *    root -l TMVAGui.C                                                           *
  **********************************************************************************/
 
 #include <cstdlib>
-#include <iostream> 
+#include <iostream>
 #include <map>
 #include <string>
 
@@ -44,12 +44,13 @@
 // two types of category methods are implemented
 Bool_t UseOffsetMethod = kTRUE;
 
-int main( int argc, char** argv ) 
+int main( int argc, char** argv )
 {
    //---------------------------------------------------------------
-   // Example for usage of different event categories with classifiers 
+   // Example for usage of different event categories with classifiers
 
-   std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;
+   std::cout << std::endl
+             << "==> Start TMVAClassificationCategory" << std::endl;
 
    bool batchMode = false;
 
@@ -62,7 +63,9 @@ int main( int argc, char** argv )
   std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
   if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";
 
-   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );
+  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory",
+                                              outputFile,
+                                              factoryOptions );
 
    // Define the input variables used for the MVA training
    factory->AddVariable( "var1", 'F' );
@@ -70,16 +73,17 @@ int main( int argc, char** argv )
    factory->AddVariable( "var3", 'F' );
    factory->AddVariable( "var4", 'F' );
 
-   // You can add so-called "Spectator variables", which are not used in the MVA training,
-   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
-   // input variables, the response values of all trained MVAs, and the spectator variables
+   // You can add so-called "Spectator variables", which are not used
+   // in the MVA training, but will appear in the final "TestTree"
+   // produced by TMVA. This TestTree will contain the input
+   // variables, the response values of all trained MVAs, and the
+   // spectator variables
    factory->AddSpectator( "eta" );
 
    // Load the signal and background event samples from ROOT trees
    TFile *input(0);
-   TString fname( "" );
-   if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root";
-   else                 fname = "data/toy_sigbkg_categ_varoff.root";
+   TString fname = UseOffsetMethod ? "data/toy_sigbkg_categ_offset.root" : "data/toy_sigbkg_categ_varoff.root";
+
    if (!gSystem->AccessPathName( fname )) {
       // first we try to find tmva_example.root in the local directory
       std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
@@ -108,7 +112,7 @@ int main( int argc, char** argv )
 
    // Tell the factory how to use the training and testing events
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
-                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
+                                        "nTest_Signal=5500:nTrain_Background=3400:SplitMode=Random:NormMode=NumEvents" );
 
    // ---- Book MVA methods
 
@@ -117,14 +121,14 @@ int main( int argc, char** argv )
 
    // Likelihood
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
-                        "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 
+                        "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
 
    // --- Categorised classifier
    TMVA::MethodCategory* mcat = 0;
 
    // The variable sets
    TString theCat1Vars = "var1:var2:var3:var4";
-   TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");
+   TString theCat2Vars = UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3";
 
    // Fisher with categories
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
diff --git a/tmva/test/setup.sh b/tmva/test/setup.sh
index 17fd0edfe7d..8edab3e449b 100755
--- a/tmva/test/setup.sh
+++ b/tmva/test/setup.sh
@@ -23,6 +23,9 @@ if [ ! $ROOTSYS ]; then
     return 1
 fi
 
+export TMVASYS=$PWD
+
+
 # On MacOS X $DYLD_LIBRARY_PATH has to be modified, so:
 if [[ `root-config --platform` == "macosx" ]]; then
 
-- 
GitLab