From 2da6ec40ba8965b75e009a1709779e0407609b9a Mon Sep 17 00:00:00 2001 From: Joerg Stelzer <joerg.stelzer@cern.ch> Date: Mon, 27 Jun 2011 13:19:22 +0000 Subject: [PATCH] fix event requests for Category classifier git-svn-id: http://root.cern.ch/svn/root/trunk@39989 27541ba8-7e3a-0410-8455-c3a389f83636 --- tmva/doc/LICENSE | 2 +- tmva/doc/README | 70 +-- tmva/inc/TMVA/DataSetFactory.h | 107 ++-- tmva/inc/TMVA/DataSetInfo.h | 4 +- tmva/inc/TMVA/MethodBase.h | 51 +- tmva/inc/TMVA/SVKernelFunction.h | 0 tmva/inc/TMVA/SVKernelMatrix.h | 0 tmva/src/CrossEntropy.cxx | 1 - tmva/src/DataSetFactory.cxx | 720 ++++++++++++----------- tmva/src/DataSetInfo.cxx | 84 +-- tmva/src/DataSetManager.cxx | 10 +- tmva/src/Factory.cxx | 4 +- tmva/src/MethodBoost.cxx | 2 + tmva/src/MethodCategory.cxx | 46 +- tmva/src/MethodCompositeBase.cxx | 90 +-- tmva/src/PDEFoamVect.cxx | 8 +- tmva/src/RegressionVariance.cxx | 2 +- tmva/src/VariableNormalizeTransform.cxx | 1 + tmva/test/TMVAClassificationCategory.cxx | 34 +- tmva/test/setup.sh | 3 + 20 files changed, 660 insertions(+), 579 deletions(-) mode change 100644 => 100755 tmva/inc/TMVA/SVKernelFunction.h mode change 100644 => 100755 tmva/inc/TMVA/SVKernelMatrix.h diff --git a/tmva/doc/LICENSE b/tmva/doc/LICENSE index e555484b511..40a9b19fd4e 100644 --- a/tmva/doc/LICENSE +++ b/tmva/doc/LICENSE @@ -1,6 +1,6 @@ TMVA -- Toolkit for Multivariate Data Analysis -Copyright (c) 2005-2009, Regents of CERN (Switzerland), the +Copyright (c) 2005-20010, Regents of CERN (Switzerland), the University of Victoria (Canada), the MPI fuer Kernphysik Heidelberg (Germany), LAPP (France), the University of Bonn (Germany). diff --git a/tmva/doc/README b/tmva/doc/README index 0df2207c9d3..68c92e81a10 100644 --- a/tmva/doc/README +++ b/tmva/doc/README @@ -4,7 +4,7 @@ TMVA Users Guide : http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf TMVA home page : http://tmva.sourceforge.net/ -TMVA developer page : http://sourceforge.net/projects/tmva +TMVA download page : http://sourceforge.net/projects/tmva TMVA mailing list : http://sourceforge.net/mailarchive/forum.php?forum_name=tmva-users TMVA license (BSD) : http://tmva.sourceforge.net/LICENSE @@ -15,7 +15,7 @@ System requirements: TMVA has been tested to run on Linux, MAC/OSX and Windows platforms. - Running TMVA requires the availability of ROOT shared libraries with ROOT_VERSION >= 5.08 + Running TMVA requires the availability of ROOT shared libraries with ROOT_VERSION >= 5.14 (type "root-config --version" to see the version installed) ======================================================================================== @@ -26,64 +26,58 @@ Getting Started: How to compile the code: ------------------------ /home> cd TMVA - /home/TMVA> source setup.[c]sh // includes TMVA/lib in your lib path - /home/TMVA> cd src - /home/TMVA/src> make // compile and build the library ../libTMVA.1.so + /home/TMVA> make # compile and build the library lib/libTMVA.1.so - How to run the code as ROOT macro: // training/testing of an academic example + How to run the code as ROOT macro: # training/testing of an academic example ---------------------------------- - /home/TMVA> cd macros + /home/TMVA> cd test + /home/TMVA/test> source setup.sh # setup script must be exectuted before running macros (use setup.csh for c-shell) --- For classification: - /home/TMVA/macros> root -l TMVAClassification.C // run all standard classifiers - /home/TMVA/macros> root -l TMVAClassification.C\(\"LD,Likelihood\"\) // run LD and Likelihood classifiers + /home/TMVA/test> root -l TMVAClassification.C # run all standard classifiers (takes a while) + /home/TMVA/test> root -l TMVAClassification.C\(\"LD,Likelihood\"\) # run LD and Likelihood classifiers --- For regression: - /home/TMVA/macros> root -l TMVARegression.C // run all regression algorithms - /home/TMVA/macros> root -l TMVARegression.C\(\"LD,KNN\"\) // run LD and k-NN regression algorithms + /home/TMVA/test> root -l TMVARegression.C # run all regression algorithms n(takes a while) + /home/TMVA/test> root -l TMVARegression.C\(\"LD,KNN\"\) # run LD and k-NN regression algorithms --> at the end of the jobs, a GUI will pop up: try to click through all the buttons; some of the lower buttons are method-specific, and will only work when the corresponding classifiers/regression algorithms have been trained/tested before (unless they are greyed out) - How to run the code as an executable: // training/testing of an academic example + How to run the code as an executable: ------------------------------------- - /home/TMVA> cd execs - /home/TMVA/execs> make - /home/TMVA/execs> ./TMVAClassification // run all standard classifiers - /home/TMVA/execs> ./TMVAClassification LD Likelihood // run LD and Likelihood classifiers + /home/TMVA/test> make + /home/TMVA/test> ./TMVAClassification # run all standard classifiers + /home/TMVA/test> ./TMVAClassification LD Likelihood # run LD and Likelihood classifiers - ... and similar for regression + ... and similarly for regression - /home/TMVA/examples> root -l ../macros/TMVAGui.C // start the GUI + /home/TMVA/test> root -l TMVAGui.C # start the GUI + + How to run the code as an python script using PyROOT: + ------------------------------------- + /home/TMVA/test> make + /home/TMVA/test> python ./TMVAClassification.py --method LD,Likelihood How to apply the TMVA methods: ------------------------------------- - /home/TMVA> cd macros + /home/TMVA> cd test --- For classification: - /home/TMVA/macros> root -l TMVAClassificationApplication.C - /home/TMVA/macros> root -l TMVAClassificationApplication.C\(\"LD,Likelihood\"\) + /home/TMVA/test> root -l TMVAClassificationApplication.C + /home/TMVA/test> root -l TMVAClassificationApplication.C\(\"LD,Likelihood\"\) - ... and similar for regression + ... and similar for regression. + ... and similar for executables. The directory structure: ------------------------ - src/ : the TMVA source code + inc/ : the TMVA class headers + src/ : the TMVA class source lib/ : here you'll find the TMVA library (libTMVA.1.so) after compilation - (copy it to you preferred library directory or include - this directory in your LD_LIBRARY_PATH as it is done - by: source setup.[c]sh - macros/ : example code of how to use the TMVA library with a ROOT macro - uses input data from a Toy Monte Carlo; - also: handy root macros which read and display the - results produced by TMVAClassification and TMVARegression - execs/ : same example code as in 'macros', but for using the TMVA library in an executable - execs/data : the Toy Monte Carlo data - python/ : example code of how to use the TMVA library with a python script; - requires availability of PyROOT - development/ : for use by developers only + test/ : example code for analysis macros and executables, the GUI and analysis scripts. ======================================================================================== @@ -158,11 +152,11 @@ Please report any problems and/or suggestions for improvements to the authors. ======================================================================================== -Copyright 漏 (2005-2009): +Copyright 漏 (2005-2010): ------------------------ - Andreas Hoecker, Peter Speckmayer, J枚rg Stelzer (all: CERN, Switzerland), - Jan Therhaag, Eckhard von Toerne (both: U. Bonn, Germany), + Andreas Hoecker, Peter Speckmayer, J枚rg Stelzer (CERN, Switzerland), + Jan Therhaag, Eckhard von Toerne (U. Bonn, Germany), Helge Voss (MPI-KP Heidelberg, Germany), Contributed to TMVA have, please see: http://tmva.sourceforge.net/#authors diff --git a/tmva/inc/TMVA/DataSetFactory.h b/tmva/inc/TMVA/DataSetFactory.h index 04856343b55..bc92730269f 100644 --- a/tmva/inc/TMVA/DataSetFactory.h +++ b/tmva/inc/TMVA/DataSetFactory.h @@ -73,24 +73,13 @@ #endif namespace TMVA { - + class DataSet; class DataSetInfo; class DataInputHandler; class TreeInfo; class MsgLogger; - typedef std::vector< Event* > EventVector; - typedef std::vector< EventVector > EventVectorOfClasses; - typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType; - typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType; - - typedef std::vector< Double_t > ValuePerClass; - typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType; - - typedef std::vector< Int_t > NumberPerClass; - typedef std::map<Types::ETreeType, NumberPerClass > NumberPerClassOfTreeType; - // =============== maybe move these elswhere (e.g. into the tools ) // =============== functors ======================= @@ -139,7 +128,7 @@ namespace TMVA { template <typename F> - class null_t + class null_t { private: // returns argF @@ -157,7 +146,7 @@ namespace TMVA { } - + template <typename F, typename G, typename H> class compose_binary_t : public std::binary_function<typename G::argument_type, typename H::argument_type, @@ -171,7 +160,7 @@ namespace TMVA { compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h) { } - + typename F::result_type operator()(const typename G::argument_type& argG, const typename H::argument_type& argH) const { @@ -218,12 +207,49 @@ namespace TMVA { class DataSetFactory { + typedef std::vector< Event* > EventVector; + typedef std::vector< EventVector > EventVectorOfClasses; + typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType; + typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType; + + typedef std::vector< Double_t > ValuePerClass; + typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType; + + class EventStats { + public: + Int_t nTrainingEventsRequested; + Int_t nTestingEventsRequested; + Int_t nInitialEvents; + Int_t nEvBeforeCut; + Int_t nEvAfterCut; + Float_t nWeEvBeforeCut; + Float_t nWeEvAfterCut; + Double_t nNegWeights; + Float_t* varAvLength; + EventStats(): + nTrainingEventsRequested(0), + nTestingEventsRequested(0), + nInitialEvents(0), + nEvBeforeCut(0), + nEvAfterCut(0), + nWeEvBeforeCut(0), + nWeEvAfterCut(0), + nNegWeights(0), + varAvLength(0) + {} + ~EventStats() { delete[] varAvLength; } + Float_t cutScaling() const { return Float_t(nEvAfterCut)/nEvBeforeCut; } + }; + + typedef std::vector< int > NumberPerClass; + typedef std::vector< EventStats > EvtStatsPerClass; + public: // singleton class - static DataSetFactory& Instance() { if (!fgInstance) fgInstance = new DataSetFactory(); return *fgInstance; } + static DataSetFactory& Instance() { if (!fgInstance) fgInstance = new DataSetFactory(); return *fgInstance; } static void destroyInstance() { if (fgInstance) { delete fgInstance; fgInstance=0; } } DataSet* CreateDataSet( DataSetInfo &, DataInputHandler& ); @@ -231,34 +257,37 @@ namespace TMVA { protected: ~DataSetFactory(); - + DataSetFactory(); static DataSetFactory *fgInstance; DataSet* BuildInitialDataSet( DataSetInfo&, TMVA::DataInputHandler& ); DataSet* BuildDynamicDataSet( DataSetInfo& ); - + // ---------- new versions - void BuildEventVector ( DataSetInfo& dsi, - DataInputHandler& dataInput, - EventVectorOfClassesOfTreeType& tmpEventVector); - - DataSet* MixEvents ( DataSetInfo& dsi, - EventVectorOfClassesOfTreeType& tmpEventVector, - NumberPerClassOfTreeType& nTrainTestEvents, - const TString& splitMode, - const TString& mixMode, - const TString& normMode, - UInt_t splitSeed); - - void RenormEvents ( DataSetInfo& dsi, - EventVectorOfClassesOfTreeType& tmpEventVector, - const TString& normMode ); - - void InitOptions ( DataSetInfo& dsi, - NumberPerClassOfTreeType& nTrainTestEvents, - TString& normMode, UInt_t& splitSeed, TString& splitMode, TString& mixMode ); - + void BuildEventVector ( DataSetInfo& dsi, + DataInputHandler& dataInput, + EventVectorOfClassesOfTreeType& eventsmap, + EvtStatsPerClass& eventCounts); + + DataSet* MixEvents ( DataSetInfo& dsi, + EventVectorOfClassesOfTreeType& eventsmap, + EvtStatsPerClass& eventCounts, + const TString& splitMode, + const TString& mixMode, + const TString& normMode, + UInt_t splitSeed); + + void RenormEvents ( DataSetInfo& dsi, + EventVectorOfClassesOfTreeType& eventsmap, + const EvtStatsPerClass& eventCounts, + const TString& normMode ); + + void InitOptions ( DataSetInfo& dsi, + EvtStatsPerClass& eventsmap, + TString& normMode, UInt_t& splitSeed, + TString& splitMode, TString& mixMode ); + // ------------------------ @@ -282,7 +311,7 @@ namespace TMVA { Bool_t fVerbose; //! Verbosity TString fVerboseLevel; //! VerboseLevel - // the event + // the event mutable TTree* fCurrentTree; //! the tree, events are currently read from mutable UInt_t fCurrentEvtIdx; //! the current event (to avoid reading of the same event) diff --git a/tmva/inc/TMVA/DataSetInfo.h b/tmva/inc/TMVA/DataSetInfo.h index 776d8993565..2d4038c3480 100644 --- a/tmva/inc/TMVA/DataSetInfo.h +++ b/tmva/inc/TMVA/DataSetInfo.h @@ -14,7 +14,7 @@ * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - DESY, Germany * * * - * Copyright (c) 2008: * + * Copyright (c) 2008-2011: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * DESY Hamburg, Germany * @@ -157,7 +157,7 @@ namespace TMVA { std::vector<TString> GetListOfVariables() const; - // correlation matrix + // correlation matrix const TMatrixD* CorrelationMatrix ( const TString& className ) const; void SetCorrelationMatrix ( const TString& className, TMatrixD* matrix ); void PrintCorrelationMatrix( const TString& className ); diff --git a/tmva/inc/TMVA/MethodBase.h b/tmva/inc/TMVA/MethodBase.h index 58eb36cf9de..5c43369b066 100644 --- a/tmva/inc/TMVA/MethodBase.h +++ b/tmva/inc/TMVA/MethodBase.h @@ -341,14 +341,14 @@ namespace TMVA { TString GetTrainingTMVAVersionString() const; TString GetTrainingROOTVersionString() const; - TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) - { - if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; - } - const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const - { - if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; - } + TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) + { + if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; + } + const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const + { + if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; + } void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; } @@ -413,7 +413,7 @@ namespace TMVA { // ---------- protected event and tree accessors ----------------------------- - // names of input variables (if the original names are expressions, they are + // names of input variables (if the original names are expressions, they are // transformed into regexps) const TString& GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; } const TString& GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); } @@ -435,18 +435,18 @@ namespace TMVA { // some basic statistical analysis void Statistics( Types::ETreeType treeType, const TString& theVarName, - Double_t&, Double_t&, Double_t&, + Double_t&, Double_t&, Double_t&, Double_t&, Double_t&, Double_t& ); - // if TRUE, write weights only to text files + // if TRUE, write weights only to text files Bool_t TxtWeightsOnly() const { return kTRUE; } protected: - + // access to event information that needs method-specific information - - Float_t GetTWeight( const Event* ev ) const { - return (fIgnoreNegWeightsInTraining && (ev->GetWeight() < 0)) ? 0. : ev->GetWeight(); + + Float_t GetTWeight( const Event* ev ) const { + return (fIgnoreNegWeightsInTraining && (ev->GetWeight() < 0)) ? 0. : ev->GetWeight(); } Bool_t IsConstructedFromWeightFile() const { return fConstructedFromWeightFile; } @@ -479,13 +479,13 @@ namespace TMVA { // PDFs for classifier response (required to compute signal probability and Rarity) void CreateMVAPdfs(); - // for root finder + // for root finder static Double_t IGetEffForRoot( Double_t ); // interface Double_t GetEffForRoot ( Double_t ); // implementation // used for file parsing Bool_t GetLine( std::istream& fin, char * buf ); - + // fill test tree with classification or regression results virtual void AddClassifierOutput ( Types::ETreeType type ); virtual void AddClassifierOutputProb( Types::ETreeType type ); @@ -494,12 +494,13 @@ namespace TMVA { private: - void AddInfoItem( void* gi, const TString& name, const TString& value) const; + void AddInfoItem( void* gi, const TString& name, + const TString& value) const; - static void CreateVariableTransforms(const TString& trafoDefinition, - TMVA::DataSetInfo& dataInfo, - TMVA::TransformationHandler& transformationHandler, - TMVA::MsgLogger& log ); + static void CreateVariableTransforms(const TString& trafoDefinition, + TMVA::DataSetInfo& dataInfo, + TMVA::TransformationHandler& transformationHandler, + TMVA::MsgLogger& log ); // ========== class members ================================================== @@ -523,7 +524,7 @@ namespace TMVA { private: // MethodCuts redefines some of the evaluation variables and histograms -> must access private members - friend class MethodCuts; + friend class MethodCuts; Bool_t fDisableWriting; //! set to true in order to suppress writing to XML @@ -537,11 +538,11 @@ namespace TMVA { // naming and versioning TString fJobName; // name of job -> user defined, appears in weight files TString fMethodName; // name of the method (set in derived class) - Types::EMVA fMethodType; // type of method (set in derived class) + Types::EMVA fMethodType; // type of method (set in derived class) TString fTestvar; // variable used in evaluation, etc (mostly the MVA) UInt_t fTMVATrainingVersion; // TMVA version used for training UInt_t fROOTTrainingVersion; // ROOT version used for training - Bool_t fConstructedFromWeightFile; // is it obtained from weight file? + Bool_t fConstructedFromWeightFile; // is it obtained from weight file? // Directory structure: fMethodBaseDir/fBaseDir // where the first directory name is defined by the method type diff --git a/tmva/inc/TMVA/SVKernelFunction.h b/tmva/inc/TMVA/SVKernelFunction.h old mode 100644 new mode 100755 diff --git a/tmva/inc/TMVA/SVKernelMatrix.h b/tmva/inc/TMVA/SVKernelMatrix.h old mode 100644 new mode 100755 diff --git a/tmva/src/CrossEntropy.cxx b/tmva/src/CrossEntropy.cxx index e7885cdc684..e886ff381a3 100644 --- a/tmva/src/CrossEntropy.cxx +++ b/tmva/src/CrossEntropy.cxx @@ -32,7 +32,6 @@ // -p log (p) - (1-p)log(1-p); p=purity //_______________________________________________________________________ -//#include <math.h> #include "TMath.h" #include "TMVA/CrossEntropy.h" diff --git a/tmva/src/DataSetFactory.cxx b/tmva/src/DataSetFactory.cxx index 74b629c8d7a..68cdc6f5f8f 100644 --- a/tmva/src/DataSetFactory.cxx +++ b/tmva/src/DataSetFactory.cxx @@ -1,30 +1,30 @@ // @(#)root/tmva $Id$ // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss -/********************************************************************************** - * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * - * Package: TMVA * - * Class : DataSetFactory * - * Web : http://tmva.sourceforge.net * - * * - * Description: * - * Implementation (see header for description) * - * * - * Authors (alphabetical): * - * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * - * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * - * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany * - * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * - * * - * Copyright (c) 2009: * - * CERN, Switzerland * - * MPI-K Heidelberg, Germany * - * U. of Bonn, Germany * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted according to the terms listed in LICENSE * - * (http://tmva.sourceforge.net/LICENSE) * - **********************************************************************************/ +/***************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : DataSetFactory * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation (see header for description) * + * * + * Authors (alphabetical): * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * + * Joerg Stelzer <Joerg.Stelzer@cern.ch> - MSU, USA * + * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * * + * Copyright (c) 2009: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + *****************************************************************************/ #include <assert.h> @@ -85,7 +85,7 @@ TMVA::DataSetFactory* TMVA::DataSetFactory::fgInstance = 0; namespace TMVA { // calculate the largest common divider // this function is not happy if numbers are negative! - Int_t LargestCommonDivider(Int_t a, Int_t b) + Int_t LargestCommonDivider(Int_t a, Int_t b) { if (a<b) {Int_t tmp = a; a=b; b=tmp; } // achieve a>=b if (b==0) return a; @@ -107,7 +107,7 @@ TMVA::DataSetFactory::DataSetFactory() : } //_______________________________________________________________________ -TMVA::DataSetFactory::~DataSetFactory() +TMVA::DataSetFactory::~DataSetFactory() { // destructor std::vector<TTreeFormula*>::const_iterator formIt; @@ -122,7 +122,8 @@ TMVA::DataSetFactory::~DataSetFactory() } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA::DataInputHandler& dataInput ) +TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, + TMVA::DataInputHandler& dataInput ) { // steering the creation of a new dataset @@ -131,7 +132,7 @@ TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA if (ds->GetNEvents() > 1) { CalcMinMax(ds,dsi); - + // from the the final dataset build the correlation matrix for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) { const TString className = dsi.GetClassInfo(cl)->GetName(); @@ -144,17 +145,18 @@ TMVA::DataSet* TMVA::DataSetFactory::CreateDataSet( TMVA::DataSetInfo& dsi, TMVA } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi ) +TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi ) { Log() << kDEBUG << "Build DataSet consisting of one Event with dynamically changing variables" << Endl; DataSet* ds = new DataSet(dsi); - // create a DataSet with one Event which uses dynamic variables (pointers to variables) + // create a DataSet with one Event which uses dynamic variables + // (pointers to variables) if(dsi.GetNClasses()==0){ dsi.AddClass( "data" ); dsi.GetClassInfo( "data" )->SetNumber(0); } - + std::vector<Float_t*>* evdyn = new std::vector<Float_t*>(0); std::vector<VariableInfo>& varinfos = dsi.GetVariableInfos(); @@ -166,7 +168,7 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi for (;it!=itEnd;++it) { Float_t* external=(Float_t*)(*it).GetExternalLink(); if (external==0) - Log() << kDEBUG << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl; + Log() << kDEBUG << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl; evdyn->push_back (external); } @@ -186,11 +188,14 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, DataInputHandler& dataInput ) +TMVA::DataSet* +TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, + DataInputHandler& dataInput ) { - // if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variables) + // if no entries, than create a DataSet with one Event which uses + // dynamic variables (pointers to variables) if (dataInput.GetEntries()==0) return BuildDynamicDataSet( dsi ); - // ------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------- // register the classes in the datasetinfo-object // information comes from the trees in the dataInputHandler-object @@ -200,32 +205,31 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, Data } delete classList; + EvtStatsPerClass eventCounts(dsi.GetNClasses()); TString normMode; TString splitMode; TString mixMode; - UInt_t splitSeed; + UInt_t splitSeed; + InitOptions( dsi, eventCounts, normMode, splitSeed, splitMode , mixMode ); - // ======= build event-vector tentative new ordering ================================= - - TMVA::EventVectorOfClassesOfTreeType tmpEventVector; - TMVA::NumberPerClassOfTreeType nTrainTestEvents; + // ======= build event-vector from input, apply preselection =============== + EventVectorOfClassesOfTreeType tmpEventVector; + BuildEventVector( dsi, dataInput, tmpEventVector, eventCounts ); - InitOptions ( dsi, nTrainTestEvents, normMode, splitSeed, splitMode , mixMode ); - BuildEventVector( dsi, dataInput, tmpEventVector ); - - DataSet* ds = MixEvents( dsi, tmpEventVector, nTrainTestEvents, splitMode, mixMode, normMode, splitSeed); + DataSet* ds = MixEvents( dsi, tmpEventVector, eventCounts, + splitMode, mixMode, normMode, splitSeed); const Bool_t showCollectedOutput = kFALSE; if (showCollectedOutput) { Int_t maxL = dsi.GetClassNameMaxLength(); Log() << kINFO << "Collected:" << Endl; for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) { - Log() << kINFO << " " - << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + Log() << kINFO << " " + << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() << " training entries: " << ds->GetNClassEvents( 0, cl ) << Endl; - Log() << kINFO << " " - << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() - << " testing entries: " << ds->GetNClassEvents( 1, cl ) << Endl; + Log() << kINFO << " " + << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + << " testing entries: " << ds->GetNClassEvents( 1, cl ) << Endl; } Log() << kINFO << " " << Endl; } @@ -234,19 +238,18 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildInitialDataSet( DataSetInfo& dsi, Data } //_______________________________________________________________________ -Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar ) -{ +Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf, + const TString& expression, + Bool_t& hasDollar ) +{ // checks a TTreeFormula for problems Bool_t worked = kTRUE; - + if( ttf->GetNdim() <= 0 ) - Log() << kFATAL << "Expression " << expression.Data() << " could not be resolved to a valid formula. " << Endl; - // if( ttf->GetNcodes() == 0 ){ - // Log() << kWARNING << "Expression: " << expression.Data() << " does not appear to depend on any TTree variable --> please check spelling" << Endl; - // worked = kFALSE; - // } + Log() << kFATAL << "Expression " << expression.Data() + << " could not be resolved to a valid formula. " << Endl; if( ttf->GetNdata() == 0 ){ - Log() << kWARNING << "Expression: " << expression.Data() + Log() << kWARNING << "Expression: " << expression.Data() << " does not provide data for this event. " << "This event is not taken into account. --> please check if you use as a variable " << "an entry of an array which is not filled for some events " @@ -262,7 +265,7 @@ Bool_t TMVA::DataSetFactory::CheckTTreeFormula( TTreeFormula* ttf, const TString //_______________________________________________________________________ void TMVA::DataSetFactory::ChangeToNewTree( TreeInfo& tinfo, const DataSetInfo & dsi ) -{ +{ // While the data gets copied into the local training and testing // trees, the input tree can change (for intance when changing from // signal to background tree, or using TChains as input) The @@ -347,7 +350,7 @@ void TMVA::DataSetFactory::ChangeToNewTree( TreeInfo& tinfo, const DataSetInfo & if (dsi.GetClassInfo(clIdx)->GetName() != tinfo.GetClassName() ) { // if the tree is of another class fWeightFormula.push_back( 0 ); - continue; + continue; } ttf = 0; @@ -482,7 +485,7 @@ void TMVA::DataSetFactory::CalcMinMax( DataSet* ds, TMVA::DataSetInfo& dsi ) TMatrixD* TMVA::DataSetFactory::CalcCorrelationMatrix( DataSet* ds, const UInt_t classNumber ) { // computes correlation matrix for variables "theVars" in tree; - // "theType" defines the required event "type" + // "theType" defines the required event "type" // ("type" variable must be present in tree) // first compute variance-covariance @@ -498,7 +501,7 @@ TMatrixD* TMVA::DataSetFactory::CalcCorrelationMatrix( DataSet* ds, const UInt_t if (d > 0) (*mat)(ivar, jvar) /= sqrt(d); else { Log() << kWARNING << "<GetCorrelationMatrix> Zero variances for variables " - << "(" << ivar << ", " << jvar << ") = " << d + << "(" << ivar << ", " << jvar << ") = " << d << Endl; (*mat)(ivar, jvar) = 0; } @@ -523,7 +526,7 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t // init matrices TVectorD vec(nvar); - TMatrixD mat2(nvar, nvar); + TMatrixD mat2(nvar, nvar); for (ivar=0; ivar<nvar; ivar++) { vec(ivar) = 0; for (jvar=0; jvar<nvar; jvar++) mat2(ivar, jvar) = 0; @@ -538,13 +541,13 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t Double_t weight = ev->GetWeight(); ic += weight; // count used events - + for (ivar=0; ivar<nvar; ivar++) { - + Double_t xi = ev->GetValue(ivar); vec(ivar) += xi*weight; mat2(ivar, ivar) += (xi*xi*weight); - + for (jvar=ivar+1; jvar<nvar; jvar++) { Double_t xj = ev->GetValue(jvar); mat2(ivar, jvar) += (xi*xj*weight); @@ -570,13 +573,16 @@ TMatrixD* TMVA::DataSetFactory::CalcCovarianceMatrix( DataSet * ds, const UInt_t // --------------------------------------- new versions //_______________________________________________________________________ -void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, - TMVA::NumberPerClassOfTreeType& nTrainTestEvents, - TString& normMode, UInt_t& splitSeed, - TString& splitMode, - TString& mixMode ) +void +TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, + EvtStatsPerClass& nEventRequests, + TString& normMode, + UInt_t& splitSeed, + TString& splitMode, + TString& mixMode ) { // the dataset splitting + Configurable splitSpecs( dsi.GetSplitOptions() ); splitSpecs.SetConfigName("DataSetFactory"); splitSpecs.SetConfigDescription( "Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" ); @@ -595,10 +601,10 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, splitSpecs.AddPreDefVal(TString("Random")); splitSpecs.AddPreDefVal(TString("Alternate")); splitSpecs.AddPreDefVal(TString("Block")); - + splitSeed = 100; splitSpecs.DeclareOptionRef( splitSeed, "SplitSeed", - "Seed for random event shuffling" ); + "Seed for random event shuffling" ); normMode = "NumEvents"; // the weight normalisation modes splitSpecs.DeclareOptionRef( normMode, "NormMode", @@ -609,21 +615,14 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, // the number of events - // initialization - nTrainTestEvents.insert( TMVA::NumberPerClassOfTreeType::value_type( Types::kTraining, TMVA::NumberPerClass( dsi.GetNClasses() ) ) ); - nTrainTestEvents.insert( TMVA::NumberPerClassOfTreeType::value_type( Types::kTesting, TMVA::NumberPerClass( dsi.GetNClasses() ) ) ); - // fill in the numbers for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) { - nTrainTestEvents[Types::kTraining].at(cl) = 0; - nTrainTestEvents[Types::kTesting].at(cl) = 0; - TString clName = dsi.GetClassInfo(cl)->GetName(); TString titleTrain = TString().Format("Number of training events of class %s (default: 0 = all)",clName.Data()).Data(); TString titleTest = TString().Format("Number of test events of class %s (default: 0 = all)",clName.Data()).Data(); - splitSpecs.DeclareOptionRef( nTrainTestEvents[Types::kTraining].at(cl) , TString("nTrain_")+clName, titleTrain ); - splitSpecs.DeclareOptionRef( nTrainTestEvents[Types::kTesting].at(cl) , TString("nTest_")+clName , titleTest ); + splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTrainingEventsRequested, TString("nTrain_")+clName, titleTrain ); + splitSpecs.DeclareOptionRef( nEventRequests.at(cl).nTestingEventsRequested , TString("nTest_")+clName , titleTest ); } splitSpecs.DeclareOptionRef( fVerbose, "V", "Verbosity (default: true)" ); @@ -637,7 +636,7 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, splitSpecs.CheckForUnusedOptions(); // output logging verbosity - if (Verbose()) fLogger->SetMinType( kVERBOSE ); + if (Verbose()) fLogger->SetMinType( kVERBOSE ); if (fVerboseLevel.CompareTo("Debug") ==0) fLogger->SetMinType( kDEBUG ); if (fVerboseLevel.CompareTo("Verbose") ==0) fLogger->SetMinType( kVERBOSE ); if (fVerboseLevel.CompareTo("Info") ==0) fLogger->SetMinType( kINFO ); @@ -647,72 +646,58 @@ void TMVA::DataSetFactory::InitOptions( TMVA::DataSetInfo& dsi, // adjust mixmode if same as splitmode option has been set Log() << kINFO << "Splitmode is: \"" << splitMode << "\" the mixmode is: \"" << mixMode << "\"" << Endl; if (mixMode=="SAMEASSPLITMODE") mixMode = splitMode; - else if (mixMode!=splitMode) + else if (mixMode!=splitMode) Log() << kINFO << "DataSet splitmode="<<splitMode <<" differs from mixmode="<<mixMode<<Endl; } //_______________________________________________________________________ -void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, - TMVA::DataInputHandler& dataInput, - TMVA::EventVectorOfClassesOfTreeType& tmpEventVector ) +void +TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, + TMVA::DataInputHandler& dataInput, + EventVectorOfClassesOfTreeType& eventsmap, + EvtStatsPerClass& eventCounts) { // build empty event vectors // distributes events between kTraining/kTesting/kMaxTreeType - - tmpEventVector.insert( std::make_pair(Types::kTraining ,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) ); - tmpEventVector.insert( std::make_pair(Types::kTesting ,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) ); - tmpEventVector.insert( std::make_pair(Types::kMaxTreeType,TMVA::EventVectorOfClasses(dsi.GetNClasses() ) ) ); + const UInt_t nclasses = dsi.GetNClasses(); + + eventsmap[ Types::kTraining ] = EventVectorOfClasses(nclasses); + eventsmap[ Types::kTesting ] = EventVectorOfClasses(nclasses); + eventsmap[ Types::kMaxTreeType ] = EventVectorOfClasses(nclasses); // create the type, weight and boostweight branches - const UInt_t nvars = dsi.GetNVariables(); - const UInt_t ntgts = dsi.GetNTargets(); - const UInt_t nvis = dsi.GetNSpectators(); - // std::vector<Float_t> fmlEval(nvars+ntgts+1+1+nvis); // +1+1 for results of evaluation of cut and weight ttreeformula - - // number of signal and background events passing cuts - std::vector< Int_t > nInitialEvents( dsi.GetNClasses() ); - std::vector< Int_t > nEvBeforeCut( dsi.GetNClasses() ); - std::vector< Int_t > nEvAfterCut( dsi.GetNClasses() ); - std::vector< Float_t > nWeEvBeforeCut( dsi.GetNClasses() ); - std::vector< Float_t > nWeEvAfterCut( dsi.GetNClasses() ); - std::vector< Double_t > nNegWeights( dsi.GetNClasses() ); - std::vector< Float_t* > varAvLength( dsi.GetNClasses() ); + const UInt_t nvars = dsi.GetNVariables(); + const UInt_t ntgts = dsi.GetNTargets(); + const UInt_t nvis = dsi.GetNSpectators(); + + for (size_t i=0; i<nclasses; i++) { + eventCounts[i].varAvLength = new Float_t[nvars]; + for (UInt_t ivar=0; ivar<nvars; ivar++) + eventCounts[i].varAvLength[ivar] = 0; + } Bool_t haveArrayVariable = kFALSE; Bool_t *varIsArray = new Bool_t[nvars]; - for (size_t i=0; i<varAvLength.size(); i++) { - varAvLength[i] = new Float_t[nvars]; - for (UInt_t ivar=0; ivar<nvars; ivar++) { - //varIsArray[ivar] = kFALSE; - varAvLength[i][ivar] = 0; - } - } + // if we work with chains we need to remember the current tree if + // the chain jumps to a new tree we have to reset the formulas + for (UInt_t cl=0; cl<nclasses; cl++) { - // if we work with chains we need to remember the current tree - // if the chain jumps to a new tree we have to reset the formulas - for (UInt_t cl=0; cl<dsi.GetNClasses(); cl++) { - - Log() << kINFO << "Create training and testing trees -- looping over class \"" + Log() << kINFO << "Create training and testing trees -- looping over class \"" << dsi.GetClassInfo(cl)->GetName() << "\" ..." << Endl; + EventStats& classEventCounts = eventCounts[cl]; + // info output for weights - const TString tmpWeight = dsi.GetClassInfo(cl)->GetWeight(); - if (tmpWeight!="") { - Log() << kINFO << "Weight expression for class \"" << dsi.GetClassInfo(cl)->GetName() << "\": \"" - << tmpWeight << "\"" << Endl; - } - else { - Log() << kINFO << "No weight expression defined for class \"" << dsi.GetClassInfo(cl)->GetName() - << "\"" << Endl; - } - + Log() << kINFO << "Weight expression for class \'" << dsi.GetClassInfo(cl)->GetName() << "\': \"" + << dsi.GetClassInfo(cl)->GetWeight() << "\"" << Endl; + // used for chains only TString currentFileName(""); - + std::vector<TreeInfo>::const_iterator treeIt(dataInput.begin(dsi.GetClassInfo(cl)->GetName())); for (;treeIt!=dataInput.end(dsi.GetClassInfo(cl)->GetName()); treeIt++) { @@ -721,18 +706,23 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, std::vector<Float_t> tgts(ntgts); std::vector<Float_t> vis(nvis); TreeInfo currentInfo = *treeIt; - + + Log() << kDEBUG << "Building event vectors " << currentInfo.GetTreeType() << Endl; + + EventVector& event_v = eventsmap[currentInfo.GetTreeType()].at(cl); + Bool_t isChain = (TString("TChain") == currentInfo.GetTree()->ClassName()); currentInfo.GetTree()->LoadTree(0); ChangeToNewTree( currentInfo, dsi ); // count number of events in tree before cut - nInitialEvents.at(cl) += currentInfo.GetTree()->GetEntries(); - + classEventCounts.nInitialEvents += currentInfo.GetTree()->GetEntries(); + // loop over events in ntuple - for (Long64_t evtIdx = 0; evtIdx < currentInfo.GetTree()->GetEntries(); evtIdx++) { + const UInt_t nEvts = currentInfo.GetTree()->GetEntries(); + for (Long64_t evtIdx = 0; evtIdx < nEvts; evtIdx++) { currentInfo.GetTree()->LoadTree(evtIdx); - + // may need to reload tree in case of chains if (isChain) { if (currentInfo.GetTree()->GetTree()->GetDirectory()->GetFile()->GetName() != currentFileName) { @@ -743,27 +733,27 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, currentInfo.GetTree()->GetEntry(evtIdx); Int_t sizeOfArrays = 1; Int_t prevArrExpr = 0; - + // ======= evaluate all formulas ================= // first we check if some of the formulas are arrays for (UInt_t ivar=0; ivar<nvars; ivar++) { Int_t ndata = fInputFormulas[ivar]->GetNdata(); - varAvLength[cl][ivar] += ndata; + classEventCounts.varAvLength[ivar] += ndata; if (ndata == 1) continue; haveArrayVariable = kTRUE; varIsArray[ivar] = kTRUE; if (sizeOfArrays == 1) { sizeOfArrays = ndata; prevArrExpr = ivar; - } + } else if (sizeOfArrays!=ndata) { Log() << kERROR << "ERROR while preparing training and testing trees:" << Endl; Log() << " multiple array-type expressions of different length were encountered" << Endl; - Log() << " location of error: event " << evtIdx + Log() << " location of error: event " << evtIdx << " in tree " << currentInfo.GetTree()->GetName() << " of file " << currentInfo.GetTree()->GetCurrentFile()->GetName() << Endl; - Log() << " expression " << fInputFormulas[ivar]->GetTitle() << " has " + Log() << " expression " << fInputFormulas[ivar]->GetTitle() << " has " << ndata << " entries, while" << Endl; Log() << " expression " << fInputFormulas[prevArrExpr]->GetTitle() << " has " << fInputFormulas[prevArrExpr]->GetNdata() << " entries" << Endl; @@ -782,26 +772,26 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, formula = fCutFormulas[cl]; if (formula) { Int_t ndata = formula->GetNdata(); - cutVal = (ndata==1 ? + cutVal = (ndata==1 ? formula->EvalInstance(0) : formula->EvalInstance(idata)); if (TMath::IsNaN(cutVal)) { containsNaN = kTRUE; - Log() << kWARNING << "Cut expression resolves to infinite value (NaN): " + Log() << kWARNING << "Cut expression resolves to infinite value (NaN): " << formula->GetTitle() << Endl; } } - + // the input variable for (UInt_t ivar=0; ivar<nvars; ivar++) { formula = fInputFormulas[ivar]; - Int_t ndata = formula->GetNdata(); - vars[ivar] = (ndata == 1 ? - formula->EvalInstance(0) : + Int_t ndata = formula->GetNdata(); + vars[ivar] = (ndata == 1 ? + formula->EvalInstance(0) : formula->EvalInstance(idata)); if (TMath::IsNaN(vars[ivar])) { containsNaN = kTRUE; - Log() << kWARNING << "Input expression resolves to infinite value (NaN): " + Log() << kWARNING << "Input expression resolves to infinite value (NaN): " << formula->GetTitle() << Endl; } } @@ -809,13 +799,13 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, // the targets for (UInt_t itrgt=0; itrgt<ntgts; itrgt++) { formula = fTargetFormulas[itrgt]; - Int_t ndata = formula->GetNdata(); - tgts[itrgt] = (ndata == 1 ? - formula->EvalInstance(0) : + Int_t ndata = formula->GetNdata(); + tgts[itrgt] = (ndata == 1 ? + formula->EvalInstance(0) : formula->EvalInstance(idata)); if (TMath::IsNaN(tgts[itrgt])) { containsNaN = kTRUE; - Log() << kWARNING << "Target expression resolves to infinite value (NaN): " + Log() << kWARNING << "Target expression resolves to infinite value (NaN): " << formula->GetTitle() << Endl; } } @@ -823,13 +813,13 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, // the spectators for (UInt_t itVis=0; itVis<nvis; itVis++) { formula = fSpectatorFormulas[itVis]; - Int_t ndata = formula->GetNdata(); - vis[itVis] = (ndata == 1 ? - formula->EvalInstance(0) : + Int_t ndata = formula->GetNdata(); + vis[itVis] = (ndata == 1 ? + formula->EvalInstance(0) : formula->EvalInstance(idata)); if (TMath::IsNaN(vis[itVis])) { containsNaN = kTRUE; - Log() << kWARNING << "Spectator expression resolves to infinite value (NaN): " + Log() << kWARNING << "Spectator expression resolves to infinite value (NaN): " << formula->GetTitle() << Endl; } } @@ -845,24 +835,24 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, formula->EvalInstance(idata)); if (TMath::IsNaN(weight)) { containsNaN = kTRUE; - Log() << kWARNING << "Weight expression resolves to infinite value (NaN): " + Log() << kWARNING << "Weight expression resolves to infinite value (NaN): " << formula->GetTitle() << Endl; } } - - // Count the events before rejection due to cut or NaN value - // (weighted and unweighted) - nEvBeforeCut.at(cl) ++; + + // Count the events before rejection due to cut or NaN + // value (weighted and unweighted) + classEventCounts.nEvBeforeCut++; if (!TMath::IsNaN(weight)) - nWeEvBeforeCut.at(cl) += weight; + classEventCounts.nWeEvBeforeCut += weight; - // apply the cut - // skip rest if cut is not fulfilled + // apply the cut, skip rest if cut is not fulfilled if (cutVal<0.5) continue; - // global flag if negative weights exist -> can be used by classifiers who may - // require special data treatment (also print warning) - if (weight < 0) nNegWeights.at(cl)++; + // global flag if negative weights exist -> can be used + // by classifiers who may require special data + // treatment (also print warning) + if (weight < 0) classEventCounts.nNegWeights++; // now read the event-values (variables and regression targets) @@ -874,90 +864,93 @@ void TMVA::DataSetFactory::BuildEventVector( TMVA::DataSetInfo& dsi, // Count the events after rejection due to cut or NaN value // (weighted and unweighted) - nEvAfterCut.at(cl) ++; - nWeEvAfterCut.at(cl) += weight; + classEventCounts.nEvAfterCut++; + classEventCounts.nWeEvAfterCut += weight; // event accepted, fill temporary ntuple - tmpEventVector.find(currentInfo.GetTreeType())->second.at(cl).push_back(new Event(vars, tgts , vis, cl , weight)); - + event_v.push_back(new Event(vars, tgts , vis, cl , weight)); } } - currentInfo.GetTree()->ResetBranchAddresses(); } } - // for output, check maximum class name length + // for output format, get the maximum class name length Int_t maxL = dsi.GetClassNameMaxLength(); - + Log() << kINFO << "Number of events in input trees (after possible flattening of arrays):" << Endl; for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) { - Log() << kINFO << " " - << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + Log() << kINFO << " " + << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() << " -- number of events : " - << std::setw(5) << nEvBeforeCut.at(cl) - << " / sum of weights: " << std::setw(5) << nWeEvBeforeCut.at(cl) << Endl; + << std::setw(5) << eventCounts[cl].nEvBeforeCut + << " / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvBeforeCut << Endl; } for (UInt_t cl = 0; cl < dsi.GetNClasses(); cl++) { - Log() << kINFO << " " << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() - <<" tree -- total number of entries: " + Log() << kINFO << " " << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + <<" tree -- total number of entries: " << std::setw(5) << dataInput.GetEntries(dsi.GetClassInfo(cl)->GetName()) << Endl; } - Log() << kINFO << "Preselection:" << Endl; + Log() << kINFO << "Preselection: (will effect number of requested training and testing events)" << Endl; if (dsi.HasCuts()) { for (UInt_t cl = 0; cl< dsi.GetNClasses(); cl++) { - Log() << kINFO << " " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + Log() << kINFO << " " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() << " requirement: \"" << dsi.GetClassInfo(cl)->GetCut() << "\"" << Endl; - Log() << kINFO << " " - << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + Log() << kINFO << " " + << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() << " -- number of events passed: " - << std::setw(5) << nEvAfterCut.at(cl) - << " / sum of weights: " << std::setw(5) << nWeEvAfterCut.at(cl) << Endl; - Log() << kINFO << " " - << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() + << std::setw(5) << eventCounts[cl].nEvAfterCut + << " / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvAfterCut << Endl; + Log() << kINFO << " " + << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cl)->GetName() << " -- efficiency : " - << std::setw(6) << nWeEvAfterCut.at(cl)/nWeEvBeforeCut.at(cl) << Endl; + << std::setw(6) << eventCounts[cl].nWeEvAfterCut/eventCounts[cl].nWeEvBeforeCut << Endl; } } else Log() << kINFO << " No preselection cuts applied on event classes" << Endl; delete[] varIsArray; - for (size_t i=0; i<varAvLength.size(); i++) - delete[] varAvLength[i]; } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, - TMVA::EventVectorOfClassesOfTreeType& tmpEventVector, - TMVA::NumberPerClassOfTreeType& nTrainTestEvents, - const TString& splitMode, - const TString& mixMode, - const TString& normMode, - UInt_t splitSeed) +TMVA::DataSet* +TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, + EventVectorOfClassesOfTreeType& tmpEventVector, + EvtStatsPerClass& eventCounts, + const TString& splitMode, + const TString& mixMode, + const TString& normMode, + UInt_t splitSeed) { // Select and distribute unassigned events to kTraining and kTesting - Bool_t emptyUndefined = kTRUE; + //Bool_t emptyUndefined = kTRUE; -// // check if the vectors of all classes are empty - for( Int_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){ - emptyUndefined &= tmpEventVector[Types::kMaxTreeType].at(cls).empty(); - } + // check if the vectors of all classes are empty + //for( Int_t cls = 0, clsEnd = dsi.GetNClasses(); cls < clsEnd; ++cls ){ + // emptyUndefined &= tmpEventVector[Types::kMaxTreeType].at(cls).empty(); + //} TMVA::RandomGenerator rndm( splitSeed ); - + // ==== splitting of undefined events to kTraining and kTesting // if splitMode contains "RANDOM", then shuffle the undefined events - if (splitMode.Contains( "RANDOM" ) && !emptyUndefined ) { - Log() << kDEBUG << "randomly shuffling events which are not yet associated to testing or training"<<Endl; + if (splitMode.Contains( "RANDOM" ) /*&& !emptyUndefined*/ ) { // random shuffle the undefined events of each class for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){ - std::random_shuffle(tmpEventVector[Types::kMaxTreeType].at(cls).begin(), - tmpEventVector[Types::kMaxTreeType].at(cls).end(), - rndm ); + EventVector& unspecifiedEvents = tmpEventVector[Types::kMaxTreeType].at(cls); + if( ! unspecifiedEvents.empty() ) { + Log() << kDEBUG << "randomly shuffling " + << unspecifiedEvents.size() + << " events of class " << cls + << " which are not yet associated to testing or training" << Endl; + std::random_shuffle( unspecifiedEvents.begin(), + unspecifiedEvents.end(), + rndm ); + } } } @@ -968,59 +961,72 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, Log() << kDEBUG << "check number of training/testing events, requested and available number of events and for class " << cls << Endl; // check if enough or too many events are already in the training/testing eventvectors of the class cls - EventVector& eventVectorTraining = tmpEventVector.find( Types::kTraining )->second.at(cls); - EventVector& eventVectorTesting = tmpEventVector.find( Types::kTesting )->second.at(cls); - EventVector& eventVectorUndefined= tmpEventVector.find( Types::kMaxTreeType )->second.at(cls); - - Int_t alreadyAvailableTraining = eventVectorTraining.size(); - Int_t alreadyAvailableTesting = eventVectorTesting.size(); - Int_t availableUndefined = eventVectorUndefined.size(); - - Int_t requestedTraining = nTrainTestEvents.find( Types::kTraining )->second.at(cls); - Int_t requestedTesting = nTrainTestEvents.find( Types::kTesting )->second.at(cls); - - Log() << kDEBUG << "availableTraining " << alreadyAvailableTraining << Endl; - Log() << kDEBUG << "availableTesting " << alreadyAvailableTesting << Endl; - Log() << kDEBUG << "availableUndefined " << availableUndefined << Endl; - Log() << kDEBUG << "requestedTraining " << requestedTraining << Endl; - Log() << kDEBUG << "requestedTesting " << requestedTesting << Endl; - // - // nomenclature r=available training - // s=available testing - // u=available undefined - // R= requested training - // S= requested testing - // nR = used for selection of training events - // nS = used for selection of test events - // we have: nR + nS = r+s+u + EventVector& eventVectorTraining = tmpEventVector[ Types::kTraining ].at(cls); + EventVector& eventVectorTesting = tmpEventVector[ Types::kTesting ].at(cls); + EventVector& eventVectorUndefined = tmpEventVector[ Types::kMaxTreeType ].at(cls); + + Int_t availableTraining = eventVectorTraining.size(); + Int_t availableTesting = eventVectorTesting.size(); + Int_t availableUndefined = eventVectorUndefined.size(); + + Float_t presel_scale = eventCounts[cls].cutScaling(); + + Int_t requestedTraining = Int_t(eventCounts[cls].nTrainingEventsRequested * presel_scale); + Int_t requestedTesting = Int_t(eventCounts[cls].nTestingEventsRequested * presel_scale); + + Log() << kDEBUG << "events in training trees : " << availableTraining << Endl; + Log() << kDEBUG << "events in testing trees : " << availableTesting << Endl; + Log() << kDEBUG << "events in unspecified trees : " << availableUndefined << Endl; + Log() << kDEBUG << "requested for training : " << requestedTraining; + if(presel_scale<1) + Log() << " ( " << eventCounts[cls].nTrainingEventsRequested + << " * " << presel_scale << " preselection efficiency)" << Endl; + else + Log() << Endl; + Log() << kDEBUG << "requested for testing : " << requestedTesting; + if(presel_scale<1) + Log() << " ( " << eventCounts[cls].nTestingEventsRequested + << " * " << presel_scale << " preselection efficiency)" << Endl; + else + Log() << Endl; + + // nomenclature r = available training + // s = available testing + // u = available undefined + // R = requested training + // S = requested testing + // nR = to be used to select training events + // nS = to be used to select test events + // we have the constraint: nR + nS < r+s+u, + // since we can not use more events than we have // free events: Nfree = u-Thet(R-r)-Thet(S-s) - // nomenclature: Thet(x) = x, if x>0 else 0; + // nomenclature: Thet(x) = x, if x>0 else 0 // nR = max(R,r) + 0.5 * Nfree // nS = max(S,s) + 0.5 * Nfree // nR +nS = R+S + u-R+r-S+s = u+r+s= ok! for R>r // nR +nS = r+S + u-S+s = u+r+s= ok! for r>R - //EVT three different cases might occur here + // three different cases might occur here // // Case a - // requestedTraining and requestedTesting >0 + // requestedTraining and requestedTesting >0 // free events: Nfree = u-Thet(R-r)-Thet(S-s) // nR = Max(R,r) + 0.5 * Nfree // nS = Max(S,s) + 0.5 * Nfree - // + // // Case b // exactly one of requestedTraining or requestedTesting >0 // assume training R >0 - // nR = max(R,r) + // nR = max(R,r) // nS = s+u+r-nR // and s=nS // - //Case c: - // requestedTraining=0, requestedTesting=0 + // Case c + // requestedTraining=0, requestedTesting=0 // Nfree = u-|r-s| // if NFree >=0 // R = Max(r,s) + 0.5 * Nfree = S - // else if r>s + // else if r>s // R = r; S=s+u // else // R = r+u; S=s @@ -1030,90 +1036,119 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, // distribute undefined events according to nR, nS // finally determine actual sub samples from nR and nS to be used in training / testing // - // implementation of case C) - int useForTesting,useForTraining; - if( (requestedTraining == 0) && (requestedTesting == 0)){ - // 0 means automatic distribution of events - Log() << kDEBUG << "requested 0" << Endl; - // try to get the same number of events in training and testing for this class (balance) - Int_t NFree = availableUndefined - TMath::Abs(alreadyAvailableTraining - alreadyAvailableTesting); - if (NFree >=0){ - requestedTraining = TMath::Max(alreadyAvailableTraining,alreadyAvailableTesting) + NFree/2; - requestedTesting = availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - requestedTraining; // the rest - } else if (alreadyAvailableTraining > alreadyAvailableTesting){ //r>s - requestedTraining = alreadyAvailableTraining; - requestedTesting = alreadyAvailableTesting +availableUndefined; - } - else { - requestedTraining = alreadyAvailableTraining+availableUndefined; - requestedTesting = alreadyAvailableTesting; + + Int_t useForTesting(0),useForTraining(0); + Int_t allAvailable(availableUndefined + availableTraining + availableTesting); + + if( (requestedTraining == 0) && (requestedTesting == 0)){ + + // Case C: balance the number of training and testing events + + if ( availableUndefined >= TMath::Abs(availableTraining - availableTesting) ) { + // enough unspecified are available to equal training and testing + useForTraining = useForTesting = allAvailable/2; + } else { + // all unspecified are assigned to the smaller of training / testing + useForTraining = availableTraining; + useForTesting = availableTesting; + if (availableTraining < availableTesting) + useForTraining += availableUndefined; + else + useForTesting += availableUndefined; } - useForTraining = requestedTraining; - useForTesting = requestedTesting; + requestedTraining = useForTraining; + requestedTesting = useForTesting; } - else if (requestedTesting == 0){ // case B) - useForTraining = TMath::Max(requestedTraining,alreadyAvailableTraining); - useForTesting= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTraining; // the rest + + else if (requestedTesting == 0){ + // case B + useForTraining = TMath::Max(requestedTraining,availableTraining); + if (allAvailable < useForTraining) { + Log() << kFATAL << "More events requested for training (" + << requestedTraining << ") than available (" + << allAvailable << ")!" << Endl; + } + useForTesting = allAvailable - useForTraining; // the rest requestedTesting = useForTesting; } + else if (requestedTraining == 0){ // case B) - useForTesting = TMath::Max(requestedTesting,alreadyAvailableTesting); - useForTraining= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTesting; // the rest + useForTesting = TMath::Max(requestedTesting,availableTesting); + if (allAvailable < useForTesting) { + Log() << kFATAL << "More events requested for testing (" + << requestedTesting << ") than available (" + << allAvailable << ")!" << Endl; + } + useForTraining= allAvailable - useForTesting; // the rest requestedTraining = useForTraining; } - else{ // case A - int NFree = availableUndefined-TMath::Max(requestedTraining-alreadyAvailableTraining,0)-TMath::Max(requestedTesting-alreadyAvailableTesting,0); + + else { + // Case A + // requestedTraining R and requestedTesting S >0 + // free events: Nfree = u-Thet(R-r)-Thet(S-s) + // nR = Max(R,r) + 0.5 * Nfree + // nS = Max(S,s) + 0.5 * Nfree + Int_t stillNeedForTraining = TMath::Max(requestedTraining-availableTraining,0); + Int_t stillNeedForTesting = TMath::Max(requestedTesting-availableTesting,0); + + int NFree = availableUndefined - stillNeedForTraining - stillNeedForTesting; if (NFree <0) NFree = 0; - useForTraining = TMath::Max(requestedTraining,alreadyAvailableTraining) + NFree/2; - useForTesting= availableUndefined+alreadyAvailableTraining+alreadyAvailableTesting - useForTraining; // the rest + useForTraining = TMath::Max(requestedTraining,availableTraining) + NFree/2; + useForTesting= allAvailable - useForTraining; // the rest } + Log() << kDEBUG << "determined event sample size to select training sample from="<<useForTraining<<Endl; Log() << kDEBUG << "determined event sample size to select test sample from="<<useForTesting<<Endl; - - // associate undefined events + + + + + + // associate undefined events if( splitMode == "ALTERNATE" ){ Log() << kDEBUG << "split 'ALTERNATE'" << Endl; - Int_t nTraining = alreadyAvailableTraining; - Int_t nTesting = alreadyAvailableTesting; + Int_t nTraining = availableTraining; + Int_t nTesting = availableTesting; for( EventVector::iterator it = eventVectorUndefined.begin(), itEnd = eventVectorUndefined.end(); it != itEnd; ){ - ++nTraining; - if( nTraining <= requestedTraining ){ - eventVectorTraining.insert( eventVectorTraining.end(), (*it) ); - ++it; - } + ++nTraining; + if( nTraining <= requestedTraining ){ + eventVectorTraining.insert( eventVectorTraining.end(), (*it) ); + ++it; + } if( it != itEnd ){ - ++nTesting; + ++nTesting; eventVectorTesting.insert( eventVectorTesting.end(), (*it) ); ++it; } } - }else{ + } else { Log() << kDEBUG << "split '" << splitMode << "'" << Endl; - // test if enough events are available - Log() << kDEBUG << "availableundefined : " << availableUndefined << Endl; - Log() << kDEBUG << "useForTraining : " << useForTraining << Endl; - Log() << kDEBUG << "useForTesting : " << useForTesting << Endl; - Log() << kDEBUG << "alreadyAvailableTraining : " << alreadyAvailableTraining << Endl; - Log() << kDEBUG << "alreadyAvailableTesting : " << alreadyAvailableTesting << Endl; - - if( availableUndefined<(useForTraining-alreadyAvailableTraining) || - availableUndefined<(useForTesting -alreadyAvailableTesting ) || - availableUndefined<(useForTraining+useForTesting-alreadyAvailableTraining-alreadyAvailableTesting ) ){ - Log() << kFATAL << "More events requested than available!" << Endl; - } - - // select the events - if (useForTraining>alreadyAvailableTraining){ - eventVectorTraining.insert( eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- alreadyAvailableTraining ); - eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- alreadyAvailableTraining); + // test if enough events are available + Log() << kDEBUG << "availableundefined : " << availableUndefined << Endl; + Log() << kDEBUG << "useForTraining : " << useForTraining << Endl; + Log() << kDEBUG << "useForTesting : " << useForTesting << Endl; + Log() << kDEBUG << "availableTraining : " << availableTraining << Endl; + Log() << kDEBUG << "availableTesting : " << availableTesting << Endl; + + if( availableUndefined<(useForTraining-availableTraining) || + availableUndefined<(useForTesting -availableTesting ) || + availableUndefined<(useForTraining+useForTesting-availableTraining-availableTesting ) ){ + Log() << kFATAL << "More events requested than available!" << Endl; + } + + // select the events + if (useForTraining>availableTraining){ + eventVectorTraining.insert( eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- availableTraining ); + eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- availableTraining); } - if (useForTesting>alreadyAvailableTesting){ - eventVectorTesting.insert( eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- alreadyAvailableTesting ); + if (useForTesting>availableTesting){ + eventVectorTesting.insert( eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- availableTesting ); } } - eventVectorUndefined.clear(); + eventVectorUndefined.clear(); // finally shorten the event vectors to the requested size by removing random events if (splitMode.Contains( "RANDOM" )){ UInt_t sizeTraining = eventVectorTraining.size(); @@ -1153,21 +1188,21 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, } } else { // erase at end - if( eventVectorTraining.size() < UInt_t(requestedTraining) ) - Log() << kWARNING << "DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n" - << "There is probably an issue. Please contact the TMVA developers." << Endl; + if( eventVectorTraining.size() < UInt_t(requestedTraining) ) + Log() << kWARNING << "DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n" + << "There is probably an issue. Please contact the TMVA developers." << Endl; std::for_each( eventVectorTraining.begin()+requestedTraining, eventVectorTraining.end(), DeleteFunctor<Event>() ); eventVectorTraining.erase(eventVectorTraining.begin()+requestedTraining,eventVectorTraining.end()); - if( eventVectorTesting.size() < UInt_t(requestedTesting) ) - Log() << kWARNING << "DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n" - << "There is probably an issue. Please contact the TMVA developers." << Endl; + if( eventVectorTesting.size() < UInt_t(requestedTesting) ) + Log() << kWARNING << "DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n" + << "There is probably an issue. Please contact the TMVA developers." << Endl; std::for_each( eventVectorTesting.begin()+requestedTesting, eventVectorTesting.end(), DeleteFunctor<Event>() ); eventVectorTesting.erase(eventVectorTesting.begin()+requestedTesting,eventVectorTesting.end()); } } - TMVA::DataSetFactory::RenormEvents( dsi, tmpEventVector, normMode ); + TMVA::DataSetFactory::RenormEvents( dsi, tmpEventVector, eventCounts, normMode ); Int_t trainingSize = 0; Int_t testingSize = 0; @@ -1211,35 +1246,35 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, Log() << kDEBUG << "insert class 0 into training and test vector" << Endl; trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(0).begin(), tmpEventVector[Types::kTraining].at(0).end() ); testingEventVector->insert( testingEventVector->end(), tmpEventVector[Types::kTesting].at(0).begin(), tmpEventVector[Types::kTesting].at(0).end() ); - + // insert other classes EvtVecIt itTarget; for( UInt_t cls = 1; cls < dsi.GetNClasses(); ++cls ){ Log() << kDEBUG << "insert class " << cls << Endl; // training vector itTarget = trainingEventVector->begin() - 1; // start one before begin - // loop over source + // loop over source for( itEvent = tmpEventVector[Types::kTraining].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTraining].at(cls).end(); itEvent != itEventEnd; ++itEvent ){ // if( std::distance( itTarget, trainingEventVector->end()) < Int_t(cls+1) ) { if( (trainingEventVector->end() - itTarget) < Int_t(cls+1) ) { itTarget = trainingEventVector->end(); trainingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing break; - }else{ + }else{ itTarget += cls+1; trainingEventVector->insert( itTarget, (*itEvent) ); // fill event } } // testing vector itTarget = testingEventVector->begin() - 1; - // loop over source + // loop over source for( itEvent = tmpEventVector[Types::kTesting].at(cls).begin(), itEventEnd = tmpEventVector[Types::kTesting].at(cls).end(); itEvent != itEventEnd; ++itEvent ){ // if( std::distance( itTarget, testingEventVector->end()) < Int_t(cls+1) ) { if( ( testingEventVector->end() - itTarget ) < Int_t(cls+1) ) { itTarget = testingEventVector->end(); testingEventVector->insert( itTarget, itEvent, itEventEnd ); // fill in the rest without mixing break; - }else{ + }else{ itTarget += cls+1; testingEventVector->insert( itTarget, (*itEvent) ); // fill event } @@ -1250,13 +1285,13 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, // std::cout << std::endl; // std::cout << "TRAINING VECTOR" << std::endl; // std::transform( trainingEventVector->begin(), trainingEventVector->end(), ostream_iterator<Int_t>(std::cout, "|"), std::mem_fun(&TMVA::Event::GetClass) ); - + // std::cout << std::endl; // std::cout << "TESTING VECTOR" << std::endl; // std::transform( testingEventVector->begin(), testingEventVector->end(), ostream_iterator<Int_t>(std::cout, "|"), std::mem_fun(&TMVA::Event::GetClass) ); // std::cout << std::endl; - }else{ + }else{ for( UInt_t cls = 0; cls < dsi.GetNClasses(); ++cls ){ trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[Types::kTraining].at(cls).begin(), tmpEventVector[Types::kTraining].at(cls).end() ); testingEventVector->insert ( testingEventVector->end(), tmpEventVector[Types::kTesting].at(cls).begin(), tmpEventVector[Types::kTesting].at(cls).end() ); @@ -1288,20 +1323,22 @@ TMVA::DataSet* TMVA::DataSetFactory::MixEvents( DataSetInfo& dsi, // create dataset DataSet* ds = new DataSet(dsi); - Log() << kINFO << "Create internal training tree" << Endl; - ds->SetEventCollection(trainingEventVector, Types::kTraining ); - Log() << kINFO << "Create internal testing tree" << Endl; - ds->SetEventCollection(testingEventVector, Types::kTesting ); + Log() << kINFO << "Create internal training tree" << Endl; + ds->SetEventCollection(trainingEventVector, Types::kTraining ); + Log() << kINFO << "Create internal testing tree" << Endl; + ds->SetEventCollection(testingEventVector, Types::kTesting ); return ds; - + } //_______________________________________________________________________ -void TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi, - TMVA::EventVectorOfClassesOfTreeType& tmpEventVector, - const TString& normMode ) +void +TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi, + EventVectorOfClassesOfTreeType& tmpEventVector, + const EvtStatsPerClass& eventCounts, + const TString& normMode ) { // ============================================================ // renormalisation @@ -1406,20 +1443,18 @@ void TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi, // --------------------------------- // now apply the normalization factors Int_t maxL = dsi.GetClassNameMaxLength(); - for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls<clsEnd; ++cls) { - Log() << kINFO << "--> Rescale " << setiosflags(ios::left) << std::setw(maxL) + for (UInt_t cls = 0, clsEnd = dsi.GetNClasses(); cls<clsEnd; ++cls) { + Log() << kINFO << "--> Rescale " << setiosflags(ios::left) << std::setw(maxL) << dsi.GetClassInfo(cls)->GetName() << " event weights by factor: " << renormFactor.at(cls) << Endl; - std::for_each( tmpEventVector[Types::kTraining].at(cls).begin(), + std::for_each( tmpEventVector[Types::kTraining].at(cls).begin(), tmpEventVector[Types::kTraining].at(cls).end(), std::bind2nd(std::mem_fun(&TMVA::Event::ScaleWeight),renormFactor.at(cls)) ); - std::for_each( tmpEventVector[Types::kTesting].at(cls).begin(), + std::for_each( tmpEventVector[Types::kTesting].at(cls).begin(), tmpEventVector[Types::kTesting].at(cls).end(), std::bind2nd(std::mem_fun(&TMVA::Event::ScaleWeight),renormFactor.at(cls)) ); } - - // --------------------------------- // for information purposes dsi.SetNormalization( normMode ); @@ -1454,20 +1489,29 @@ void TMVA::DataSetFactory::RenormEvents( TMVA::DataSetInfo& dsi, testingSumWeights += testingSumWeightsPerClass.at(cls); // output statistics - Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) - << dsi.GetClassInfo(cls)->GetName() << " -- " - << "training entries : " << trainingSizePerClass.at(cls) - << " (" << "sum of weights: " << trainingSumWeightsPerClass.at(cls) << ")" << Endl; - Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) - << dsi.GetClassInfo(cls)->GetName() << " -- " - << "testing entries : " << testingSizePerClass.at(cls) - << " (" << "sum of weights: " << testingSumWeightsPerClass.at(cls) << ")" << Endl; - Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) - << dsi.GetClassInfo(cls)->GetName() << " -- " - << "training and testing entries: " - << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) - << " (" << "sum of weights: " + + Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) + << dsi.GetClassInfo(cls)->GetName() << " -- " + << "training events : " << trainingSizePerClass.at(cls) + << " (sum of weights: " << trainingSumWeightsPerClass.at(cls) << ")" + << " - requested were " << eventCounts[cls].nTrainingEventsRequested << " events" << Endl; + Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) + << dsi.GetClassInfo(cls)->GetName() << " -- " + << "testing events : " << testingSizePerClass.at(cls) + << " (sum of weights: " << testingSumWeightsPerClass.at(cls) << ")" + << " - requested were " << eventCounts[cls].nTestingEventsRequested << " events" << Endl; + Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) + << dsi.GetClassInfo(cls)->GetName() << " -- " + << "training and testing events: " + << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) + << " (sum of weights: " << (trainingSumWeightsPerClass.at(cls)+testingSumWeightsPerClass.at(cls)) << ")" << Endl; + if(eventCounts[cls].nEvAfterCut<eventCounts[cls].nEvBeforeCut) { + Log() << kINFO << setiosflags(ios::left) << std::setw(maxL) + << dsi.GetClassInfo(cls)->GetName() << " -- " + << "due to the preselection a scaling factor has been applied to the numbers of requested events: " + << eventCounts[cls].cutScaling() << Endl; + } } } diff --git a/tmva/src/DataSetInfo.cxx b/tmva/src/DataSetInfo.cxx index e59f7c47eb0..1d773b0e851 100644 --- a/tmva/src/DataSetInfo.cxx +++ b/tmva/src/DataSetInfo.cxx @@ -58,7 +58,7 @@ #endif //_______________________________________________________________________ -TMVA::DataSetInfo::DataSetInfo(const TString& name) +TMVA::DataSetInfo::DataSetInfo(const TString& name) : TObject(), fDataSetManager(NULL), fName(name), @@ -81,7 +81,7 @@ TMVA::DataSetInfo::DataSetInfo(const TString& name) } //_______________________________________________________________________ -TMVA::DataSetInfo::~DataSetInfo() +TMVA::DataSetInfo::~DataSetInfo() { // destructor ClearDataSet(); @@ -101,6 +101,12 @@ void TMVA::DataSetInfo::ClearDataSet() const if(fDataSet!=0) { delete fDataSet; fDataSet=0; } } +void +TMVA::DataSetInfo::SetMsgType( EMsgType t ) const +{ + fLogger->SetMinType(t); +} + //_______________________________________________________________________ TMVA::ClassInfo* TMVA::DataSetInfo::AddClass( const TString& className ) { @@ -119,12 +125,6 @@ TMVA::ClassInfo* TMVA::DataSetInfo::AddClass( const TString& className ) return fClasses.back(); } -//_______________________________________________________________________ -void TMVA::DataSetInfo::SetMsgType( EMsgType t ) const -{ - fLogger->SetMinType(t); -} - //_______________________________________________________________________ TMVA::ClassInfo* TMVA::DataSetInfo::GetClassInfo( const TString& name ) const { @@ -166,12 +166,12 @@ std::vector<Float_t>* TMVA::DataSetInfo::GetTargetsForMulticlass( const TMVA::E // fTargetsForMulticlass->resize( GetNClasses() ); fTargetsForMulticlass->assign( GetNClasses(), 0.0 ); fTargetsForMulticlass->at( ev->GetClass() ) = 1.0; - return fTargetsForMulticlass; + return fTargetsForMulticlass; } //_______________________________________________________________________ -Bool_t TMVA::DataSetInfo::HasCuts() const +Bool_t TMVA::DataSetInfo::HasCuts() const { Bool_t hasCuts = kFALSE; for (std::vector<ClassInfo*>::iterator it = fClasses.begin(); it < fClasses.end(); it++) { @@ -181,22 +181,26 @@ Bool_t TMVA::DataSetInfo::HasCuts() const } //_______________________________________________________________________ -const TMatrixD* TMVA::DataSetInfo::CorrelationMatrix( const TString& className ) const -{ +const TMatrixD* TMVA::DataSetInfo::CorrelationMatrix( const TString& className ) const +{ ClassInfo* ptr = GetClassInfo(className); return ptr?ptr->GetCorrelationMatrix():0; } //_______________________________________________________________________ -TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const TString& expression, const TString& title, const TString& unit, - Double_t min, Double_t max, char varType, - Bool_t normalized, void* external ) +TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const TString& expression, + const TString& title, + const TString& unit, + Double_t min, Double_t max, + char varType, + Bool_t normalized, + void* external ) { - // add a variable (can be a complex expression) to the set of variables used in - // the MV analysis + // add a variable (can be a complex expression) to the set of + // variables used in the MV analysis TString regexpr = expression; // remove possible blanks regexpr.ReplaceAll(" ", "" ); - fVariables.push_back(VariableInfo( regexpr, title, unit, + fVariables.push_back(VariableInfo( regexpr, title, unit, fVariables.size()+1, varType, external, min, max, normalized )); fNeedsRebuilding = kTRUE; return fVariables.back(); @@ -211,17 +215,21 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddVariable( const VariableInfo& varInfo) } //_______________________________________________________________________ -TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const TString& expression, const TString& title, const TString& unit, - Double_t min, Double_t max, - Bool_t normalized, void* external ) +TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const TString& expression, + const TString& title, + const TString& unit, + Double_t min, Double_t max, + Bool_t normalized, + void* external ) { - // add a variable (can be a complex expression) to the set of variables used in - // the MV analysis + // add a variable (can be a complex expression) to the set of + // variables used in the MV analysis TString regexpr = expression; // remove possible blanks regexpr.ReplaceAll(" ", "" ); char type='F'; - fTargets.push_back(VariableInfo( regexpr, title, unit, - fTargets.size()+1, type, external, min, max, normalized )); + fTargets.push_back(VariableInfo( regexpr, title, unit, + fTargets.size()+1, type, external, min, + max, normalized )); fNeedsRebuilding = kTRUE; return fTargets.back(); } @@ -235,7 +243,9 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddTarget( const VariableInfo& varInfo){ } //_______________________________________________________________________ -TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression, const TString& title, const TString& unit, +TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression, + const TString& title, + const TString& unit, Double_t min, Double_t max, char type, Bool_t normalized, void* external ) { @@ -243,7 +253,7 @@ TMVA::VariableInfo& TMVA::DataSetInfo::AddSpectator( const TString& expression, // the MV analysis TString regexpr = expression; // remove possible blanks regexpr.ReplaceAll(" ", "" ); - fSpectators.push_back(VariableInfo( regexpr, title, unit, + fSpectators.push_back(VariableInfo( regexpr, title, unit, fSpectators.size()+1, type, external, min, max, normalized )); fNeedsRebuilding = kTRUE; return fSpectators.back(); @@ -282,7 +292,7 @@ void TMVA::DataSetInfo::SetWeightExpression( const TString& expr, const TString& if (className != "") { TMVA::ClassInfo* ci = AddClass(className); ci->SetWeight( expr ); - } + } else { // no class name specified, set weight for all classes if (fClasses.size()==0) { @@ -321,7 +331,7 @@ void TMVA::DataSetInfo::AddCut( const TCut& cut, const TString& className ) // set the cut for the classes if (className == "") { // if no className has been given set the cut for all the classes for (std::vector<ClassInfo*>::iterator it = fClasses.begin(); it < fClasses.end(); it++) { - const TCut& oldCut = (*it)->GetCut(); + const TCut& oldCut = (*it)->GetCut(); (*it)->SetCut( oldCut+cut ); } } @@ -344,7 +354,7 @@ std::vector<TString> TMVA::DataSetInfo::GetListOfVariables() const //_______________________________________________________________________ void TMVA::DataSetInfo::PrintCorrelationMatrix( const TString& className ) -{ +{ // calculates the correlation matrices for signal and background, // prints them to standard output, and fills 2D histograms Log() << kINFO << "Correlation matrix (" << className << "):" << Endl; @@ -367,7 +377,7 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m, for (UInt_t jvar=0; jvar<nvar; jvar++) { (*tm)(ivar, jvar) = (*m)(ivar,jvar); } - } + } TH2F* h2 = new TH2F( *tm ); h2->SetNameTitle( hName, hTitle ); @@ -379,7 +389,7 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m, // present in percent, and round off digits // also, use absolute value of correlation coefficient (ignore sign) - h2->Scale( 100.0 ); + h2->Scale( 100.0 ); for (UInt_t ibin=1; ibin<=nvar; ibin++) { for (UInt_t jbin=1; jbin<=nvar; jbin++) { h2->SetBinContent( ibin, jbin, Int_t(h2->GetBinContent( ibin, jbin )) ); @@ -404,24 +414,24 @@ TH2* TMVA::DataSetInfo::CreateCorrelationMatrixHist( const TMatrixD* m, // gROOT->SetStyle("Plain"); // TStyle* gStyle = gROOT->GetStyle( "Plain" ); // gStyle->SetPalette( 1, 0 ); - // TPaletteAxis* paletteAxis + // TPaletteAxis* paletteAxis // = (TPaletteAxis*)h2->GetListOfFunctions()->FindObject( "palette" ); // ------------------------------------------------------------------------------------- - + Log() << kDEBUG << "Created correlation matrix as 2D histogram: " << h2->GetName() << Endl; - + return h2; } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetInfo::GetDataSet() const +TMVA::DataSet* TMVA::DataSetInfo::GetDataSet() const { // returns data set if (fDataSet==0 || fNeedsRebuilding) { if(fDataSet!=0) ClearDataSet(); // fDataSet = DataSetManager::Instance().CreateDataSet(GetName()); //DSMTEST replaced by following lines if( !fDataSetManager ) - Log() << kFATAL << "DataSetManager has not been set in DataSetInfo (GetDataSet() )." << Endl; + Log() << kFATAL << "DataSetManager has not been set in DataSetInfo (GetDataSet() )." << Endl; fDataSet = fDataSetManager->CreateDataSet(GetName()); fNeedsRebuilding = kFALSE; diff --git a/tmva/src/DataSetManager.cxx b/tmva/src/DataSetManager.cxx index caa8119d719..63c81e0ea14 100644 --- a/tmva/src/DataSetManager.cxx +++ b/tmva/src/DataSetManager.cxx @@ -51,7 +51,7 @@ using std::endl; // void TMVA::DataSetManager::DestroyInstance() { if (fgDSManager) { delete fgDSManager; fgDSManager=0; } } // DSMTEST removed //_______________________________________________________________________ -TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput ) +TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput ) : fDataInput(dataInput), fDataSetInfoCollection(), fLogger( new MsgLogger("DataSetManager", kINFO) ) @@ -60,20 +60,20 @@ TMVA::DataSetManager::DataSetManager( DataInputHandler& dataInput ) } //_______________________________________________________________________ -TMVA::DataSetManager::~DataSetManager() +TMVA::DataSetManager::~DataSetManager() { // destructor // fDataSetInfoCollection.SetOwner(); // DSMTEST --> created a segfault because the DataSetInfo-objects got deleted twice - TMVA::DataSetFactory::destroyInstance(); + TMVA::DataSetFactory::destroyInstance(); delete fLogger; } //_______________________________________________________________________ -TMVA::DataSet* TMVA::DataSetManager::CreateDataSet( const TString& dsiName ) +TMVA::DataSet* TMVA::DataSetManager::CreateDataSet( const TString& dsiName ) { - // Creates the singleton dataset + // Creates the singleton dataset DataSetInfo* dsi = GetDataSetInfo( dsiName ); if (!dsi) Log() << kFATAL << "DataSetInfo object '" << dsiName << "' not found" << Endl; diff --git a/tmva/src/Factory.cxx b/tmva/src/Factory.cxx index 1dee0b59184..c5d839953f2 100644 --- a/tmva/src/Factory.cxx +++ b/tmva/src/Factory.cxx @@ -621,8 +621,8 @@ void TMVA::Factory::PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, I //_______________________________________________________________________ void TMVA::Factory::PrepareTrainingAndTestTree( const TCut& cut, const TString& opt ) -{ - // prepare the training and test trees +{ + // prepare the training and test trees // -> same cuts for signal and background SetInputTreesFromEventAssignTrees(); diff --git a/tmva/src/MethodBoost.cxx b/tmva/src/MethodBoost.cxx index 1041ad144b0..018d8690442 100644 --- a/tmva/src/MethodBoost.cxx +++ b/tmva/src/MethodBoost.cxx @@ -83,6 +83,7 @@ TMVA::MethodBoost::MethodBoost( const TString& jobName, , fMethodError(0) , fOrigMethodError(0) , fBoostWeight(0) + , fDetailedMonitoring(kFALSE) , fADABoostBeta(0) , fRandomSeed(0) , fBoostedMethodTitle(methodTitle) @@ -109,6 +110,7 @@ TMVA::MethodBoost::MethodBoost( DataSetInfo& dsi, , fMethodError(0) , fOrigMethodError(0) , fBoostWeight(0) + , fDetailedMonitoring(kFALSE) , fADABoostBeta(0) , fRandomSeed(0) , fBoostedMethodTitle("") diff --git a/tmva/src/MethodCategory.cxx b/tmva/src/MethodCategory.cxx index f1354d07d45..cfc7ef73dcb 100644 --- a/tmva/src/MethodCategory.cxx +++ b/tmva/src/MethodCategory.cxx @@ -1,5 +1,5 @@ // @(#)root/tmva $Id$ -// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Eckhard von Toerne +// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard von Toerne /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -11,17 +11,17 @@ * Virtual base class for all MVA method * * * * Authors (alphabetical): * - * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * - * Nadim Sah <Nadim.Sah@cern.ch> - Berlin, Germany * - * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland * - * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * - * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany * - * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Nadim Sah <Nadim.Sah@cern.ch> - Berlin, Germany * + * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland * + * Joerg Stelzer <Joerg.Stelzer@cern.ch> - MSU East Lansing, USA * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany * + * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * * * * Copyright (c) 2005-2011: * * CERN, Switzerland * - * U. of Victoria, Canada * + * MSU East Lansing, USA * * MPI-K Heidelberg, Germany * * U. of Bonn, Germany * * * @@ -151,12 +151,15 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut, // std::cout << "set input done " << std::endl; rearrangeTransformation->SetEnabled(kFALSE); - IMethod* addedMethod = ClassifierFactory::Instance().Create(addedMethodName,GetJobName(),theTitle,dsi,theOptions); + IMethod* addedMethod = ClassifierFactory::Instance().Create(addedMethodName, + GetJobName(), + theTitle, + dsi, + theOptions); MethodBase *method = (dynamic_cast<MethodBase*>(addedMethod)); - if(method==0) return 0; - + method->SetupMethod(); method->ParseOptions(); method->GetTransformationHandler().AddTransformation( rearrangeTransformation, -1 ); @@ -186,7 +189,7 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut, UInt_t newSpectatorIndex = primaryDSI.GetSpectatorInfos().size(); fCategorySpecIdx.push_back(newSpectatorIndex); - + primaryDSI.AddSpectator( Form("%s_cat%i:=%s", GetName(),(int)fMethods.size(),theCut.GetTitle()), Form("%s:%s",GetName(),method->GetName()), "pass", 0, 0, 'C' ); @@ -358,15 +361,10 @@ void TMVA::MethodCategory::Train() // specify the minimum # of training events and set 'classification' const Int_t MinNoTrainingEvents = 10; - // THIS NEEDS TO BE CHANGED: -// TString what("Classification"); -// what.ToLower(); -// Types::EAnalysisType analysisType = ( what.CompareTo("regression")==0 ? Types::kRegression : Types::kClassification ); - Types::EAnalysisType analysisType = GetAnalysisType(); // start the training - Log() << kINFO << "Train all sub-classifiers for " + Log() << kINFO << "Train all sub-classifiers for " << (analysisType == Types::kRegression ? "Regression" : "Classification") << " ..." << Endl; // don't do anything if no sub-classifier booked @@ -374,7 +372,7 @@ void TMVA::MethodCategory::Train() Log() << kINFO << "...nothing found to train" << Endl; return; } - + std::vector<IMethod*>::iterator itrMethod; // iterate over all booked sub-classifiers and train them @@ -382,9 +380,9 @@ void TMVA::MethodCategory::Train() MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod); if(!mva) continue; - mva->SetAnalysisType(GetAnalysisType()); - if (!mva->HasAnalysisType( analysisType, - mva->DataInfo().GetNClasses(), + mva->SetAnalysisType( analysisType ); + if (!mva->HasAnalysisType( analysisType, + mva->DataInfo().GetNClasses(), mva->DataInfo().GetNTargets() ) ) { Log() << kWARNING << "Method " << mva->GetMethodTypeName() << " is not capable of handling " ; if (analysisType == Types::kRegression) @@ -394,8 +392,6 @@ void TMVA::MethodCategory::Train() itrMethod = fMethods.erase( itrMethod ); continue; } - - mva->SetAnalysisType( analysisType ); if (mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) { Log() << kINFO << "Train method: " << mva->GetMethodName() << " for " diff --git a/tmva/src/MethodCompositeBase.cxx b/tmva/src/MethodCompositeBase.cxx index 11e255187fe..313edc22c79 100644 --- a/tmva/src/MethodCompositeBase.cxx +++ b/tmva/src/MethodCompositeBase.cxx @@ -1,32 +1,32 @@ // @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen -/********************************************************************************** - * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * - * Package: TMVA * - * Class : MethodCompositeBase * - * Web : http://tmva.sourceforge.net * - * * - * Description: * - * Virtual base class for all MVA method * - * * - * Authors (alphabetical): * - * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * - * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * - * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * - * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel * - * * - * Copyright (c) 2005: * - * CERN, Switzerland * - * U. of Victoria, Canada * - * MPI-K Heidelberg, Germany * - * LAPP, Annecy, France * - * * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted according to the terms listed in LICENSE * - * (http://tmva.sourceforge.net/LICENSE) * - **********************************************************************************/ +/***************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : MethodCompositeBase * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Virtual base class for all MVA method * + * * + * Authors (alphabetical): * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Joerg Stelzer <Joerg.Stelzer@cern.ch> - MSU, USA * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * + * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel * + * * + * Copyright (c) 2005: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * LAPP, Annecy, France * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + *****************************************************************************/ //_______________________________________________________________________ // @@ -58,7 +58,7 @@ using std::vector; ClassImp(TMVA::MethodCompositeBase) //_______________________________________________________________________ -TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName, +TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName, Types::EMVA methodType, const TString& methodTitle, DataSetInfo& theData, @@ -71,7 +71,7 @@ TMVA::MethodCompositeBase::MethodCompositeBase( const TString& jobName, //_______________________________________________________________________ TMVA::MethodCompositeBase::MethodCompositeBase( Types::EMVA methodType, DataSetInfo& dsi, - const TString& weightFile, + const TString& weightFile, TDirectory* theTargetDir ) : TMVA::MethodBase( methodType, dsi, weightFile, theTargetDir ), fMethodIndex(0) @@ -85,7 +85,7 @@ TMVA::IMethod* TMVA::MethodCompositeBase::GetMethod( const TString &methodTitle vector<IMethod*>::const_iterator itrMethodEnd = fMethods.end(); for (; itrMethod != itrMethodEnd; itrMethod++) { - MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod); + MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod); if ( (mva->GetMethodName())==methodTitle ) return mva; } return 0; @@ -102,26 +102,26 @@ TMVA::IMethod* TMVA::MethodCompositeBase::GetMethod( const Int_t index ) const //_______________________________________________________________________ -void TMVA::MethodCompositeBase::AddWeightsXMLTo( void* parent ) const +void TMVA::MethodCompositeBase::AddWeightsXMLTo( void* parent ) const { void* wght = gTools().AddChild(parent, "Weights"); gTools().AddAttr( wght, "NMethods", fMethods.size() ); - for (UInt_t i=0; i< fMethods.size(); i++) + for (UInt_t i=0; i< fMethods.size(); i++) { void* methxml = gTools().AddChild( wght, "Method" ); MethodBase* method = dynamic_cast<MethodBase*>(fMethods[i]); - gTools().AddAttr(methxml,"Index", i ); - gTools().AddAttr(methxml,"Weight", fMethodWeight[i]); + gTools().AddAttr(methxml,"Index", i ); + gTools().AddAttr(methxml,"Weight", fMethodWeight[i]); gTools().AddAttr(methxml,"MethodSigCut", method->GetSignalReferenceCut()); gTools().AddAttr(methxml,"MethodSigCutOrientation", method->GetSignalReferenceCutOrientation()); gTools().AddAttr(methxml,"MethodTypeName", method->GetMethodTypeName()); - gTools().AddAttr(methxml,"MethodName", method->GetMethodName() ); + gTools().AddAttr(methxml,"MethodName", method->GetMethodName() ); gTools().AddAttr(methxml,"JobName", method->GetJobName()); - gTools().AddAttr(methxml,"Options", method->GetOptions()); + gTools().AddAttr(methxml,"Options", method->GetOptions()); if (method->fTransformationPointer) - gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("true")); + gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("true")); else - gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("false")); + gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("false")); method->AddWeightsXMLTo(methxml); } } @@ -132,14 +132,14 @@ TMVA::MethodCompositeBase::~MethodCompositeBase( void ) // delete methods vector<IMethod*>::iterator itrMethod = fMethods.begin(); for (; itrMethod != fMethods.end(); itrMethod++) { - Log() << kVERBOSE << "Delete method: " << (*itrMethod)->GetName() << Endl; + Log() << kVERBOSE << "Delete method: " << (*itrMethod)->GetName() << Endl; delete (*itrMethod); } fMethods.clear(); } //_______________________________________________________________________ -void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) +void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) { // XML streamer UInt_t nMethods; @@ -162,13 +162,13 @@ void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) Bool_t rerouteTransformation = kFALSE; if (gTools().HasAttr( ch, "UseMainMethodTransformation")) { - TString rerouteString(""); - gTools().ReadAttr( ch, "UseMainMethodTransformation", rerouteString ); - rerouteString.ToLower(); - if (rerouteString=="true") - rerouteTransformation=kTRUE; + TString rerouteString(""); + gTools().ReadAttr( ch, "UseMainMethodTransformation", rerouteString ); + rerouteString.ToLower(); + if (rerouteString=="true") + rerouteTransformation=kTRUE; } - + //remove trailing "~" to signal that options have to be reused optionString.ReplaceAll("~",""); //ignore meta-options for method Boost diff --git a/tmva/src/PDEFoamVect.cxx b/tmva/src/PDEFoamVect.cxx index d26b012f68f..94dfb64f78d 100644 --- a/tmva/src/PDEFoamVect.cxx +++ b/tmva/src/PDEFoamVect.cxx @@ -35,7 +35,7 @@ using namespace std; -//#define SW2 std::setw(12) +#define SW2 std::setprecision(7) << std::setw(12) ClassImp(TMVA::PDEFoamVect) @@ -200,13 +200,11 @@ TMVA::PDEFoamVect& TMVA::PDEFoamVect::operator =(Double_t x) //_____________________________________________________________________ void TMVA::PDEFoamVect::Print(Option_t *option) const { - streamsize wid = cout.width(); // saving current field width // Printout of all vector components if(!option) Error( "Print ", "No option set \n"); cout << "("; for(Int_t i=0; i<fDim-1; i++) - cout << std::setw(12) << *(fCoords+i) << ","; - cout << std::setw(12) << *(fCoords+fDim-1); + cout << SW2 << *(fCoords+i) << ","; + cout << SW2 << *(fCoords+fDim-1); cout << ")"; - cout.width(wid); } diff --git a/tmva/src/RegressionVariance.cxx b/tmva/src/RegressionVariance.cxx index a4bbe0cc458..d0ab388a66d 100644 --- a/tmva/src/RegressionVariance.cxx +++ b/tmva/src/RegressionVariance.cxx @@ -61,7 +61,7 @@ Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t &nLeft, Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot); Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) ); Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left); - + // return 1/ (leftIndex + rightIndex); return (parentIndex - leftIndex - rightIndex)/(parentIndex); } diff --git a/tmva/src/VariableNormalizeTransform.cxx b/tmva/src/VariableNormalizeTransform.cxx index eebe6bced5b..ea6fc57ca74 100644 --- a/tmva/src/VariableNormalizeTransform.cxx +++ b/tmva/src/VariableNormalizeTransform.cxx @@ -401,6 +401,7 @@ void TMVA::VariableNormalizeTransform::ReadFromXML( void* trfnode ) gTools().ReadAttr(trfnode, "NVariables", nvars); // coverity[tainted_data_argument] gTools().ReadAttr(trfnode, "NTargets", ntgts); + // coverity[tainted_data_argument] for( UInt_t ivar = 0; ivar < nvars; ++ivar ){ fGet.push_back(std::make_pair<Char_t,UInt_t>('v',ivar)); diff --git a/tmva/test/TMVAClassificationCategory.cxx b/tmva/test/TMVAClassificationCategory.cxx index 60433d839ca..f2768c7a60b 100644 --- a/tmva/test/TMVAClassificationCategory.cxx +++ b/tmva/test/TMVAClassificationCategory.cxx @@ -21,11 +21,11 @@ * macros (simply say: root -l <../macros/macro.C>), which can be conveniently * * invoked through a GUI launched by the command * * * - * root -l TMVAGui.C * + * root -l TMVAGui.C * **********************************************************************************/ #include <cstdlib> -#include <iostream> +#include <iostream> #include <map> #include <string> @@ -44,12 +44,13 @@ // two types of category methods are implemented Bool_t UseOffsetMethod = kTRUE; -int main( int argc, char** argv ) +int main( int argc, char** argv ) { //--------------------------------------------------------------- - // Example for usage of different event categories with classifiers + // Example for usage of different event categories with classifiers - std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; + std::cout << std::endl + << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode = false; @@ -62,7 +63,9 @@ int main( int argc, char** argv ) std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; - TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); + TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", + outputFile, + factoryOptions ); // Define the input variables used for the MVA training factory->AddVariable( "var1", 'F' ); @@ -70,16 +73,17 @@ int main( int argc, char** argv ) factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); - // You can add so-called "Spectator variables", which are not used in the MVA training, - // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the - // input variables, the response values of all trained MVAs, and the spectator variables + // You can add so-called "Spectator variables", which are not used + // in the MVA training, but will appear in the final "TestTree" + // produced by TMVA. This TestTree will contain the input + // variables, the response values of all trained MVAs, and the + // spectator variables factory->AddSpectator( "eta" ); // Load the signal and background event samples from ROOT trees TFile *input(0); - TString fname( "" ); - if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root"; - else fname = "data/toy_sigbkg_categ_varoff.root"; + TString fname = UseOffsetMethod ? "data/toy_sigbkg_categ_offset.root" : "data/toy_sigbkg_categ_varoff.root"; + if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; @@ -108,7 +112,7 @@ int main( int argc, char** argv ) // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, - "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); + "nTest_Signal=5500:nTrain_Background=3400:SplitMode=Random:NormMode=NumEvents" ); // ---- Book MVA methods @@ -117,14 +121,14 @@ int main( int argc, char** argv ) // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", - "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); + "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // --- Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "var1:var2:var3:var4"; - TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); + TString theCat2Vars = UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"; // Fisher with categories TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); diff --git a/tmva/test/setup.sh b/tmva/test/setup.sh index 17fd0edfe7d..8edab3e449b 100755 --- a/tmva/test/setup.sh +++ b/tmva/test/setup.sh @@ -23,6 +23,9 @@ if [ ! $ROOTSYS ]; then return 1 fi +export TMVASYS=$PWD + + # On MacOS X $DYLD_LIBRARY_PATH has to be modified, so: if [[ `root-config --platform` == "macosx" ]]; then -- GitLab