diff --git a/tmva/Makefile b/tmva/Makefile index c80d1f6d9d8ed57ce643b92d27f6a7c9e41eac60..dafce3e9d625f97983112b8d2f927f94c99807f0 100644 --- a/tmva/Makefile +++ b/tmva/Makefile @@ -46,13 +46,17 @@ DICTH2 := TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h GiniIndexWithLaplace.h SimulatedAnnealing.h DICTH3 := Config.h KDEKernel.h Interval.h FitterBase.h MCFitter.h GeneticFitter.h SimulatedAnnealingFitter.h \ MinuitFitter.h MinuitWrapper.h IFitterTarget.h \ - PDEFoam.h PDEFoamDistr.h PDEFoamVect.h PDEFoamCell.h BDTEventWrapper.h CCTreeWrapper.h \ + PDEFoam.h PDEFoamDecisionTree.h PDEFoamDensityBase.h PDEFoamDiscriminantDensity.h \ + PDEFoamEventDensity.h PDEFoamTargetDensity.h PDEFoamDecisionTreeDensity.h PDEFoamMultiTarget.h \ + PDEFoamVect.h PDEFoamCell.h PDEFoamDiscriminant.h PDEFoamEvent.h PDEFoamTarget.h \ + PDEFoamKernelBase.h PDEFoamKernelTrivial.h PDEFoamKernelLinN.h PDEFoamKernelGauss.h \ + BDTEventWrapper.h CCTreeWrapper.h \ CCPruner.h CostComplexityPruneTool.h SVEvent.h OptimizeConfigParameters.h DICTH4 := TNeuron.h TSynapse.h TActivationChooser.h TActivation.h TActivationSigmoid.h TActivationIdentity.h \ TActivationTanh.h TActivationRadial.h TNeuronInputChooser.h TNeuronInput.h TNeuronInputSum.h \ TNeuronInputSqSum.h TNeuronInputAbs.h Types.h Ranking.h RuleFit.h RuleFitAPI.h IMethod.h MsgLogger.h \ VariableTransformBase.h VariableIdentityTransform.h VariableDecorrTransform.h VariablePCATransform.h \ - VariableGaussTransform.h VariableNormalizeTransform.h + VariableGaussTransform.h VariableNormalizeTransform.h VariableRearrangeTransform.h DICTH1 := $(patsubst %,inc/%,$(DICTH1)) DICTH2 := $(patsubst %,inc/%,$(DICTH2)) DICTH3 := $(patsubst %,inc/%,$(DICTH3)) diff --git a/tmva/Module.mk b/tmva/Module.mk index c19b011c14ed8b197528f58ff7be1f66fa6df96c..778a830874e3003417d63f243fa71948167f8a49 100644 --- a/tmva/Module.mk +++ b/tmva/Module.mk @@ -44,7 +44,11 @@ TMVAH2 := TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h GiniIndexWithLaplace.h SimulatedAnnealing.h TMVAH3 := Config.h KDEKernel.h Interval.h FitterBase.h MCFitter.h GeneticFitter.h SimulatedAnnealingFitter.h \ MinuitFitter.h MinuitWrapper.h IFitterTarget.h \ - PDEFoam.h PDEFoamDistr.h PDEFoamVect.h PDEFoamCell.h BDTEventWrapper.h CCTreeWrapper.h \ + PDEFoam.h PDEFoamDecisionTree.h PDEFoamDensityBase.h PDEFoamDiscriminantDensity.h \ + PDEFoamEventDensity.h PDEFoamTargetDensity.h PDEFoamDecisionTreeDensity.h PDEFoamMultiTarget.h \ + PDEFoamVect.h PDEFoamCell.h PDEFoamDiscriminant.h PDEFoamEvent.h PDEFoamTarget.h \ + PDEFoamKernelBase.h PDEFoamKernelTrivial.h PDEFoamKernelLinN.h PDEFoamKernelGauss.h \ + BDTEventWrapper.h CCTreeWrapper.h \ CCPruner.h CostComplexityPruneTool.h SVEvent.h OptimizeConfigParameters.h TMVAH4 := TNeuron.h TSynapse.h TActivationChooser.h TActivation.h TActivationSigmoid.h TActivationIdentity.h \ TActivationTanh.h TActivationRadial.h TNeuronInputChooser.h TNeuronInput.h TNeuronInputSum.h \ diff --git a/tmva/inc/BinarySearchTreeNode.h b/tmva/inc/BinarySearchTreeNode.h index c6f92d2dbceb00f64bd439aeae2175e990271045..04dfae4ce4ea5d5ec4931b37160582c83958b227 100644 --- a/tmva/inc/BinarySearchTreeNode.h +++ b/tmva/inc/BinarySearchTreeNode.h @@ -89,6 +89,7 @@ namespace TMVA { const std::vector<Float_t> & GetEventV() const { return fEventV; } Float_t GetWeight() const { return fWeight; } + UInt_t GetClass() const { return fClass; } Bool_t IsSignal() const { return (fClass == 0); } const std::vector<Float_t> & GetTargets() const { return fTargets; } diff --git a/tmva/inc/DataInputHandler.h b/tmva/inc/DataInputHandler.h index 5af89c2eb76e2125a7311c35c6ccc8851dcf063a..28b9e02f34c044811b8418e51c463ba035d4ce46 100644 --- a/tmva/inc/DataInputHandler.h +++ b/tmva/inc/DataInputHandler.h @@ -66,7 +66,7 @@ namespace TMVA { TTree* GetTree() const { return fTree; } Double_t GetWeight() const { return fWeight; } - UInt_t GetEntries() const { return fTree->GetEntries(); } + UInt_t GetEntries() const { if( !fTree ) return 0; else return fTree->GetEntries(); } Types::ETreeType GetTreeType() const { return fTreeType; } const TString& GetClassName() const { return fClassName; } diff --git a/tmva/inc/DataSet.h b/tmva/inc/DataSet.h index 3637684a838dcda9d68d6a5beac6ebb7b344d092..534a642e892ee20369a306f74051c425b1d10f9b 100644 --- a/tmva/inc/DataSet.h +++ b/tmva/inc/DataSet.h @@ -89,8 +89,8 @@ namespace TMVA { Long64_t GetNEvents( Types::ETreeType type = Types::kMaxTreeType ) const; Long64_t GetNTrainingEvents() const { return GetNEvents(Types::kTraining); } Long64_t GetNTestEvents() const { return GetNEvents(Types::kTesting); } - Event* GetEvent() const; - Event* GetEvent ( Long64_t ievt ) const { fCurrentEventIdx = ievt; return GetEvent(); } + Event* GetEvent() const; // returns event without transformations + Event* GetEvent ( Long64_t ievt ) const { fCurrentEventIdx = ievt; return GetEvent(); } // returns event without transformations Event* GetTrainingEvent( Long64_t ievt ) const { return GetEvent(ievt, Types::kTraining); } Event* GetTestEvent ( Long64_t ievt ) const { return GetEvent(ievt, Types::kTesting); } Event* GetEvent ( Long64_t ievt, Types::ETreeType type ) const { diff --git a/tmva/inc/DecisionTreeNode.h b/tmva/inc/DecisionTreeNode.h index d9c938650ecb24d17ada8885cf03bafba545198c..1eed1b99da8dd71d7a56bf8f6062430e687fb86b 100644 --- a/tmva/inc/DecisionTreeNode.h +++ b/tmva/inc/DecisionTreeNode.h @@ -332,7 +332,7 @@ namespace TMVA { static MsgLogger* fgLogger; // static because there is a huge number of nodes... - std::vector<Double_t> fFisherCoeff; // the other fisher coeff (offset at the last element + std::vector<Double_t> fFisherCoeff; // the fisher coeff (offset at the last element) Float_t fCutValue; // cut value appplied on this node to discriminate bkg against sig Bool_t fCutType; // true: if event variable > cutValue ==> signal , false otherwise diff --git a/tmva/inc/Event.h b/tmva/inc/Event.h index 8623310422a63b0adcf699a0609e9a971cffee91..5ad1ced3ee73b431413459fe630598f28d97224c 100644 --- a/tmva/inc/Event.h +++ b/tmva/inc/Event.h @@ -82,8 +82,6 @@ namespace TMVA { UInt_t GetNTargets() const; UInt_t GetNSpectators() const; - const std::vector<UInt_t>* GetVariableArrangement() const { return fVariableArrangement; } - Float_t GetValue( UInt_t ivar) const; const std::vector<Float_t>& GetValues() const; @@ -101,9 +99,8 @@ namespace TMVA { void SetVal ( UInt_t ivar, Float_t val ); void SetTarget ( UInt_t itgt, Float_t value ); void SetSpectator ( UInt_t ivar, Float_t value ); - void SetVariableArrangement( std::vector<UInt_t>* const m ) const; - static void ClearDynamicVariables(); + static void ClearDynamicVariables() {} void CopyVarValues( const Event& other ); void Print ( std::ostream & o ) const; @@ -114,7 +111,6 @@ namespace TMVA { mutable std::vector<Float_t*>* fValuesDynamic; // the event values mutable std::vector<Float_t> fTargets; // target values for regression mutable std::vector<Float_t> fSpectators; // "visisting" variables which are never used for any calculation - mutable std::vector<UInt_t>* fVariableArrangement; // needed for MethodCategories, where we can train on other than the main variables UInt_t fClass; // signal or background type: signal=1, background=0 Double_t fWeight; // event weight (product of global and individual weights) diff --git a/tmva/inc/Interval.h b/tmva/inc/Interval.h index 1f0b3a82b4e675608e522186a952581812c57a07..92f1c446774f00da92d44c407f017ed889f031f2 100644 --- a/tmva/inc/Interval.h +++ b/tmva/inc/Interval.h @@ -84,7 +84,7 @@ namespace TMVA { private: Double_t fMin, fMax; // the constraints of the Interval - Int_t fNbins; // when >0 : number of bins (discrete interval); when =0 continuous interval + Int_t fNbins; // when >0 : number of bins (discrete interval); when ==0 continuous interval static MsgLogger* fgLogger; // message logger MsgLogger& Log() const { return *fgLogger; } diff --git a/tmva/inc/LinkDef3.h b/tmva/inc/LinkDef3.h index 69c7e8e4690682efa0b16ffedf596a25a8946f2c..91b6162e5ca6b5914be4c93b4a825759ade4a077 100644 --- a/tmva/inc/LinkDef3.h +++ b/tmva/inc/LinkDef3.h @@ -23,9 +23,22 @@ #pragma link C++ class TMVA::MinuitWrapper+; #pragma link C++ class TMVA::IFitterTarget+; #pragma link C++ class TMVA::PDEFoam+; -#pragma link C++ class TMVA::PDEFoamDistr+; +#pragma link C++ class TMVA::PDEFoamEvent+; +#pragma link C++ class TMVA::PDEFoamDiscriminant+; +#pragma link C++ class TMVA::PDEFoamTarget+; +#pragma link C++ class TMVA::PDEFoamMultiTarget+; +#pragma link C++ class TMVA::PDEFoamDecisionTree+; +#pragma link C++ class TMVA::PDEFoamDensityBase+; +#pragma link C++ class TMVA::PDEFoamDiscriminantDensity+; +#pragma link C++ class TMVA::PDEFoamEventDensity+; +#pragma link C++ class TMVA::PDEFoamTargetDensity+; +#pragma link C++ class TMVA::PDEFoamDecisionTreeDensity+; #pragma link C++ class TMVA::PDEFoamVect+; #pragma link C++ class TMVA::PDEFoamCell+; +#pragma link C++ class TMVA::PDEFoamKernelBase+; +#pragma link C++ class TMVA::PDEFoamKernelTrivial+; +#pragma link C++ class TMVA::PDEFoamKernelLinN+; +#pragma link C++ class TMVA::PDEFoamKernelGauss+; #pragma link C++ class TMVA::BDTEventWrapper+; #pragma link C++ class TMVA::CCTreeWrapper+; #pragma link C++ class TMVA::CCPruner+; diff --git a/tmva/inc/LinkDef4.h b/tmva/inc/LinkDef4.h index 99dca352ad5c6a7af191850b8a38380287e7fb0b..1411b5f2d5c241cbb36378f5abf8f2f4fe99172e 100644 --- a/tmva/inc/LinkDef4.h +++ b/tmva/inc/LinkDef4.h @@ -35,5 +35,6 @@ #pragma link C++ class TMVA::VariablePCATransform+; #pragma link C++ class TMVA::VariableGaussTransform+; #pragma link C++ class TMVA::VariableNormalizeTransform+; +#pragma link C++ class TMVA::VariableRearrangeTransform+; #endif diff --git a/tmva/inc/MethodBDT.h b/tmva/inc/MethodBDT.h index 9040d9cfc4e5d2bff8ef7e9657c9f1a88ff9277e..edf4f8e9644e21806c107e795ec864e4b5b39eb7 100644 --- a/tmva/inc/MethodBDT.h +++ b/tmva/inc/MethodBDT.h @@ -110,6 +110,8 @@ namespace TMVA { private: Double_t GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t useNTrees ); + Double_t PrivateGetMvaValue( TMVA::Event& ev, Double_t* err=0, Double_t* errUpper=0, UInt_t useNTrees=0 ); + void BoostMonitor(Int_t iTree); public: const std::vector<Float_t>& GetMulticlassValues(); @@ -158,7 +160,6 @@ namespace TMVA { void GetHelpMessage() const; - virtual Bool_t IsSignalLike() { return GetMvaValue() > 0;} protected: void DeclareCompatibilityOptions(); @@ -236,7 +237,7 @@ namespace TMVA { Double_t fSampleSizeFraction; // relative size of bagged event sample to original sample size Bool_t fNoNegWeightsInTraining; // ignore negative event weights in the training - + Bool_t fDoBoostMonitor; //create control plot with ROC integral vs tree number //some histograms for monitoring diff --git a/tmva/inc/MethodBase.h b/tmva/inc/MethodBase.h index 293f8f5bb5e4061fe4fddf4e74eed516aaaba733..b9311af031705d1cf5190e8015acea92422767fd 100644 --- a/tmva/inc/MethodBase.h +++ b/tmva/inc/MethodBase.h @@ -74,6 +74,8 @@ class TGraph; class TTree; class TDirectory; class TSpline; +class TH1F; +class TH1D; namespace TMVA { @@ -86,6 +88,8 @@ namespace TMVA { class MethodBase : virtual public IMethod, public Configurable { + friend class Factory; + public: enum EWeightFileType { kROOT=0, kTEXT }; @@ -270,6 +274,8 @@ namespace TMVA { virtual std::vector<Float_t> GetMulticlassEfficiency( std::vector<std::vector<Float_t> >& purity ); virtual std::vector<Float_t> GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity ); virtual Double_t GetSignificance() const; + virtual Double_t GetROCIntegral(TH1F *histS, TH1F *histB) const; + // virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const; virtual Double_t GetROCIntegral(PDF *pdfS=0, PDF *pdfB=0) const; virtual Double_t GetMaximumSignificance( Double_t SignalEvents, Double_t BackgroundEvents, Double_t& optimal_significance_value ) const; @@ -311,9 +317,11 @@ namespace TMVA { // sets the minimum requirement on the MVA output to declare an event signal-like Double_t GetSignalReferenceCut() const { return fSignalReferenceCut; } + Double_t GetSignalReferenceCutOrientation() const { return fSignalReferenceCutOrientation; } // sets the minimum requirement on the MVA output to declare an event signal-like void SetSignalReferenceCut( Double_t cut ) { fSignalReferenceCut = cut; } + void SetSignalReferenceCutOrientation( Double_t cutOrientation ) { fSignalReferenceCutOrientation = cutOrientation; } // pointers to ROOT directories TDirectory* BaseDir() const; @@ -331,8 +339,16 @@ namespace TMVA { TString GetTrainingTMVAVersionString() const; TString GetTrainingROOTVersionString() const; - TransformationHandler& GetTransformationHandler() { return fTransformation; } - const TransformationHandler& GetTransformationHandler() const { return fTransformation; } + TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) + { + if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; + } + const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const + { + if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation; + } + + void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; } // ---------- event accessors ------------------------------------------------ @@ -356,7 +372,8 @@ namespace TMVA { // this method is used to decide whether an event is signal- or background-like // the reference cut "xC" is taken to be where // Int_[-oo,xC] { PDF_S(x) dx } = Int_[xC,+oo] { PDF_B(x) dx } - virtual Bool_t IsSignalLike() { return GetMvaValue() > GetSignalReferenceCut() ? kTRUE : kFALSE; } + virtual Bool_t IsSignalLike(); + virtual Bool_t IsSignalLike(Double_t mvaVal); DataSet* Data() const { return DataInfo().GetDataSet(); } @@ -476,7 +493,11 @@ namespace TMVA { private: void AddInfoItem( void* gi, const TString& name, const TString& value) const; - void CreateVariableTransforms(const TString& trafoDefinition ); + + static void CreateVariableTransforms(const TString& trafoDefinition, + TMVA::DataSetInfo& dataInfo, + TMVA::TransformationHandler& transformationHandler, + TMVA::MsgLogger& log ); // ========== class members ================================================== @@ -507,6 +528,7 @@ namespace TMVA { DataSetInfo& fDataSetInfo; //! the data set information (sometimes needed) Double_t fSignalReferenceCut; // minimum requirement on the MVA output to declare an event signal-like + Double_t fSignalReferenceCutOrientation; // minimum requirement on the MVA output to declare an event signal-like Types::ESBType fVariableTransformType; // this is the event type (sig or bgd) assumed for variable transform // naming and versioning @@ -537,6 +559,8 @@ namespace TMVA { PDF* fMVAPdfS; // signal MVA PDF PDF* fMVAPdfB; // background MVA PDF + TH1F* fmvaS; // PDFs of MVA distribution (signal) + TH1F* fmvaB; // PDFs of MVA distribution (background) PDF* fSplS; // PDFs of MVA distribution (signal) PDF* fSplB; // PDFs of MVA distribution (background) TSpline* fSpleffBvsS; // splines for signal eff. versus background eff. @@ -558,7 +582,8 @@ namespace TMVA { // variable preprocessing TString fVarTransformString; // labels variable transform method - TransformationHandler fTransformation; // the list of transformations + TransformationHandler* fTransformationPointer; // pointer to the rest of transformations + TransformationHandler fTransformation; // the list of transformations // help and verbosity diff --git a/tmva/inc/MethodBoost.h b/tmva/inc/MethodBoost.h index ae8ce5912c436c007ff92519718f2f69e1ed55fe..aad72e563e3f2591719b7276de78a3623d928e7e 100644 --- a/tmva/inc/MethodBoost.h +++ b/tmva/inc/MethodBoost.h @@ -11,12 +11,12 @@ * Virtual base class for all MVA method * * * * Authors (alphabetical): * - * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * - * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * - * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * - * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel * - * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland * + * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany * + * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * * * * Copyright (c) 2005: * * CERN, Switzerland * @@ -118,10 +118,10 @@ namespace TMVA { MethodBoost* SetStage( Types::EBoostStage stage ) { fBoostStage = stage; return this; } - //training a single classifier + // training a single classifier void SingleTrain(); - //calculating a boosting weight from the classifier, storing it in the next one + // calculating a boosting weight from the classifier, storing it in the next one void SingleBoost(); // calculate weight of single method @@ -130,7 +130,7 @@ namespace TMVA { // return ROC integral on training/testing sample Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE); - //writing the monitoring histograms and tree to a file + // writing the monitoring histograms and tree to a file void WriteMonitoringHistosToFile( void ) const; // write evaluation histograms into target file @@ -139,94 +139,65 @@ namespace TMVA { // performs the MethodBase testing + testing of each boosted classifier virtual void TestClassification(); - //finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType + // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType void FindMVACut(); - //setting all the boost weights to 1 + // setting all the boost weights to 1 void ResetBoostWeights(); - //creating the vectors of histogram for monitoring MVA response of each classifier + // creating the vectors of histogram for monitoring MVA response of each classifier void CreateMVAHistorgrams(); // calculate MVA values of current trained method on training // sample void CalcMVAValues(); - - //Number of times the classifier is boosted (set by the user) - Int_t fBoostNum; - // string specifying the boost type (AdaBoost / Bagging ) - TString fBoostType; - - // string specifying the boost type ( ByError,Average,LastMethod ) - TString fMethodWeightType; - - //estimation of the level error of the classifier analysing the train dataset - Double_t fMethodError; - //estimation of the level error of the classifier analysing the train dataset (with unboosted weights) - Double_t fOrigMethodError; - - //the weight used to boost the next classifier - Double_t fBoostWeight; - - // min and max values for the classifier response - TString fTransformString; - - //ADA boost parameter, default is 1 - Double_t fADABoostBeta; - - // seed for random number generator used for bagging - UInt_t fRandomSeed; - - // details of the boosted classifier - TString fBoostedMethodName; - TString fBoostedMethodTitle; - TString fBoostedMethodOptions; - - // histograms to monitor values during the boosting - std::vector<TH1*>* fMonitorHist; - - //whether to monitor the MVA response of every classifier using the - Bool_t fMonitorBoostedMethod; - - //MVA output from each classifier over the training hist, using orignal events weights + + Int_t fBoostNum; // Number of times the classifier is boosted + TString fBoostType; // string specifying the boost type + TString fMethodWeightType; // string specifying the boost type + Double_t fMethodError; // estimation of the level error of the classifier + // analysing the train dataset + Double_t fOrigMethodError; // estimation of the level error of the classifier + // analysing the train dataset (with unboosted weights) + Double_t fBoostWeight; // the weight used to boost the next classifier + TString fTransformString; // min and max values for the classifier response + Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise) + + Double_t fADABoostBeta; // ADA boost parameter, default is 1 + UInt_t fRandomSeed; // seed for random number generator used for bagging + + TString fBoostedMethodName; // details of the boosted classifier + TString fBoostedMethodTitle; // title + TString fBoostedMethodOptions; // options + + std::vector<TH1*>* fMonitorHist; // histograms to monitor values during the boosting + Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier + + // MVA output from each classifier over the training hist, using orignal events weights std::vector< TH1* > fTrainSigMVAHist; std::vector< TH1* > fTrainBgdMVAHist; - //MVA output from each classifier over the training hist, using boosted events weights + // MVA output from each classifier over the training hist, using boosted events weights std::vector< TH1* > fBTrainSigMVAHist; std::vector< TH1* > fBTrainBgdMVAHist; - //MVA output from each classifier over the testing hist + // MVA output from each classifier over the testing hist std::vector< TH1* > fTestSigMVAHist; std::vector< TH1* > fTestBgdMVAHist; - - // tree to monitor values during the boosting - TTree* fMonitorTree; - - // the stage of the boosting - Types::EBoostStage fBoostStage; - - //the number of histogram filled for every type of boosted classifier - Int_t fDefaultHistNum; - - //whether to recalculate the MVA cut at every boosting step - Bool_t fRecalculateMVACut; - - // roc integral of last trained method (on training sample) - Double_t fROC_training; + + TTree* fMonitorTree; // tree to monitor values during the boosting + Types::EBoostStage fBoostStage; // stage of the boosting + Int_t fDefaultHistNum; // number of histogram filled for every type of boosted classifier + Bool_t fRecalculateMVACut; // whether to recalculate the MVA cut at every boosting step + Double_t fROC_training; // roc integral of last trained method (on training sample) // overlap integral of mva distributions for signal and // background (training sample) - Double_t fOverlap_integral; - - // mva values for the last trained method (on training sample) - std::vector<Float_t> *fMVAvalues; - - DataSetManager* fDataSetManager; // DSMTEST - friend class Factory; // DSMTEST - friend class Reader; // DSMTEST - - - + Double_t fOverlap_integral; + + std::vector<Float_t> *fMVAvalues; // mva values for the last trained method + DataSetManager* fDataSetManager; // DSMTEST + friend class Factory; // DSMTEST + friend class Reader; // DSMTEST protected: diff --git a/tmva/inc/MethodCategory.h b/tmva/inc/MethodCategory.h index 8b5663453aed97a4d0fbcd6bb8aec05feb181719..4f4e74f360d4480932eadf05647bc7b2014b2b67 100644 --- a/tmva/inc/MethodCategory.h +++ b/tmva/inc/MethodCategory.h @@ -93,6 +93,9 @@ namespace TMVA { Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 ); + // regression response + virtual const std::vector<Float_t>& GetRegressionValues(); + virtual void MakeClass( const TString& = TString("") ) const {}; private : diff --git a/tmva/inc/MethodHMatrix.h b/tmva/inc/MethodHMatrix.h index 87e5b038b3545c181ab4f3f754c57579c1debc71..7f25284236b8412aa98df3e69f8bca64eee61b83 100644 --- a/tmva/inc/MethodHMatrix.h +++ b/tmva/inc/MethodHMatrix.h @@ -109,8 +109,7 @@ namespace TMVA { void ProcessOptions(); // returns chi2 estimator for given type (signal or background) - Double_t GetChi2( Event* e, Types::ESBType ) const; - Double_t GetChi2( Types::ESBType ) const; + Double_t GetChi2( Types::ESBType ); // compute correlation matrices void ComputeCovariance( Bool_t, TMatrixD* ); diff --git a/tmva/inc/MethodLikelihood.h b/tmva/inc/MethodLikelihood.h index 2ac2d768ba37083fcbaf6fe0489436e67d36f2e4..d25bf25e21f02cf9a6caf9a80c78f64d498cf490 100644 --- a/tmva/inc/MethodLikelihood.h +++ b/tmva/inc/MethodLikelihood.h @@ -150,7 +150,7 @@ namespace TMVA { TString fKDEiterString; // Number of iterations (string) TString fKDEtypeString; // Kernel type to use for KDE (string) TString* fInterpolateString; // which interpolation method used for reference histograms (individual for each variable) - + ClassDef(MethodLikelihood,0) // Likelihood analysis ("non-parametric approach") }; diff --git a/tmva/inc/MethodMLP.h b/tmva/inc/MethodMLP.h index bbfbdd56e633cec1f826c660f512f1e7bf71fe9f..7ce853a2a5e2a0bfa0e6f2fb1504c80193976c24 100644 --- a/tmva/inc/MethodMLP.h +++ b/tmva/inc/MethodMLP.h @@ -112,7 +112,7 @@ namespace TMVA { enum EBPTrainingMode { kSequential=0, kBatch }; bool HasInverseHessian() { return fCalculateErrors; } - Double_t GetMvaValueAsymError( Double_t* errUpper, Double_t* errLower ); + Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper=0 ); protected: @@ -220,6 +220,11 @@ namespace TMVA { Int_t fGA_SC_rate; // GA settings: SC_rate Double_t fGA_SC_factor; // GA settings: SC_factor + // regression, storage of deviations + std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; // deviation from the targets, event weight + + Float_t fWeightRange; // suppress outliers for the estimator calculation + #ifdef MethodMLP_UseMinuit__ // minuit variables -- commented out because they rely on a static pointer Int_t fNumberOfWeights; // Minuit: number of weights diff --git a/tmva/inc/MethodPDEFoam.h b/tmva/inc/MethodPDEFoam.h index 42f6232ad9a3daebe30cf8bd5e64f5ea526e20ce..ff3e6371b32eecaec013a7a4bcfe9b9a691f1a7e 100644 --- a/tmva/inc/MethodPDEFoam.h +++ b/tmva/inc/MethodPDEFoam.h @@ -18,12 +18,12 @@ * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * * Original author of the TFoam implementation: * * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * * - * Copyright (c) 2008: * + * Copyright (c) 2008, 2010: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * @@ -71,8 +71,56 @@ #include "TMVA/PDEFoam.h" #endif +#ifndef ROOT_TMVA_PDEFoamDecisionTree +#include "TMVA/PDEFoamDecisionTree.h" +#endif +#ifndef ROOT_TMVA_PDEFoamEvent +#include "TMVA/PDEFoamEvent.h" +#endif +#ifndef ROOT_TMVA_PDEFoamDiscriminant +#include "TMVA/PDEFoamDiscriminant.h" +#endif +#ifndef ROOT_TMVA_PDEFoamTarget +#include "TMVA/PDEFoamTarget.h" +#endif +#ifndef ROOT_TMVA_PDEFoamMultiTarget +#include "TMVA/PDEFoamMultiTarget.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif +#ifndef ROOT_TMVA_PDEFoamTargetDensity +#include "TMVA/PDEFoamTargetDensity.h" +#endif +#ifndef ROOT_TMVA_PDEFoamEventDensity +#include "TMVA/PDEFoamEventDensity.h" +#endif +#ifndef ROOT_TMVA_PDEFoamDiscriminantDensity +#include "TMVA/PDEFoamDiscriminantDensity.h" +#endif +#ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity +#include "TMVA/PDEFoamDecisionTreeDensity.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelTrivial +#include "TMVA/PDEFoamKernelTrivial.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelLinN +#include "TMVA/PDEFoamKernelLinN.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelGauss +#include "TMVA/PDEFoamKernelGauss.h" +#endif + namespace TMVA { + // kernel types + enum EKernel { kNone=0, kGaus=1, kLinN=2 }; + class MethodPDEFoam : public MethodBase { public: @@ -97,6 +145,7 @@ namespace TMVA { void TrainMultiTargetRegression( void ); // Regression output: any number of values void TrainSeparatedClassification( void ); // Classification: one foam for Sig, one for Bg void TrainUnifiedClassification( void ); // Classification: one foam for Signal and Bg + void TrainMultiClassification(); // Classification: one foam for every class using MethodBase::ReadWeightsFromStream; @@ -114,11 +163,20 @@ namespace TMVA { // calculate the MVA value Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 ); + // calculate multiclass MVA values + const std::vector<Float_t>& GetMulticlassValues(); + // regression procedure virtual const std::vector<Float_t>& GetRegressionValues(); + // reset the method + virtual void Reset(); + // ranking of input variables - const Ranking* CreateRanking() { return 0; } + const Ranking* CreateRanking(); + + // get number of cuts in every dimension, starting at cell + void GetNCuts(PDEFoamCell *cell, std::vector<UInt_t> &nCuts); // helper functions to convert enum types to UInt_t and back EKernel GetKernel( void ) { return fKernel; } @@ -141,8 +199,14 @@ namespace TMVA { // Set Xmin, Xmax in foam with index 'foam_index' void SetXminXmax(TMVA::PDEFoam*); - // Set foam options - void InitFoam(TMVA::PDEFoam*, EFoamType); + // create foam and set foam options + PDEFoam* InitFoam(TString, EFoamType, UInt_t cls=0); + + // create pdefoam kernel + PDEFoamKernelBase* CreatePDEFoamKernel(); + + // delete all trained foams + void DeleteFoams(); // fill variable names into foam void FillVariableNamesToFoam() const; @@ -162,10 +226,9 @@ namespace TMVA { // options to be used Bool_t fSigBgSeparated; // Separate Sig and Bg, or not - Double_t fFrac; // Fraction used for calc of Xmin, Xmax - Double_t fDiscrErrCut; // cut on discrimant error - Float_t fVolFrac; // inverse volume fraction (used for density calculation during buildup) - Float_t fVolFracInv; // volume fraction (used for density calculation during buildup) + Float_t fFrac; // Fraction used for calc of Xmin, Xmax + Float_t fDiscrErrCut; // cut on discrimant error + Float_t fVolFrac; // volume fraction (used for density calculation during buildup) Int_t fnCells; // Number of Cells (1000) Int_t fnActiveCells; // Number of active cells Int_t fnSampl; // Number of MC events per cell in build-up (1000) @@ -180,15 +243,16 @@ namespace TMVA { TString fKernelStr; // Kernel for GetMvaValue() (option string) EKernel fKernel; // Kernel for GetMvaValue() + PDEFoamKernelBase *fKernelEstimator;// Kernel estimator TString fTargetSelectionStr; // method of selecting the target (only mulit target regr.) ETargetSelection fTargetSelection; // method of selecting the target (only mulit target regr.) Bool_t fFillFoamWithOrigWeights; // fill the foam with boost weights Bool_t fUseYesNoCell; // return -1 or 1 for bg or signal like event TString fDTLogic; // use DT algorithm to split cells EDTSeparation fDTSeparation; // enum which specifies the separation to use for the DT logic - Bool_t fPeekMax; // peek up cell with max. driver integral for split + Bool_t fPeekMax; // BACKWARDS COMPATIBILITY: peek up cell with max. driver integral for split - std::vector<Double_t> fXmin, fXmax; // range for histograms and foams + std::vector<Float_t> fXmin, fXmax; // range for histograms and foams // foams and densities // foam[0]=signal, if Sig and BG are Seperated; else foam[0]=signal/bg @@ -198,7 +262,7 @@ namespace TMVA { // default initialisation called by all constructors void Init( void ); - ClassDef(MethodPDEFoam,0) // Analysis of PDEFoam discriminant (PDEFoam or Mahalanobis approach) + ClassDef(MethodPDEFoam,0) // Multi-dimensional probability density estimator using TFoam (PDE-Foam) }; } // namespace TMVA diff --git a/tmva/inc/PDEFoam.h b/tmva/inc/PDEFoam.h index 5f7d3a686ff51942febe33b813f1ec1b3a61ec20..435afce668d0a6f05358520b8369717acf3236ec 100644 --- a/tmva/inc/PDEFoam.h +++ b/tmva/inc/PDEFoam.h @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S. Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -12,9 +14,9 @@ * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * - * Copyright (c) 2008: * + * Copyright (c) 2008, 2010: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * @@ -60,14 +62,31 @@ namespace TMVA { class PDEFoamCell; class PDEFoamVect; - class PDEFoamDistr; + class PDEFoamDensityBase; + class PDEFoamKernelBase; class PDEFoam; - enum EFoamType { kSeparate, kDiscr, kMonoTarget, kMultiTarget }; + // separation types + enum EDTSeparation { kFoam, kGiniIndex, kMisClassificationError, + kCrossEntropy, kGiniIndexWithLaplace, kSdivSqrtSplusB }; + + // foam types + enum EFoamType { kSeparate, kDiscr, kMonoTarget, kMultiTarget, kMultiClass }; + + // enum type for possible foam cell values + // kValue : cell value who's rms is minimized + // kValueError : error on kValue + // kValueDensity : volume density of kValue + // kMeanValue : mean sampling value (saved in fIntegral) + // kRms : rms of sampling distribution (saved in fDriver) + // kRmsOvMean : rms/mean of sampling distribution (saved in + // fDriver and fIntegral) + enum ECellValue { kValue, kValueError, kValueDensity, kMeanValue, + kRms, kRmsOvMean, kCellVolume }; } -#ifndef ROOT_TMVA_PDEFoamDistr -#include "TMVA/PDEFoamDistr.h" +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" #endif #ifndef ROOT_TMVA_PDEFoamVect #include "TMVA/PDEFoamVect.h" @@ -75,34 +94,11 @@ namespace TMVA { #ifndef ROOT_TMVA_PDEFoamCell #include "TMVA/PDEFoamCell.h" #endif +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif namespace TMVA { - enum EKernel { kNone=0, kGaus=1, kLinN=2 }; - enum ETargetSelection { kMean=0, kMpv=1 }; - enum ECellType { kAll, kActive, kInActive }; - - // enum type for possible foam cell values - // kNev : number of events (saved in cell element 0) - // kDiscriminator : discriminator (saved in cell element 0) - // kDiscriminatorError : error on discriminator (saved in cell element 1) - // kTarget0 : target 0 (saved in cell element 0) - // kTargetError : error on target 0 (saved in cell element 1) - // kMeanValue : mean sampling value (saved in fIntegral) - // kRms : rms of sampling distribution (saved in fDriver) - // kRmsOvMean : rms/mean of sampling distribution (saved in - // fDriver and fIntegral) - // kDensity : number of events/cell volume - enum ECellValue { kNev, kDiscriminator, kDiscriminatorError, kTarget0, - kTarget0Error, kMeanValue, kRms, kRmsOvMean, kDensity }; - // separation quantity to use (kFoam: use PDEFoam algorithm) - enum EDTSeparation { kFoam, kGiniIndex, kMisClassificationError, - kCrossEntropy }; -} - -namespace TMVA { - - std::ostream& operator<< ( std::ostream& os, const PDEFoam& pdefoam ); - std::istream& operator>> ( std::istream& istr, PDEFoam& pdefoam ); class PDEFoam : public TObject { protected: @@ -130,17 +126,17 @@ namespace TMVA { //---------- working space for CELL exploration ------------- Double_t *fAlpha; // [fDim] Internal parameters of the hyperrectangle // --------- PDE-Foam specific variables - EFoamType fFoamType; // type of foam + EFoamType fFoamType; // BACKWARDS COMPATIBILITY: type of foam Double_t *fXmin; // [fDim] minimum for variable transform Double_t *fXmax; // [fDim] maximum for variable transform - UInt_t fNElements; // number of variables in every cell + UInt_t fNElements; // BACKWARDS COMPATIBILITY: number of variables in every cell UInt_t fNmin; // minimal number of events in cell to split cell UInt_t fMaxDepth; // maximum depth of cell tree - Float_t fVolFrac; // volume fraction (with respect to total phase space - Bool_t fFillFoamWithOrigWeights; // fill the foam with boost or orig. weights - EDTSeparation fDTSeparation; // split cells according to decision tree logic - Bool_t fPeekMax; // peek up cell with max. driver integral for split - PDEFoamDistr *fDistr; //! distribution of training events + Float_t fVolFrac; // BACKWARDS COMPATIBILITY: volume fraction (with respect to total phase space + Bool_t fFillFoamWithOrigWeights; // BACKWARDS COMPATIBILITY: fill the foam with boost or orig. weights + EDTSeparation fDTSeparation; // BACKWARDS COMPATIBILITY: split cells according to decision tree logic + Bool_t fPeekMax; // BACKWARDS COMPATIBILITY: peek up cell with max. driver integral for split + PDEFoamDensityBase *fDistr; //! distribution of training events Timer *fTimer; // timer for graphical output TObjArray *fVariableNames;// collection of all variable names mutable MsgLogger* fLogger; //! message logger @@ -148,39 +144,24 @@ namespace TMVA { ///////////////////////////////////////////////////////////////// // METHODS // ///////////////////////////////////////////////////////////////// - private: - // Square function (fastest implementation) - template<typename T> T Sqr(T x) const { return x*x; } - PDEFoamDistr* GetDistr() const { assert(fDistr); return fDistr; } protected: // ---------- TMVA console output void OutputGrow(Bool_t finished = false ); // nice TMVA console output - // ---------- Weighting functions for kernels - - Float_t WeightGaus(PDEFoamCell*, std::vector<Float_t>&, UInt_t dim=0); - - Double_t WeightLinNeighbors( std::vector<Float_t> &txvec, ECellValue cv, - Int_t dim1=-1, Int_t dim2=-1, - Bool_t TreatEmptyCells=kFALSE ); - // ---------- Foam build-up functions // Internal foam initialization functions void InitCells(); // Initialisation of all foam cells Int_t CellFill(Int_t, PDEFoamCell*);// Allocates new empty cell and return its index - void Explore(PDEFoamCell *Cell); // Exploration of the new cell, determine <wt>, wtMax etc. - void DTExplore(PDEFoamCell *Cell); // Exploration of the new cell according to decision tree logic + virtual void Explore(PDEFoamCell *Cell); // Exploration of the new cell, determine <wt>, wtMax etc. void Varedu(Double_t [], Int_t&, Double_t&,Double_t&); // Determines the best edge, variace reduction void MakeAlpha(); // Provides random point inside hyperrectangle void Grow(); // build up foam Long_t PeekMax(); // peek cell with max. driver integral - Long_t PeekLast(); // peek last created cell Int_t Divide(PDEFoamCell *); // Divide iCell into two daughters; iCell retained, taged as inactive Double_t Eval(Double_t *xRand, Double_t &event_density); // evaluate distribution on point 'xRand' - Float_t GetSeparation(Float_t s, Float_t b); // calculate separation // ---------- Cell value access functions @@ -188,24 +169,20 @@ namespace TMVA { Double_t GetCellElement(PDEFoamCell *cell, UInt_t i); // get Element 'i' in cell 'cell' void SetCellElement(PDEFoamCell *cell, UInt_t i, Double_t value); // set Element 'i' in cell 'cell' to value 'value' - // helper functions to access cell data - Double_t GetCellValue(PDEFoamCell*, ECellValue); - // specific function used during evaluation; determines, whether a cell value is undefined - Bool_t CellValueIsUndefined( PDEFoamCell* ); + virtual Bool_t CellValueIsUndefined( PDEFoamCell* ); // finds cell according to given event variables - PDEFoamCell* FindCell(std::vector<Float_t>&); //! - std::vector<TMVA::PDEFoamCell*> FindCells(std::vector<Float_t>&); //! + PDEFoamCell* FindCell(std::vector<Float_t>&); + std::vector<TMVA::PDEFoamCell*> FindCells(std::vector<Float_t>&); + std::vector<TMVA::PDEFoamCell*> FindCells(std::map<Int_t,Float_t>&); + void FindCells(std::map<Int_t, Float_t>&, PDEFoamCell*, std::vector<PDEFoamCell*> &); - // find cells, which fit a given event vector - void FindCellsRecursive(std::vector<Float_t>&, PDEFoamCell*, - std::vector<PDEFoamCell*> &); - - // calculates the mean/ mpv target values for a given event 'tvals' - std::vector<Float_t> GetCellTargets( std::vector<Float_t> &tvals, ETargetSelection ts ); - // get number of events in cell during foam build-up - Double_t GetBuildUpCellEvents(PDEFoamCell* cell); + // get internal density + PDEFoamDensityBase* GetDistr() const { assert(fDistr); return fDistr; } + + // Square function (fastest implementation) + template<typename T> T Sqr(T x) const { return x*x; } PDEFoam(const PDEFoam&); // Copy Constructor NOT USED @@ -217,20 +194,18 @@ namespace TMVA { // ---------- Foam creation functions - void Init(); // initialize PDEFoamDistr - void FillBinarySearchTree( const Event* ev, Bool_t NoNegWeights=kFALSE ); + void Initialize(){}; // initialize the PDEFoam + void FillBinarySearchTree( const Event* ev ); // fill event into BST void Create(); // build-up foam // function to fill created cell with given value - void FillFoamCells(const Event* ev, Bool_t NoNegWeights=kFALSE); + virtual void FillFoamCells(const Event* ev, Float_t wt); - // functions to calc discriminators/ mean targets for every cell - // using filled cell values - void CalcCellDiscr(); - void CalcCellTarget(); + // remove all cell elements + void ResetCellElements(); - // init TObject pointer on cells - void ResetCellElements(Bool_t allcells = false); + // function to call after foam is grown + virtual void Finalize(){}; // ---------- Getters and Setters @@ -240,19 +215,11 @@ namespace TMVA { void SetnBin(Int_t nBin){fNBin = nBin;} // Sets no of bins in histogs in cell exploration void SetEvPerBin(Int_t EvPerBin){fEvPerBin =EvPerBin;} // Sets max. no. of effective events per bin void SetInhiDiv(Int_t, Int_t ); // Set inhibition of cell division along certain edge - void SetNElements(UInt_t numb){fNElements = numb;} // init every cell element (TVectorD*) - void SetVolumeFraction(Float_t vfr){fVolFrac = vfr;} // set VolFrac - void SetFoamType(EFoamType ft); // set foam type - void SetFillFoamWithOrigWeights(Bool_t new_val){fFillFoamWithOrigWeights=new_val;} - void SetDTSeparation(EDTSeparation new_val){fDTSeparation=new_val;} - void SetPeekMax(Bool_t new_val){ fPeekMax = new_val; } + void SetDensity(PDEFoamDensityBase *dens) { fDistr = dens; } // coverity[ -tainted_data_return ] Int_t GetTotDim() const {return fDim; } // Get total dimension TString GetFoamName() const {return fName; } // Get name of foam - UInt_t GetNElements() const {return fNElements; } // returns number of elements, saved on every cell - Float_t GetVolumeFraction() const {return fVolFrac;} // get VolFrac from PDEFoam - EFoamType GetFoamType() const {return fFoamType;}; // get foam type UInt_t GetNActiveCells() const {return fNoAct;}; // returns number of active cells UInt_t GetNInActiveCells() const {return GetNCells()-GetNActiveCells();}; // returns number of not active cells UInt_t GetNCells() const {return fNCells;}; // returns number of cells @@ -261,7 +228,6 @@ namespace TMVA { // Getters and Setters for user cut options void SetNmin(UInt_t val) { fNmin=val; } UInt_t GetNmin() { return fNmin; } - Bool_t GetFillFoamWithOrigWeights() const { return fFillFoamWithOrigWeights; } void SetMaxDepth(UInt_t maxdepth) { fMaxDepth = maxdepth; } UInt_t GetMaxDepth() const { return fMaxDepth; } @@ -293,61 +259,42 @@ namespace TMVA { void CheckAll(Int_t); // Checks correctness of the entire data structure in the FOAM object void PrintCell(Long_t iCell=0); // Print content of cell void PrintCells(); // Prints content of all cells - void CheckCells(Bool_t remove_empty_cells=false); // check all cells with respect to critical values - void RemoveEmptyCell(Int_t iCell); // removes iCell if its volume is zero - void PrintCellElements(); // print all cells with its elements // Message logger MsgLogger& Log() const { return *fLogger; } - // ---------- Foam output - - friend std::ostream& operator<< ( std::ostream& os, const PDEFoam& pdefoam ); - friend std::istream& operator>> ( std::istream& istr, PDEFoam& pdefoam ); - - void ReadStream(istream &); // read foam from stream - void PrintStream(ostream &) const; // write foam from stream - void ReadXML( void* parent ); // read foam variables from xml - void AddXMLTo( void* parent ); // write foam variables to xml - // ---------- Foam projection methods // project foam to two-dimensional histogram - TH2D* Project2(Int_t idim1, Int_t idim2, const char *opt="cell_value", - const char *ker="kNone", UInt_t maxbins=50); - - // helper function for Project2() - Double_t GetProjectionCellValue( PDEFoamCell* cell, - Int_t idim1, Int_t idim2, ECellValue cv ); + virtual TH2D* Project2(Int_t idim1, Int_t idim2, ECellValue cell_value=kValue, + PDEFoamKernelBase *kernel=NULL, UInt_t maxbins=50); // Project one-dimensional foam to a 1-dim histogram - TH1D* Draw1Dim(const char *opt, Int_t nbin); + TH1D* Draw1Dim(ECellValue cell_value, Int_t nbin, PDEFoamKernelBase *kernel=NULL); // Generates C++ code (root macro) for drawing foam with boxes (only 2-dim!) void RootPlot2dim( const TString& filename, TString opt, - Bool_t CreateCanvas = kTRUE, Bool_t colors = kTRUE, - Bool_t log_colors = kFALSE ); + Bool_t CreateCanvas = kTRUE, Bool_t colors = kTRUE ); // ---------- Foam evaluation functions // get cell value for a given event - Double_t GetCellValue(std::vector<Float_t>&, ECellValue); - - // helper functions to access cell data with kernel - Double_t GetCellDiscr(std::vector<Float_t> &xvec, EKernel kernel=kNone); - Double_t GetCellDensity(std::vector<Float_t> &xvec, EKernel kernel=kNone); + virtual Float_t GetCellValue( std::vector<Float_t>& xvec, ECellValue cv, PDEFoamKernelBase* ); - // calc mean cell value of neighbor cells - Double_t GetAverageNeighborsValue(std::vector<Float_t> &txvec, ECellValue cv); + // get cell values for a given (incomplete) event vector + virtual std::vector<Float_t> GetCellValue( std::map<Int_t,Float_t>& xvec, ECellValue cv ); - // returns regression value (mono target regression) - Double_t GetCellRegValue0(std::vector<Float_t>&, EKernel kernel=kNone); + // get cell value stored in a foam cell + virtual Float_t GetCellValue( PDEFoamCell* cell, ECellValue cv ); - // returns regression value i, given all variables (multi target regression) - std::vector<Float_t> GetProjectedRegValue(std::vector<Float_t> &vals, EKernel kernel=kNone, ETargetSelection ts=kMean); + // ---------- friend classes + friend class PDEFoamKernelBase; + friend class PDEFoamKernelTrivial; + friend class PDEFoamKernelLinN; + friend class PDEFoamKernelGauss; // ---------- ROOT class definition - ClassDef(PDEFoam,5) // Tree of PDEFoamCells + ClassDef(PDEFoam,6) // Tree of PDEFoamCells }; // end of PDEFoam } // namespace TMVA diff --git a/tmva/inc/PDEFoamCell.h b/tmva/inc/PDEFoamCell.h index 3c081134716217d9eeee8ffe86d44d1cb466f195..73e4778985ba4b1932db720b8f5818345bb4067c 100644 --- a/tmva/inc/PDEFoamCell.h +++ b/tmva/inc/PDEFoamCell.h @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S. Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -15,7 +17,7 @@ * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * * Copyright (c) 2008: * * CERN, Switzerland * @@ -103,13 +105,14 @@ namespace TMVA { void SetSerial(Int_t Serial){ fSerial=Serial;} // Set serial number Int_t GetSerial() const { return fSerial;} // Get serial number UInt_t GetDepth(); // Get depth in binary tree + UInt_t GetTreeDepth(UInt_t depth=0); // Get depth of binary tree //--- other --- void Print(Option_t *option) const ; // Prints cell content //--- getter and setter for user variable --- void SetElement(TObject* fobj){ fElement = fobj; } // Set user variable TObject* GetElement() const { return fElement; } // Get pointer to user varibale //////////////////////////////////////////////////////////////////////////// - ClassDef(PDEFoamCell,1) //Single cell of FOAM + ClassDef(PDEFoamCell,2) //Single cell of FOAM }; // end of PDEFoamCell } // namespace TMVA diff --git a/tmva/inc/PDEFoamDecisionTree.h b/tmva/inc/PDEFoamDecisionTree.h new file mode 100644 index 0000000000000000000000000000000000000000..8f8bb6cea3520a904f09ca6f154f2693bad52fa7 --- /dev/null +++ b/tmva/inc/PDEFoamDecisionTree.h @@ -0,0 +1,66 @@ +// @(#)root/tmva $Id$ +// Author: Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDecisionTree * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Class for decision tree like PDEFoam. It overrides * + * PDEFoam::Explore() to use the decision tree like cell split * + * algorithm, given a specific separation type. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamDecisionTree +#define ROOT_TMVA_PDEFoamDecisionTree + +#ifndef ROOT_TMVA_PDEFoamDiscriminant +#include "TMVA/PDEFoamDiscriminant.h" +#endif +#ifndef ROOT_TMVA_SeparationBase +#include "TMVA/SeparationBase.h" +#endif + +namespace TMVA +{ + + class PDEFoamDecisionTree : public PDEFoamDiscriminant + { + + private: + SeparationBase *fSepType; // separation type + + protected: + + virtual void Explore(PDEFoamCell *Cell); // Exploration of the cell + + PDEFoamDecisionTree(const PDEFoamDecisionTree&); // Copy Constructor NOT USED + + public: + PDEFoamDecisionTree(); // Default constructor (used only by ROOT streamer) + PDEFoamDecisionTree(const TString&, SeparationBase *sepType, UInt_t cls); // Principal user-defined constructor + virtual ~PDEFoamDecisionTree(); // Default destructor + + // ---------- ROOT class definition + ClassDef(PDEFoamDecisionTree, 1) // Decision tree like PDEFoam + }; // end of PDEFoamDecisionTree + +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamDecisionTreeDensity.h b/tmva/inc/PDEFoamDecisionTreeDensity.h new file mode 100644 index 0000000000000000000000000000000000000000..07b4d473dffb6ebdd35292d9c1693dcd8907aa61 --- /dev/null +++ b/tmva/inc/PDEFoamDecisionTreeDensity.h @@ -0,0 +1,75 @@ +// @(#)root/tmva $Id$ +// Author: Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDecisionTreeDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Class PDEFoamDecisionTreeDensity is a class representing * + * n-dimensional real positive integrand function * + * The main function is Density() which provides the event density at a * + * given point during the foam build-up (sampling). * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity +#define ROOT_TMVA_PDEFoamDecisionTreeDensity + +#include <vector> + +#ifndef ROOT_TH2 +#include "TH2.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif +#ifndef ROOT_TMVA_Volume +#include "TMVA/Volume.h" +#endif + +namespace TMVA +{ + + // class definition of underlying event density + class PDEFoamDecisionTreeDensity : public PDEFoamDensityBase + { + + protected: + UInt_t fClass; // signal class + + public: + PDEFoamDecisionTreeDensity(); + PDEFoamDecisionTreeDensity(std::vector<Double_t> box, UInt_t cls); + PDEFoamDecisionTreeDensity(const PDEFoamDecisionTreeDensity&); + virtual ~PDEFoamDecisionTreeDensity() {}; + + // returns allways 0 + virtual Double_t Density(std::vector<Double_t> &Xarg, Double_t &event_density); + + // fill histograms with events found in volume + virtual void FillHistograms(TMVA::Volume&, std::vector<TH1D*>&, std::vector<TH1D*>&, std::vector<TH1D*>&, std::vector<TH1D*>&); + + ClassDef(PDEFoamDecisionTreeDensity, 1) // Class for decision tree like PDEFoam density + }; //end of PDEFoamDecisionTreeDensity + +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamDistr.h b/tmva/inc/PDEFoamDensityBase.h similarity index 51% rename from tmva/inc/PDEFoamDistr.h rename to tmva/inc/PDEFoamDensityBase.h index e9a84a2dec4b336b7d4f13e5c46cdb563dbc165c..c382e9c91ab13e41b0594a26b4f147e724e92c01 100644 --- a/tmva/inc/PDEFoamDistr.h +++ b/tmva/inc/PDEFoamDensityBase.h @@ -1,12 +1,14 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * - * Classes: PDEFoamDistr * + * Classes: PDEFoamDensityBase * * Web : http://tmva.sourceforge.net * * * * Description: * - * Class PDEFoamDistr is an Abstract class representing * + * Class PDEFoamDensityBase is an Abstract class representing * * n-dimensional real positive integrand function * * The main function is Density() which provides the event density at a * * given point during the foam build-up (sampling). * @@ -15,10 +17,10 @@ * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * Peter Speckmayer - CERN, Switzerland * * * - * Copyright (c) 2008: * + * Copyright (c) 2008, 2010: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * @@ -27,18 +29,12 @@ * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ -#ifndef ROOT_TMVA_PDEFoamDistr -#define ROOT_TMVA_PDEFoamDistr +#ifndef ROOT_TMVA_PDEFoamDensityBase +#define ROOT_TMVA_PDEFoamDensityBase #ifndef ROOT_TObject #include "TObject.h" #endif -#ifndef ROOT_TH1D -#include "TH1D.h" -#endif -#ifndef ROOT_TH2D -#include "TH2D.h" -#endif #ifndef ROOT_TMVA_BinarySearchTree #include "TMVA/BinarySearchTree.h" @@ -46,67 +42,51 @@ #ifndef ROOT_TMVA_Event #include "TMVA/Event.h" #endif -#ifndef ROOT_TMVA_PDEFoam -#include "TMVA/PDEFoam.h" -#endif -#ifndef ROOT_TMVA_PDEFoamCell -#include "TMVA/PDEFoamCell.h" -#endif #ifndef ROOT_TMVA_MsgLogger #include "TMVA/MsgLogger.h" #endif -namespace TMVA { - // options for filling density (used in Density() to build up foam) - // kEVENT_DENSITY : use event density for foam buildup - // kDISCRIMINATOR : use N_sig/(N_sig + N_bg) for foam buildup - // kTARGET : use GetTarget(0) for foam build up - enum TDensityCalc { kEVENT_DENSITY, kDISCRIMINATOR, kTARGET }; -} - -namespace TMVA { +namespace TMVA +{ // class definition of underlying density - class PDEFoamDistr : public ::TObject { - + class PDEFoamDensityBase : public ::TObject + { private: - const PDEFoam *fPDEFoam; // PDEFoam to refer to - BinarySearchTree *fBst; // Binary tree to find events within a volume - TDensityCalc fDensityCalc;// method of density calculation + std::vector<Double_t> fBox; // range-searching box + Double_t fBoxVolume; // volume of range searching box + Bool_t fBoxHasChanged; // range searching box has changed protected: - mutable MsgLogger* fLogger; //! message logger + BinarySearchTree *fBst; // Binary tree to find events within a volume + mutable MsgLogger *fLogger; //! message logger + MsgLogger& Log() const { return *fLogger; } - public: - PDEFoamDistr(); - PDEFoamDistr(const PDEFoamDistr&); - virtual ~PDEFoamDistr(); + // calculate volume of fBox + Double_t GetBoxVolume(); - // density build-up functions - void Initialize(); // create and initialize binary search tree - void FillBinarySearchTree( const Event* ev, EFoamType ft, Bool_t NoNegWeights=kFALSE ); + public: + PDEFoamDensityBase(); + PDEFoamDensityBase(std::vector<Double_t> box); + PDEFoamDensityBase(const PDEFoamDensityBase&); + virtual ~PDEFoamDensityBase(); - // main function used by PDEFoam - // returns density at a given point by range searching in BST - Double_t Density(Double_t *Xarg, Double_t &event_density); + // fill event into binary search tree + void FillBinarySearchTree(const Event* ev); - // Return fDim histograms with signal and bg events - void FillHist(PDEFoamCell* cell, std::vector<TH1F*>&, std::vector<TH1F*>&, - std::vector<TH1F*>&, std::vector<TH1F*>&); + // set the range-searching box + void SetBox(std::vector<Double_t> box) { fBox = box; fBoxHasChanged = kTRUE; } - // Getter and setter for the fPDEFoam pointer - void SetPDEFoam(const PDEFoam *foam){ fPDEFoam = foam; } - const PDEFoam* GetPDEFoam() const { return fPDEFoam; }; + // get the range-searching box + const std::vector<Double_t>& GetBox() const { return fBox; } - // Getters and setters for foam filling method - void SetDensityCalc( TDensityCalc dc ){ fDensityCalc = dc; }; - Bool_t FillDiscriminator(){ return fDensityCalc == kDISCRIMINATOR; } - Bool_t FillTarget0() { return fDensityCalc == kTARGET; } - Bool_t FillEventDensity() { return fDensityCalc == kEVENT_DENSITY; } + // main function used by PDEFoam + // returns density at a given point by range searching in BST + virtual Double_t Density(std::vector<Double_t> &Xarg, Double_t &event_density) = 0; - ClassDef(PDEFoamDistr,3) //Class for Event density - }; //end of PDEFoamDistr + ClassDef(PDEFoamDensityBase, 1) //Class for density + }; //end of PDEFoamDensityBase } // namespace TMVA diff --git a/tmva/inc/PDEFoamDiscriminant.h b/tmva/inc/PDEFoamDiscriminant.h new file mode 100644 index 0000000000000000000000000000000000000000..613cb82bd0100e235a83cbdc83cbe2762eb4fcc6 --- /dev/null +++ b/tmva/inc/PDEFoamDiscriminant.h @@ -0,0 +1,71 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDiscriminant * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Concrete PDEFoam sub-class. This foam stores the discriminant D * + * = N_sig / (N_bg + N_sig) with every cell, as well as the * + * statistical error on the discriminant. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamDiscriminant +#define ROOT_TMVA_PDEFoamDiscriminant + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif + +namespace TMVA +{ + + class PDEFoamDiscriminant : public PDEFoam + { + + protected: + UInt_t fClass; // signal class + + PDEFoamDiscriminant(const PDEFoamDiscriminant&); // Copy Constructor NOT USED + + // ---------- Public functions ---------------------------------- + public: + PDEFoamDiscriminant(); // Default constructor (used only by ROOT streamer) + PDEFoamDiscriminant(const TString&, UInt_t); // Principal user-defined constructor + virtual ~PDEFoamDiscriminant() {}; // Default destructor + + // function to fill created cell with given value + virtual void FillFoamCells(const Event* ev, Float_t wt); + + // function to call after foam is grown + virtual void Finalize(); + + // 2-dimensional projection + virtual TH2D* Project2(Int_t, Int_t, ECellValue, PDEFoamKernelBase*, UInt_t); + + // ---------- ROOT class definition + ClassDef(PDEFoamDiscriminant, 1) // Tree of PDEFoamCells + }; // end of PDEFoamDiscriminant + +} // namespace TMVA + +// ---------- Inline functions + +#endif diff --git a/tmva/inc/PDEFoamDiscriminantDensity.h b/tmva/inc/PDEFoamDiscriminantDensity.h new file mode 100644 index 0000000000000000000000000000000000000000..4551475663d7b34b0c2c75849f549fea71f17484 --- /dev/null +++ b/tmva/inc/PDEFoamDiscriminantDensity.h @@ -0,0 +1,65 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDiscriminantDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Class PDEFoamDiscriminantDensity is a class representing * + * n-dimensional real positive integrand function * + * The main function is Density() which provides the event density at a * + * given point during the foam build-up (sampling). * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamDiscriminantDensity +#define ROOT_TMVA_PDEFoamDiscriminantDensity + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif + +namespace TMVA +{ + + // class definition of underlying event density + class PDEFoamDiscriminantDensity : public PDEFoamDensityBase + { + + protected: + UInt_t fClass; // signal class + + public: + PDEFoamDiscriminantDensity(); + PDEFoamDiscriminantDensity(std::vector<Double_t> box, UInt_t cls); + PDEFoamDiscriminantDensity(const PDEFoamDiscriminantDensity&); + virtual ~PDEFoamDiscriminantDensity() {}; + + // main function used by PDEFoam + // returns discriminant density N_class/N_total at a given point + // by range searching in BST + virtual Double_t Density(std::vector<Double_t> &Xarg, Double_t &event_density); + + ClassDef(PDEFoamDiscriminantDensity, 1) //Class for Discriminant density + }; //end of PDEFoamDiscriminantDensity + +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamEvent.h b/tmva/inc/PDEFoamEvent.h new file mode 100644 index 0000000000000000000000000000000000000000..c5f3f9110e7b2ca2f78db81e5e748cb988d3bd1a --- /dev/null +++ b/tmva/inc/PDEFoamEvent.h @@ -0,0 +1,64 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamEvent * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Concrete PDEFoam sub-class. This foam stores the number of * + * events with every cell, as well as the statistical error on * + * the event number. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamEvent +#define ROOT_TMVA_PDEFoamEvent + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif + +namespace TMVA +{ + + class PDEFoamEvent : public PDEFoam + { + + protected: + + PDEFoamEvent(const PDEFoamEvent&); // Copy Constructor NOT USED + + // ---------- Public functions ---------------------------------- + public: + PDEFoamEvent(); // Default constructor (used only by ROOT streamer) + PDEFoamEvent(const TString&); // Principal user-defined constructor + virtual ~PDEFoamEvent() {}; // Default destructor + + // function to fill created cell with given value + virtual void FillFoamCells(const Event* ev, Float_t wt); + + // ---------- ROOT class definition + ClassDef(PDEFoamEvent, 1) // Tree of PDEFoamCells + }; // end of PDEFoamEvent + +} // namespace TMVA + +// ---------- Inline functions + +#endif diff --git a/tmva/inc/PDEFoamEventDensity.h b/tmva/inc/PDEFoamEventDensity.h new file mode 100644 index 0000000000000000000000000000000000000000..5edb6e8dc68b8442420f854f885201e377ac4438 --- /dev/null +++ b/tmva/inc/PDEFoamEventDensity.h @@ -0,0 +1,61 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamEventDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Class PDEFoamEventDensity is a class representing * + * n-dimensional real positive integrand function * + * The main function is Density() which provides the event density at a * + * given point during the foam build-up (sampling). * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamEventDensity +#define ROOT_TMVA_PDEFoamEventDensity + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif + +namespace TMVA +{ + + // class definition of underlying event density + class PDEFoamEventDensity : public PDEFoamDensityBase + { + + public: + PDEFoamEventDensity(); + PDEFoamEventDensity(std::vector<Double_t> box); + PDEFoamEventDensity(const PDEFoamEventDensity&); + virtual ~PDEFoamEventDensity() {}; + + // main function used by PDEFoam + // returns event density at a given point by range searching in BST + virtual Double_t Density(std::vector<Double_t> &Xarg, Double_t &event_density); + + ClassDef(PDEFoamEventDensity, 1) //Class for Event density + }; //end of PDEFoamEventDensity + +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamKernelBase.h b/tmva/inc/PDEFoamKernelBase.h new file mode 100644 index 0000000000000000000000000000000000000000..b9ae4f17cb585f4e992df4a718f90050a92d0306 --- /dev/null +++ b/tmva/inc/PDEFoamKernelBase.h @@ -0,0 +1,63 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelBase * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * PDEFoam kernel interface * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamKernelBase +#define ROOT_TMVA_PDEFoamKernelBase + +#ifndef ROOT_TObject +#include "TObject.h" +#endif + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif + +namespace TMVA +{ + + class PDEFoamKernelBase : public TObject + { + + protected: + mutable MsgLogger* fLogger; //! message logger + + public: + PDEFoamKernelBase(); // Constructor + PDEFoamKernelBase(const PDEFoamKernelBase&); // Copy constructor + virtual ~PDEFoamKernelBase(); // Destructor + + // kernel estimator + virtual Float_t Estimate(PDEFoam*, std::vector<Float_t>&, ECellValue) = 0; + + // Message logger + MsgLogger& Log() const { return *fLogger; } + + ClassDef(PDEFoamKernelBase, 1) // PDEFoam kernel + }; // end of PDEFoamKernelBase +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamKernelGauss.h b/tmva/inc/PDEFoamKernelGauss.h new file mode 100644 index 0000000000000000000000000000000000000000..3d14fa2a4e850cafbac8402eef096ca889ee8ec5 --- /dev/null +++ b/tmva/inc/PDEFoamKernelGauss.h @@ -0,0 +1,68 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelGauss * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * PDEFoam kernel, which weights all cell values by a gauss function. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamKernelGauss +#define ROOT_TMVA_PDEFoamKernelGauss + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif + +namespace TMVA +{ + + class PDEFoamKernelGauss : public PDEFoamKernelBase + { + + protected: + Float_t fSigma; // width of gauss curve + + // Square function (fastest implementation) + template<typename T> T Sqr(T x) const { return x * x; } + + // calculate gaussian weight + Float_t WeightGaus(PDEFoam*, PDEFoamCell*, std::vector<Float_t>&); + + // estimate the cell value by its neighbors + Float_t GetAverageNeighborsValue(PDEFoam*, std::vector<Float_t>&, ECellValue); + + public: + PDEFoamKernelGauss(Float_t sigma); // Constructor + PDEFoamKernelGauss(const PDEFoamKernelGauss&); // Copy Constructor + virtual ~PDEFoamKernelGauss() {}; // Destructor + + // kernel estimator + virtual Float_t Estimate(PDEFoam*, std::vector<Float_t>&, ECellValue); + + ClassDef(PDEFoamKernelGauss, 1) // PDEFoam kernel + }; // end of PDEFoamKernelGauss +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamKernelLinN.h b/tmva/inc/PDEFoamKernelLinN.h new file mode 100644 index 0000000000000000000000000000000000000000..f8bb9ac19268d26ffe716f54cd00b06ddf4495c8 --- /dev/null +++ b/tmva/inc/PDEFoamKernelLinN.h @@ -0,0 +1,63 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelLinN * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * PDEFoam kernel, which linear weights with the neighbor cells. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamKernelLinN +#define ROOT_TMVA_PDEFoamKernelLinN + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif + +namespace TMVA +{ + + class PDEFoamKernelLinN : public PDEFoamKernelBase + { + + protected: + // helper function, which linear weights with the neighbor cells + Float_t WeightLinNeighbors(PDEFoam*, std::vector<Float_t>&, ECellValue, Bool_t); + + // helper function for WeightLinNeighbors() + Float_t GetAverageNeighborsValue(PDEFoam*, std::vector<Float_t>&, ECellValue); + + public: + PDEFoamKernelLinN(); // Constructor + PDEFoamKernelLinN(const PDEFoamKernelLinN&); // Copy Constructor + virtual ~PDEFoamKernelLinN() {}; // Destructor + + // kernel estimator + virtual Float_t Estimate(PDEFoam*, std::vector<Float_t>&, ECellValue); + + ClassDef(PDEFoamKernelLinN, 1) // PDEFoam kernel + }; // end of PDEFoamKernelLinN +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamKernelTrivial.h b/tmva/inc/PDEFoamKernelTrivial.h new file mode 100644 index 0000000000000000000000000000000000000000..b999f0132e8e7af5e1da16ce2c02150067ee1ef0 --- /dev/null +++ b/tmva/inc/PDEFoamKernelTrivial.h @@ -0,0 +1,56 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelTrivial * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Trivial PDEFoam kernel * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamKernelTrivial +#define ROOT_TMVA_PDEFoamKernelTrivial + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif + +namespace TMVA +{ + + class PDEFoamKernelTrivial : public PDEFoamKernelBase + { + + public: + PDEFoamKernelTrivial(); // Constructor + PDEFoamKernelTrivial(const PDEFoamKernelTrivial&); // Copy Constructor + virtual ~PDEFoamKernelTrivial() {}; // Destructor + + // kernel estimator + virtual Float_t Estimate(PDEFoam*, std::vector<Float_t>&, ECellValue); + + ClassDef(PDEFoamKernelTrivial, 1) // trivial PDEFoam kernel + }; // end of PDEFoamKernelTrivial +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamMultiTarget.h b/tmva/inc/PDEFoamMultiTarget.h new file mode 100644 index 0000000000000000000000000000000000000000..bd2dbdad99686c6a6fe32a67f41108fe5c2cf61f --- /dev/null +++ b/tmva/inc/PDEFoamMultiTarget.h @@ -0,0 +1,71 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamMultiTarget * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Concrete PDEFoamEvent sub-class. This foam stores the number * + * of events with every cell, as well as the statistical error on * + * the event number. It overrides GetCellValue() for projecting * + * the target values given an incomplete event map with * + * N_variables < dimension of foam. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamMultiTarget +#define ROOT_TMVA_PDEFoamMultiTarget + +#ifndef ROOT_TMVA_PDEFoamEvent +#include "TMVA/PDEFoamEvent.h" +#endif + +namespace TMVA +{ + + // target selection method + enum ETargetSelection { kMean = 0, kMpv = 1 }; + + class PDEFoamMultiTarget : public PDEFoamEvent + { + + protected: + ETargetSelection fTargetSelection; // the target selection method + + PDEFoamMultiTarget(const PDEFoamMultiTarget&); // Copy Constructor NOT USED + + // ---------- Public functions ---------------------------------- + public: + PDEFoamMultiTarget(); // Default constructor (used only by ROOT streamer) + PDEFoamMultiTarget(const TString&, ETargetSelection); // Principal user-defined constructor + virtual ~PDEFoamMultiTarget() {}; // Default destructor + + // overridden from PDEFoam: extract the targets from the foam + virtual std::vector<Float_t> GetCellValue(std::map<Int_t, Float_t>&, ECellValue); + using PDEFoam::GetCellValue; + + // ---------- ROOT class definition + ClassDef(PDEFoamMultiTarget, 1) // Tree of PDEFoamCells + }; // end of PDEFoamMultiTarget + +} // namespace TMVA + +// ---------- Inline functions + +#endif diff --git a/tmva/inc/PDEFoamTarget.h b/tmva/inc/PDEFoamTarget.h new file mode 100644 index 0000000000000000000000000000000000000000..331612fefc3b77a51e68784ed858b2f82af5e26b --- /dev/null +++ b/tmva/inc/PDEFoamTarget.h @@ -0,0 +1,78 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamTarget * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Concrete PDEFoam sub-class. This foam stores the first target * + * (index 0) with every cell, as well as the statistical error on * + * the target. * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamTarget +#define ROOT_TMVA_PDEFoamTarget + +#ifndef ROOT_TMVA_PDEFoam +#include "TMVA/PDEFoam.h" +#endif + +namespace TMVA +{ + + class PDEFoamTarget : public PDEFoam + { + + protected: + UInt_t fTarget; // the target to fill the cells with + + // specific function used during evaluation; determines, whether + // a cell value is undefined + Bool_t CellValueIsUndefined(PDEFoamCell* cell); + + // calculate the average of the neighbor cell values + Float_t GetAverageNeighborsValue(std::vector<Float_t>&, ECellValue); + + PDEFoamTarget(const PDEFoamTarget&); // Copy Constructor NOT USED + + // ---------- Public functions ---------------------------------- + public: + PDEFoamTarget(); // Default constructor (used only by ROOT streamer) + PDEFoamTarget(const TString&, UInt_t); // Principal user-defined constructor + virtual ~PDEFoamTarget() {}; // Default destructor + + // function to fill created cell with given value + virtual void FillFoamCells(const Event* ev, Float_t wt); + + // function to call after foam is grown + virtual void Finalize(); + + Float_t GetCellValue(std::vector<Float_t> &xvec, ECellValue cv, PDEFoamKernelBase*); + using PDEFoam::GetCellValue; + + // ---------- ROOT class definition + ClassDef(PDEFoamTarget, 1) // Tree of PDEFoamCells + }; // end of PDEFoamTarget + +} // namespace TMVA + +// ---------- Inline functions + +#endif diff --git a/tmva/inc/PDEFoamTargetDensity.h b/tmva/inc/PDEFoamTargetDensity.h new file mode 100644 index 0000000000000000000000000000000000000000..6a985cf87865f6e685ea4de86d24541f95510f27 --- /dev/null +++ b/tmva/inc/PDEFoamTargetDensity.h @@ -0,0 +1,64 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamTargetDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Class PDEFoamTargetDensity is a class representing * + * n-dimensional real positive integrand function * + * The main function is Density() which provides the event density at a * + * given point during the foam build-up (sampling). * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_PDEFoamTargetDensity +#define ROOT_TMVA_PDEFoamTargetDensity + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif + +namespace TMVA +{ + + // class definition of underlying event density + class PDEFoamTargetDensity : public PDEFoamDensityBase + { + + protected: + UInt_t fTarget; // the target to calculate the density for + + public: + PDEFoamTargetDensity(); + PDEFoamTargetDensity(std::vector<Double_t> box, UInt_t target); + PDEFoamTargetDensity(const PDEFoamTargetDensity&); + virtual ~PDEFoamTargetDensity() {}; + + // main function used by PDEFoam + // returns event density at a given point by range searching in BST + virtual Double_t Density(std::vector<Double_t> &Xarg, Double_t &event_density); + + ClassDef(PDEFoamTargetDensity, 1) //Class for Target density + }; //end of PDEFoamTargetDensity + +} // namespace TMVA + +#endif diff --git a/tmva/inc/PDEFoamVect.h b/tmva/inc/PDEFoamVect.h index bd63c314a4804b67f1330a9b36d83aa8a677fa39..867e3f04901b3c3cf8cc4731de7ba2daeb1aaf35 100644 --- a/tmva/inc/PDEFoamVect.h +++ b/tmva/inc/PDEFoamVect.h @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S. Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -13,7 +15,7 @@ * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * * Copyright (c) 2008: * * CERN, Switzerland * @@ -63,7 +65,7 @@ namespace TMVA { Int_t GetDim() const { return fDim; } // Returns dimension Double_t GetCoord(Int_t i) const { return fCoords[i]; } // Returns coordinate - ClassDef(PDEFoamVect,1) //n-dimensional vector with dynamical allocation + ClassDef(PDEFoamVect,2) //n-dimensional vector with dynamical allocation }; // end of PDEFoamVect } // namespace TMVA diff --git a/tmva/inc/Results.h b/tmva/inc/Results.h index 11f4c17b113b19cc294ab5dd89f7e325a126ccf5..07e189142b2596c18457ed86b0e1766702b34d4e 100644 --- a/tmva/inc/Results.h +++ b/tmva/inc/Results.h @@ -52,6 +52,7 @@ #endif class TH1; +class TGraph; namespace TMVA { @@ -76,6 +77,7 @@ namespace TMVA { TList* GetStorage() const { return fStorage; } TObject* GetObject(const TString & alias) const; TH1* GetHist(const TString & alias) const; + TGraph* GetGraph(const TString & alias) const; virtual Types::EAnalysisType GetAnalysisType() { return Types::kNoAnalysisType; } // delete all stored data diff --git a/tmva/inc/Tools.h b/tmva/inc/Tools.h index 6828f0651e1219957e8fbd756fdd597003460424..1f7bbcd31ec02cb2406a9d00d5f8d3fc198d833c 100644 --- a/tmva/inc/Tools.h +++ b/tmva/inc/Tools.h @@ -125,7 +125,7 @@ namespace TMVA { // returns the covariance matrix of of the different classes (and the sum) // given the event sample - std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls ); + std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, Int_t maxNumberVar = -1 ); // turns covariance into correlation matrix diff --git a/tmva/inc/TransformationHandler.h b/tmva/inc/TransformationHandler.h index 5f65ff796a5a992044319d00818f80f3676106c5..4df8d18d3570d87438f2aeed6d35ffc0d1356042 100644 --- a/tmva/inc/TransformationHandler.h +++ b/tmva/inc/TransformationHandler.h @@ -72,7 +72,7 @@ namespace TMVA { TString GetVariableAxisTitle( const VariableInfo& info ) const; const Event* Transform(const Event*) const; - const Event* InverseTransform(const Event*) const; + const Event* InverseTransform(const Event*, Bool_t suppressIfNoTargets=true ) const; // overrides the reference classes of all added transformations. Handle with care!!! void SetTransformationReferenceClass( Int_t cls ); diff --git a/tmva/inc/Types.h b/tmva/inc/Types.h index 37e154a02078ff3019081f7003deebec486a194d..6c6aa348bb8a3d0c1474ebc9231b90b711f07379 100644 --- a/tmva/inc/Types.h +++ b/tmva/inc/Types.h @@ -102,7 +102,7 @@ namespace TMVA { kDecorrelated, kNormalized, kPCA, - kGaussDecorr, + kRearranged, kGauss, kUniform, kMaxVariableTransform diff --git a/tmva/inc/VariableDecorrTransform.h b/tmva/inc/VariableDecorrTransform.h index 00776449921f8d00b57df74f634141987ed50707..c7bebe3b684102300c6d1f6b88775f93eba8353e 100644 --- a/tmva/inc/VariableDecorrTransform.h +++ b/tmva/inc/VariableDecorrTransform.h @@ -84,6 +84,7 @@ namespace TMVA { std::vector<TMatrixD*> fDecorrMatrices; //! Decorrelation matrix [class0/class1/.../all classes] void CalcSQRMats( const std::vector<Event*>&, Int_t maxCls ); + std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls ); ClassDef(VariableDecorrTransform,0) // Variable transformation: decorrelation }; diff --git a/tmva/inc/VariableNormalizeTransform.h b/tmva/inc/VariableNormalizeTransform.h index 805c9f043b03c7355b1d1757e6577fb67a5621a2..ea0d6eb42d740ccf7be538c2c71790e8e1450f0c 100644 --- a/tmva/inc/VariableNormalizeTransform.h +++ b/tmva/inc/VariableNormalizeTransform.h @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * * @@ -50,6 +51,8 @@ namespace TMVA { public: + typedef std::vector<Float_t> FloatVector; + typedef std::vector< FloatVector > VectorOfFloatVectors; VariableNormalizeTransform( DataSetInfo& dsi ); virtual ~VariableNormalizeTransform( void ); @@ -80,8 +83,8 @@ namespace TMVA { // mutable Event* fTransformedEvent; - std::vector< std::vector<Float_t> > fMin; //! Min of source range - std::vector< std::vector<Float_t> > fMax; //! Max of source range + VectorOfFloatVectors fMin; //! Min of source range + VectorOfFloatVectors fMax; //! Max of source range ClassDef(VariableNormalizeTransform,0) // Variable transformation: normalization }; diff --git a/tmva/inc/VariablePCATransform.h b/tmva/inc/VariablePCATransform.h index 93e5983beda3e4505dc79808d97149385521f614..76e7a7d6a5dcebf2ac2f55d6119b991f3616092f 100644 --- a/tmva/inc/VariablePCATransform.h +++ b/tmva/inc/VariablePCATransform.h @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * * @@ -71,9 +72,8 @@ namespace TMVA { private: void CalculatePrincipalComponents( const std::vector<Event*>& ); - std::vector<Float_t> X2P( const std::vector<Float_t>&, Int_t cls ) const; - - // mutable Event* fTransformedEvent; + void X2P( std::vector<Float_t>&, const std::vector<Float_t>&, Int_t cls ) const; + void P2X( std::vector<Float_t>&, const std::vector<Float_t>&, Int_t cls ) const; // store relevant parts of PCA locally std::vector<TVectorD*> fMeanValues; // mean values diff --git a/tmva/inc/VariableRearrangeTransform.h b/tmva/inc/VariableRearrangeTransform.h new file mode 100644 index 0000000000000000000000000000000000000000..12b65d8bf76787381e6e72c8ddaa28c037485621 --- /dev/null +++ b/tmva/inc/VariableRearrangeTransform.h @@ -0,0 +1,79 @@ +// @(#)root/tmva $Id$ +// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : VariableRearrangeTransform * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * rearrangement of input variables * + * * + * Authors (alphabetical): * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * + * * + * Copyright (c) 2005: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_VariableRearrangeTransform +#define ROOT_TMVA_VariableRearrangeTransform + +////////////////////////////////////////////////////////////////////////// +// // +// VariableRearrangeTransform // +// // +// rearrangement of input variables // +// // +////////////////////////////////////////////////////////////////////////// + +#ifndef ROOT_TMVA_VariableTransformBase +#include "TMVA/VariableTransformBase.h" +#endif + +namespace TMVA { + + class VariableRearrangeTransform : public VariableTransformBase { + + public: + + typedef std::vector<Float_t> FloatVector; + + VariableRearrangeTransform( DataSetInfo& dsi ); + virtual ~VariableRearrangeTransform( void ); + + void Initialize(); + Bool_t PrepareTransformation( const std::vector<Event*>& ); + + virtual const Event* Transform(const Event* const, Int_t cls ) const; + virtual const Event* InverseTransform( const Event* const, Int_t cls ) const; + + void WriteTransformationToStream ( std::ostream& ) const {} + void ReadTransformationFromStream( std::istream&, const TString& ) { SetCreated(); } + + virtual void AttachXMLTo(void* parent); + virtual void ReadFromXML( void* trfnode ); + + virtual void PrintTransformation( ostream & o ); + + // writer of function code + virtual void MakeFunction( std::ostream& fout, const TString& fncName, Int_t part, UInt_t trCounter, Int_t cls ); + + // provides string vector giving explicit transformation + std::vector<TString>* GetTransformationStrings( Int_t cls ) const; + + private: + + ClassDef(VariableRearrangeTransform,0) // Variable transformation: normalization + }; + +} // namespace TMVA + +#endif diff --git a/tmva/inc/VariableTransformBase.h b/tmva/inc/VariableTransformBase.h index 2712064172819953f49509c8f6313d418a31b0d2..26f11ac4864d3801ee783eb855ec3c21ff7309f6 100644 --- a/tmva/inc/VariableTransformBase.h +++ b/tmva/inc/VariableTransformBase.h @@ -68,6 +68,10 @@ namespace TMVA { public: + typedef std::vector<std::pair<Char_t,UInt_t> > VectorOfCharAndInt; + typedef VectorOfCharAndInt::iterator ItVarTypeIdx; + typedef VectorOfCharAndInt::const_iterator ItVarTypeIdxConst; + VariableTransformBase( DataSetInfo& dsi, Types::EVariableTransform tf, const TString& trfName ); virtual ~VariableTransformBase( void ); @@ -83,6 +87,17 @@ namespace TMVA { Bool_t IsCreated() const { return fCreated; } Bool_t IsNormalised() const { return fNormalise; } + // variable selection + virtual void SelectInput( const TString& inputVariables, Bool_t putIntoVariables = kFALSE ); + virtual Bool_t GetInput ( const Event* event, std::vector<Float_t>& input, std::vector<Char_t>& mask, Bool_t backTransform = kFALSE ) const; + virtual void SetOutput( Event* event, std::vector<Float_t>& output, std::vector<Char_t>& mask, const Event* oldEvent = 0, Bool_t backTransform = kFALSE ) const; + virtual void CountVariableTypes( UInt_t& nvars, UInt_t& ntgts, UInt_t& nspcts ) const; + + void ToggleInputSortOrder( const Bool_t sortOrder ) { fSortGet = sortOrder; } + void SetOutputDataSetInfo( DataSetInfo* outputDsi ) { fDsiOutput = outputDsi; } + + + void SetUseSignalTransform( Bool_t e=kTRUE) { fUseSignalTransform = e; } Bool_t UseSignalTransform() const { return fUseSignalTransform; } @@ -107,6 +122,7 @@ namespace TMVA { const std::vector<TMVA::VariableInfo>& Variables() const { return fVariables; } const std::vector<TMVA::VariableInfo>& Targets() const { return fTargets; } + const std::vector<TMVA::VariableInfo>& Spectators() const { return fSpectators; } MsgLogger& Log() const { return *fLogger; } @@ -122,17 +138,24 @@ namespace TMVA { UInt_t GetNVariables() const { return fDsi.GetNVariables(); } UInt_t GetNTargets() const { return fDsi.GetNTargets(); } + UInt_t GetNSpectators() const { return fDsi.GetNSpectators(); } DataSetInfo& fDsi; + DataSetInfo* fDsiOutput; std::vector<TMVA::VariableInfo>& Variables() { return fVariables; } std::vector<TMVA::VariableInfo>& Targets() { return fTargets; } + std::vector<TMVA::VariableInfo>& Spectators() { return fSpectators; } Int_t GetNClasses() const { return fDsi.GetNClasses(); } mutable Event* fTransformedEvent; // holds the current transformed event mutable Event* fBackTransformedEvent; // holds the current back-transformed event + // variable selection + VectorOfCharAndInt fGet; // get variables/targets/spectators + VectorOfCharAndInt fPut; // put variables/targets/spectators + private: Types::EVariableTransform fVariableTransform; // Decorrelation, PCA, etc. @@ -147,6 +170,15 @@ namespace TMVA { TString fTransformName; // name of transformation std::vector<TMVA::VariableInfo> fVariables; // event variables [saved to weight file] std::vector<TMVA::VariableInfo> fTargets; // event targets [saved to weight file --> TODO ] + std::vector<TMVA::VariableInfo> fSpectators; // event spectators [saved to weight file --> TODO ] + + mutable Bool_t fVariableTypesAreCounted; // true if variable types have been counted already + mutable UInt_t fNVariables; // number of variables to be transformed + mutable UInt_t fNTargets; // number of targets to be transformed + mutable UInt_t fNSpectators; // number of spectators to be transformed + + Bool_t fSortGet; // if true, sort the variables into the order as defined by the user at the var definition + // if false, sort the variables according to the order given for the var transformation protected: diff --git a/tmva/src/ConvergenceTest.cxx b/tmva/src/ConvergenceTest.cxx index d8dd0a5a92b39575429b44935106c07a7aecd7c8..b5e56ae43439072e7671ed0339c7b141fdd9e8cd 100644 --- a/tmva/src/ConvergenceTest.cxx +++ b/tmva/src/ConvergenceTest.cxx @@ -117,7 +117,7 @@ Float_t TMVA::ConvergenceTest::SpeedControl( UInt_t ofSteps ) Int_t n = 0; Int_t sum = 0; std::deque<Short_t>::iterator vec = fSuccessList.begin(); - for (; vec<fSuccessList.end() ; vec++) { + for (; vec != fSuccessList.end() ; vec++) { sum += *vec; n++; } diff --git a/tmva/src/CrossEntropy.cxx b/tmva/src/CrossEntropy.cxx index 7338bfe4f53fc057ae3a861952e256c13f7a2f29..4839265bba27564b8a938402f76cac550fe64625 100644 --- a/tmva/src/CrossEntropy.cxx +++ b/tmva/src/CrossEntropy.cxx @@ -45,5 +45,6 @@ Double_t TMVA::CrossEntropy::GetSeparationIndex( const Double_t &s, const Doubl if (s+b <= 0) return 0; Double_t p = s/(s+b); if (p<=0 || p >=1) return 0; - return - ( p * log (p) + (1-p)*log(1-p) ); + // return - ( p * log (p) + (1-p)*log(1-p) ); + return - ( p * log2 (p) + (1-p)*log2(1-p) ); } diff --git a/tmva/src/DataSetFactory.cxx b/tmva/src/DataSetFactory.cxx index 4ccd515abbd191ff5b12c719b2c138b2bcb19142..a6f2502fc89de1d1b34bc86229c41f206015a666 100644 --- a/tmva/src/DataSetFactory.cxx +++ b/tmva/src/DataSetFactory.cxx @@ -158,8 +158,17 @@ TMVA::DataSet* TMVA::DataSetFactory::BuildDynamicDataSet( TMVA::DataSetInfo& dsi std::vector<Float_t*>* evdyn = new std::vector<Float_t*>(0); std::vector<VariableInfo>& varinfos = dsi.GetVariableInfos(); - std::vector<VariableInfo>::iterator it = varinfos.begin(); - for (;it!=varinfos.end();it++) evdyn->push_back( (Float_t*)(*it).GetExternalLink() ); + + if (varinfos.empty()) + Log() << kFATAL << "Dynamic data set cannot be built, since no variable informations are present. Apparently no variables have been set. This should not happen, please contact the TMVA authors." << Endl; + + std::vector<VariableInfo>::iterator it = varinfos.begin(), itEnd=varinfos.end(); + for (;it!=itEnd;++it) { + Float_t* external=(Float_t*)(*it).GetExternalLink(); + if (external==0) + Log() << kDEBUG << "The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." << Endl; + evdyn->push_back (external); + } std::vector<VariableInfo>& spectatorinfos = dsi.GetSpectatorInfos(); it = spectatorinfos.begin(); diff --git a/tmva/src/DataSetInfo.cxx b/tmva/src/DataSetInfo.cxx index 3bd423cbf4580592fae335a7766240805d9e1a3a..e59f7c47eb0483daa047a4256e034ccf07e862bc 100644 --- a/tmva/src/DataSetInfo.cxx +++ b/tmva/src/DataSetInfo.cxx @@ -424,8 +424,6 @@ TMVA::DataSet* TMVA::DataSetInfo::GetDataSet() const Log() << kFATAL << "DataSetManager has not been set in DataSetInfo (GetDataSet() )." << Endl; fDataSet = fDataSetManager->CreateDataSet(GetName()); - - fNeedsRebuilding = kFALSE; } return fDataSet; diff --git a/tmva/src/DecisionTree.cxx b/tmva/src/DecisionTree.cxx index e4e78ba44db74932885163bd2c3de4d990944c4a..bc9b45a1365cd19080d7f300f2d97c29e226b525 100644 --- a/tmva/src/DecisionTree.cxx +++ b/tmva/src/DecisionTree.cxx @@ -1007,8 +1007,13 @@ Double_t TMVA::DecisionTree::TrainNodeFast( const vector<TMVA::Event*> & eventSa // hence can be safely omitted Double_t istepSize =( xmax[ivar] - xmin[ivar] ) / Double_t(nBins); + // std::cout << "min="<<xmin[ivar] + // << " max="<<xmax[ivar] + // << " widht=" << istepSize + // << std::endl; for (Int_t icut=0; icut<fNCuts; icut++) { cutValues[ivar][icut]=xmin[ivar]+(Double_t(icut+1))*istepSize; + // std::cout << " cutValues["<<ivar<<"]["<<icut<<"]=" << cutValues[ivar][icut] << std::endl; } } } diff --git a/tmva/src/Event.cxx b/tmva/src/Event.cxx index ec9de676fe96c066b396bdc7148f4f1205f1b9bb..879f5ff3899fa1ff7405fc71a21a72df5d6dc959 100644 --- a/tmva/src/Event.cxx +++ b/tmva/src/Event.cxx @@ -41,7 +41,6 @@ TMVA::Event::Event() fValuesDynamic(0), fTargets(), fSpectators(), - fVariableArrangement(0), fClass(0), fWeight(1.0), fBoostWeight(1.0), @@ -60,7 +59,6 @@ TMVA::Event::Event( const std::vector<Float_t>& ev, fValuesDynamic(0), fTargets(tg), fSpectators(0), - fVariableArrangement(0), fClass(cls), fWeight(weight), fBoostWeight(boostweight), @@ -80,7 +78,6 @@ TMVA::Event::Event( const std::vector<Float_t>& ev, fValuesDynamic(0), fTargets(tg), fSpectators(vi), - fVariableArrangement(0), fClass(cls), fWeight(weight), fBoostWeight(boostweight), @@ -98,7 +95,6 @@ TMVA::Event::Event( const std::vector<Float_t>& ev, fValuesDynamic(0), fTargets(0), fSpectators(0), - fVariableArrangement(0), fClass(cls), fWeight(weight), fBoostWeight(boostweight), @@ -113,14 +109,11 @@ TMVA::Event::Event( const std::vector<Float_t*>*& evdyn, UInt_t nvar ) fValuesDynamic(0), fTargets(0), fSpectators(evdyn->size()-nvar), - fVariableArrangement(0), fClass(0), fWeight(0), fBoostWeight(0), fDynamic(true) { - - //std::cout << "CON 2 " << evdyn->size() << std::endl; // constructor for single events fValuesDynamic = (std::vector<Float_t*>*) evdyn; } @@ -128,16 +121,35 @@ TMVA::Event::Event( const std::vector<Float_t*>*& evdyn, UInt_t nvar ) //____________________________________________________________ TMVA::Event::Event( const Event& event ) : fValues(event.fValues), - fValuesDynamic(0), + fValuesDynamic(event.fValuesDynamic), fTargets(event.fTargets), fSpectators(event.fSpectators), - fVariableArrangement(event.fVariableArrangement), fClass(event.fClass), fWeight(event.fWeight), fBoostWeight(event.fBoostWeight), fDynamic(event.fDynamic) { // copy constructor + if (event.fDynamic){ + fValues.clear(); + UInt_t nvar = event.GetNVariables(); + UInt_t idx=0; + std::vector<Float_t*>::iterator itDyn=event.fValuesDynamic->begin(), itDynEnd=event.fValuesDynamic->end(); + for (; itDyn!=itDynEnd && idx<nvar; ++itDyn){ + Float_t value=*(*itDyn); + fValues.push_back( value ); + ++idx; + } + fSpectators.clear(); + for (; itDyn!=itDynEnd; ++itDyn){ + Float_t value=*(*itDyn); + fSpectators.push_back( value ); + ++idx; + } + + fDynamic=kFALSE; + fValuesDynamic=NULL; + } } //____________________________________________________________ @@ -146,34 +158,33 @@ TMVA::Event::~Event() // Event destructor } -//____________________________________________________________ -void TMVA::Event::ClearDynamicVariables() -{ - // clear global variable -// if (fValuesDynamic != 0) { -// fValuesDynamic->clear(); -// delete fValuesDynamic; -// fValuesDynamic = 0; -// } -} - -//____________________________________________________________ -void TMVA::Event::SetVariableArrangement( std::vector<UInt_t>* const m ) const { - // set the variable arrangement - - // mapping from global variable index (the position in the vector) - // to the new index in the subset of variables used by the - // composite classifier - fVariableArrangement = m; -} - - - //____________________________________________________________ void TMVA::Event::CopyVarValues( const Event& other ) { // copies only the variable values fValues = other.fValues; + fTargets = other.fTargets; + fSpectators = other.fSpectators; + if (other.fDynamic){ + UInt_t nvar = other.GetNVariables(); + fValues.clear(); + UInt_t idx=0; + std::vector<Float_t*>::iterator itDyn=other.fValuesDynamic->begin(), itDynEnd=other.fValuesDynamic->end(); + for (; itDyn!=itDynEnd && idx<nvar; ++itDyn){ + Float_t value=*(*itDyn); + fValues.push_back( value ); + ++idx; + } + fSpectators.clear(); + for (; itDyn!=itDynEnd; ++itDyn){ + Float_t value=*(*itDyn); + fSpectators.push_back( value ); + ++idx; + } + } + fDynamic = kFALSE; + fValuesDynamic = NULL; + fClass = other.fClass; fWeight = other.fWeight; fBoostWeight = other.fBoostWeight; @@ -184,20 +195,14 @@ Float_t TMVA::Event::GetValue( UInt_t ivar ) const { // return value of i'th variable Float_t retval; - if (fVariableArrangement==0) { - //if(fDynamic) - // std::cout << fValuesDynamic->size() << " index = " << ivar << std::endl; - retval = fDynamic ?( *(*fValuesDynamic)[ivar] ) : fValues[ivar]; + + if (fDynamic){ + retval = *((*fValuesDynamic).at(ivar)); } - else { - UInt_t mapIdx = (*fVariableArrangement)[ivar]; - if (fDynamic) { - retval = *(*fValuesDynamic)[mapIdx]; - } - else { - retval = ( mapIdx<fValues.size() ) ? fValues[mapIdx] : fSpectators[mapIdx-fValues.size()]; - } + else{ + retval = fValues.at(ivar); } + return retval; } @@ -213,17 +218,10 @@ Float_t TMVA::Event::GetSpectator( UInt_t ivar) const const std::vector<Float_t>& TMVA::Event::GetValues() const { // return value vector - if (fVariableArrangement!=0) { - assert(0); - } if (fDynamic) { -// if (fValuesDynamic->size()-GetNSpectators() != fValues.size()) { -// std::cout << "ERROR Event::GetValues() is trying to change the size of the variable vector, exiting ..." << std::endl; -// assert(0); -// } fValues.clear(); - for (std::vector<Float_t*>::const_iterator it = fValuesDynamic->begin(); - it != fValuesDynamic->end()-GetNSpectators(); it++) { + for (std::vector<Float_t*>::const_iterator it = fValuesDynamic->begin(), itEnd=fValuesDynamic->end()-GetNSpectators(); + it != itEnd; ++it) { Float_t val = *(*it); fValues.push_back( val ); } @@ -235,12 +233,7 @@ const std::vector<Float_t>& TMVA::Event::GetValues() const UInt_t TMVA::Event::GetNVariables() const { // accessor to the number of variables - - // if variables have to arranged (as it is the case for the - // composite classifier) the number of the variables changes - - if (fVariableArrangement==0) return fValues.size(); - else return fVariableArrangement->size(); + return fValues.size(); } //____________________________________________________________ @@ -255,11 +248,7 @@ UInt_t TMVA::Event::GetNSpectators() const { // accessor to the number of spectators - // if variables have to arranged (as it is the case for the - // composite classifier) the number of the variables changes - - if (fVariableArrangement==0) return fSpectators.size(); - else return fValues.size()-fVariableArrangement->size(); + return fSpectators.size(); } diff --git a/tmva/src/Factory.cxx b/tmva/src/Factory.cxx index c008e47a3739958f32714c7f6f706e7e7ba09713..cff09fd7f191bc127644af6a7ee0d0492cb11299 100644 --- a/tmva/src/Factory.cxx +++ b/tmva/src/Factory.cxx @@ -839,14 +839,14 @@ void TMVA::Factory::WriteDataInformation() processTrfs = fTransformations; // remove any trace of identity transform - if given (avoid to apply it twice) - processTrfs.ReplaceAll(" ",""); - processTrfs.ReplaceAll("I;",""); - processTrfs.ReplaceAll(";I",""); - processTrfs.ReplaceAll("I",""); +// processTrfs.ReplaceAll(" ",""); +// processTrfs.ReplaceAll("I;",""); +// processTrfs.ReplaceAll(";I",""); +// processTrfs.ReplaceAll("I",""); // and re-add identity transform at beginning - if (processTrfs.Length() > 0) processTrfs = TString("I;") + processTrfs; - else processTrfs = TString("I"); +// if (processTrfs.Length() > 0) processTrfs = TString("I;") + processTrfs; +// else processTrfs = TString("I"); std::vector<TMVA::TransformationHandler*> trfs; TransformationHandler* identityTrHandler = 0; @@ -855,61 +855,18 @@ void TMVA::Factory::WriteDataInformation() std::vector<TString>::iterator trfsDefIt = trfsDef.begin(); for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) { trfs.push_back(new TMVA::TransformationHandler(DefaultDataSetInfo(), "Factory")); - std::vector<TString> trfDef = gTools().SplitString(*trfsDefIt,','); + TString trfS = (*trfsDefIt); - std::vector<TString>::iterator trfDefIt = trfDef.begin(); + Log() << kINFO << Endl; + Log() << kINFO << "current transformation string: '" << trfS.Data() << "'" << Endl; + TMVA::MethodBase::CreateVariableTransforms( trfS, + DefaultDataSetInfo(), + *(trfs.back()), + Log() ); - for (; trfDefIt!=trfDef.end(); trfDefIt++) { - TString trfS = (*trfDefIt); - - TList* trClsList = gTools().ParseFormatLine( trfS, "_" ); // split entry to get trf-name and class-name - TListIter trClsIt(trClsList); - - const TString& trName = ((TObjString*)trClsList->At(0))->GetString(); - TString trCls = "AllClasses"; - ClassInfo *ci = NULL; - Int_t idxCls = -1; - if (trClsList->GetEntries() > 1) { - trCls = ((TObjString*)trClsList->At(1))->GetString(); - if (trCls == "AllClasses") { - // do nothing, since all necessary parameters are already set - } - else { - ci = DefaultDataSetInfo().GetClassInfo( trCls ); - if (ci == NULL) { - Log() << kFATAL << "Class " << trCls << " not known for variable transformation " << trName << ", please check." << Endl; - } - else { - idxCls = ci->GetNumber(); - } - } - } - delete trClsList; - - if (trName=='I') { - trfs.back()->AddTransformation( new VariableIdentityTransform ( DefaultDataSetInfo() ), idxCls ); + if (trfS.BeginsWith('I')) { identityTrHandler = trfs.back(); } - else if (trName=='D') { - trfs.back()->AddTransformation( new VariableDecorrTransform ( DefaultDataSetInfo() ), idxCls ); - } - else if (trName=='P') { - trfs.back()->AddTransformation( new VariablePCATransform ( DefaultDataSetInfo() ), idxCls ); - } - else if (trName=='U') { - trfs.back()->AddTransformation( new VariableGaussTransform ( DefaultDataSetInfo(), "Uniform" ), idxCls ); - } - else if (trName=='G') { - trfs.back()->AddTransformation( new VariableGaussTransform ( DefaultDataSetInfo() ), idxCls ); - } - else if (trName=='N') { - trfs.back()->AddTransformation( new VariableNormalizeTransform( DefaultDataSetInfo() ), idxCls ); - } - else { - Log() << kINFO << "The transformation " << *trfsDefIt << " definition is not valid, the \n" - << "transformation " << trName << " is not known!" << Endl; - } - } } const std::vector<Event*>& inputEvents = DefaultDataSetInfo().GetDataSet()->GetEventCollection(); @@ -1074,7 +1031,7 @@ void TMVA::Factory::TrainAllMethods() else methCat->fDataSetManager = fDataSetManager; } //ToDo, Do we need to fill the DataSetManager of MethodBoost here too? - + m->SetAnalysisType(fAnalysisType); m->SetupMethod(); m->ReadStateFromFile(); diff --git a/tmva/src/GeneticAlgorithm.cxx b/tmva/src/GeneticAlgorithm.cxx index 9f5ec5a5ae48378a86728ab8c21364ce9dff341a..789ba6136134ebc4d0e33c183f008e2454898b8d 100644 --- a/tmva/src/GeneticAlgorithm.cxx +++ b/tmva/src/GeneticAlgorithm.cxx @@ -212,7 +212,7 @@ Double_t TMVA::GeneticAlgorithm::SpreadControl( Int_t ofSteps, Int_t successStep Int_t n = 0; Int_t sum = 0; std::deque<Int_t>::iterator vec = fSuccessList.begin(); - for (; vec<fSuccessList.end() ; vec++) { + for (; vec != fSuccessList.end() ; vec++) { sum += *vec; n++; } diff --git a/tmva/src/GiniIndex.cxx b/tmva/src/GiniIndex.cxx index 8b70ba3fa7f61f1c26dee3655234b6105e537e40..bad48e5b1230ce43e9a8564448c2020b9b365578 100644 --- a/tmva/src/GiniIndex.cxx +++ b/tmva/src/GiniIndex.cxx @@ -59,7 +59,8 @@ Double_t TMVA::GiniIndex::GetSeparationIndex( const Double_t &s, const Double_t if (s+b <= 0) return 0; if (s<=0 || b <=0) return 0; - else return s*b/(s+b)/(s+b); + // else return s*b/(s+b)/(s+b); + else return 2*s*b/(s+b)/(s+b); } diff --git a/tmva/src/Interval.cxx b/tmva/src/Interval.cxx index ad57d6fe463aa63538fe571a24dd0ea75aa2c0b7..4dc152aea2f55b3657d891f5380df1bf3b2524de 100644 --- a/tmva/src/Interval.cxx +++ b/tmva/src/Interval.cxx @@ -86,8 +86,8 @@ TMVA::Interval::Interval( Double_t min, Double_t max, Int_t nbins ) : if (!fgLogger) fgLogger = new MsgLogger("Interval"); // defines minimum and maximum of an interval - // when nbins == 0, interval describes a discrete distribution (equally distributed in the interval) - // when nbins > 0, interval describes a continous interval + // when nbins > 0, interval describes a discrete distribution (equally distributed in the interval) + // when nbins == 0, interval describes a continous interval // if (fMax - fMin < 0) Log() << kFATAL << "maximum lower than minimum" << Endl; if (nbins < 0) { diff --git a/tmva/src/MethodANNBase.cxx b/tmva/src/MethodANNBase.cxx index 3a259fb8683b2c07b2854a73e89fe6b79ab1d5ea..a619fee4401f1d43646188525cf845e32d526618 100644 --- a/tmva/src/MethodANNBase.cxx +++ b/tmva/src/MethodANNBase.cxx @@ -21,7 +21,7 @@ * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany * * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei * * * - * Copyright (c) 2005: * + * Copyright (c) 2005-2011: * * CERN, Switzerland * * * * Redistribution and use in source and binary forms, with or without * @@ -271,14 +271,11 @@ void TMVA::MethodANNBase::BuildNetwork( vector<Int_t>* layout, vector<Double_t>* // build network given a layout (number of neurons in each layer) // and optional weights array - if (fEstimator!=kMSE && fEstimator!=kCE) { - if (fEstimatorS == "MSE") fEstimator = kMSE; //zjh - else if (fEstimatorS == "CE") fEstimator = kCE; //zjh - } + if (fEstimatorS == "MSE") fEstimator = kMSE; //zjh + else if (fEstimatorS == "CE") fEstimator = kCE; //zjh + else Log()<<kWARNING<<"fEstimator="<<fEstimator<<"\tfEstimatorS="<<fEstimatorS<<Endl; if (fEstimator!=kMSE && fEstimator!=kCE) Log()<<kWARNING<<"Estimator type unspecified \t"<<Endl; //zjh - - Log() << kINFO << "Building Network" << Endl; DeleteNetwork(); diff --git a/tmva/src/MethodBDT.cxx b/tmva/src/MethodBDT.cxx index 3888a7d9394d2cdf3e1f404b8ad34ed75a16615c..6cccfaa1cf07899e8cbe797b5ab2197e149da07a 100644 --- a/tmva/src/MethodBDT.cxx +++ b/tmva/src/MethodBDT.cxx @@ -1,4 +1,4 @@ -// @(#)root/tmva $Id$ + // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss /********************************************************************************** @@ -113,6 +113,7 @@ #include "TRandom3.h" #include "TMath.h" #include "TObjString.h" +#include "TGraph.h" #include "TMVA/ClassifierFactory.h" #include "TMVA/MethodBDT.h" @@ -129,6 +130,7 @@ #include "TMVA/Results.h" #include "TMVA/ResultsMulticlass.h" #include "TMVA/Interval.h" +#include "TMVA/PDF.h" using std::vector; @@ -349,6 +351,8 @@ void TMVA::MethodBDT::DeclareOptions() }else{ DeclareOptionRef(fMaxDepth=3,"MaxDepth","Max depth of the decision tree allowed"); } + DeclareOptionRef(fDoBoostMonitor=kFALSE,"DoBoostMonitor","create control plot with ROC integral vs tree number"); + } void TMVA::MethodBDT::DeclareCompatibilityOptions() { @@ -692,12 +696,27 @@ void TMVA::MethodBDT::Train() TH1* nodesBeforePruningVsTree = new TH1I("NodesBeforePruning","nodes before pruning",fNTrees,0,fNTrees); TH1* nodesAfterPruningVsTree = new TH1I("NodesAfterPruning","nodes after pruning",fNTrees,0,fNTrees); + + if(!DoMulticlass()){ Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType()); h->SetXTitle("boost weight"); results->Store(h, "BoostWeights"); - + + + // Monitor the performance (on TEST sample) versus number of trees + if (fDoBoostMonitor){ + TH2* boostMonitor = new TH2F("BoostMonitor","ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05); + boostMonitor->SetXTitle("#tree"); + boostMonitor->SetYTitle("ROC Integral"); + results->Store(boostMonitor, "BoostMonitor"); + TGraph *boostMonitorGraph = new TGraph(); + boostMonitorGraph->SetName("BoostMonitorGraph"); + boostMonitorGraph->SetTitle("ROCIntegralVsNTrees"); + results->Store(boostMonitorGraph, "BoostMonitorGraph"); + } + // weights applied in boosting vs tree number h = new TH1F("BoostWeightVsTree","Boost weights vs tree",fNTrees,0,fNTrees); h->SetXTitle("#tree"); @@ -783,7 +802,7 @@ void TMVA::MethodBDT::Train() if (fBoostType!="Grad") if (fUseYesNoLeaf && !DoRegression() ){ // remove leaf nodes where both daughter nodes are of same type - nNodesBeforePruning = fForest.back()->CleanTree(); + nNodesBeforePruning = fForest.back()->CleanTree(); } nNodesBeforePruningCount += nNodesBeforePruning; nodesBeforePruningVsTree->SetBinContent(itree+1,nNodesBeforePruning); @@ -814,7 +833,7 @@ void TMVA::MethodBDT::Train() } if (fUseYesNoLeaf && !DoRegression() ){ // remove leaf nodes where both daughter nodes are of same type - fForest.back()->CleanTree(); + fForest.back()->CleanTree(); } } nNodesAfterPruning = fForest.back()->GetNNodes(); @@ -823,6 +842,18 @@ void TMVA::MethodBDT::Train() fITree = itree; fMonitorNtuple->Fill(); + if (fDoBoostMonitor){ + if (! DoRegression() ){ + if ( itree==fNTrees-1 || (!(itree%500)) || + (!(itree%250) && itree <1000)|| + (!(itree%100) && itree < 500)|| + (!(itree%50) && itree < 250)|| + (!(itree%25) && itree < 150)|| + (!(itree%10) && itree < 50)|| + (!(itree%5) && itree < 20) + ) BoostMonitor(itree); + } + } } } @@ -1089,24 +1120,64 @@ Double_t TMVA::MethodBDT::Boost( vector<TMVA::Event*> eventSample, DecisionTree // apply the boosting alogrithim (the algorithm is selecte via the the "option" given // in the constructor. The return value is the boosting weight - if (fBoostType=="AdaBoost") return this->AdaBoost (eventSample, dt); - else if (fBoostType=="Bagging") return this->Bagging (eventSample, iTree); - else if (fBoostType=="RegBoost") return this->RegBoost (eventSample, dt); - else if (fBoostType=="AdaBoostR2") return this->AdaBoostR2(eventSample, dt); + Double_t returnVal=-1; + + if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (eventSample, dt); + else if (fBoostType=="Bagging") returnVal = this->Bagging (eventSample, iTree); + else if (fBoostType=="RegBoost") returnVal = this->RegBoost (eventSample, dt); + else if (fBoostType=="AdaBoostR2") returnVal = this->AdaBoostR2(eventSample, dt); else if (fBoostType=="Grad"){ if(DoRegression()) - return this->GradBoostRegression(eventSample, dt); + returnVal = this->GradBoostRegression(eventSample, dt); else if(DoMulticlass()) - return this->GradBoost (eventSample, dt, cls); + returnVal = this->GradBoost (eventSample, dt, cls); else - return this->GradBoost (eventSample, dt); + returnVal = this->GradBoost (eventSample, dt); } else { Log() << kINFO << GetOptions() << Endl; Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl; } - return -1; + return returnVal; +} + +//_______________________________________________________________________ +void TMVA::MethodBDT::BoostMonitor(Int_t iTree) +{ + // fills the ROCIntegral vs Itree from the testSample for the monitoring plots + // during the training .. but using the testing events + + TH1F *tmpS = new TH1F( "tmpS", "", 100 , -1., 1.00001 ); + TH1F *tmpB = new TH1F( "tmpB", "", 100 , -1., 1.00001 ); + TH1F *tmp; + + const std::vector<Event*> events=Data()->GetEventCollection(Types::kTesting); + UInt_t signalClassNr = DataInfo().GetClassInfo("Signal")->GetNumber(); + + // fMethod->GetTransformationHandler().CalcTransformations(fMethod->Data()->GetEventCollection(Types::kTesting)); + for (UInt_t iev=0; iev < events.size() ; iev++){ + if (events[iev]->GetClass() == signalClassNr) tmp=tmpS; + else tmp=tmpB; + tmp->Fill(PrivateGetMvaValue(*(events[iev])),events[iev]->GetWeight()); + } + + TMVA::PDF *sig = new TMVA::PDF( " PDF Sig", tmpS, TMVA::PDF::kSpline3 ); + TMVA::PDF *bkg = new TMVA::PDF( " PDF Bkg", tmpB, TMVA::PDF::kSpline3 ); + + Results* results = Data()->GetResults(GetMethodName(),Types::kTraining, Types::kMaxAnalysisType); + TGraph* gr=results->GetGraph("BoostMonitorGraph"); + Int_t nPoints = gr->GetN(); + gr->Set(nPoints+1); + gr->SetPoint(nPoints,(Double_t)iTree+1,GetROCIntegral(sig,bkg)); + + tmpS->Delete(); + tmpB->Delete(); + + delete sig; + delete bkg; + + return; } //_______________________________________________________________________ @@ -1171,6 +1242,10 @@ Double_t TMVA::MethodBDT::AdaBoost( vector<TMVA::Event*> eventSample, DecisionTr << "and this is not implemented... a typo in the options ??" <<Endl; } } + + Log() << kDEBUG << "BDT AdaBoos wrong/all: " << sumGlobalwfalse << "/" << sumGlobalw << Endl; + + Double_t newSumGlobalw=0; vector<Double_t> newSumw(sumw.size(),0); @@ -1204,9 +1279,11 @@ Double_t TMVA::MethodBDT::AdaBoost( vector<TMVA::Event*> eventSample, DecisionTr else { boostWeight = TMath::Power((1.0 - err)/err, fAdaBoostBeta); } + Log() << kDEBUG << "BDT AdaBoos wrong/all: " << sumGlobalwfalse << "/" << sumGlobalw << " 1-err/err="<<boostWeight<< " log.."<<TMath::Log(boostWeight)<<Endl; Results* results = Data()->GetResults(GetMethodName(),Types::kTraining, Types::kMaxAnalysisType); + for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) { if ((!( (dt->CheckEvent(*(*e),fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*e))) || DoRegression()) { @@ -1224,11 +1301,15 @@ Double_t TMVA::MethodBDT::AdaBoost( vector<TMVA::Event*> eventSample, DecisionTr newSumw[(*e)->GetClass()] += (*e)->GetWeight(); } + // re-normalise the weights (independent for Signal and Background) Double_t globalNormWeight=sumGlobalw/newSumGlobalw; vector<Double_t> normWeightByClass; for (UInt_t i=0; i<sumw.size(); i++) normWeightByClass.push_back(sumw[i]/newSumw[i]); + Log() << kDEBUG << "new Nsig="<<newSumw[0]*globalNormWeight << " new Nbkg="<<newSumw[1]*globalNormWeight << Endl; + + for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) { if (fRenormByClass) (*e)->ScaleBoostWeight( normWeightByClass[(*e)->GetClass()] ); else (*e)->ScaleBoostWeight( globalNormWeight ); @@ -1482,6 +1563,16 @@ Double_t TMVA::MethodBDT::GetMvaValue( Double_t* err, Double_t* errUpper ){ //_______________________________________________________________________ Double_t TMVA::MethodBDT::GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t useNTrees ) +{ + // Return the MVA value (range [-1;1]) that classifies the + // event according to the majority vote from the total number of + // decision trees. + const Event* ev = GetEvent(); + return PrivateGetMvaValue(const_cast<TMVA::Event&>(*ev), err, errUpper, useNTrees); + +} +//_______________________________________________________________________ + Double_t TMVA::MethodBDT::PrivateGetMvaValue(TMVA::Event& ev, Double_t* err, Double_t* errUpper, UInt_t useNTrees ) { // Return the MVA value (range [-1;1]) that classifies the // event according to the majority vote from the total number of @@ -1493,20 +1584,21 @@ Double_t TMVA::MethodBDT::GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t // allow for the possibility to use less trees in the actual MVA calculation // than have been originally trained. UInt_t nTrees = fForest.size(); + if (useNTrees > 0 ) nTrees = useNTrees; - if (fBoostType=="Grad") return GetGradBoostMVA(const_cast<TMVA::Event&>(*GetEvent()),nTrees); + if (fBoostType=="Grad") return GetGradBoostMVA(ev,nTrees); Double_t myMVA = 0; Double_t norm = 0; for (UInt_t itree=0; itree<nTrees; itree++) { // if (fUseWeightedTrees) { - myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(*GetEvent(),fUseYesNoLeaf); + myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,fUseYesNoLeaf); norm += fBoostWeights[itree]; } else { - myMVA += fForest[itree]->CheckEvent(*GetEvent(),fUseYesNoLeaf); + myMVA += fForest[itree]->CheckEvent(ev,fUseYesNoLeaf); norm += 1; } } @@ -1843,16 +1935,29 @@ void TMVA::MethodBDT::MakeClassSpecificHeader( std::ostream& fout, const TStrin fout << " // constructor of an essentially \"empty\" node floating in space" << endl; fout << " BDT_DecisionTreeNode ( BDT_DecisionTreeNode* left," << endl; fout << " BDT_DecisionTreeNode* right," << endl; - fout << " double cutValue, bool cutType, int selector," << endl; + if (fUseFisherCuts){ + fout << " int nFisherCoeff," << endl; + for (UInt_t i=0;i<GetNVariables()+1;i++){ + fout << " double fisherCoeff"<<i<<"," << endl; + } + } + fout << " int selector, double cutValue, bool cutType, " << endl; fout << " int nodeType, double purity, double response ) :" << endl; - fout << " fLeft ( left )," << endl; - fout << " fRight ( right )," << endl; - fout << " fCutValue( cutValue )," << endl; - fout << " fCutType ( cutType )," << endl; - fout << " fSelector( selector )," << endl; - fout << " fNodeType( nodeType )," << endl; - fout << " fPurity ( purity )," << endl; - fout << " fResponse( response ){}" << endl << endl; + fout << " fLeft ( left )," << endl; + fout << " fRight ( right )," << endl; + if (fUseFisherCuts) fout << " fNFisherCoeff ( nFisherCoeff )," << endl; + fout << " fSelector ( selector )," << endl; + fout << " fCutValue ( cutValue )," << endl; + fout << " fCutType ( cutType )," << endl; + fout << " fNodeType ( nodeType )," << endl; + fout << " fPurity ( purity )," << endl; + fout << " fResponse ( response ){" << endl; + if (fUseFisherCuts){ + for (UInt_t i=0;i<GetNVariables()+1;i++){ + fout << " fFisherCoeff.push_back(fisherCoeff"<<i<<");" << endl; + } + } + fout << " }" << endl << endl; fout << " virtual ~BDT_DecisionTreeNode();" << endl << endl; fout << " // test event if it decends the tree at this node to the right" << endl; fout << " virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << endl; @@ -1868,9 +1973,13 @@ void TMVA::MethodBDT::MakeClassSpecificHeader( std::ostream& fout, const TStrin fout << "private:" << endl << endl; fout << " BDT_DecisionTreeNode* fLeft; // pointer to the left daughter node" << endl; fout << " BDT_DecisionTreeNode* fRight; // pointer to the right daughter node" << endl; + if (fUseFisherCuts){ + fout << " int fNFisherCoeff; // =0 if this node doesn use fisher, else =nvar+1 " << endl; + fout << " std::vector<double> fFisherCoeff; // the fisher coeff (offset at the last element)" << endl; + } + fout << " int fSelector; // index of variable used in node selection (decision tree) " << endl; fout << " double fCutValue; // cut value appplied on this node to discriminate bkg against sig" << endl; fout << " bool fCutType; // true: if event variable > cutValue ==> signal , false otherwise" << endl; - fout << " int fSelector; // index of variable used in node selection (decision tree) " << endl; fout << " int fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << endl; fout << " double fPurity; // Purity of node from training"<< endl; fout << " double fResponse; // Regression response value of node" << endl; @@ -1887,7 +1996,19 @@ void TMVA::MethodBDT::MakeClassSpecificHeader( std::ostream& fout, const TStrin fout << "bool BDT_DecisionTreeNode::GoesRight( const std::vector<double>& inputValues ) const" << endl; fout << "{" << endl; fout << " // test event if it decends the tree at this node to the right" << endl; - fout << " bool result = (inputValues[fSelector] > fCutValue );" << endl; + fout << " bool result;" << endl; + if (fUseFisherCuts){ + fout << " if (fNFisherCoeff == 0){" << endl; + fout << " result = (inputValues[fSelector] > fCutValue );" << endl; + fout << " }else{" << endl; + fout << " double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << endl; + fout << " for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << endl; + fout << " fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << endl; + fout << " result = fisher > fCutValue;" << endl; + fout << " }" << endl; + }else{ + fout << " result = (inputValues[fSelector] > fCutValue );" << endl; + } fout << " if (fCutType == true) return result; //the cuts are selecting Signal ;" << endl; fout << " else return !result;" << endl; fout << "}" << endl; @@ -1927,12 +2048,21 @@ void TMVA::MethodBDT::MakeClassInstantiateNode( DecisionTreeNode *n, std::ostrea fout << "0"; } fout << ", " << endl - << setprecision(6) + << setprecision(6); + if (fUseFisherCuts){ + fout << n->GetNFisherCoeff() << ", "; + for (UInt_t i=0; i< GetNVariables()+1; i++) { + if (n->GetNFisherCoeff() == 0 ){ + fout << "0, "; + }else{ + fout << n->GetFisherCoeff(i) << ", "; + } + } + } + fout << n->GetSelector() << ", " << n->GetCutValue() << ", " << n->GetCutType() << ", " - << n->GetSelector() << ", " << n->GetNodeType() << ", " << n->GetPurity() << "," << n->GetResponse() << ") "; - } diff --git a/tmva/src/MethodBase.cxx b/tmva/src/MethodBase.cxx index 886550fa63452e6dc3548b52f001555fc1782848..08dfe86cbcaaecfc1952edfc1bedd44ff9b6d399 100644 --- a/tmva/src/MethodBase.cxx +++ b/tmva/src/MethodBase.cxx @@ -138,6 +138,7 @@ TMVA::MethodBase::MethodBase( const TString& jobName, fDisableWriting ( kFALSE ), fDataSetInfo ( dsi ), fSignalReferenceCut ( 0.5 ), + fSignalReferenceCutOrientation( 1. ), fVariableTransformType ( Types::kSignal ), fJobName ( jobName ), fMethodName ( methodTitle ), @@ -159,6 +160,7 @@ TMVA::MethodBase::MethodBase( const TString& jobName, fSplTrainB ( 0 ), fSplTrainEffBvsS ( 0 ), fVarTransformString ( "None" ), + fTransformationPointer ( 0 ), fTransformation ( dsi, methodTitle ), fVerbose ( kFALSE ), fVerbosityLevelString ( "Default" ), @@ -215,6 +217,7 @@ TMVA::MethodBase::MethodBase( Types::EMVA methodType, fSplTrainB ( 0 ), fSplTrainEffBvsS ( 0 ), fVarTransformString ( "None" ), + fTransformationPointer ( 0 ), fTransformation ( dsi, "" ), fVerbose ( kFALSE ), fVerbosityLevelString ( "Default" ), @@ -427,7 +430,10 @@ void TMVA::MethodBase::ProcessBaseOptions() SetOptions( fMVAPdfS->GetOptions() ); } - CreateVariableTransforms( fVarTransformString ); + TMVA::MethodBase::CreateVariableTransforms( fVarTransformString, + DataInfo(), + GetTransformationHandler(), + Log() ); if (!HasMVAPdfs()) { if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; } @@ -452,16 +458,66 @@ void TMVA::MethodBase::ProcessBaseOptions() } //_______________________________________________________________________ -void TMVA::MethodBase::CreateVariableTransforms(const TString& trafoDefinition ) +void TMVA::MethodBase::CreateVariableTransforms(const TString& trafoDefinitionIn, + TMVA::DataSetInfo& dataInfo, + TMVA::TransformationHandler& transformationHandler, + TMVA::MsgLogger& log) { - if (trafoDefinition != "None") { - TList* trList = gTools().ParseFormatLine( trafoDefinition, "," ); + // create variable transformations + + TString trafoDefinition(trafoDefinitionIn); + if (trafoDefinition == "None") // no transformations + return; + + // workaround for transformations to complicated to be handled by makeclass/Reader, ToDo fix this in a later release + // count number of transformations with incomplete set of variables + TString trafoDefinitionCheck(trafoDefinitionIn); + int npartial = 0, ntrafo=0; + for( Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos ){ + TString ch = trafoDefinition(pos,1); + if( ch == "(" ) npartial++; + if( ch == "+" || ch == ",") ntrafo++; + } + if (npartial>1) log << kFATAL << "sorry, the booking of multiple partial variable transformations is not yet implemented, please book a less complicated variable transform than: "<<trafoDefinitionIn<< Endl; //ToDo make kFATAL + // workaround end + + Int_t parenthesisCount = 0; + for( Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position ){ + TString ch = trafoDefinition(position,1); +// std::cout << "position " << position << " ch " << ch << std::endl; + if( ch == "(" ) + ++parenthesisCount; + if( ch == ")" ) + --parenthesisCount; + if( ch == "," && parenthesisCount == 0 ){ + trafoDefinition.Replace(position,1,'+'); + } + } +// std::cout << "replaced: " << trafoDefinition.Data() << std::endl; + +// if( trafoDefinition.Contains("+") || trafoDefinition.Contains("(") ) { // new format + + TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" ); TListIter trIt(trList); while (TObjString* os = (TObjString*)trIt()) { + TString tdef = os->GetString(); Int_t idxCls = -1; - TList* trClsList = gTools().ParseFormatLine( os->GetString(), "_" ); // split entry to get trf-name and class-name + TString variables = ""; + if( tdef.Contains("(") ) { // contains selection of variables + Ssiz_t parStart = tdef.Index( "(" ); + Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1; + + variables = tdef(parStart,parLen); + tdef.Remove(parStart,parLen); + variables.Remove(parLen-1,1); + variables.Remove(0,1); + } + + TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name TListIter trClsIt(trClsList); + if( trClsList->GetSize() < 1 ) + log << kFATAL << "Incorrect transformation string provided." << Endl; const TString& trName = ((TObjString*)trClsList->At(0))->GetString(); if (trClsList->GetEntries() > 1) { @@ -469,36 +525,63 @@ void TMVA::MethodBase::CreateVariableTransforms(const TString& trafoDefinition ) ClassInfo *ci = NULL; trCls = ((TObjString*)trClsList->At(1))->GetString(); if (trCls != "AllClasses") { - ci = DataInfo().GetClassInfo( trCls ); + ci = dataInfo.GetClassInfo( trCls ); if (ci == NULL) - Log() << kFATAL << "Class " << trCls << " not known for variable transformation " + log << kFATAL << "Class " << trCls << " not known for variable transformation " << trName << ", please check." << Endl; else idxCls = ci->GetNumber(); } } - if (trName == "D" || trName == "Deco" || trName == "Decorrelate") - GetTransformationHandler().AddTransformation( new VariableDecorrTransform ( DataInfo()) , idxCls ); - else if (trName == "P" || trName == "PCA") - GetTransformationHandler().AddTransformation( new VariablePCATransform ( DataInfo()), idxCls ); - else if (trName == "U" || trName == "Uniform") - GetTransformationHandler().AddTransformation( new VariableGaussTransform ( DataInfo(),"Uniform"), idxCls ); - else if (trName == "G" || trName == "Gauss") - GetTransformationHandler().AddTransformation( new VariableGaussTransform ( DataInfo()), idxCls ); - else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") - GetTransformationHandler().AddTransformation( new VariableNormalizeTransform( DataInfo()), idxCls ); - else - Log() << kFATAL << "<ProcessOptions> Variable transform '" + VariableTransformBase* transformation = NULL; + if (trName == "I" || trName == "Ident" || trName == "Identity"){ + if( variables.Length() == 0 ) + variables = "_V_"; + transformation = new VariableIdentityTransform( dataInfo); + } + else if (trName == "D" || trName == "Deco" || trName == "Decorrelate"){ + if( variables.Length() == 0 ) + variables = "_V_"; + transformation = new VariableDecorrTransform( dataInfo); + } + else if (trName == "P" || trName == "PCA"){ + if( variables.Length() == 0 ) + variables = "_V_"; + transformation = new VariablePCATransform ( dataInfo); + } + else if (trName == "U" || trName == "Uniform"){ + if( variables.Length() == 0 ) + variables = "_V_,_T_"; + transformation = new VariableGaussTransform ( dataInfo, "Uniform" ); + } + else if (trName == "G" || trName == "Gauss"){ + if( variables.Length() == 0 ) + variables = "_V_"; + transformation = new VariableGaussTransform ( dataInfo); + } + else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") + { + if( variables.Length() == 0 ) + variables = "_V_,_T_"; + transformation = new VariableNormalizeTransform( dataInfo); + } + else + log << kFATAL << "<ProcessOptions> Variable transform '" << trName << "' unknown." << Endl; - ClassInfo* clsInfo = DataInfo().GetClassInfo(idxCls); + + if( transformation ){ + ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls); if( clsInfo ) - Log() << kINFO << " create Transformation " << trName << " with reference class " <<clsInfo->GetName() << "=("<< idxCls <<")"<<Endl; + log << kINFO << "Create Transformation \"" << trName << "\" with reference class " << clsInfo->GetName() << "=("<< idxCls <<")"<<Endl; else - Log() << kINFO << " create Transformation " << trName << " with events of all classes." << Endl; + log << kINFO << "Create Transformation \"" << trName << "\" with events from all classes." << Endl; + transformation->SelectInput( variables ); + transformationHandler.AddTransformation(transformation, idxCls); } } + return; } //_______________________________________________________________________ @@ -728,6 +811,13 @@ Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Do return val; } +Bool_t TMVA::MethodBase::IsSignalLike() { + return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE; +} +Bool_t TMVA::MethodBase::IsSignalLike(Double_t mvaVal) { + return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE; +} + //_______________________________________________________________________ void TMVA::MethodBase::AddClassifierOutput( Types::ETreeType type ) { @@ -1142,7 +1232,7 @@ void TMVA::MethodBase::WriteStateToXML( void* parent ) const AddSpectatorsXMLTo( parent ); // write class info if in multiclass mode - if(DoMulticlass()) +// if(DoMulticlass()) AddClassesXMLTo(parent); // write target info if in regression mode @@ -1150,7 +1240,7 @@ void TMVA::MethodBase::WriteStateToXML( void* parent ) const AddTargetsXMLTo(parent); // write transformations - GetTransformationHandler().AddXMLTo( parent ); + GetTransformationHandler(false).AddXMLTo( parent ); // write MVA variable distributions void* pdfs = gTools().AddChild(parent, "MVAPdfs"); @@ -1325,7 +1415,8 @@ void TMVA::MethodBase::ReadStateFromXML( void* methodNode ) ReadSpectatorsFromXML(ch); } else if (nodeName=="Classes") { - if(DataInfo().GetNClasses()==0 && DoMulticlass()) +// if(DataInfo().GetNClasses()==0 && DoMulticlass()) + if(DataInfo().GetNClasses()==0) ReadClassesFromXML(ch); } else if (nodeName=="Targets") { @@ -1575,9 +1666,20 @@ void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const { // write class info to XML - void* targets = gTools().AddChild(parent, "Classes"); - gTools().AddAttr( targets, "NClass", gTools().StringFromInt(DataInfo().GetNClasses()) ); + UInt_t nClasses=DataInfo().GetNClasses(); + + void* classes = gTools().AddChild(parent, "Classes"); + gTools().AddAttr( classes, "NClass", nClasses ); + + for (UInt_t iCls=0; iCls<nClasses; ++iCls){ + ClassInfo *classInfo=DataInfo().GetClassInfo (iCls); + TString className =classInfo->GetName(); + UInt_t classNumber=classInfo->GetNumber(); + void* classNode=gTools().AddChild(classes, "Class"); + gTools().AddAttr( classNode, "Name", className ); + gTools().AddAttr( classNode, "Index", classNumber ); + } } //_______________________________________________________________________ void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const @@ -1680,11 +1782,37 @@ void TMVA::MethodBase::ReadClassesFromXML( void* clsnode ) // coverity[tainted_data_argument] gTools().ReadAttr( clsnode, "NClass", readNCls); - for(UInt_t icls = 0; icls<readNCls;++icls){ - TString classname = Form("class%i",icls); - DataInfo().AddClass(classname); + TString className=""; + UInt_t classIndex=0; + void* ch = gTools().GetChild(clsnode); + if (!ch) { + for(UInt_t icls = 0; icls<readNCls;++icls){ + TString classname = Form("class%i",icls); + DataInfo().AddClass(classname); + + } + } + else{ + while (ch) { + gTools().ReadAttr( ch, "Index", classIndex); + gTools().ReadAttr( ch, "Name", className ); + DataInfo().AddClass(className); + + ch = gTools().GetNextChild(ch); + } + } + // retrieve signal and background class index + if (DataInfo().GetClassInfo("Signal") != 0) { + fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber(); + } + else + fSignalClass=0; + if (DataInfo().GetClassInfo("Background") != 0) { + fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber(); } + else + fBackgroundClass=1; } //_______________________________________________________________________ @@ -2446,6 +2574,38 @@ Double_t TMVA::MethodBase::GetSeparation( PDF* pdfS, PDF* pdfB ) const } } + //_______________________________________________________________________ +Double_t TMVA::MethodBase::GetROCIntegral(TH1F *histS, TH1F *histB) const +{ + // calculate the area (integral) under the ROC curve as a + // overall quality measure of the classification + + // note, if zero pointers given, use internal pdf + // sanity check first + if ((!histS && histB) || (histS && !histB)) + Log() << kFATAL << "<GetROCIntegral(TH1F*, TH1F*)> Mismatch in hists" << Endl; + + if(histS==0 || histB==0) return 0.; + + TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 ); + TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 ); + + + Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin()); + Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax()); + + Double_t integral = 0; + UInt_t nsteps = 1000; + Double_t step = (xmax-xmin)/Double_t(nsteps); + Double_t cut = xmin; + for (UInt_t i=0; i<nsteps; i++){ + integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut); + cut+=step; + } + return integral*step; +} + + //_______________________________________________________________________ Double_t TMVA::MethodBase::GetROCIntegral(PDF *pdfS, PDF *pdfB) const { @@ -2799,12 +2959,17 @@ void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const fout << " varIt != inputValues.end(); varIt++, ivar++) {" << endl; fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << endl; fout << " }" << endl; - if (GetTransformationHandler().GetTransformationList().GetSize()!=0 && GetMethodType() != Types::kLikelihood) + if (GetTransformationHandler().GetTransformationList().GetSize()!=0 && + GetMethodType() != Types::kLikelihood && + GetMethodType() != Types::kHMatrix) { fout << " Transform( iV, -1 );" << endl; + } fout << " retval = GetMvaValue__( iV );" << endl; fout << " }" << endl; fout << " else {" << endl; - if (GetTransformationHandler().GetTransformationList().GetSize()!=0 && GetMethodType() != Types::kLikelihood) { + if (GetTransformationHandler().GetTransformationList().GetSize()!=0 && + GetMethodType() != Types::kLikelihood && + GetMethodType() != Types::kHMatrix) { fout << " std::vector<double> iV;" << endl; fout << " int ivar = 0;" << endl; fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << endl; diff --git a/tmva/src/MethodBoost.cxx b/tmva/src/MethodBoost.cxx index e8db2a0c931cb8cc4007815503468c283a8195f7..486b1a8a3c18065f1817260688d7e2e4f7e8a34b 100644 --- a/tmva/src/MethodBoost.cxx +++ b/tmva/src/MethodBoost.cxx @@ -11,16 +11,16 @@ * Virtual base class for all MVA method * * * * Authors (alphabetical): * - * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * - * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * - * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * - * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel * - * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland * + * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany * + * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * * * - * Copyright (c) 2005: * + * Copyright (c) 2005-2011: * * CERN, Switzerland * - * U. of Victoria, Canada #include "TMVA/Timer.h" * + * U. of Victoria, Canada * * MPI-K Heidelberg, Germany * * U. of Bonn, Germany * * * @@ -63,7 +63,9 @@ #include "TMVA/Config.h" #include "TMVA/SeparationBase.h" +#include "TMVA/MisClassificationError.h" #include "TMVA/GiniIndex.h" +#include "TMVA/CrossEntropy.h" #include "TMVA/RegressionVariance.h" REGISTER_METHOD(Boost) @@ -132,7 +134,7 @@ TMVA::MethodBoost::~MethodBoost( void ) // the histogram themselves are deleted when the file is closed - if(fMonitorHist) { + if (fMonitorHist) { for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it; delete fMonitorHist; } @@ -154,8 +156,8 @@ TMVA::MethodBoost::~MethodBoost( void ) Bool_t TMVA::MethodBoost::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ ) { // Boost can handle classification with 2 classes and regression with one regression-target - if( type == Types::kClassification && numberClasses == 2 ) return kTRUE; - // if( type == Types::kRegression && numberTargets == 1 ) return kTRUE; + if (type == Types::kClassification && numberClasses == 2) return kTRUE; + // if (type == Types::kRegression && numberTargets == 1) return kTRUE; return kFALSE; } @@ -164,62 +166,71 @@ Bool_t TMVA::MethodBoost::HasAnalysisType( Types::EAnalysisType type, UInt_t num void TMVA::MethodBoost::DeclareOptions() { DeclareOptionRef( fBoostNum = 1, "Boost_Num", - "Number of times the classifier is boosted"); + "Number of times the classifier is boosted" ); DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod", - "Whether to write monitoring histogram for each boosted classifier"); + "Write monitoring histograms for each boosted classifier" ); - DeclareOptionRef(fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers"); + DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring", + "Produce histograms for detailed boost-wise monitoring" ); + + DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" ); AddPreDefVal(TString("AdaBoost")); AddPreDefVal(TString("Bagging")); AddPreDefVal(TString("HighEdgeGauss")); AddPreDefVal(TString("HighEdgeCoPara")); - DeclareOptionRef(fMethodWeightType = "ByError", "Boost_MethodWeightType", - "How to set the final weight of the boosted classifiers"); + DeclareOptionRef( fMethodWeightType = "ByError", "Boost_MethodWeightType", + "How to set the final weight of the boosted classifiers" ); AddPreDefVal(TString("ByError")); AddPreDefVal(TString("Average")); AddPreDefVal(TString("ByROC")); AddPreDefVal(TString("ByOverlap")); AddPreDefVal(TString("LastMethod")); - DeclareOptionRef(fRecalculateMVACut = kTRUE, "Boost_RecalculateMVACut", - "Whether to recalculate the classifier MVA Signallike cut at every boost iteration"); + DeclareOptionRef( fRecalculateMVACut = kTRUE, "Boost_RecalculateMVACut", + "Recalculate the classifier MVA Signallike cut at every boost iteration" ); - DeclareOptionRef(fADABoostBeta = 1.0, "Boost_AdaBoostBeta", - "The ADA boost parameter that sets the effect of every boost step on the events' weights"); + DeclareOptionRef( fADABoostBeta = 1.0, "Boost_AdaBoostBeta", + "The ADA boost parameter that sets the effect of every boost step on the events' weights" ); - DeclareOptionRef(fTransformString = "step", "Boost_Transform", - "Type of transform applied to every boosted method linear, log, step"); + DeclareOptionRef( fTransformString = "step", "Boost_Transform", + "Type of transform applied to every boosted method linear, log, step" ); AddPreDefVal(TString("step")); AddPreDefVal(TString("linear")); AddPreDefVal(TString("log")); + AddPreDefVal(TString("gauss")); - DeclareOptionRef(fRandomSeed = 0, "Boost_RandomSeed", - "Seed for random number generator used for bagging"); + DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed", + "Seed for random number generator used for bagging" ); - TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);; + TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum); } //_______________________________________________________________________ Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption ) { // just registering the string from which the boosted classifier will be created - fBoostedMethodName = Types::Instance().GetMethodName( theMethod ); - fBoostedMethodTitle = methodTitle; - fBoostedMethodOptions = theOption; + fBoostedMethodName = Types::Instance().GetMethodName( theMethod ); + fBoostedMethodTitle = methodTitle; + fBoostedMethodOptions = theOption; + TString opts=theOption; + opts.ToLower(); +// if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl; + return kTRUE; } //_______________________________________________________________________ void TMVA::MethodBoost::Init() -{} +{ +} //_______________________________________________________________________ void TMVA::MethodBoost::InitHistos() { // initialisation routine - if(fMonitorHist) { + if (fMonitorHist) { for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it; delete fMonitorHist; } @@ -286,7 +297,6 @@ void TMVA::MethodBoost::CheckSetup() if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl; Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl; - //TMVA::MethodBase::CheckSetup(); if (fMonitorHist == 0){ InitHistos(); CheckSetup(); @@ -306,21 +316,34 @@ void TMVA::MethodBoost::Train() if (fMethods.size() > 0) fMethods.clear(); fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0); - Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " Classifiers ... patience please" << Endl; + Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl; Timer timer( fBoostNum, GetName() ); ResetBoostWeights(); // clean boosted method options CleanBoostOptions(); + + + // remove transformations for individual boosting steps + // the transformation of the main method will be rerouted to each of the boost steps + Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform="); + if (varTrafoStart >0) { + Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart); + if (varTrafoEnd<varTrafoStart) + varTrafoEnd=fBoostedMethodOptions.Length(); + fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart); + } + // // training and boosting the classifiers for (fMethodIndex=0;fMethodIndex<fBoostNum;fMethodIndex++) { // the first classifier shows the option string output, the rest not if (fMethodIndex>0) TMVA::MsgLogger::InhibitOutput(); + IMethod* method = ClassifierFactory::Instance().Create(std::string(fBoostedMethodName), GetJobName(), - Form("%s_B%04i", fBoostedMethodName.Data(),fMethodIndex), + Form("%s_B%04i", fBoostedMethodTitle.Data(),fMethodIndex), DataInfo(), fBoostedMethodOptions); TMVA::MsgLogger::EnableOutput(); @@ -328,17 +351,16 @@ void TMVA::MethodBoost::Train() // supressing the rest of the classifier output the right way MethodBase *meth = (dynamic_cast<MethodBase*>(method)); - if(meth==0) continue; + if (meth==0) continue; // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST - if( meth->GetMethodType() == Types::kCategory ){ // DSMTEST + if (meth->GetMethodType() == Types::kCategory) { // DSMTEST MethodCategory *methCat = (dynamic_cast<MethodCategory*>(meth)); // DSMTEST - if( !methCat ) // DSMTEST + if (!methCat) // DSMTEST Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST methCat->fDataSetManager = fDataSetManager; // DSMTEST } // DSMTEST - meth->SetMsgType(kWARNING); meth->SetupMethod(); meth->ParseOptions(); @@ -347,18 +369,23 @@ void TMVA::MethodBoost::Train() meth->ProcessSetup(); meth->CheckSetup(); + + // reroute transformationhandler + meth->RerouteTransformationHandler (&(this->GetTransformationHandler())); + + // creating the directory of the classifier - if (fMonitorBoostedMethod) - { - methodDir=MethodBaseDir()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fMethodIndex)); - if (methodDir==0) - methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fMethodIndex)); - MethodBase* m = dynamic_cast<MethodBase*>(method); - if(m) { - m->SetMethodDir(methodDir); - m->BaseDir()->cd(); - } + if (fMonitorBoostedMethod) { + methodDir=MethodBaseDir()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fMethodIndex)); + if (methodDir==0) { + methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fMethodIndex)); + } + MethodBase* m = dynamic_cast<MethodBase*>(method); + if (m) { + m->SetMethodDir(methodDir); + m->BaseDir()->cd(); } + } // training TMVA::MethodCompositeBase::fMethods.push_back(method); @@ -372,7 +399,7 @@ void TMVA::MethodBoost::Train() // calculate MVA values of method on training sample CalcMVAValues(); - + if (fMethodIndex==0 && fMonitorBoostedMethod) CreateMVAHistorgrams(); // get ROC integral and overlap integral for single method on @@ -383,11 +410,13 @@ void TMVA::MethodBoost::Train() CalcMethodWeight(); AllMethodsWeight += fMethodWeight.back(); - (*fMonitorHist)[4]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kTRUE, Types::kTesting)); - (*fMonitorHist)[5]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTesting)); - (*fMonitorHist)[6]->SetBinContent(fMethodIndex+1, fROC_training); - (*fMonitorHist)[7]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTraining)); - (*fMonitorHist)[8]->SetBinContent(fMethodIndex+1, fOverlap_integral); + if (fDetailedMonitoring) { + (*fMonitorHist)[4]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kTRUE, Types::kTesting)); + (*fMonitorHist)[5]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTesting)); + (*fMonitorHist)[6]->SetBinContent(fMethodIndex+1, fROC_training); + (*fMonitorHist)[7]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTraining)); + (*fMonitorHist)[8]->SetBinContent(fMethodIndex+1, fOverlap_integral); + } // boosting (reweight training sample) method->MonitorBoost(SetStage(Types::kBeforeBoosting)); @@ -403,17 +432,15 @@ void TMVA::MethodBoost::Train() // thought of counting a few steps, but it doesn't seem to be necessary Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl; if (fMethodError > 0.49999) StopCounter++; - if (StopCounter > 0 && fBoostType != "Bagging") - { - timer.DrawProgressBar( fBoostNum ); - fBoostNum = fMethodIndex+1; - Log() << kINFO << "Error rate has reached 0.5, boosting process stopped at #" << fBoostNum << " classifier" << Endl; - if (fBoostNum < 5) - Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fADABoostBeta << ", try reducing it." <<Endl; - for (Int_t i=0;i<fDefaultHistNum;i++) - (*fMonitorHist)[i]->SetBins(fBoostNum,0,fBoostNum); - break; - } + if (StopCounter > 0 && fBoostType != "Bagging") { + timer.DrawProgressBar( fBoostNum ); + fBoostNum = fMethodIndex+1; + Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl; + if (fBoostNum < 5) + Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fADABoostBeta << ", try reducing it." <<Endl; + for (Int_t i=0;i<fDefaultHistNum;i++) (*fMonitorHist)[i]->SetBins(fBoostNum,0,fBoostNum); + break; + } } if (fMethodWeightType == "LastMethod") { fMethodWeight.back() = AllMethodsWeight = 1.0; } @@ -467,7 +494,7 @@ void TMVA::MethodBoost::CreateMVAHistorgrams() if (DataInfo().GetClassInfo("Signal") != 0) { signalClass = DataInfo().GetClassInfo("Signal")->GetNumber(); } - gTools().ComputeStat( Data()->GetEventCollection(), fMVAvalues, + gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues, meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass ); fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve; @@ -476,12 +503,12 @@ void TMVA::MethodBoost::CreateMVAHistorgrams() // creating all the historgrams for (Int_t imtd=0; imtd<fBoostNum; imtd++) { - fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) ); - fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i",imtd), "MVA_Train_B", fNbins, xmin, xmax ) ); + fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) ); + fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) ); fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) ); fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) ); - fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i",imtd), "MVA_Test_S", fNbins, xmin, xmax ) ); - fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i",imtd), "MVA_Test_B", fNbins, xmin, xmax ) ); + fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) ); + fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) ); } } @@ -489,7 +516,7 @@ void TMVA::MethodBoost::CreateMVAHistorgrams() void TMVA::MethodBoost::ResetBoostWeights() { // resetting back the boosted weights of the events to 1 - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { Event *ev = Data()->GetEvent(ievt); ev->SetBoostWeight( 1.0 ); } @@ -504,7 +531,7 @@ void TMVA::MethodBoost::WriteMonitoringHistosToFile( void ) const //writing the histograms in the specific classifier's directory MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]); - if(!m) continue; + if (!m) continue; dir = m->BaseDir(); dir->cd(); fTrainSigMVAHist[imtd]->SetDirectory(dir); @@ -536,8 +563,8 @@ void TMVA::MethodBoost::TestClassification() if (fMethods.size()<nloop) nloop = fMethods.size(); //running over all the events and populating the test MVA histograms Data()->SetCurrentType(Types::kTesting); - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - Event* ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + const Event* ev = GetEvent(ievt); Float_t w = ev->GetWeight(); if (DataInfo().IsSignal(ev)) { for (UInt_t imtd=0; imtd<nloop; imtd++) { @@ -558,7 +585,7 @@ void TMVA::MethodBoost::TestClassification() void TMVA::MethodBoost::WriteEvaluationHistosToFile(Types::ETreeType treetype) { MethodBase::WriteEvaluationHistosToFile(treetype); - if(treetype==Types::kTraining) return; + if (treetype==Types::kTraining) return; UInt_t nloop = fTestSigMVAHist.size(); if (fMethods.size()<nloop) nloop = fMethods.size(); if (fMonitorBoostedMethod) { @@ -566,9 +593,9 @@ void TMVA::MethodBoost::WriteEvaluationHistosToFile(Types::ETreeType treetype) for (UInt_t imtd=0;imtd<nloop;imtd++) { //writing the histograms in the specific classifier's directory MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]); - if(!mva) continue; + if (!mva) continue; dir = mva->BaseDir(); - if(dir==0) continue; + if (dir==0) continue; dir->cd(); fTestSigMVAHist[imtd]->SetDirectory(dir); fTestSigMVAHist[imtd]->Write(); @@ -590,8 +617,7 @@ void TMVA::MethodBoost::SingleTrain() // initialization Data()->SetCurrentType(Types::kTraining); MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod()); - if(meth) - meth->TrainMethod(); + if (meth) meth->TrainMethod(); } //_______________________________________________________________________ @@ -605,48 +631,106 @@ void TMVA::MethodBoost::FindMVACut() if (!fRecalculateMVACut && fMethodIndex>0) { MethodBase* m = dynamic_cast<MethodBase*>(fMethods[0]); - if(m) - lastMethod->SetSignalReferenceCut(m->GetSignalReferenceCut()); - } else { - + if (m) lastMethod->SetSignalReferenceCut(m->GetSignalReferenceCut()); + } + else { + // creating a fine histograms containing the error rate - const Int_t nValBins=1000; - Double_t* err=new Double_t[nValBins]; - const Double_t valmin=-1.5; - const Double_t valmax=1.5; - for (Int_t i=0;i<nValBins;i++) err[i]=0.; + const Int_t nBins=101; + Double_t minMVA=150000; + Double_t maxMVA=-150000; + for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { + GetEvent(ievt); + Double_t val=lastMethod->GetMvaValue(); + if (val>maxMVA) maxMVA=val; + if (val<minMVA) minMVA=val; + } + Double_t sum = 0.; + + TH1F *mvaS = new TH1F("mvaS","",nBins,minMVA,maxMVA); + TH1F *mvaB = new TH1F("mvaB","",nBins,minMVA,maxMVA); + TH1F *mvaSC = new TH1F("mvaSC","",nBins,minMVA,maxMVA); + TH1F *mvaBC = new TH1F("mvaBC","",nBins,minMVA,maxMVA); + for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { + Double_t weight = GetEvent(ievt)->GetWeight(); + Double_t mvaVal=lastMethod->GetMvaValue(); sum +=weight; - Double_t val=lastMethod->GetMvaValue(); - Int_t ibin = (Int_t) (((val-valmin)/(valmax-valmin))*nValBins); - - if (ibin>=nValBins) ibin = nValBins-1; - if (ibin<0) ibin = 0; - if (DataInfo().IsSignal(Data()->GetEvent(ievt))){ - for (Int_t i=ibin;i<nValBins;i++) err[i]+=weight; - } - else { - for (Int_t i=0;i<ibin;i++) err[i]+=weight; + if (DataInfo().IsSignal(GetEvent(ievt))){ + mvaS->Fill(mvaVal,weight); + }else { + mvaB->Fill(mvaVal,weight); } } - Double_t minerr=1.e6; - Int_t minbin=-1; - for (Int_t i=0;i<nValBins;i++){ - if (err[i]<=minerr){ - minerr=err[i]; - minbin=i; + SeparationBase *sepGain; + //sepGain = new MisClassificationError(); + sepGain = new GiniIndex(); + //sepGain = new CrossEntropy(); + + Double_t sTot = mvaS->GetSum(); + Double_t bTot = mvaB->GetSum(); + + mvaSC->SetBinContent(1,mvaS->GetBinContent(1)); + mvaBC->SetBinContent(1,mvaB->GetBinContent(1)); + Double_t sSel=mvaSC->GetBinContent(1); + Double_t bSel=mvaBC->GetBinContent(1); + Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); + Double_t mvaCut=mvaSC->GetBinLowEdge(1); + // cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << endl; + + // for (Int_t ibin=1;ibin<=nBins;ibin++) cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << endl; + Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal + Double_t SoBRight=1, SoBLeft=1; + for (Int_t ibin=2;ibin<nBins;ibin++){ + mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1)); + mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1)); + + sSel=mvaSC->GetBinContent(ibin); + bSel=mvaBC->GetBinContent(ibin); + + if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot) + // && (mvaSC->GetBinCenter(ibin) >0 || (fMethodIndex+1)%2 ) + ){ + separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); + mvaCut=mvaSC->GetBinCenter(ibin); + mvaCut=mvaSC->GetBinLowEdge(ibin+1); + if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1; + else mvaCutOrientation=1; + SoBRight=sSel/bSel; + SoBLeft=(sTot-sSel)/(bTot-bSel); } } - delete[] err; + + if (SoBRight<1 && SoBLeft<1) { + if (mvaCutOrientation == -1) mvaCut = mvaSC->GetBinCenter(1)-mvaSC->GetBinWidth(1); + if (mvaCutOrientation == 1) mvaCut = mvaSC->GetBinCenter(nBins)+mvaSC->GetBinWidth(nBins); + } else if (SoBRight>1 && SoBLeft>1) { + if (mvaCutOrientation == 1) mvaCut = mvaSC->GetBinCenter(1)-mvaSC->GetBinWidth(1); + if (mvaCutOrientation == -1) mvaCut = mvaSC->GetBinCenter(nBins)+mvaSC->GetBinWidth(nBins); + } - Double_t sigCutVal = valmin + ((valmax-valmin)*minbin)/Float_t(nValBins+1); - lastMethod->SetSignalReferenceCut(sigCutVal); + // cout << "Min="<<minMVA << " Max=" << maxMVA + // << " sTot=" << sTot + // << " bTot=" << bTot + // << " sepGain="<<separationGain + // << " cut=" << mvaCut + // << " cutOrientation="<<mvaCutOrientation + // << endl; + // cout << "S/B right="<<SoBRight << " left="<<SoBLeft<<endl; + // if (fMethodIndex==0)mvaCut = -1.9616885110735893e-02; + // if (fMethodIndex==1)mvaCut = -6.8812005221843719e-02; + lastMethod->SetSignalReferenceCut(mvaCut); + lastMethod->SetSignalReferenceCutOrientation(mvaCutOrientation); Log() << kDEBUG << "(old step) Setting method cut to " <<lastMethod->GetSignalReferenceCut()<< Endl; - + + mvaS ->Delete(); + mvaB ->Delete(); + mvaSC->Delete(); + mvaBC->Delete(); } } @@ -655,15 +739,15 @@ void TMVA::MethodBoost::FindMVACut() void TMVA::MethodBoost::SingleBoost() { MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back()); - if(!method) return; - Event * ev; Float_t w,v,wo; Bool_t sig=kTRUE; + if (!method) return; + Float_t w,v,wo; Bool_t sig=kTRUE; Double_t sumAll=0, sumWrong=0, sumAllOrig=0, sumWrongOrig=0, sumAll1=0; - Bool_t* WrongDetection=new Bool_t[Data()->GetNEvents()]; - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE; + Bool_t* WrongDetection=new Bool_t[GetNEvents()]; + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE; // finding the wrong events and calculating their total weights - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + const Event* ev = GetEvent(ievt); sig=DataInfo().IsSignal(ev); v = fMVAvalues->at(ievt); w = ev->GetWeight(); @@ -678,12 +762,13 @@ void TMVA::MethodBoost::SingleBoost() } sumAll += w; sumAllOrig += wo; - if ( sig != (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) ) { - WrongDetection[ievt]=kTRUE; - sumWrong+=w; + if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){ + WrongDetection[ievt]=kFALSE; + }else{ + WrongDetection[ievt]=kTRUE; + sumWrong+=w; sumWrongOrig+=wo; } - else WrongDetection[ievt]=kFALSE; } fMethodError=sumWrong/sumAll; fOrigMethodError = sumWrongOrig/sumAllOrig; @@ -699,10 +784,11 @@ void TMVA::MethodBoost::SingleBoost() } else fBoostWeight = 1000; + Double_t alphaWeight = ( fBoostWeight > 0.0 ? TMath::Log(fBoostWeight) : 0.0); if (alphaWeight>5.) alphaWeight = 5.; if (alphaWeight<0.){ - //Log()<<kWARNING<<"alphaWeight is too small in AdaBoost alpha=" << alphaWeight<< Endl; + Log() << kWARNING << "alphaWeight is too small in AdaBoost (alpha = " << alphaWeight << ")" << Endl; alphaWeight = -alphaWeight; } if (fBoostType == "AdaBoost") { @@ -711,36 +797,40 @@ void TMVA::MethodBoost::SingleBoost() // touching the original weights (changing only the boosted weight of all the events) // first reweight Double_t newSum=0., oldSum=0.; - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + Event* ev = Data()->GetEvent(ievt); oldSum += ev->GetWeight(); - // ev->ScaleBoostWeight(TMath::Exp(-alphaWeight*((WrongDetection[ievt])? -1.0 : 1.0))); - //ev->ScaleBoostWeight(TMath::Exp(-alphaWeight*((WrongDetection[ievt])? -1.0 : 0))); - if (WrongDetection[ievt]) ev->ScaleBoostWeight(fBoostWeight); + if (WrongDetection[ievt] && fBoostWeight != 0) { + if (ev->GetWeight() > 0) ev->ScaleBoostWeight(fBoostWeight); + else ev->ScaleBoostWeight(1./fBoostWeight); + } newSum += ev->GetWeight(); } Double_t normWeight = oldSum/newSum; // bla std::cout << "Normalize weight by (Boost)" << normWeight << " = " << oldSum<<"/"<<newSum<< " eventBoostFactor="<<fBoostWeight<<std::endl; // next normalize the weights - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - Data()->GetEvent(ievt)->ScaleBoostWeight(normWeight); - } - + Double_t normSig=0, normBkg=0; + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + Event* ev = Data()->GetEvent(ievt); + ev->ScaleBoostWeight(normWeight); + if (ev->GetClass()) normBkg+=ev->GetWeight(); + else normSig+=ev->GetWeight(); + } } else if (fBoostType == "Bagging") { // Bagging or Bootstrap boosting, gives new random weight for every event TRandom3*trandom = new TRandom3(fRandomSeed+fMethods.size()); - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + Event* ev = Data()->GetEvent(ievt); ev->SetBoostWeight(trandom->Rndm()); sumAll1+=ev->GetWeight(); } // rescaling all the weights to have the same sum, but without touching the original // weights (changing only the boosted weight of all the events) Double_t Factor=sumAll/sumAll1; - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + Event* ev = Data()->GetEvent(ievt); ev->ScaleBoostWeight(Factor); } } @@ -750,8 +840,8 @@ void TMVA::MethodBoost::SingleBoost() // from the MVA cut value Double_t MVACutValue = method->GetSignalReferenceCut(); sumAll1 = 0; - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + Event* ev = Data()->GetEvent(ievt); if (fBoostType == "HighEdgeGauss") ev->SetBoostWeight( TMath::Exp( -std::pow(fMVAvalues->at(ievt)-MVACutValue,2)/(0.1*fADABoostBeta) ) ); else if (fBoostType == "HighEdgeCoPara") @@ -765,7 +855,7 @@ void TMVA::MethodBoost::SingleBoost() // touching the original weights (changing only the boosted // weight of all the events) Double_t Factor=sumAll/sumAll1; - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) Data()->GetEvent(ievt)->ScaleBoostWeight(Factor); } delete[] WrongDetection; @@ -783,21 +873,31 @@ void TMVA::MethodBoost::CalcMethodWeight() return; } - Event * ev; Float_t w; + Float_t w; Double_t sumAll=0, sumWrong=0; // finding the MVA cut value for IsSignalLike, stored in the method FindMVACut(); // finding the wrong events and calculating their total weights - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - ev = Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + const Event* ev = GetEvent(ievt); w = ev->GetWeight(); sumAll += w; - if ( DataInfo().IsSignal(ev) != - (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) ) + if ( DataInfo().IsSignal(ev) != method->IsSignalLike(fMVAvalues->at(ievt))) { sumWrong += w; + } + + // if (ievt < 10) + // cout << " TYpe=" << DataInfo().IsSignal(ev) + // << " mvaValue="<<fMVAvalues->at(ievt) + // << " mvaCutVal="<<method->GetSignalReferenceCut() + // << " mvaCutValOrien="<<method->GetSignalReferenceCutOrientation() + // << " isSignallike="<<method->IsSignalLike(fMVAvalues->at(ievt)) + // << endl; } + + // cout << "sumWrong="<<sumWrong << " sumAll="<<sumAll<<endl; fMethodError=sumWrong/sumAll; // calculating the fMethodError and the fBoostWeight out of it uses @@ -815,11 +915,11 @@ void TMVA::MethodBoost::CalcMethodWeight() if (fBoostWeight <= 0.0) fBoostWeight = 1.0; // calculate method weight - if (fMethodWeightType == "ByError") fMethodWeight.push_back(TMath::Log(fBoostWeight)); - else if (fMethodWeightType == "Average") fMethodWeight.push_back(1.0); - else if (fMethodWeightType == "ByROC") fMethodWeight.push_back(fROC_training); + if (fMethodWeightType == "ByError") fMethodWeight.push_back(TMath::Log(fBoostWeight)); + else if (fMethodWeightType == "Average") fMethodWeight.push_back(1.0); + else if (fMethodWeightType == "ByROC") fMethodWeight.push_back(fROC_training); else if (fMethodWeightType == "ByOverlap") fMethodWeight.push_back((fOverlap_integral > 0.0 ? 1.0/fOverlap_integral : 1000.0)); - else fMethodWeight.push_back(0); + else fMethodWeight.push_back(0); } //_______________________________________________________________________ @@ -878,9 +978,10 @@ Double_t TMVA::MethodBoost::GetMvaValue( Double_t* err, Double_t* errUpper ) //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.); for (UInt_t i=0;i< fMethods.size(); i++){ MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]); - if(m==0) continue; + if (m==0) continue; Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue(); Double_t sigcut = m->GetSignalReferenceCut(); + // default is no transform if (fTransformString == "linear"){ @@ -891,8 +992,11 @@ Double_t TMVA::MethodBoost::GetMvaValue( Double_t* err, Double_t* errUpper ) val = TMath::Log((val-sigcut)+epsilon); } else if (fTransformString == "step" ){ - if (val < sigcut) val = -1.; - else val = 1.; + if (m->IsSignalLike(val)) val = 1.; + else val = -1.; + } + else if (fTransformString == "gauss"){ + val = TMath::Gaus((val-sigcut),1); } else { Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl; @@ -966,9 +1070,9 @@ Double_t TMVA::MethodBoost::GetBoostROCIntegral(Bool_t singleMethod, Types::ETre if (singleMethod && eTT==Types::kTraining) mvaRes = fMVAvalues; // values already calculated else { - mvaRes = new std::vector <Float_t>(Data()->GetNEvents()); - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - Data()->GetEvent(ievt); + mvaRes = new std::vector <Float_t>(GetNEvents()); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + GetEvent(ievt); (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err); } } @@ -982,7 +1086,7 @@ Double_t TMVA::MethodBoost::GetBoostROCIntegral(Bool_t singleMethod, Types::ETre if (DataInfo().GetClassInfo("Signal") != 0) { signalClass = DataInfo().GetClassInfo("Signal")->GetNumber(); } - gTools().ComputeStat( Data()->GetEventCollection(eTT), mvaRes, + gTools().ComputeStat( GetEventCollection(eTT), mvaRes, meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass ); fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve; @@ -1059,8 +1163,8 @@ void TMVA::MethodBoost::CalcMVAValues() return; } // calculate MVA values - for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { - Data()->GetEvent(ievt); + for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) { + GetEvent(ievt); fMVAvalues->at(ievt) = method->GetMvaValue(); } } diff --git a/tmva/src/MethodCategory.cxx b/tmva/src/MethodCategory.cxx index 612551a4f4c8f7af1959e2f3ff4b25aaafb1ceea..62dd013f5b1e18f7ee4f8b42c03b6f29389fac93 100644 --- a/tmva/src/MethodCategory.cxx +++ b/tmva/src/MethodCategory.cxx @@ -11,10 +11,15 @@ * Virtual base class for all MVA method * * * * Authors (alphabetical): * + * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * * Nadim Sah <Nadim.Sah@cern.ch> - Berlin, Germany * + * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * + * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * + * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany * + * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany * * * - * Copyright (c) 2005: * + * Copyright (c) 2005-2011: * * CERN, Switzerland * * U. of Victoria, Canada * * MPI-K Heidelberg, Germany * @@ -57,6 +62,7 @@ #include "TMVA/Ranking.h" #include "TMVA/VariableInfo.h" #include "TMVA/DataSetManager.h" +#include "TMVA/VariableRearrangeTransform.h" REGISTER_METHOD(Category) @@ -131,8 +137,20 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut, Log() << kINFO << "Adding sub-classifier: " << addedMethodName << "::" << theTitle << Endl; + // add transformation to rearrange the input variables + VariableRearrangeTransform* rearrangeTransformation = new VariableRearrangeTransform(DataInfo()); + TString variables(theVariables); + variables.ReplaceAll(":",","); // use ',' as separator between variables +// std::cout << "variables " << variables.Data() << std::endl; + DataSetInfo& dsi = CreateCategoryDSI(theCut, theVariables, theTitle); + rearrangeTransformation->SetOutputDataSetInfo( &dsi ); + rearrangeTransformation->ToggleInputSortOrder(kFALSE); // kFALSE --> take the order of variables from the option string + rearrangeTransformation->SelectInput( variables, kTRUE ); +// std::cout << "set input done " << std::endl; + + rearrangeTransformation->SetEnabled(kFALSE); IMethod* addedMethod = ClassifierFactory::Instance().Create(addedMethodName,GetJobName(),theTitle,dsi,theOptions); MethodBase *method = (dynamic_cast<MethodBase*>(addedMethod)); @@ -141,6 +159,7 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut, method->SetupMethod(); method->ParseOptions(); + method->GetTransformationHandler().AddTransformation( rearrangeTransformation, -1 ); method->ProcessSetup(); // set or create correct method base dir for added method @@ -172,6 +191,8 @@ TMVA::IMethod* TMVA::MethodCategory::AddMethod( const TCut& theCut, Form("%s:%s",GetName(),method->GetName()), "pass", 0, 0, 'C' ); + rearrangeTransformation->SetEnabled(kTRUE); + return method; } @@ -217,7 +238,9 @@ TMVA::DataSetInfo& TMVA::MethodCategory::CreateCategoryDSI(const TCut& theCut, // check the variables of the old dsi for the variable that we want to add for (itrVarInfo = oldDSI.GetVariableInfos().begin(); itrVarInfo != oldDSI.GetVariableInfos().end(); itrVarInfo++) { - if((*itrVariables==itrVarInfo->GetLabel()) || (*itrVariables==itrVarInfo->GetExpression())) { + if((*itrVariables==itrVarInfo->GetLabel()) ) { // || (*itrVariables==itrVarInfo->GetExpression())) { + // don't compare the expression, since the user might take two times the same expression, but with different labels + // and apply different transformations to the variables. dsi->AddVariable(*itrVarInfo); varMap.push_back(counter); found = kTRUE; @@ -227,7 +250,9 @@ TMVA::DataSetInfo& TMVA::MethodCategory::CreateCategoryDSI(const TCut& theCut, // check the spectators of the old dsi for the variable that we want to add for (itrVarInfo = oldDSI.GetSpectatorInfos().begin(); itrVarInfo != oldDSI.GetSpectatorInfos().end(); itrVarInfo++) { - if((*itrVariables==itrVarInfo->GetLabel()) || (*itrVariables==itrVarInfo->GetExpression())) { + if((*itrVariables==itrVarInfo->GetLabel()) ) { // || (*itrVariables==itrVarInfo->GetExpression())) { + // don't compare the expression, since the user might take two times the same expression, but with different labels + // and apply different transformations to the variables. dsi->AddVariable(*itrVarInfo); varMap.push_back(counter); found = kTRUE; @@ -334,9 +359,11 @@ void TMVA::MethodCategory::Train() const Int_t MinNoTrainingEvents = 10; // THIS NEEDS TO BE CHANGED: - TString what("Classification"); - what.ToLower(); - Types::EAnalysisType analysisType = ( what.CompareTo("regression")==0 ? Types::kRegression : Types::kClassification ); +// TString what("Classification"); +// what.ToLower(); +// Types::EAnalysisType analysisType = ( what.CompareTo("regression")==0 ? Types::kRegression : Types::kClassification ); + + Types::EAnalysisType analysisType = GetAnalysisType(); // start the training Log() << kINFO << "Train all sub-classifiers for " @@ -355,6 +382,7 @@ void TMVA::MethodCategory::Train() MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod); if(!mva) continue; + mva->SetAnalysisType(GetAnalysisType()); if (!mva->HasAnalysisType( analysisType, mva->DataInfo().GetNClasses(), mva->DataInfo().GetNTargets() ) ) { @@ -582,15 +610,44 @@ Double_t TMVA::MethodCategory::GetMvaValue( Double_t* err, Double_t* errUpper ) } // get mva value from the suitable sub-classifier - ev->SetVariableArrangement(&fVarMaps[methodToUse]); - MethodBase* m = dynamic_cast<MethodBase*>(fMethods[methodToUse]); - Double_t mvaValue = 0; - if(m!=0) { - mvaValue = m->GetMvaValue(ev,err); - } - if (errUpper) *errUpper=-1; // using same convention as in NoErrorCalc() - ev->SetVariableArrangement(0); + Double_t mvaValue = dynamic_cast<MethodBase*>(fMethods[methodToUse])->GetMvaValue(ev,err,errUpper); return mvaValue; } + + +//_______________________________________________________________________ +const std::vector<Float_t> &TMVA::MethodCategory::GetRegressionValues() +{ + // returns the mva value of the right sub-classifier + + if (fMethods.size()==0) return MethodBase::GetRegressionValues(); + + UInt_t methodToUse = 0; + const Event* ev = GetEvent(); + + // determine which sub-classifier to use for this event + Int_t suitableCutsN = 0; + + for (UInt_t i=0; i<fMethods.size(); ++i) { + if (PassesCut(ev, i)) { + ++suitableCutsN; + methodToUse=i; + } + } + + if (suitableCutsN == 0) { + Log() << kWARNING << "Event does not lie within the cut of any sub-classifier." << Endl; + return MethodBase::GetRegressionValues(); + } + + if (suitableCutsN > 1) { + Log() << kFATAL << "The defined categories are not disjoint." << Endl; + return MethodBase::GetRegressionValues(); + } + + // get mva value from the suitable sub-classifier + return dynamic_cast<MethodBase*>(fMethods[methodToUse])->GetRegressionValues(); +} + diff --git a/tmva/src/MethodCompositeBase.cxx b/tmva/src/MethodCompositeBase.cxx index efc2f00284a487be91f35aa727810968b2d60a0f..11e255187fe3def213057c082d226b56879ae83e 100644 --- a/tmva/src/MethodCompositeBase.cxx +++ b/tmva/src/MethodCompositeBase.cxx @@ -113,10 +113,15 @@ void TMVA::MethodCompositeBase::AddWeightsXMLTo( void* parent ) const gTools().AddAttr(methxml,"Index", i ); gTools().AddAttr(methxml,"Weight", fMethodWeight[i]); gTools().AddAttr(methxml,"MethodSigCut", method->GetSignalReferenceCut()); + gTools().AddAttr(methxml,"MethodSigCutOrientation", method->GetSignalReferenceCutOrientation()); gTools().AddAttr(methxml,"MethodTypeName", method->GetMethodTypeName()); gTools().AddAttr(methxml,"MethodName", method->GetMethodName() ); gTools().AddAttr(methxml,"JobName", method->GetJobName()); gTools().AddAttr(methxml,"Options", method->GetOptions()); + if (method->fTransformationPointer) + gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("true")); + else + gTools().AddAttr(methxml,"UseMainMethodTransformation", TString("false")); method->AddWeightsXMLTo(methxml); } } @@ -146,14 +151,30 @@ void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) gTools().ReadAttr( wghtnode, "NMethods", nMethods ); void* ch = gTools().GetChild(wghtnode); for (UInt_t i=0; i< nMethods; i++) { - Double_t methodWeight, methodSigCut; + Double_t methodWeight, methodSigCut, methodSigCutOrientation; gTools().ReadAttr( ch, "Weight", methodWeight ); gTools().ReadAttr( ch, "MethodSigCut", methodSigCut); + gTools().ReadAttr( ch, "MethodSigCutOrientation", methodSigCutOrientation); gTools().ReadAttr( ch, "MethodTypeName", methodTypeName ); gTools().ReadAttr( ch, "MethodName", methodName ); gTools().ReadAttr( ch, "JobName", jobName ); gTools().ReadAttr( ch, "Options", optionString ); + Bool_t rerouteTransformation = kFALSE; + if (gTools().HasAttr( ch, "UseMainMethodTransformation")) { + TString rerouteString(""); + gTools().ReadAttr( ch, "UseMainMethodTransformation", rerouteString ); + rerouteString.ToLower(); + if (rerouteString=="true") + rerouteTransformation=kTRUE; + } + + //remove trailing "~" to signal that options have to be reused + optionString.ReplaceAll("~",""); + //ignore meta-options for method Boost + optionString.ReplaceAll("Boost_","~Boost_"); + optionString.ReplaceAll("!~","~!"); + if (i==0){ // the cast on MethodBoost is ugly, but a similar line is also in ReadWeightsFromFile --> needs to be fixed later ((TMVA::MethodBoost*)this)->BookMethod( Types::Instance().GetMethodType( methodTypeName), methodName, optionString ); @@ -169,12 +190,15 @@ void TMVA::MethodCompositeBase::ReadWeightsFromXML( void* wghtnode ) void* methXML = gTools().GetChild(ch); meth->SetupMethod(); - meth->ReadWeightsFromXML(methXML); meth->SetMsgType(kWARNING); meth->ParseOptions(); meth->ProcessSetup(); meth->CheckSetup(); + meth->ReadWeightsFromXML(methXML); meth->SetSignalReferenceCut(methodSigCut); + meth->SetSignalReferenceCutOrientation(methodSigCutOrientation); + + meth->RerouteTransformationHandler (&(this->GetTransformationHandler())); ch = gTools().GetNextChild(ch); } diff --git a/tmva/src/MethodCuts.cxx b/tmva/src/MethodCuts.cxx index 0131a0bd47e2983d43b0ac30d396f484ea938522..6f9206ef30dc247ae4455fee022f819978baa488 100644 --- a/tmva/src/MethodCuts.cxx +++ b/tmva/src/MethodCuts.cxx @@ -873,13 +873,13 @@ Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& pars ) // retrieve signal and background efficiencies for given cut switch (fEffMethod) { case kUsePDFs: - this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB ); + this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB); break; case kUseEventSelection: - this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB); + this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB); break; default: - this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB); + this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB); } Double_t eta = 0; @@ -1767,7 +1767,7 @@ void TMVA::MethodCuts::GetHelpMessage() const Log() << " - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk << Endl; Log() << " - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk << Endl; Log() << " - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk << Endl; - Log() << " - Sin: ( Sin( StepNumber / TemperatureScale ) + 1 ) / (StepNumber + 1) * InitialTemperature + Eps" << brk << Endl; + Log() << " - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk << Endl; Log() << " - Geo: CurrentTemperature * TemperatureScale" << Endl; Log() << "" << Endl; Log() << "Their performance can be improved by adjusting initial temperature" << Endl; diff --git a/tmva/src/MethodHMatrix.cxx b/tmva/src/MethodHMatrix.cxx index ff9945389b74f0a60a8eb904331de7ff422cda7f..47ebef42d4baf4648b67e8d6c1fafea264946a12 100644 --- a/tmva/src/MethodHMatrix.cxx +++ b/tmva/src/MethodHMatrix.cxx @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * * * @@ -190,17 +191,21 @@ void TMVA::MethodHMatrix::ComputeCovariance( Bool_t isSignal, TMatrixD* mat ) Double_t *xval = new Double_t[nvar]; // perform event loop - for (Int_t i=0; i<Data()->GetNEvents(); i++) { + for (Int_t i=0, iEnd=Data()->GetNEvents(); i<iEnd; ++i) { - // retrieve the event - const Event* ev = GetEvent(i); - Double_t weight = ev->GetWeight(); + // retrieve the original (not transformed) event + const Event* origEvt = Data()->GetEvent(i); + Double_t weight = origEvt->GetWeight(); // in case event with neg weights are to be ignored if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0) continue; - if (DataInfo().IsSignal(ev) != isSignal) continue; + if (DataInfo().IsSignal(origEvt) != isSignal) continue; + // transform the event + GetTransformationHandler().SetTransformationReferenceClass( origEvt->GetClass() ); + const Event* ev = GetTransformationHandler().Transform( origEvt ); + // event is of good type sumOfWeights += weight; @@ -250,51 +255,31 @@ Double_t TMVA::MethodHMatrix::GetMvaValue( Double_t* err, Double_t* errUpper ) } //_______________________________________________________________________ -Double_t TMVA::MethodHMatrix::GetChi2( TMVA::Event* e, Types::ESBType type ) const +Double_t TMVA::MethodHMatrix::GetChi2( Types::ESBType type ) { // compute chi2-estimator for event according to type (signal/background) - // loop over variables - UInt_t ivar,jvar; - vector<Double_t> val( GetNvar() ); - for (ivar=0; ivar<GetNvar(); ivar++) { - val[ivar] = e->GetValue(ivar); - if (IsNormalised()) val[ivar] = gTools().NormVariable( val[ivar], GetXmin( ivar ), GetXmax( ivar ) ); - } + // get original (not transformed) event - Double_t chi2 = 0; - for (ivar=0; ivar<GetNvar(); ivar++) { - for (jvar=0; jvar<GetNvar(); jvar++) { - if (type == Types::kSignal) - chi2 += ( (val[ivar] - (*fVecMeanS)(ivar))*(val[jvar] - (*fVecMeanS)(jvar)) - * (*fInvHMatrixS)(ivar,jvar) ); - else - chi2 += ( (val[ivar] - (*fVecMeanB)(ivar))*(val[jvar] - (*fVecMeanB)(jvar)) - * (*fInvHMatrixB)(ivar,jvar) ); - } - } + const Event* origEvt = fTmpEvent ? fTmpEvent:Data()->GetEvent(); - // sanity check - if (chi2 < 0) Log() << kFATAL << "<GetChi2> negative chi2: " << chi2 << Endl; + // loop over variables + UInt_t ivar(0), jvar(0), nvar(GetNvar()); + vector<Double_t> val( nvar ); - return chi2; -} + // transform the event according to the given type (signal/background) + if (type==Types::kSignal) + GetTransformationHandler().SetTransformationReferenceClass( fSignalClass ); + else + GetTransformationHandler().SetTransformationReferenceClass( fBackgroundClass ); -//_______________________________________________________________________ -Double_t TMVA::MethodHMatrix::GetChi2( Types::ESBType type ) const -{ - // compute chi2-estimator for event according to type (signal/background) + const Event* ev = GetTransformationHandler().Transform( origEvt ); - const Event * ev = GetEvent(); - - // loop over variables - UInt_t ivar,jvar; - vector<Double_t> val( GetNvar() ); - for (ivar=0; ivar<GetNvar(); ivar++) val[ivar] = ev->GetValue( ivar ); + for (ivar=0; ivar<nvar; ivar++) val[ivar] = ev->GetValue( ivar ); Double_t chi2 = 0; - for (ivar=0; ivar<GetNvar(); ivar++) { - for (jvar=0; jvar<GetNvar(); jvar++) { + for (ivar=0; ivar<nvar; ivar++) { + for (jvar=0; jvar<nvar; jvar++) { if (type == Types::kSignal) chi2 += ( (val[ivar] - (*fVecMeanS)(ivar))*(val[jvar] - (*fVecMeanS)(jvar)) * (*fInvHMatrixS)(ivar,jvar) ); @@ -311,24 +296,30 @@ Double_t TMVA::MethodHMatrix::GetChi2( Types::ESBType type ) const } //_______________________________________________________________________ -void TMVA::MethodHMatrix::AddWeightsXMLTo( void* parent ) const { +void TMVA::MethodHMatrix::AddWeightsXMLTo( void* parent ) const +{ + // create XML description for HMatrix classification + void* wght = gTools().AddChild(parent, "Weights"); - gTools().WriteTVectorDToXML(wght,"VecMeanS",fVecMeanS); - gTools().WriteTVectorDToXML(wght,"VecMeanB", fVecMeanB); - gTools().WriteTMatrixDToXML(wght,"InvHMatS",fInvHMatrixS); - gTools().WriteTMatrixDToXML(wght,"InvHMatB",fInvHMatrixB); - //Log() << kFATAL << "Please implement writing of weights as XML" << Endl; + gTools().WriteTVectorDToXML( wght, "VecMeanS", fVecMeanS ); + gTools().WriteTVectorDToXML( wght, "VecMeanB", fVecMeanB ); + gTools().WriteTMatrixDToXML( wght, "InvHMatS", fInvHMatrixS ); + gTools().WriteTMatrixDToXML( wght, "InvHMatB", fInvHMatrixB ); } -void TMVA::MethodHMatrix::ReadWeightsFromXML( void* wghtnode ){ +//_______________________________________________________________________ +void TMVA::MethodHMatrix::ReadWeightsFromXML( void* wghtnode ) +{ + // read weights from XML file + void* descnode = gTools().GetChild(wghtnode); - gTools().ReadTVectorDFromXML(descnode,"VecMeanS",fVecMeanS); + gTools().ReadTVectorDFromXML( descnode, "VecMeanS", fVecMeanS ); descnode = gTools().GetNextChild(descnode); - gTools().ReadTVectorDFromXML(descnode,"VecMeanB", fVecMeanB); + gTools().ReadTVectorDFromXML( descnode, "VecMeanB", fVecMeanB ); descnode = gTools().GetNextChild(descnode); - gTools().ReadTMatrixDFromXML(descnode,"InvHMatS",fInvHMatrixS); + gTools().ReadTMatrixDFromXML( descnode, "InvHMatS", fInvHMatrixS ); descnode = gTools().GetNextChild(descnode); - gTools().ReadTMatrixDFromXML(descnode,"InvHMatB",fInvHMatrixB); + gTools().ReadTMatrixDFromXML( descnode, "InvHMatB", fInvHMatrixB ); } //_______________________________________________________________________ @@ -392,8 +383,24 @@ void TMVA::MethodHMatrix::MakeClassSpecific( std::ostream& fout, const TString& fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl; fout << "{" << endl; fout << " // returns the H-matrix signal estimator" << endl; - fout << " double s = GetChi2( inputValues, " << Types::kSignal << " );" << endl; - fout << " double b = GetChi2( inputValues, " << Types::kBackground << " );" << endl; + fout << " std::vector<double> inputValuesSig = inputValues;" << endl; + fout << " std::vector<double> inputValuesBgd = inputValues;" << endl; + if (GetTransformationHandler().GetTransformationList().GetSize() != 0) { + + UInt_t signalClass =DataInfo().GetClassInfo("Signal")->GetNumber(); + UInt_t backgroundClass=DataInfo().GetClassInfo("Background")->GetNumber(); + + fout << " Transform(inputValuesSig," << signalClass << ");" << endl; + fout << " Transform(inputValuesBgd," << backgroundClass << ");" << endl; + } + +// fout << " for(uint i=0; i<GetNvar(); ++i) std::cout << inputValuesSig.at(i) << \" \" << inputValuesBgd.at(i) << std::endl; " << endl; + + fout << " double s = GetChi2( inputValuesSig, " << Types::kSignal << " );" << endl; + fout << " double b = GetChi2( inputValuesBgd, " << Types::kBackground << " );" << endl; + +// fout << " std::cout << s << \" \" << b << std::endl; " << endl; + fout << " " << endl; fout << " if (s+b <= 0) std::cout << \"Problem in class " << className << "::GetMvaValue__: s+b = \"" << endl; fout << " << s+b << \" <= 0 \" << std::endl;" << endl; diff --git a/tmva/src/MethodLikelihood.cxx b/tmva/src/MethodLikelihood.cxx index d410793cf4a9d389085c2f02d2a38c6260377687..838006a4030124564d3702a70d94f4a988c920a7 100644 --- a/tmva/src/MethodLikelihood.cxx +++ b/tmva/src/MethodLikelihood.cxx @@ -308,9 +308,12 @@ void TMVA::MethodLikelihood::Train( void ) // the reference histograms require the correct boundaries. Since in Likelihood classification // the transformations are applied using both classes, also the corresponding boundaries // need to take this into account - vector<Double_t> xmin(GetNvar()), xmax(GetNvar()); - for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {xmin[ivar]=1e30; xmax[ivar]=-1e30;} - for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) { + UInt_t nvar=GetNvar(); + vector<Double_t> xmin(nvar), xmax(nvar); + for (UInt_t ivar=0; ivar<nvar; ivar++) {xmin[ivar]=1e30; xmax[ivar]=-1e30;} + + UInt_t nevents=Data()->GetNEvents(); + for (UInt_t ievt=0; ievt<nevents; ievt++) { // use the true-event-type's transformation // set the event true event types transformation const Event* origEv = Data()->GetEvent(ievt); @@ -319,7 +322,7 @@ void TMVA::MethodLikelihood::Train( void ) for (int cls=0;cls<2;cls++){ GetTransformationHandler().SetTransformationReferenceClass(cls); const Event* ev = GetTransformationHandler().Transform( origEv ); - for (UInt_t ivar=0; ivar<GetNvar(); ivar++) { + for (UInt_t ivar=0; ivar<nvar; ivar++) { Float_t value = ev->GetValue(ivar); if (value < xmin[ivar]) xmin[ivar] = value; if (value > xmax[ivar]) xmax[ivar] = value; @@ -432,15 +435,15 @@ Double_t TMVA::MethodLikelihood::GetMvaValue( Double_t* err, Double_t* errUpper // need to distinguish signal and background in case of variable transformation // signal first - //GetTransformationHandler().SetTransformationReferenceClass( DataInfo().GetClassInfo("Signal")->GetNumber() ); + GetTransformationHandler().SetTransformationReferenceClass( fSignalClass ); // temporary: JS --> FIX - GetTransformationHandler().SetTransformationReferenceClass( 0 ); + //GetTransformationHandler().SetTransformationReferenceClass( 0 ); const Event* ev = GetEvent(); for (ivar=0; ivar<GetNvar(); ivar++) vs(ivar) = ev->GetValue(ivar); - //GetTransformationHandler().SetTransformationReferenceClass( DataInfo().GetClassInfo("Background")->GetNumber() ); + GetTransformationHandler().SetTransformationReferenceClass( fBackgroundClass ); // temporary: JS --> FIX - GetTransformationHandler().SetTransformationReferenceClass( 1 ); + //GetTransformationHandler().SetTransformationReferenceClass( 1 ); ev = GetEvent(); for (ivar=0; ivar<GetNvar(); ivar++) vb(ivar) = ev->GetValue(ivar); @@ -457,7 +460,7 @@ Double_t TMVA::MethodLikelihood::GetMvaValue( Double_t* err, Double_t* errUpper // verify limits if (x[itype] >= (*fPDFSig)[ivar]->GetXmax()) x[itype] = (*fPDFSig)[ivar]->GetXmax() - 1.0e-10; - else if (x[itype] < (*fPDFSig)[ivar]->GetXmin()) x[itype] = (*fPDFSig)[ivar]->GetXmin(); + else if (x[itype] < (*fPDFSig)[ivar]->GetXmin()) x[itype] = (*fPDFSig)[ivar]->GetXmin(); // find corresponding histogram from cached indices PDF* pdf = (itype == 0) ? (*fPDFSig)[ivar] : (*fPDFBgd)[ivar]; diff --git a/tmva/src/MethodMLP.cxx b/tmva/src/MethodMLP.cxx index f7b24aca7f92dad111530f681cf48a712693899e..95a0962df1d77bc6dd4347be494628cef55e62be 100644 --- a/tmva/src/MethodMLP.cxx +++ b/tmva/src/MethodMLP.cxx @@ -22,7 +22,7 @@ * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany * * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei * * * - * Copyright (c) 2005: * + * Copyright (c) 2005-2011: * * CERN, Switzerland * * * * Redistribution and use in source and binary forms, with or without * @@ -76,7 +76,9 @@ TMVA::MethodMLP::MethodMLP( const TString& jobName, : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ), fPrior(0.0),//zjh fSamplingFraction(1.0), - fSamplingEpoch (0.0) + fSamplingEpoch (0.0), + fDeviationsFromTargets(0), + fWeightRange (1.0) { // standard constructor } @@ -88,7 +90,9 @@ TMVA::MethodMLP::MethodMLP( DataSetInfo& theData, : MethodANNBase( Types::kMLP, theData, theWeightFile, theTargetDir ), fPrior(0.0),//zjh fSamplingFraction(1.0), - fSamplingEpoch(0.0) + fSamplingEpoch(0.0), + fDeviationsFromTargets(0), + fWeightRange(1.0) { // constructor from a weight file } @@ -180,10 +184,14 @@ void TMVA::MethodMLP::DeclareOptions() DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator", "Use regulator to avoid over-training"); //zjh - DeclareOptionRef(fUpdateLimit=10, "UpdateLimit", - "Number of updates for regulator before stop training"); //zjh + DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit", + "Maximum times of regulator update"); //zjh DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors", "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh + + DeclareOptionRef(fWeightRange=1.0, "WeightRange", + "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range"); + } //_______________________________________________________________________ @@ -261,6 +269,13 @@ Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t i Int_t nEvents = GetNEvents(); UInt_t nClasses = DataInfo().GetNClasses(); UInt_t nTgts = DataInfo().GetNTargets(); + + + Float_t sumOfWeights = 0.f; + if( fWeightRange < 1.f ){ + fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents); + } + for (Int_t i = 0; i < nEvents; i++) { const Event* ev = GetEvent(i); @@ -283,9 +298,10 @@ Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t i if (fEstimator==kCE){ Double_t norm(0); for (UInt_t icls = 0; icls < nClasses; icls++) { - norm += exp( GetOutputNeuron( icls )->GetActivationValue()); + Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue(); + norm += exp( activationValue ); if(icls==cls) - d = exp( GetOutputNeuron( icls )->GetActivationValue()); + d = exp( activationValue ); } d = -TMath::Log(d/norm); } @@ -305,20 +321,48 @@ Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t i estimator += d*w; //zjh } + if( fDeviationsFromTargets ) + fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w)); + + sumOfWeights += w; + + // fill monitoring histograms if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) ); else if (histB != 0) histB->Fill( float(v), float(w) ); } + + if( fDeviationsFromTargets ) { + std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end()); + + Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights; + estimator = 0.f; + + Float_t weightRangeCut = fWeightRange*sumOfWeights; + Float_t weightSum = 0.f; + for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){ + float deviation = (*itDev).first; + float devWeight = (*itDev).second; + weightSum += devWeight; // add the weight of this event + if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange + estimator += devWeight*deviation; + } + } + + sumOfWeights = sumOfWeightsInRange; + delete fDeviationsFromTargets; + } + if (histS != 0) fEpochMonHistS.push_back( histS ); if (histB != 0) fEpochMonHistB.push_back( histB ); //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents)); //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents)); //else estimator = estimator*0.5/Float_t(nEvents); - if (DoRegression()) estimator = estimator/Float_t(nEvents); - else if (DoMulticlass()) estimator = estimator/Float_t(nEvents); - else estimator = estimator/Float_t(nEvents); + if (DoRegression()) estimator = estimator/Float_t(sumOfWeights); + else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights); + else estimator = estimator/Float_t(sumOfWeights); //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh @@ -348,7 +392,7 @@ void TMVA::MethodMLP::Train(Int_t nEpochs) Int_t nEvents=GetNEvents(); Int_t nSynapses=fSynapses->GetEntriesFast(); if (nSynapses>nEvents) - Log()<<kFATAL<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl; + Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl; #ifdef MethodMLP_UseMinuit__ if (useMinuit) MinuitMinimize(); @@ -484,11 +528,9 @@ void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs ) //zjh+ if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl; AccuError+=dError; - if (std::abs(dError)>0.0001) RegUpdateCD=0; - if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=((0.4*fResetStep)>50?50:(0.4*fResetStep)) && i<0.8*nEpochs && AccuError>0.01 ) { - Log()<<kDEBUG <<Endl; - Log()<<kDEBUG<<"\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl; + if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) { + Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl; UpdateRegulators(); Hessian.UnitMatrix(); RegUpdateCD=0; @@ -524,13 +566,17 @@ void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs ) } // draw progress - TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE ); //zjh + TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh if (fSteps > 0) { Float_t progress = 0; if (Float_t(i)/nEpochs < fSamplingEpoch) - progress = Progress()*fSamplingEpoch*fSamplingFraction*100; +// progress = Progress()*fSamplingEpoch*fSamplingFraction*100; + progress = Progress()*fSamplingFraction*100*fSamplingEpoch; else - progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress()); + { +// progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress()); + progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress()); + } Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh if (progress2>progress) progress=progress2; //zjh timer.DrawProgressBar( Int_t(progress), convText ); @@ -1054,8 +1100,8 @@ void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch) TSynapse* synapse; Int_t numSynapses = fSynapses->GetEntriesFast(); for (Int_t i = 0; i < numSynapses; i++) { - synapse = (TSynapse*)fSynapses->At(i); - if (lateEpoch) synapse->DecayLearningRate(fDecayRate*fDecayRate); + synapse = (TSynapse*)fSynapses->At(i); + if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square. else synapse->DecayLearningRate(fDecayRate); } } @@ -1143,7 +1189,7 @@ void TMVA::MethodMLP::UpdateNetwork(std::vector<Float_t>& desired, Double_t even { // update the network based on how closely // the output matched the desired output - for (UInt_t i = 0; i < desired.size(); i++) { + for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) { Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i); error *= eventWeight; GetOutputNeuron( i )->SetError(error); @@ -1372,12 +1418,12 @@ void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) } //_______________________________________________________________________ -Double_t TMVA::MethodMLP::GetMvaValueAsymError( Double_t* errLower, Double_t* errUpper ) +Double_t TMVA::MethodMLP::GetMvaValue( Double_t* errLower, Double_t* errUpper ) { - Double_t MvaValue = GetMvaValue();// contains back propagation + Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation // no hessian (old training file) or no error reqested - if (fInvHessian.GetNcols()==0 || errLower==0 || errUpper==0) + if (!fCalculateErrors || errLower==0 || errUpper==0) return MvaValue; Double_t MvaUpper,MvaLower,median,variance; @@ -1400,8 +1446,13 @@ Double_t TMVA::MethodMLP::GetMvaValueAsymError( Double_t* errLower, Double_t* er TMatrixD sig=sensT*fInvHessian*sens; variance=sig[0][0]; median=GetOutputNeuron()->GetValue(); - //Log()<<kDEBUG<<"median="<<median<<"\tvariance="<<variance<<Endl; + if (variance<0) { + Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl; + variance=0; + } + variance=sqrt(variance); + //upper MvaUpper=fOutput->Eval(median+variance); if(errUpper) @@ -1412,12 +1463,6 @@ Double_t TMVA::MethodMLP::GetMvaValueAsymError( Double_t* errLower, Double_t* er if(errLower) *errLower=MvaValue-MvaLower; - if (variance<0) { - Log()<<kWARNING<<"median=" << median << "\tvariance=" << variance - <<"MvaLower=" << MvaLower <<"\terrLower=" << (errLower?*errLower:0) - <<"MvaUpper=" << MvaUpper <<"\terrUpper=" << (errUpper?*errUpper:0) - <<Endl; - } return MvaValue; } diff --git a/tmva/src/MethodPDEFoam.cxx b/tmva/src/MethodPDEFoam.cxx index 2ab8d8a40a4faf5ca9ffe5d08e5e8b6bbcb9e96f..1406843a93cb791c1cd528fd4011527e44b5c3a0 100644 --- a/tmva/src/MethodPDEFoam.cxx +++ b/tmva/src/MethodPDEFoam.cxx @@ -13,13 +13,13 @@ * Authors (alphabetical): * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * Peter Speckmayer - CERN, Switzerland * * * * Original author of the TFoam implementation: * * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * * - * Copyright (c) 2008: * + * Copyright (c) 2008, 2010: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * @@ -32,6 +32,7 @@ #include <iomanip> #include <cassert> +#include <climits> #include "TMath.h" #include "Riostream.h" @@ -44,6 +45,12 @@ #include "TMVA/Types.h" #include "TMVA/ClassifierFactory.h" #include "TMVA/Config.h" +#include "TMVA/SeparationBase.h" +#include "TMVA/GiniIndex.h" +#include "TMVA/GiniIndexWithLaplace.h" +#include "TMVA/MisClassificationError.h" +#include "TMVA/CrossEntropy.h" +#include "TMVA/SdivSqrtSplusB.h" REGISTER_METHOD(PDEFoam) @@ -59,8 +66,7 @@ TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName, , fSigBgSeparated(kFALSE) , fFrac(0.001) , fDiscrErrCut(-1.0) - , fVolFrac(30.0) - , fVolFracInv(1.0/30.0) + , fVolFrac(1.0/15.0) , fnCells(999) , fnActiveCells(500) , fnSampl(2000) @@ -73,6 +79,7 @@ TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName, , fMaxDepth(0) , fKernelStr("None") , fKernel(kNone) + , fKernelEstimator(NULL) , fTargetSelectionStr("Mean") , fTargetSelection(kMean) , fFillFoamWithOrigWeights(kFALSE) @@ -80,8 +87,8 @@ TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName, , fDTLogic("None") , fDTSeparation(kFoam) , fPeekMax(kTRUE) - , fXmin(std::vector<Double_t>()) - , fXmax(std::vector<Double_t>()) + , fXmin(std::vector<Float_t>()) + , fXmax(std::vector<Float_t>()) , fFoam(std::vector<PDEFoam*>()) { // init PDEFoam objects @@ -95,8 +102,7 @@ TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi, , fSigBgSeparated(kFALSE) , fFrac(0.001) , fDiscrErrCut(-1.0) - , fVolFrac(30.0) - , fVolFracInv(1.0/30.0) + , fVolFrac(1.0/15.0) , fnCells(999) , fnActiveCells(500) , fnSampl(2000) @@ -109,6 +115,7 @@ TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi, , fMaxDepth(0) , fKernelStr("None") , fKernel(kNone) + , fKernelEstimator(NULL) , fTargetSelectionStr("Mean") , fTargetSelection(kMean) , fFillFoamWithOrigWeights(kFALSE) @@ -116,8 +123,8 @@ TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi, , fDTLogic("None") , fDTSeparation(kFoam) , fPeekMax(kTRUE) - , fXmin(std::vector<Double_t>()) - , fXmax(std::vector<Double_t>()) + , fXmin(std::vector<Float_t>()) + , fXmax(std::vector<Float_t>()) , fFoam(std::vector<PDEFoam*>()) { // constructor from weight file @@ -126,9 +133,10 @@ TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi, //_______________________________________________________________________ Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ ) { - // PDEFoam can handle classification with 2 classes and regression + // PDEFoam can handle classification with multiple classes and regression // with one or more regression-targets if (type == Types::kClassification && numberClasses == 2) return kTRUE; + if (type == Types::kMulticlass ) return kTRUE; if (type == Types::kRegression) return kTRUE; return kFALSE; } @@ -142,8 +150,7 @@ void TMVA::MethodPDEFoam::Init( void ) fSigBgSeparated = kFALSE; // default: unified foam fFrac = 0.001; // fraction of outlier events fDiscrErrCut = -1.; // cut on discriminator error - fVolFrac = 30.0; // inverse range searching box size - fVolFracInv = 1./30.; // range searching box size + fVolFrac = 1./15.; // range searching box size fnActiveCells = 500; // number of active cells to create fnCells = fnActiveCells*2-1; // total number of cells fnSampl = 2000; // number of sampling points in cell @@ -155,17 +162,15 @@ void TMVA::MethodPDEFoam::Init( void ) fUseYesNoCell = kFALSE; // return -1 or 1 for bg or signal events fDTLogic = "None"; // decision tree algorithmus fDTSeparation = kFoam; // separation type - fPeekMax = kTRUE; // peek cell with max separation fKernel = kNone; // default: use no kernel + fKernelEstimator= NULL; // kernel estimator used during evaluation fTargetSelection= kMean; // default: use mean for target selection (only multi target regression!) fCompress = kTRUE; // compress ROOT output file fMultiTargetRegression = kFALSE; // multi-target regression - for (UInt_t i=0; i<fFoam.size(); i++) - if (fFoam.at(i)) delete fFoam.at(i); - fFoam.clear(); + DeleteFoams(); if (fUseYesNoCell) SetSignalReferenceCut( 0.0 ); // MVA output in [-1, 1] @@ -181,7 +186,7 @@ void TMVA::MethodPDEFoam::DeclareOptions() // DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" ); DeclareOptionRef( fFrac = 0.001, "TailCut", "Fraction of outlier events that are excluded from the foam in each dimension" ); - DeclareOptionRef( fVolFracInv = 1./30., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)"); + DeclareOptionRef( fVolFrac = 1./15., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)"); DeclareOptionRef( fnActiveCells = 500, "nActiveCells", "Maximum number of active cells to be created by the foam"); DeclareOptionRef( fnSampl = 2000, "nSampl", "Number of generated MC events per cell"); DeclareOptionRef( fnBin = 5, "nBin", "Number of bins in edge histograms"); @@ -196,7 +201,8 @@ void TMVA::MethodPDEFoam::DeclareOptions() AddPreDefVal(TString("GiniIndex")); AddPreDefVal(TString("MisClassificationError")); AddPreDefVal(TString("CrossEntropy")); - DeclareOptionRef( fPeekMax = kTRUE, "PeekMax", "Peek up cell with max. driver integral for the next split"); + AddPreDefVal(TString("GiniIndexWithLaplace")); + AddPreDefVal(TString("SdivSqrtSplusB")); DeclareOptionRef( fKernelStr = "None", "Kernel", "Kernel type used"); AddPreDefVal(TString("None")); @@ -208,9 +214,11 @@ void TMVA::MethodPDEFoam::DeclareOptions() } +//_______________________________________________________________________ void TMVA::MethodPDEFoam::DeclareCompatibilityOptions() { MethodBase::DeclareCompatibilityOptions(); DeclareOptionRef(fCutNmin = kTRUE, "CutNmin", "Requirement for minimal number of events in cell"); + DeclareOptionRef(fPeekMax = kTRUE, "PeekMax", "Peek cell with max. loss for the next split"); } //_______________________________________________________________________ @@ -229,13 +237,9 @@ void TMVA::MethodPDEFoam::ProcessOptions() } fnCells = fnActiveCells*2-1; - fVolFrac = 1./fVolFracInv; - // DT logic is only applicable if a single foam is trained if (fSigBgSeparated && fDTLogic != "None") { - Log() << kWARNING << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl; - fDTLogic = "None"; - fDTSeparation = kFoam; + Log() << kFATAL << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl; } // set separation to use @@ -247,6 +251,10 @@ void TMVA::MethodPDEFoam::ProcessOptions() fDTSeparation = kMisClassificationError; else if (fDTLogic == "CrossEntropy") fDTSeparation = kCrossEntropy; + else if (fDTLogic == "GiniIndexWithLaplace") + fDTSeparation = kGiniIndexWithLaplace; + else if (fDTLogic == "SdivSqrtSplusB") + fDTSeparation = kSdivSqrtSplusB; else { Log() << kWARNING << "Unknown separation type: " << fDTLogic << ", setting to None" << Endl; @@ -266,10 +274,10 @@ void TMVA::MethodPDEFoam::ProcessOptions() TMVA::MethodPDEFoam::~MethodPDEFoam( void ) { // destructor - for (UInt_t i=0; i<fFoam.size(); i++) { - if (fFoam.at(i)) delete fFoam.at(i); - } - fFoam.clear(); + DeleteFoams(); + + if (fKernelEstimator != NULL) + delete fKernelEstimator; } //_______________________________________________________________________ @@ -286,8 +294,8 @@ void TMVA::MethodPDEFoam::CalcXminXmax() if (fMultiTargetRegression) kDim += tDim; - Double_t *xmin = new Double_t[kDim]; - Double_t *xmax = new Double_t[kDim]; + Float_t *xmin = new Float_t[kDim]; + Float_t *xmax = new Float_t[kDim]; // set default values for (UInt_t dim=0; dim<kDim; dim++) { @@ -304,7 +312,7 @@ void TMVA::MethodPDEFoam::CalcXminXmax() for (Long64_t i=0; i<(GetNEvents()); i++) { // events loop const Event* ev = GetEvent(i); for (UInt_t dim=0; dim<kDim; dim++) { // variables loop - Double_t val; + Float_t val; if (fMultiTargetRegression) { if (dim < vDim) val = ev->GetValue(dim); @@ -390,9 +398,7 @@ void TMVA::MethodPDEFoam::Train( void ) CalcXminXmax(); // delete foams - for (UInt_t i=0; i<fFoam.size(); i++) - if (fFoam.at(i)) delete fFoam.at(i); - fFoam.clear(); + DeleteFoams(); // start training if (DoRegression()) { @@ -402,29 +408,31 @@ void TMVA::MethodPDEFoam::Train( void ) TrainMonoTargetRegression(); } else { - if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) { - Log() << kINFO << "NormMode=" << DataInfo().GetNormalization() - << " chosen. Note that only NormMode=EqualNumEvents" - << " ensures that Discriminant values correspond to" - << " signal probabilities." << Endl; + if (DoMulticlass()) + TrainMultiClassification(); + else { + if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) { + Log() << kINFO << "NormMode=" << DataInfo().GetNormalization() + << " chosen. Note that only NormMode=EqualNumEvents" + << " ensures that Discriminant values correspond to" + << " signal probabilities." << Endl; + } + + Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl; + Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl; + Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl; + + if (fSigBgSeparated) + TrainSeparatedClassification(); + else + TrainUnifiedClassification(); } - - Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl; - Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl; - Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl; - - if (fSigBgSeparated) - TrainSeparatedClassification(); - else - TrainUnifiedClassification(); } - // check cells and delete the binary search tree in order to save - // memory + // delete the binary search tree in order to save memory for(UInt_t i=0; i<fFoam.size(); i++) { - Log() << kVERBOSE << "Check all cells and remove cells with volume 0" << Endl; - fFoam.at(i)->CheckCells(true); - if(fFoam.at(i)) fFoam.at(i)->DeleteBinarySearchTree(); + if(fFoam.at(i)) + fFoam.at(i)->DeleteBinarySearchTree(); } } @@ -440,8 +448,7 @@ void TMVA::MethodPDEFoam::TrainSeparatedClassification() for(int i=0; i<2; i++) { // create 2 PDEFoams - fFoam.push_back( new PDEFoam(foamcaption[i]) ); - InitFoam(fFoam.back(), kSeparate); + fFoam.push_back( InitFoam(foamcaption[i], kSeparate) ); Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i] << " with events" << Endl; @@ -449,7 +456,8 @@ void TMVA::MethodPDEFoam::TrainSeparatedClassification() for (Long64_t k=0; k<GetNEvents(); k++) { const Event* ev = GetEvent(k); if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev))) - fFoam.back()->FillBinarySearchTree(ev, IgnoreEventsWithNegWeightsInTraining()); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillBinarySearchTree(ev); } Log() << kINFO << "Build up " << foamcaption[i] << Endl; @@ -458,9 +466,11 @@ void TMVA::MethodPDEFoam::TrainSeparatedClassification() Log() << kVERBOSE << "Filling foam cells with events" << Endl; // loop over all events -> fill foam cells for (Long64_t k=0; k<GetNEvents(); k++) { - const Event* ev = GetEvent(k); + const Event* ev = GetEvent(k); + Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev))) - fFoam.back()->FillFoamCells(ev, IgnoreEventsWithNegWeightsInTraining()); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillFoamCells(ev, weight); } } } @@ -471,25 +481,68 @@ void TMVA::MethodPDEFoam::TrainUnifiedClassification() // Create only one unified foam which contains discriminator // (N_sig)/(N_sig + N_bg) - fFoam.push_back( new PDEFoam("DiscrFoam") ); - InitFoam(fFoam.back(), kDiscr); + fFoam.push_back( InitFoam("DiscrFoam", kDiscr, 0) ); // class 0 = signal Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl; // insert event to BinarySearchTree - for (Long64_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (Long64_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillBinarySearchTree(ev); + } Log() << kINFO << "Build up discriminator foam" << Endl; fFoam.back()->Create(); // build foam Log() << kVERBOSE << "Filling foam cells with events" << Endl; // loop over all training events -> fill foam cells with N_sig and N_Bg - for (UInt_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (UInt_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillFoamCells(ev, weight); + } Log() << kVERBOSE << "Calculate cell discriminator"<< Endl; // calc discriminator (and it's error) for each cell - fFoam.back()->CalcCellDiscr(); + fFoam.back()->Finalize(); +} + +//_______________________________________________________________________ +void TMVA::MethodPDEFoam::TrainMultiClassification() +{ + // Create one foam discriminator foam for every class, where the + // disciminant equals N_class/N_total. + + for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) { + + fFoam.push_back( InitFoam(Form("MultiClassFoam%u",iClass), kMultiClass, iClass) ); + + Log() << kVERBOSE << "Filling binary search tree of multiclass foam " + << iClass << " with events" << Endl; + // insert event to BinarySearchTree + for (Long64_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillBinarySearchTree(ev); + } + + Log() << kINFO << "Build up multiclass foam " << iClass << Endl; + fFoam.back()->Create(); // build foam + + Log() << kVERBOSE << "Filling foam cells with events" << Endl; + // loop over all training events -> fill foam cells with N_sig and N_Bg + for (UInt_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillFoamCells(ev, weight); + } + + Log() << kVERBOSE << "Calculate cell discriminator"<< Endl; + // calc discriminator (and it's error) for each cell + fFoam.back()->Finalize(); + } } //_______________________________________________________________________ @@ -511,26 +564,31 @@ void TMVA::MethodPDEFoam::TrainMonoTargetRegression() else Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl; - TString foamcaption = "MonoTargetRegressionFoam"; - fFoam.push_back( new PDEFoam(foamcaption) ); - InitFoam(fFoam.back(), kMonoTarget); + fFoam.push_back( InitFoam("MonoTargetRegressionFoam", kMonoTarget) ); Log() << kVERBOSE << "Filling binary search tree with events" << Endl; // insert event to BinarySearchTree - for (Long64_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (Long64_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillBinarySearchTree(ev); + } Log() << kINFO << "Build mono target regression foam" << Endl; fFoam.back()->Create(); // build foam Log() << kVERBOSE << "Filling foam cells with events" << Endl; // loop over all events -> fill foam cells with target - for (UInt_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (UInt_t k=0; k<GetNEvents(); k++) { + const Event* ev = GetEvent(k); + Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillFoamCells(ev, weight); + } Log() << kVERBOSE << "Calculate average cell targets"<< Endl; // calc weight (and it's error) for each cell - fFoam.back()->CalcCellTarget(); + fFoam.back()->Finalize(); } //_______________________________________________________________________ @@ -548,23 +606,40 @@ void TMVA::MethodPDEFoam::TrainMultiTargetRegression() Log() << kFATAL << "LinNeighbors kernel currently not supported" << " for multi target regression" << Endl; - TString foamcaption = "MultiTargetRegressionFoam"; - fFoam.push_back( new PDEFoam(foamcaption) ); - InitFoam(fFoam.back(), kMultiTarget); + fFoam.push_back( InitFoam("MultiTargetRegressionFoam", kMultiTarget) ); Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events" << Endl; // insert event to BinarySearchTree - for (Long64_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (Long64_t k=0; k<GetNEvents(); k++) { + Event *ev = new Event(*GetEvent(k)); + // since in multi-target regression targets are handled like + // variables --> remove targets and add them to the event variabels + std::vector<Float_t> targets = ev->GetTargets(); + for (UInt_t i = 0; i < targets.size(); i++) + ev->SetVal(i+ev->GetValues().size(), targets.at(i)); + ev->GetTargets().clear(); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillBinarySearchTree(ev); + } Log() << kINFO << "Build multi target regression foam" << Endl; fFoam.back()->Create(); // build foam Log() << kVERBOSE << "Filling foam cells with events" << Endl; // loop over all events -> fill foam cells with number of events - for (UInt_t k=0; k<GetNEvents(); k++) - fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining()); + for (UInt_t k=0; k<GetNEvents(); k++) { + Event *ev = new Event(*GetEvent(k)); + // since in multi-target regression targets are handled like + // variables --> remove targets and add them to the event variabels + std::vector<Float_t> targets = ev->GetTargets(); + Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); + for (UInt_t i = 0; i < targets.size(); i++) + ev->SetVal(i+ev->GetValues().size(), targets.at(i)); + ev->GetTargets().clear(); + if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0)) + fFoam.back()->FillFoamCells(ev, weight); + } } //_______________________________________________________________________ @@ -583,11 +658,10 @@ Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper ) if (fSigBgSeparated) { std::vector<Float_t> xvec = ev->GetValues(); - Double_t density_sig = 0.; - Double_t density_bg = 0.; - - density_sig = fFoam.at(0)->GetCellDensity(xvec, fKernel); // get signal event density - density_bg = fFoam.at(1)->GetCellDensity(xvec, fKernel); // get background event density + Double_t density_sig = 0.; // calc signal event density + Double_t density_bg = 0.; // calc background event density + density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator); + density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator); // calc disciminator (normed!) if ( (density_sig+density_bg) > 0 ) @@ -596,8 +670,8 @@ Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper ) discr = 0.5; // assume 50% signal probability, if no events found (bad assumption, but can be overruled by cut on error) // do error estimation (not jet used in TMVA) - Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kNev); - Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kNev); + Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator); + Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator); Double_t scaleB = 1.; Double_t errorS = TMath::Sqrt(neventsS); // estimation of statistical error on counted signal events Double_t errorB = TMath::Sqrt(neventsB); // estimation of statistical error on counted background events @@ -622,8 +696,8 @@ Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper ) std::vector<Float_t> xvec = ev->GetValues(); // get discriminator direct from the foam - discr = fFoam.at(0)->GetCellDiscr(xvec, fKernel); - discr_error = fFoam.at(0)->GetCellValue(xvec, kDiscriminatorError); + discr = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator); + discr_error = fFoam.at(0)->GetCellValue(xvec, kValueError, fKernelEstimator); } // attribute error @@ -636,6 +710,105 @@ Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper ) return discr; } +//_______________________________________________________________________ +const std::vector<Float_t>& TMVA::MethodPDEFoam::GetMulticlassValues() +{ + // get the multiclass MVA response for the PDEFoam classifier + + const TMVA::Event *ev = GetEvent(); + std::vector<Float_t> xvec = ev->GetValues(); + + if (fMulticlassReturnVal == NULL) + fMulticlassReturnVal = new std::vector<Float_t>(); + fMulticlassReturnVal->clear(); + + std::vector<Float_t> temp; // temp class. values + UInt_t nClasses = DataInfo().GetNClasses(); + for (UInt_t iClass = 0; iClass < nClasses; ++iClass) { + temp.push_back(fFoam.at(iClass)->GetCellValue(xvec, kValue, fKernelEstimator)); + } + + for (UInt_t iClass = 0; iClass < nClasses; ++iClass) { + Float_t norm = 0.0; // normalization + for (UInt_t j = 0; j < nClasses; ++j) { + if (iClass != j) + norm += exp(temp[j] - temp[iClass]); + } + fMulticlassReturnVal->push_back(1.0 / (1.0 + norm)); + } + + return *fMulticlassReturnVal; +} + +//_______________________________________________________________________ +const TMVA::Ranking* TMVA::MethodPDEFoam::CreateRanking() +{ + // Compute ranking of input variables + + // create the ranking object + fRanking = new Ranking(GetName(), "Variable Importance"); + std::vector<Float_t> importance(GetNvar(), 0); + + // determine variable importances + for (UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) { + // get the number of cuts made in every dimension of foam + PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell(); + std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0); + GetNCuts(root_cell, nCuts); + + // fill the importance vector (ignoring the target dimensions in + // case of a multi-target regression foam) + UInt_t SumCuts = 0; + std::vector<Float_t> tmp_importance; + for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) { + SumCuts += nCuts.at(ivar); + tmp_importance.push_back( nCuts.at(ivar) ); + } + // normalization of the variable importances of this foam: the + // sum of all variable importances equals 1 for this foam + for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) { + if (SumCuts > 0) + tmp_importance.at(ivar) /= SumCuts; + else + tmp_importance.at(ivar) = 0; + } + // the overall variable importance is the average over all foams + for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) { + importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size(); + } + } + + // fill ranking vector + for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) { + fRanking->AddRank(Rank(GetInputLabel(ivar), importance.at(ivar))); + } + + return fRanking; +} + +//_______________________________________________________________________ +void TMVA::MethodPDEFoam::GetNCuts(PDEFoamCell *cell, std::vector<UInt_t> &nCuts) +{ + // Fill in 'nCuts' the number of cuts made in every foam dimension, + // starting at the root cell 'cell'. + // + // Parameters: + // + // - cell - root cell to start the counting from + // + // - nCuts - the number of cuts are saved in this vector + + if (cell->GetStat() == 1) // cell is active + return; + + nCuts.at(cell->GetBest())++; + + if (cell->GetDau0() != NULL) + GetNCuts(cell->GetDau0(), nCuts); + if (cell->GetDau1() != NULL) + GetNCuts(cell->GetDau1(), nCuts); +} + //_______________________________________________________________________ void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam ) { @@ -659,52 +832,121 @@ void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam ) } //_______________________________________________________________________ -void TMVA::MethodPDEFoam::InitFoam(TMVA::PDEFoam *pdefoam, EFoamType ft) +TMVA::PDEFoam* TMVA::MethodPDEFoam::InitFoam(TString foamcaption, EFoamType ft, UInt_t cls) { - // Set foam options (incl. Xmin, Xmax) and initialize foam via - // pdefoam->Init() + // Create new PDEFoam and set foam options (incl. Xmin, Xmax) and + // initialize foam via pdefoam->Initialize() - if (!pdefoam){ - Log() << kFATAL << "Null pointer given!" << Endl; - return; + // number of foam dimensions + Int_t dim = 1; + if (ft == kMultiTarget) + // dimension of foam = number of targets + non-targets + dim = Data()->GetNTargets() + Data()->GetNVariables(); + else + dim = GetNvar(); + + // calculate range-searching box + std::vector<Double_t> box; + for (Int_t idim = 0; idim < dim; ++idim) { + box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac); + } + + // create PDEFoam and PDEFoamDensityBase + PDEFoam *pdefoam = NULL; + PDEFoamDensityBase *density = NULL; + if (fDTSeparation == kFoam) { + // use PDEFoam algorithm + switch (ft) { + case kSeparate: + pdefoam = new PDEFoamEvent(foamcaption); + density = new PDEFoamEventDensity(box); + break; + case kMultiTarget: + pdefoam = new PDEFoamMultiTarget(foamcaption, fTargetSelection); + density = new PDEFoamEventDensity(box); + break; + case kDiscr: + case kMultiClass: + pdefoam = new PDEFoamDiscriminant(foamcaption, cls); + density = new PDEFoamDiscriminantDensity(box, cls); + break; + case kMonoTarget: + pdefoam = new PDEFoamTarget(foamcaption, 0); + density = new PDEFoamTargetDensity(box, 0); + break; + default: + Log() << kFATAL << "Unknown PDEFoam type!" << Endl; + break; + } + } else { + // create a decision tree like PDEFoam + SeparationBase *sepType = NULL; + switch (fDTSeparation) { + case kGiniIndex: + sepType = new GiniIndex(); + break; + case kMisClassificationError: + sepType = new MisClassificationError(); + break; + case kCrossEntropy: + sepType = new CrossEntropy(); + break; + case kGiniIndexWithLaplace: + sepType = new GiniIndexWithLaplace(); + break; + case kSdivSqrtSplusB: + sepType = new SdivSqrtSplusB(); + break; + default: + Log() << kFATAL << "Separation type " << fDTSeparation + << " currently not supported" << Endl; + break; + } + switch (ft) { + case kDiscr: + case kMultiClass: + pdefoam = new PDEFoamDecisionTree(foamcaption, sepType, cls); + density = new PDEFoamDecisionTreeDensity(box, cls); + break; + default: + Log() << kFATAL << "Decision tree cell split algorithm is only" + << " available for (multi) classification with a single" + << " PDE-Foam (SigBgSeparate=F)" << Endl; + break; + } } + pdefoam->SetDensity(density); + + // create pdefoam kernel + fKernelEstimator = CreatePDEFoamKernel(); // set fLogger attributes pdefoam->Log().SetMinType(this->Log().GetMinType()); - - // Set foam fill value - pdefoam->SetFoamType(ft); - // set Options VolFrac, kDim, ... - if (ft==kMultiTarget) - // dimension of foam = number of targets + non-targets - pdefoam->SetDim( Data()->GetNTargets()+Data()->GetNVariables()); - else - pdefoam->SetDim( GetNvar()); // Mandatory! - pdefoam->SetVolumeFraction(fVolFrac); // Mandatory! + // set PDEFoam parameters + pdefoam->SetDim( dim); pdefoam->SetnCells( fnCells); // optional pdefoam->SetnSampl( fnSampl); // optional pdefoam->SetnBin( fnBin); // optional pdefoam->SetEvPerBin( fEvPerBin); // optional - pdefoam->SetFillFoamWithOrigWeights(fFillFoamWithOrigWeights); - pdefoam->SetDTSeparation(fDTSeparation); - pdefoam->SetPeekMax(fPeekMax); // cuts pdefoam->SetNmin(fNmin); pdefoam->SetMaxDepth(fMaxDepth); // maximum cell tree depth // Init PDEFoam - pdefoam->Init(); + pdefoam->Initialize(); // Set Xmin, Xmax SetXminXmax(pdefoam); + + return pdefoam; } //_______________________________________________________________________ const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues() { - // Return regression values for both multi and mono target regression + // Return regression values for both multi- and mono-target regression if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>(); fRegressionReturnVal->clear(); @@ -717,12 +959,22 @@ const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues() } if (fMultiTargetRegression) { - std::vector<Float_t> targets = fFoam.at(0)->GetProjectedRegValue(vals, fKernel, fTargetSelection); - for(UInt_t i=0; i<(Data()->GetNTargets()); i++) + // create std::map from event variables + std::map<Int_t, Float_t> xvec; + for (UInt_t i=0; i<vals.size(); ++i) + xvec[i] = vals.at(i); + // get the targets + std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec, kValue ); + + // sanity check + if (targets.size() != Data()->GetNTargets()) + Log() << kFATAL << "Something wrong with multi-target regression foam: " + << "number of targest does not match the DataSet()" << Endl; + for(UInt_t i=0; i<targets.size(); i++) fRegressionReturnVal->push_back(targets.at(i)); } else { - fRegressionReturnVal->push_back(fFoam.at(0)->GetCellRegValue0(vals, fKernel)); + fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator)); } // apply inverse transformation to regression values @@ -741,6 +993,46 @@ const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues() return (*fRegressionReturnVal); } +//_______________________________________________________________________ +TMVA::PDEFoamKernelBase* TMVA::MethodPDEFoam::CreatePDEFoamKernel() +{ + // create a pdefoam kernel estimator, depending on the current + // value of fKernel + switch (fKernel) { + case kNone: + return new PDEFoamKernelTrivial(); + case kLinN: + return new PDEFoamKernelLinN(); + case kGaus: + return new PDEFoamKernelGauss(fVolFrac/2.0); + default: + Log() << kFATAL << "Kernel: " << fKernel << " not supported!" << Endl; + return NULL; + } + return NULL; +} + +//_______________________________________________________________________ +void TMVA::MethodPDEFoam::DeleteFoams() +{ + // Deletes all trained foams + for (UInt_t i=0; i<fFoam.size(); i++) + if (fFoam.at(i)) delete fFoam.at(i); + fFoam.clear(); +} + +//_______________________________________________________________________ +void TMVA::MethodPDEFoam::Reset() +{ + // reset MethodPDEFoam + DeleteFoams(); + + if (fKernelEstimator != NULL) { + delete fKernelEstimator; + fKernelEstimator = NULL; + } +} + //_______________________________________________________________________ void TMVA::MethodPDEFoam::PrintCoefficients( void ) {} @@ -808,9 +1100,13 @@ void TMVA::MethodPDEFoam::WriteFoamsToFile() const if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9); else rootFile = new TFile(rfname, "RECREATE"); - fFoam.at(0)->Write(fFoam.at(0)->GetFoamName().Data()); - if (!DoRegression() && fSigBgSeparated) - fFoam.at(1)->Write(fFoam.at(1)->GetFoamName().Data()); + // write the foams + for (UInt_t i=0; i<fFoam.size(); ++i) { + Log() << "writing foam " << fFoam.at(i)->GetFoamName().Data() + << " to file" << Endl; + fFoam.at(i)->Write(fFoam.at(i)->GetFoamName().Data()); + } + rootFile->Close(); Log() << kINFO << "Foams written to file: " << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl; @@ -859,11 +1155,9 @@ void TMVA::MethodPDEFoam::ReadWeightsFromStream( istream& istr ) UInt_t kDim = GetNvar(); if (fMultiTargetRegression) kDim += Data()->GetNTargets(); + fXmin.assign(kDim, 0); + fXmax.assign(kDim, 0); - for (UInt_t i=0; i<kDim; i++) { - fXmin.push_back(0.); - fXmax.push_back(0.); - } // read range for (UInt_t i=0; i<kDim; i++) istr >> fXmin.at(i); @@ -888,9 +1182,8 @@ void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode ) gTools().ReadAttr( wghtnode, "nBin", fnBin ); gTools().ReadAttr( wghtnode, "EvPerBin", fEvPerBin ); gTools().ReadAttr( wghtnode, "Compress", fCompress ); - Bool_t regr; + Bool_t regr; // dummy for backwards compatib. gTools().ReadAttr( wghtnode, "DoRegression", regr ); - SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) ); Bool_t CutNmin; // dummy for backwards compatib. gTools().ReadAttr( wghtnode, "CutNmin", CutNmin ); gTools().ReadAttr( wghtnode, "Nmin", fNmin ); @@ -915,11 +1208,8 @@ void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode ) UInt_t kDim = GetNvar(); if (fMultiTargetRegression) kDim += Data()->GetNTargets(); - - for (UInt_t i=0; i<kDim; i++) { - fXmin.push_back(0.); - fXmax.push_back(0.); - } + fXmin.assign(kDim, 0); + fXmax.assign(kDim, 0); // read foam range void *xmin_wrap = gTools().GetChild( wghtnode ); @@ -943,12 +1233,15 @@ void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode ) } // if foams exist, delete them - for (UInt_t i=0; i<fFoam.size(); i++) - if (fFoam.at(i)) delete fFoam.at(i); - fFoam.clear(); + DeleteFoams(); // read pure foams from file ReadFoamsFromFile(); + + // recreate the pdefoam kernel estimator + if (fKernelEstimator != NULL) + delete fKernelEstimator; + fKernelEstimator = CreatePDEFoamKernel(); } //_______________________________________________________________________ @@ -971,21 +1264,32 @@ void TMVA::MethodPDEFoam::ReadFoamsFromFile() // read foams from file if (DoRegression()) { - if (fMultiTargetRegression) - fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MultiTargetRegressionFoam")) ); - else - fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MonoTargetRegressionFoam")) ); - } - else { + if (fMultiTargetRegression) + fFoam.push_back( (PDEFoam*) rootFile->Get("MultiTargetRegressionFoam") ); + else + fFoam.push_back( (PDEFoam*) rootFile->Get("MonoTargetRegressionFoam") ); + } else { if (fSigBgSeparated) { - fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("SignalFoam")) ); - fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("BgFoam")) ); + fFoam.push_back( (PDEFoam*) rootFile->Get("SignalFoam") ); + fFoam.push_back( (PDEFoam*) rootFile->Get("BgFoam") ); + } else { + // try to load discriminator foam + PDEFoam *foam = (PDEFoam*) rootFile->Get("DiscrFoam"); + if (foam != NULL) + fFoam.push_back( foam ); + else { + // load multiclass foams + for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) { + fFoam.push_back( (PDEFoam*) rootFile->Get(Form("MultiClassFoam%u",iClass)) ); + } + } } - else - fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("DiscrFoam")) ); } - if (!fFoam.at(0) || (!DoRegression() && fSigBgSeparated && !fFoam.at(1))) - Log() << kFATAL << "Could not load foam!" << Endl; + + for (UInt_t i=0; i<fFoam.size(); ++i) { + if (!fFoam.at(0)) + Log() << kFATAL << "Could not load foam!" << Endl; + } } //_______________________________________________________________________ @@ -1074,7 +1378,7 @@ void TMVA::MethodPDEFoam::GetHelpMessage() const Log() << " SigBgSeparate False Separate Signal and Background" << Endl; Log() << " TailCut 0.001 Fraction of outlier events that excluded" << Endl; Log() << " from the foam in each dimension " << Endl; - Log() << " VolFrac 0.0333 Volume fraction (used for density calculation" << Endl; + Log() << " VolFrac 0.0666 Volume fraction (used for density calculation" << Endl; Log() << " during foam build-up) " << Endl; Log() << " nActiveCells 500 Maximal number of active cells in final foam " << Endl; Log() << " nSampl 2000 Number of MC events per cell in foam build-up " << Endl; @@ -1112,7 +1416,7 @@ void TMVA::MethodPDEFoam::GetHelpMessage() const Log() << "it will result in a more precise local estimate of the sampled" << Endl; Log() << "density. In general, higher dimensional problems require larger box" << Endl; Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl; - Log() << "default value of 0.0333 was optimised for an example with 5" << Endl; + Log() << "default value of 0.0666 was optimised for an example with 5" << Endl; Log() << "observables and training samples of the order of 50000 signal and" << Endl; Log() << "background events each." << Endl; Log() << Endl; diff --git a/tmva/src/MethodTMlpANN.cxx b/tmva/src/MethodTMlpANN.cxx index 68a3d8897092d8ce20431c21ab46bd3037ef7589..c00a7179aaf3a85a21069232cfce8783d83dbb38 100644 --- a/tmva/src/MethodTMlpANN.cxx +++ b/tmva/src/MethodTMlpANN.cxx @@ -469,7 +469,8 @@ void TMVA::MethodTMlpANN::MakeClass( const TString& theClassFileName ) const else classFileName = theClassFileName; - Log() << kINFO << "Creating specific (TMultiLayerPerceptron) standalone response class: " << Endl; + classFileName.ReplaceAll(".class",""); + Log() << kINFO << "Creating specific (TMultiLayerPerceptron) standalone response class: " << classFileName << Endl; fMLP->Export( classFileName.Data() ); } diff --git a/tmva/src/PDEFoam.cxx b/tmva/src/PDEFoam.cxx index 0682edb5a4a423d8c2059035f209e8d2507ca459..64ea6669cc9712089f1241740f7906bc3e735246 100644 --- a/tmva/src/PDEFoam.cxx +++ b/tmva/src/PDEFoam.cxx @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S.Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -12,10 +14,10 @@ * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * Peter Speckmayer - CERN, Switzerland * * * - * Copyright (c) 2008: * + * Copyright (c) 2008, 2010: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * @@ -28,10 +30,9 @@ // // Implementation of PDEFoam // -// The PDEFoam method is an -// extension of the PDERS method, which uses self-adapting binning to -// divide the multi-dimensional phase space in a finite number of -// hyper-rectangles (boxes). +// The PDEFoam method is an extension of the PDERS method, which uses +// self-adapting binning to divide the multi-dimensional phase space +// in a finite number of hyper-rectangles (boxes). // // For a given number of boxes, the binning algorithm adjusts the size // and position of the boxes inside the multidimensional phase space, @@ -41,8 +42,27 @@ // events. // // The implementation of the PDEFoam is based on the monte-carlo -// integration package PDEFoam included in the analysis package ROOT. -//_____________________________________________________________________ +// integration package TFoam included in the analysis package ROOT. +// +// The class TMVA::PDEFoam defines the default interface for the +// PDEFoam variants: +// +// - PDEFoamEvent +// - PDEFoamDiscriminant +// - PDEFoamTarget +// - PDEFoamMultiTarget +// - PDEFoamDecisionTree +// +// Per default PDEFoam stores in the cells the number of events (event +// weights) and therefore acts as an event density estimator. +// However, the above listed derived classes override this behaviour +// to implement certain PDEFoam variations. +// +// In order to use PDEFoam the user has to set the density estimator +// of the type TMVA::PDEFoamDensityBase, which is used to during the foam +// build-up. The default PDEFoam should be used with +// PDEFoamEventDensity. +// _____________________________________________________________________ #include <iostream> @@ -50,7 +70,7 @@ #include <fstream> #include <sstream> #include <cassert> -#include <climits> +#include <limits> #include "TMVA/Event.h" #include "TMVA/Tools.h" @@ -104,17 +124,17 @@ TMVA::PDEFoam::PDEFoam() : fRvec(0), fPseRan(new TRandom3(4356)), fAlpha(0), - fFoamType(kDiscr), + fFoamType(kSeparate), fXmin(0), fXmax(0), fNElements(0), fNmin(100), fMaxDepth(0), - fVolFrac(30.0), + fVolFrac(1.0/15.0), fFillFoamWithOrigWeights(kFALSE), fDTSeparation(kFoam), fPeekMax(kTRUE), - fDistr(new PDEFoamDistr()), + fDistr(NULL), fTimer(new Timer(0, "PDEFoam", kTRUE)), fVariableNames(new TObjArray()), fLogger(new MsgLogger("PDEFoam")) @@ -139,17 +159,17 @@ TMVA::PDEFoam::PDEFoam(const TString& Name) : fRvec(0), fPseRan(new TRandom3(4356)), fAlpha(0), - fFoamType(kDiscr), + fFoamType(kSeparate), fXmin(0), fXmax(0), fNElements(0), fNmin(100), fMaxDepth(0), - fVolFrac(30.0), + fVolFrac(1.0/15.0), fFillFoamWithOrigWeights(kFALSE), fDTSeparation(kFoam), fPeekMax(kTRUE), - fDistr(new PDEFoamDistr()), + fDistr(NULL), fTimer(new Timer(1, "PDEFoam", kTRUE)), fVariableNames(new TObjArray()), fLogger(new MsgLogger("PDEFoam")) @@ -206,14 +226,14 @@ TMVA::PDEFoam::PDEFoam(const PDEFoam &From) : , fNElements(0) , fNmin(0) , fMaxDepth(0) - , fVolFrac(30.0) + , fVolFrac(1.0/15.0) , fFillFoamWithOrigWeights(kFALSE) , fDTSeparation(kFoam) , fPeekMax(kTRUE) , fDistr(0) , fTimer(0) , fVariableNames(0) - , fLogger(new MsgLogger("PDEFoam")) + , fLogger(new MsgLogger(*From.fLogger)) { // Copy Constructor NOT IMPLEMENTED (NEVER USED) Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; @@ -308,6 +328,9 @@ void TMVA::PDEFoam::Create() // BUILD-UP of the FOAM // // ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| // + // prepare PDEFoam for growing + ResetCellElements(); // reset all cell elements + // Define and explore root cell(s) InitCells(); Grow(); @@ -315,8 +338,7 @@ void TMVA::PDEFoam::Create() TH1::AddDirectory(addStatus); // prepare PDEFoam for the filling with events - SetNElements(2); // init space for 2 variables on every cell - ResetCellElements(); // reset the cell elements of all active cells + ResetCellElements(); // reset all cell elements } // Create //_____________________________________________________________________ @@ -338,12 +360,6 @@ void TMVA::PDEFoam::InitCells() } if(fCells==0) Log() << kFATAL << "Cannot initialize CELLS" << Endl; - // create cell elemets - if (GetNmin() > 0) { - SetNElements(1); // to save the number of events in the cell - ResetCellElements(true); - } - ///////////////////////////////////////////////////////////////////////////// // Single Root Hypercube // ///////////////////////////////////////////////////////////////////////////// @@ -351,10 +367,7 @@ void TMVA::PDEFoam::InitCells() // Exploration of the root cell(s) for(Long_t iCell=0; iCell<=fLastCe; iCell++){ - if (fDTSeparation != kFoam) - DTExplore( fCells[iCell] ); // Exploration of root cell(s) - else - Explore( fCells[iCell] ); // Exploration of root cell(s) + Explore( fCells[iCell] ); // Exploration of root cell(s) } }//InitCells @@ -433,12 +446,16 @@ void TMVA::PDEFoam::Explore(PDEFoamCell *cell) Double_t *volPart=0; + // calculate volume scale + Double_t vol_scale = 1.0; + for (Int_t idim = 0; idim < fDim; ++idim) + vol_scale *= fXmax[idim] - fXmin[idim]; + cell->CalcVolume(); - dx = cell->GetVolume(); + dx = cell->GetVolume() * vol_scale; intOld = cell->GetIntg(); //memorize old values, driOld = cell->GetDriv(); //will be needed for correcting parent cells - if (GetNmin() > 0) - toteventsOld = GetBuildUpCellEvents(cell); + toteventsOld = GetCellElement(cell, 0); ///////////////////////////////////////////////////// // Special Short MC sampling to probe cell // @@ -514,8 +531,7 @@ void TMVA::PDEFoam::Explore(PDEFoamCell *cell) cell->SetXdiv(xBest); cell->SetIntg(intTrue); cell->SetDriv(intDriv); - if (GetNmin() > 0) - SetCellElement(cell, 0, totevents); + SetCellElement(cell, 0, totevents); // correct/update integrals in all parent cells to the top of the tree Double_t parIntg, parDriv; @@ -524,146 +540,12 @@ void TMVA::PDEFoam::Explore(PDEFoamCell *cell) parDriv = parent->GetDriv(); parent->SetIntg( parIntg +intTrue -intOld ); parent->SetDriv( parDriv +intDriv -driOld ); - if (GetNmin() > 0) - SetCellElement( parent, 0, GetBuildUpCellEvents(parent) + totevents - toteventsOld); + SetCellElement( parent, 0, GetCellElement(parent, 0) + totevents - toteventsOld); } delete [] volPart; delete [] xRand; } -//_____________________________________________________________________ -void TMVA::PDEFoam::DTExplore(PDEFoamCell *cell) -{ - // Internal subprogram used by Create. It explores newly defined - // cell with according to the decision tree logic. The separation - // set by the 'fDTSeparation' option is used (see also - // GetSeparation()). - // - // The optimal division point for eventual future cell division is - // determined/recorded. Note that links to parents and initial - // volume = 1/2 parent has to be already defined prior to calling - // this routine. - // - // Note, that according to the decision tree logic, a cell is only - // split, if the number of (unweighted) events in each dautghter - // cell is greater than fNmin. - - if (!cell) - Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl; - - // create edge histograms - std::vector<TH1F*> hsig, hbkg, hsig_unw, hbkg_unw; - for (Int_t idim=0; idim<fDim; idim++) { - hsig.push_back( new TH1F(Form("hsig_%i",idim), - Form("signal[%i]",idim), fNBin, 0, 1 )); - hbkg.push_back( new TH1F(Form("hbkg_%i",idim), - Form("background[%i]",idim), fNBin, 0, 1 )); - hsig_unw.push_back( new TH1F(Form("hsig_unw_%i",idim), - Form("signal_unw[%i]",idim), fNBin, 0, 1 )); - hbkg_unw.push_back( new TH1F(Form("hbkg_unw_%i",idim), - Form("background_unw[%i]",idim), fNBin, 0, 1 )); - } - - // Fill histograms - fDistr->FillHist(cell, hsig, hbkg, hsig_unw, hbkg_unw); - - // ------ determine the best division edge - Float_t xBest = 0.5; // best division point - Int_t kBest = -1; // best split dimension - Float_t maxGain = -1.0; // maximum gain - Float_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX()+1); - Float_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX()+1); - Float_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX()+1); - Float_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX()+1); - Float_t parentGain = (nTotS+nTotB) * GetSeparation(nTotS,nTotB); - - for (Int_t idim=0; idim<fDim; idim++) { - Float_t nSelS=hsig.at(idim)->GetBinContent(0); - Float_t nSelB=hbkg.at(idim)->GetBinContent(0); - Float_t nSelS_unw=hsig_unw.at(idim)->GetBinContent(0); - Float_t nSelB_unw=hbkg_unw.at(idim)->GetBinContent(0); - for(Int_t jLo=1; jLo<fNBin; jLo++) { - nSelS += hsig.at(idim)->GetBinContent(jLo); - nSelB += hbkg.at(idim)->GetBinContent(jLo); - nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo); - nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo); - - // proceed if total number of events in left and right cell - // is greater than fNmin - if ( !( (nSelS_unw + nSelB_unw) >= GetNmin() && - (nTotS_unw-nSelS_unw + nTotB_unw-nSelB_unw) >= GetNmin() ) ) - continue; - - Float_t xLo = 1.0*jLo/fNBin; - - // calculate gain - Float_t leftGain = ((nTotS - nSelS) + (nTotB - nSelB)) - * GetSeparation(nTotS-nSelS,nTotB-nSelB); - Float_t rightGain = (nSelS+nSelB) * GetSeparation(nSelS,nSelB); - Float_t gain = parentGain - leftGain - rightGain; - - if (gain >= maxGain) { - maxGain = gain; - xBest = xLo; - kBest = idim; - } - } // jLo - } // idim - - if (kBest >= fDim || kBest < 0) - Log() << kWARNING << "No best division edge found!" << Endl; - - // set cell properties - cell->SetBest(kBest); - cell->SetXdiv(xBest); - if (nTotB+nTotS > 0) - cell->SetIntg( nTotS/(nTotB+nTotS) ); - else - cell->SetIntg( 0.0 ); - cell->SetDriv(maxGain); - cell->CalcVolume(); - - // set cell element 0 (total number of events in cell) during - // build-up - if (GetNmin() > 0) - SetCellElement( cell, 0, nTotS + nTotB); - - // clean up - for (UInt_t ih=0; ih<hsig.size(); ih++) delete hsig.at(ih); - for (UInt_t ih=0; ih<hbkg.size(); ih++) delete hbkg.at(ih); - for (UInt_t ih=0; ih<hsig_unw.size(); ih++) delete hsig_unw.at(ih); - for (UInt_t ih=0; ih<hbkg_unw.size(); ih++) delete hbkg_unw.at(ih); -} - -//_____________________________________________________________________ -Float_t TMVA::PDEFoam::GetSeparation(Float_t s, Float_t b) -{ - // Calculate the separation depending on 'fDTSeparation' for the - // given number of signal and background events 's', 'b'. Note, - // that if (s+b) < 0 or s < 0 or b < 0 than the return value is 0. - - if (s+b <= 0 || s < 0 || b < 0 ) - return 0; - - Float_t p = s/(s+b); - - switch(fDTSeparation) { - case kFoam: // p - return p; - case kGiniIndex: // p * (1-p) - return p*(1-p); - case kMisClassificationError: // 1 - max(p,1-p) - return 1 - TMath::Max(p, 1-p); - case kCrossEntropy: // -p*log(p) - (1-p)*log(1-p) - return (p<=0 || p >=1 ? 0 : -p*TMath::Log(p) - (1-p)*TMath::Log(1-p)); - default: - Log() << kFATAL << "Unknown separation type" << Endl; - break; - } - - return 0; -} - //_____________________________________________________________________ void TMVA::PDEFoam::Varedu(Double_t ceSum[5], Int_t &kBest, Double_t &xBest, Double_t &yBest) { @@ -772,7 +654,7 @@ Long_t TMVA::PDEFoam::PeekMax() // apply Nmin-cut if (GetNmin() > 0) - bCutNmin = GetBuildUpCellEvents(fCells[i]) > GetNmin(); + bCutNmin = GetCellElement(fCells[i], 0) > GetNmin(); // choose cell if(driv > drivMax && bCutNmin && bCutMaxDepth) { @@ -790,56 +672,7 @@ Long_t TMVA::PDEFoam::PeekMax() Log() << kVERBOSE << "Warning: Maximum depth reached: " << GetMaxDepth() << Endl; else - Log() << kWARNING << "Warning: PDEFoam::PeekMax: no more candidate cells (drivMax>0) found for further splitting." << Endl; - } - - return(iCell); -} - -//_____________________________________________________________________ -Long_t TMVA::PDEFoam::PeekLast() -{ - // Internal subprogram used by Create. It finds the last created - // active cell for the purpose of the division. Analogous to - // PeekMax() it is cut on the number of events in the cell (fNmin) - // and the cell tree depth (GetMaxDepth() > 0). - - Long_t iCell = -1; - - Bool_t bCutNmin = kTRUE; - Bool_t bCutMaxDepth = kTRUE; - - for(Long_t i=fLastCe; i>=0; i--) { - if( fCells[i]->GetStat() == 1 ) { - // if driver integral < numeric limit, skip cell - if (fCells[i]->GetDriv() < std::numeric_limits<float>::epsilon()) - continue; - - // apply cut on depth - if (GetMaxDepth() > 0) - bCutMaxDepth = fCells[i]->GetDepth() < GetMaxDepth(); - - // apply Nmin-cut - if (GetNmin() > 0) - bCutNmin = GetBuildUpCellEvents(fCells[i]) > GetNmin(); - - // choose cell - if(bCutNmin && bCutMaxDepth) { - iCell = i; - break; - } - } - } - - if (iCell == -1){ - if (!bCutNmin) - Log() << kVERBOSE << "Warning: No cell with more than " - << GetNmin() << " events found!" << Endl; - else if (!bCutMaxDepth) - Log() << kVERBOSE << "Warning: Maximum depth reached: " - << GetMaxDepth() << Endl; - else - Log() << kWARNING << "Warning: PDEFoam::PeekLast: no more candidate cells found for further splitting." << Endl; + Log() << kWARNING << "<PDEFoam::PeekMax>: no more candidate cells (drivMax>0) found for further splitting." << Endl; } return(iCell); @@ -877,13 +710,10 @@ Int_t TMVA::PDEFoam::Divide(PDEFoamCell *cell) Int_t d2 = CellFill(1, cell); cell->SetDau0((fCells[d1])); cell->SetDau1((fCells[d2])); - if (fDTSeparation != kFoam) { - DTExplore( (fCells[d1]) ); - DTExplore( (fCells[d2]) ); - } else { - Explore( (fCells[d1]) ); - Explore( (fCells[d2]) ); - } + + Explore( (fCells[d1]) ); + Explore( (fCells[d2]) ); + return 1; } // PDEFoam_Divide @@ -892,7 +722,16 @@ Double_t TMVA::PDEFoam::Eval(Double_t *xRand, Double_t &event_density) { // Internal subprogram. // Evaluates (training) distribution. - return fDistr->Density(xRand, event_density); + + // Transform variable xRand, since Foam boundaries are [0,1] and + // fDistr is filled with events which range in [fXmin,fXmax] + // + // Transformation: [0, 1] --> [xmin, xmax] + std::vector<Double_t> xvec; + for (Int_t idim = 0; idim < GetTotDim(); ++idim) + xvec.push_back( VarTransformInvers(idim, xRand[idim]) ); + + return GetDistr()->Density(xvec, event_density); } //_____________________________________________________________________ @@ -909,10 +748,7 @@ void TMVA::PDEFoam::Grow() PDEFoamCell* newCell; while ( (fLastCe+2) < fNCells ) { // this condition also checked inside Divide - if (fPeekMax) - iCell = PeekMax(); // peek up cell with maximum driver integral - else - iCell = PeekLast(); // peek up last cell created + iCell = PeekMax(); // peek up cell with maximum driver integral if ( (iCell<0) || (iCell>fLastCe) ) { Log() << kVERBOSE << "Break: "<< fLastCe+1 << " cells created" << Endl; @@ -1031,7 +867,8 @@ void TMVA::PDEFoam::CheckAll(Int_t level) //_____________________________________________________________________ void TMVA::PDEFoam::PrintCell(Long_t iCell) { - // Prints geometry of 'iCell' + // Prints geometry of and elements of 'iCell', as well as relations + // to parent and daughter cells. if (iCell < 0 || iCell > fLastCe) { Log() << kWARNING << "<PrintCell(iCell=" << iCell @@ -1061,6 +898,17 @@ void TMVA::PDEFoam::PrintCell(Long_t iCell) } Log() << ")" << Endl; fCells[iCell]->Print("1"); + // print the cell elements + Log() << "Elements: ["; + TVectorD *vec = (TVectorD*)fCells[iCell]->GetElement(); + if (vec != NULL){ + for (Int_t i=0; i<vec->GetNrows(); i++){ + if (i>0) Log() << ", "; + Log() << GetCellElement(fCells[iCell], i); + } + } else + Log() << "not set"; + Log() << "]" << Endl; Log()<<"}"<<Endl; } @@ -1074,1107 +922,308 @@ void TMVA::PDEFoam::PrintCells(void) } //_____________________________________________________________________ -void TMVA::PDEFoam::RemoveEmptyCell( Int_t iCell ) +void TMVA::PDEFoam::FillFoamCells(const Event* ev, Float_t wt) { - // This function removes a cell iCell, which has a volume equal to zero. - // It works the following way: - // 1) find grandparent to iCell - // 2) set daughter of the grandparent cell to the sister of iCell - // - // Result: - // iCell and its parent are alone standing ==> will be removed - - // get cell volume - Double_t volume = fCells[iCell]->GetVolume(); - - if (!fCells[iCell]->GetStat() || volume>0){ - Log() << kDEBUG << "<RemoveEmptyCell>: cell " << iCell - << "is not active or has volume>0 ==> doesn't need to be removed" << Endl; - return; - } - - // get parent and grandparent Cells - PDEFoamCell *pCell = fCells[iCell]->GetPare(); - PDEFoamCell *ppCell = fCells[iCell]->GetPare()->GetPare(); + // This function fills a weight 'wt' into the PDEFoam cell, which + // corresponds to the given event 'ev'. Per default cell element 0 + // is filled with the weight 'wt', and cell element 1 is filled + // with the squared weight. This function can be overridden by a + // subclass in order to change the values stored in the foam cells. - // get neighbour (sister) to iCell - PDEFoamCell *sCell; - if (pCell->GetDau0() == fCells[iCell]) - sCell = pCell->GetDau1(); - else - sCell = pCell->GetDau0(); - - // cross check - if (pCell->GetIntg() != sCell->GetIntg()) - Log() << kWARNING << "<RemoveEmptyCell> Error: cell integrals are not equal!" - << " Intg(parent cell)=" << pCell->GetIntg() - << " Intg(sister cell)=" << sCell->GetIntg() - << Endl; - - // set daughter of grandparent to sister of iCell - if (ppCell->GetDau0() == pCell) - ppCell->SetDau0(sCell); - else - ppCell->SetDau1(sCell); - - // set parent of sister to grandparent of of iCell - sCell->SetPare(ppCell); + // find corresponding foam cell + std::vector<Float_t> values = ev->GetValues(); + std::vector<Float_t> tvalues = VarTransform(values); + PDEFoamCell *cell = FindCell(tvalues); - // now iCell and its parent are alone ==> set them inactive - fCells[iCell]->SetStat(0); - pCell->SetStat(0); + // 0. Element: Sum of weights 'wt' + // 1. Element: Sum of weights 'wt' squared + SetCellElement(cell, 0, GetCellElement(cell, 0) + wt); + SetCellElement(cell, 1, GetCellElement(cell, 1) + wt*wt); } //_____________________________________________________________________ -void TMVA::PDEFoam::CheckCells( Bool_t remove_empty_cells ) +void TMVA::PDEFoam::ResetCellElements() { - // debug function: checks all cells with respect to critical - // values, f.e. cell volume, ... + // Remove the cell elements from all cells. - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!fCells[iCell]->GetStat()) - continue; + if (!fCells) return; - Double_t volume = fCells[iCell]->GetVolume(); - if (volume<1e-10){ - if (volume<=0){ - Log() << kWARNING << "Critical: cell volume negative or zero! volume=" - << volume << " cell number: " << iCell << Endl; - // fCells[iCell]->Print("1"); // debug output - if (remove_empty_cells){ - Log() << kWARNING << "Remove cell " << iCell << Endl; - RemoveEmptyCell(iCell); - } - } - else { - Log() << kWARNING << "Cell volume close to zero! volume=" - << volume << " cell number: " << iCell << Endl; - } + // delete all old cell elements + Log() << kVERBOSE << "Delete old cell elements" << Endl; + for(Long_t iCell=0; iCell<fNCells; iCell++) { + if (fCells[iCell]->GetElement() != NULL){ + delete dynamic_cast<TVectorD*>(fCells[iCell]->GetElement()); + fCells[iCell]->SetElement(NULL); } } } //_____________________________________________________________________ -void TMVA::PDEFoam::PrintCellElements() +Bool_t TMVA::PDEFoam::CellValueIsUndefined( PDEFoamCell* /* cell */ ) { - // This debug function prints the cell elements of all active - // cells. - - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!fCells[iCell]->GetStat()) continue; - - Log() << "cell[" << iCell << "] elements: ["; - for (UInt_t i=0; i<GetNElements(); i++){ - if (i>0) Log() << " ; "; - Log() << GetCellElement(fCells[iCell], i); - } - Log() << "]" << Endl; - } + // Returns true, if the value of the given cell is undefined. + // Default value: kFALSE. This function can be overridden by + // sub-classes. + return kFALSE; } //_____________________________________________________________________ -void TMVA::PDEFoam::ResetCellElements(Bool_t allcells) +Float_t TMVA::PDEFoam::GetCellValue(std::vector<Float_t> &xvec, ECellValue cv, PDEFoamKernelBase *kernel) { - // creates a TVectorD object with fNElements in every cell - // and initializes them by zero. - // The TVectorD object is used to store classification or - // regression data in every foam cell. + // This function finds the cell, which corresponds to the given + // untransformed event vector 'xvec' and return its value, which is + // given by the parameter 'cv'. If kernel != NULL, then + // PDEFoamKernelBase::Estimate() is called on the transformed event + // variables. // - // Parameter: - // allcells == true : create TVectorD on every cell - // allcells == false : create TVectorD on active cells with - // cell index <= fLastCe (default) - - if (!fCells || GetNElements()==0) return; + // Parameters: + // + // - xvec - event vector (untransformed, [fXmin,fXmax]) + // + // - cv - the cell value to return + // + // - kernel - PDEFoam kernel estimator. If NULL is given, than the + // pure cell value is returned + // + // Return: + // + // The cell value, corresponding to 'xvec', estimated by the given + // kernel. + + std::vector<Float_t> txvec(VarTransform(xvec)); + if (kernel == NULL) + return GetCellValue(FindCell(txvec), cv); + else + return kernel->Estimate(this, txvec, cv); +} - // delete all old cell elements - Log() << kVERBOSE << "Delete old cell elements" << Endl; - for(Long_t iCell=0; iCell<fNCells; iCell++) { - if (fCells[iCell]->GetElement() != 0){ - delete dynamic_cast<TVectorD*>(fCells[iCell]->GetElement()); - fCells[iCell]->SetElement(0); - } - } +//_____________________________________________________________________ +std::vector<Float_t> TMVA::PDEFoam::GetCellValue( std::map<Int_t,Float_t>& xvec, ECellValue cv ) +{ + // This function finds all cells, which corresponds to the given + // (incomplete) untransformed event vector 'xvec' and returns the + // cell values, according to the parameter 'cv'. + // + // Parameters: + // + // - xvec - map for the untransformed vector. The key (Int_t) is + // the dimension, and the value (Float_t) is the event + // coordinate. Note that not all coordinates have to be + // specified. + // + // - cv - cell values to return + // + // Return: + // + // cell values from all cells that were found - if (allcells){ - Log() << kVERBOSE << "Reset new cell elements to " - << GetNElements() << " value(s) per cell" << Endl; - } else { - Log() << kVERBOSE << "Reset active cell elements to " - << GetNElements() << " value(s) per cell" << Endl; - } + // transformed event + std::map<Int_t,Float_t> txvec; + for (std::map<Int_t,Float_t>::const_iterator it=xvec.begin(); it!=xvec.end(); ++it) + txvec[it->first] = VarTransform(it->first, it->second); - // create new cell elements - for(Long_t iCell=0; iCell<(allcells ? fNCells : fLastCe+1); iCell++) { - // skip inactive cells if allcells == false - if (!allcells && !(fCells[iCell]->GetStat())) - continue; + // find all cells, which correspond to the transformed event + std::vector<PDEFoamCell*> cells = FindCells(txvec); - TVectorD *elem = new TVectorD(GetNElements()); - for (UInt_t i=0; i<GetNElements(); i++) - (*elem)(i) = 0.; + // get the cell values + std::vector<Float_t> cell_values; + for (std::vector<PDEFoamCell*>::const_iterator cell_it=cells.begin(); + cell_it != cells.end(); ++cell_it) + cell_values.push_back(GetCellValue(*cell_it, cv)); - fCells[iCell]->SetElement(elem); - } + return cell_values; } //_____________________________________________________________________ -void TMVA::PDEFoam::CalcCellTarget() +TMVA::PDEFoamCell* TMVA::PDEFoam::FindCell( std::vector<Float_t> &xvec ) { - // Calculate average cell target in every cell and save them to the cell. - // This function is called when the Mono target regression option is set. + // Find cell that contains 'xvec' (in foam coordinates [0,1]). + // + // Loop to find cell that contains 'xvec' starting at root cell, + // and traversing binary tree to find the cell quickly. Note, that + // if 'xvec' lies outside the foam, the cell which is nearest to + // 'xvec' is returned. (The returned pointer should never be + // NULL.) + // + // Parameters: + // + // - xvec - event vector (in foam coordinates [0,1]) + // + // Return: + // + // PDEFoam cell corresponding to 'xvec' - // loop over cells - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) - continue; + PDEFoamVect cellPosi0(GetTotDim()), cellSize0(GetTotDim()); + PDEFoamCell *cell, *cell0; - Double_t N_ev = GetCellElement(fCells[iCell], 0); // get number of events - Double_t tar = GetCellElement(fCells[iCell], 1); // get sum of targets + cell=fCells[0]; // start with root cell + Int_t idim=0; + while (cell->GetStat()!=1) { //go down binary tree until cell is found + idim=cell->GetBest(); // dimension that changed + cell0=cell->GetDau0(); + cell0->GetHcub(cellPosi0,cellSize0); - if (N_ev > 1e-20){ - SetCellElement(fCells[iCell], 0, tar/N_ev); // set average target - SetCellElement(fCells[iCell], 1, tar/TMath::Sqrt(N_ev)); // set error on average target - } - else { - SetCellElement(fCells[iCell], 0, 0.0 ); // set mean target - SetCellElement(fCells[iCell], 1, -1 ); // set mean target error - } + if (xvec.at(idim)<=cellPosi0[idim]+cellSize0[idim]) + cell=cell0; + else + cell=(cell->GetDau1()); } + return cell; } //_____________________________________________________________________ -void TMVA::PDEFoam::CalcCellDiscr() +void TMVA::PDEFoam::FindCells(std::map<Int_t, Float_t> &txvec, PDEFoamCell* cell, std::vector<PDEFoamCell*> &cells) { - // Calc discriminator and its error for every cell and save it to the cell. - // This function is called when the fSigBgSeparated==False option is set. - - // loop over cells - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) - continue; - - Double_t N_sig = GetCellElement(fCells[iCell], 0); // get number of signal events - Double_t N_bg = GetCellElement(fCells[iCell], 1); // get number of bg events + // This is a helper function for std::vector<PDEFoamCell*> + // FindCells(...) and a generalisation of PDEFoamCell* FindCell(). + // It saves in 'cells' all cells, which contain the coordinates + // specifies in 'txvec'. Note, that not all coordinates have to be + // specified in 'txvec'. + // + // Parameters: + // + // - txvec - event vector in foam coordinates [0,1]. The key is + // the dimension and the value is the event coordinate. Note, + // that not all coordinates have to be specified. + // + // - cell - cell to start searching with (usually root cell + // fCells[0]) + // + // - cells - list of cells that were found - if (N_sig<0.) { - Log() << kWARNING << "Negative number of signal events in cell " << iCell - << ": " << N_sig << ". Set to 0." << Endl; - N_sig=0.; - } - if (N_bg<0.) { - Log() << kWARNING << "Negative number of background events in cell " << iCell - << ": " << N_bg << ". Set to 0." << Endl; - N_bg=0.; - } + PDEFoamVect cellPosi0(GetTotDim()), cellSize0(GetTotDim()); + PDEFoamCell *cell0; + Int_t idim=0; + while (cell->GetStat()!=1) { //go down binary tree until cell is found + idim=cell->GetBest(); // dimension that changed - if (N_sig+N_bg > 1e-10){ - SetCellElement(fCells[iCell], 0, N_sig/(N_sig+N_bg)); // set discriminator - SetCellElement(fCells[iCell], 1, TMath::Sqrt( Sqr ( N_sig/Sqr(N_sig+N_bg))*N_sig + - Sqr ( N_bg /Sqr(N_sig+N_bg))*N_bg ) ); // set discriminator error + // check if dimension 'idim' is specified in 'txvec' + map<Int_t, Float_t>::const_iterator it = txvec.find(idim); - } - else { - SetCellElement(fCells[iCell], 0, 0.5); // set discriminator - SetCellElement(fCells[iCell], 1, 1. ); // set discriminator error + if (it != txvec.end()){ + // case 1: cell is splitten in a dimension which is specified + // in txvec + cell0=cell->GetDau0(); + cell0->GetHcub(cellPosi0,cellSize0); + // check, whether left daughter cell contains txvec + if (it->second <= cellPosi0[idim] + cellSize0[idim]) + cell=cell0; + else + cell=cell->GetDau1(); + } else { + // case 2: cell is splitten in target dimension + FindCells(txvec, cell->GetDau0(), cells); + FindCells(txvec, cell->GetDau1(), cells); + return; } } + cells.push_back(cell); } //_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetCellDiscr( std::vector<Float_t> &xvec, EKernel kernel ) +std::vector<TMVA::PDEFoamCell*> TMVA::PDEFoam::FindCells(std::vector<Float_t> &txvec) { - // Get discriminator saved in cell (previously calculated in CalcCellDiscr()) - // which encloses the coordinates given in xvec. - // This function is used, when the fSigBgSeparated==False option is set - // (unified foams). - - // transform xvec - std::vector<Float_t> txvec(VarTransform(xvec)); - - // find cell - PDEFoamCell *cell= FindCell(txvec); - - if (!cell) return -999.; - - switch (kernel) { - case kNone: - return GetCellValue(cell, kDiscriminator); + // Find all cells, that contain txvec. This function can be used, + // when the dimension of the foam is greater than the dimension of + // txvec. E.g. this is the case for multi-target regression. + // + // Parameters: + // + // - txvec - event vector of variables, transformed into foam + // coordinates [0,1]. The size of txvec can be smaller than the + // dimension of the foam. + // + // Return value: + // + // - vector of cells, that fit txvec - case kGaus: { - Double_t result = 0.; - Double_t norm = 0.; + // copy the coordinates from 'txvec' into a map + std::map<Int_t, Float_t> txvec_map; + for (UInt_t i=0; i<txvec.size(); ++i) + txvec_map[i] = txvec.at(i); - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) continue; + // the cells found + std::vector<PDEFoamCell*> cells(0); - // calc cell density - Double_t cell_discr = GetCellValue(fCells[iCell], kDiscriminator); - Double_t gau = WeightGaus(fCells[iCell], txvec); + // loop over all target dimensions + FindCells(txvec_map, fCells[0], cells); - result += gau * cell_discr; - norm += gau; - } + return cells; +} - return result / norm; - } +//_____________________________________________________________________ +std::vector<TMVA::PDEFoamCell*> TMVA::PDEFoam::FindCells(std::map<Int_t, Float_t> &txvec) +{ + // Find all cells, that contain the coordinates specified in txvec. + // The key in 'txvec' is the dimension, and the corresponding value + // is the coordinate. Note, that not all coordinates have to be + // specified in txvec. + // + // Parameters: + // + // - txvec - map of coordinates (transformed into foam coordinates + // [0,1]) + // + // Return value: + // + // - vector of cells, that fit txvec - case kLinN: - return WeightLinNeighbors(txvec, kDiscriminator); + // the cells found + std::vector<PDEFoamCell*> cells(0); - default: - Log() << kFATAL << "GetCellDiscr: ERROR: wrong kernel!" << Endl; - return 0; - } + // loop over all target dimensions + FindCells(txvec, fCells[0], cells); - return 0; + return cells; } //_____________________________________________________________________ -void TMVA::PDEFoam::FillFoamCells(const Event* ev, Bool_t NoNegWeights) +TH1D* TMVA::PDEFoam::Draw1Dim( ECellValue cell_value, Int_t nbin, PDEFoamKernelBase *kernel ) { - // This function fills an event into the foam. + // Draws 1-dimensional foam (= histogram) // - // In case of Mono-Target regression this function prepares the - // calculation of the average target value in every cell. Note, - // that only target 0 is saved in the cell! + // Parameters: // - // In case of a unified foam this function prepares the calculation of - // the cell discriminator in every cell. + // - cell_value - the cell value to draw // - // If 'NoNegWeights' is true, an event with negative weight will - // not be filled into the foam. (Default value: false) - - std::vector<Float_t> values = ev->GetValues(); - std::vector<Float_t> targets = ev->GetTargets(); - Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight(); - EFoamType ft = GetFoamType(); + // - nbin - number of bins of result histogram + // + // - kernel - a PDEFoam kernel. - if((NoNegWeights && weight<=0) || weight==0) - return; - - if (ft == kMultiTarget) - values.insert(values.end(), targets.begin(), targets.end()); - - // find corresponding foam cell - std::vector<Float_t> tvalues = VarTransform(values); - PDEFoamCell *cell = FindCell(tvalues); - if (!cell) { - Log() << kFATAL << "<PDEFoam::FillFoamCells>: No cell found!" << Endl; - return; - } - - // Add events to cell - switch (ft) { - case kSeparate: - case kMultiTarget: - // 0. Element: Number of events - // 1. Element: RMS - SetCellElement(cell, 0, GetCellElement(cell, 0) + weight); - SetCellElement(cell, 1, GetCellElement(cell, 1) + weight*weight); - break; - - case kDiscr: - // 0. Element: Number of signal events - // 1. Element: Number of background events times normalization - if (ev->GetClass() == 0) - SetCellElement(cell, 0, GetCellElement(cell, 0) + weight); - else - SetCellElement(cell, 1, GetCellElement(cell, 1) + weight); - break; - - case kMonoTarget: - // 0. Element: Number of events - // 1. Element: Target 0 - SetCellElement(cell, 0, GetCellElement(cell, 0) + weight); - SetCellElement(cell, 1, GetCellElement(cell, 1) + weight*targets.at(0)); - break; - - default: - Log() << kFATAL << "<FillFoamCells>: unmatched foam type!" << Endl; - break; - } -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetCellRegValue0( std::vector<Float_t> &xvec, EKernel kernel ) -{ - // Get regression value 0 from cell that contains the event vector xvec. - // This function is used when the MultiTargetRegression==False option is set. - - std::vector<Float_t> txvec(VarTransform(xvec)); - PDEFoamCell *cell = FindCell(txvec); - - if (!cell) { - Log() << kFATAL << "<GetCellRegValue0> ERROR: No cell found!" << Endl; - return -999.; - } - - switch (kernel) { - case kNone: - if (GetCellValue(cell, kTarget0Error) != -1) - // cell is not empty - return GetCellValue(cell, kTarget0); - else - // cell is empty -> calc average target of neighbor cells - return GetAverageNeighborsValue(txvec, kTarget0); - break; - - case kGaus: { - // return gaus weighted cell density - - Double_t result = 0.; - Double_t norm = 0.; - - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) continue; - - // calc cell density - Double_t cell_val = 0; - if (GetCellValue(fCells[iCell], kTarget0Error) != -1) - // cell is not empty - cell_val = GetCellValue(fCells[iCell], kTarget0); - else - // cell is empty -> calc average target of neighbor cells - cell_val = GetAverageNeighborsValue(txvec, kTarget0); - Double_t gau = WeightGaus(fCells[iCell], txvec); - - result += gau * cell_val; - norm += gau; - } - return result / norm; - } - break; - case kLinN: - if (GetCellValue(cell, kTarget0Error) != -1) - // cell is not empty -> weight with non-empty neighbors - return WeightLinNeighbors(txvec, kTarget0, -1, -1, kTRUE); - else - // cell is empty -> calc average target of non-empty neighbor - // cells - return GetAverageNeighborsValue(txvec, kTarget0); - break; - - default: - Log() << kFATAL << "<GetCellRegValue0>: unknown kernel!" << Endl; - return 0.; - } - - return 0.; -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetAverageNeighborsValue( std::vector<Float_t> &txvec, - ECellValue cv ) -{ - // This function returns the average value 'cv' of only nearest - // neighbor cells. It is used in cases, where empty cells shall - // not be evaluated. - // - // Parameters: - // - txvec - event vector, transformed into foam coordinates [0, 1] - // - cv - cell value, see definition of ECellValue - - const Double_t xoffset = 1.e-6; - Double_t norm = 0; // normalisation - Double_t result = 0; // return value - - PDEFoamCell *cell = FindCell(txvec); // find cooresponding cell - PDEFoamVect cellSize(GetTotDim()); - PDEFoamVect cellPosi(GetTotDim()); - cell->GetHcub(cellPosi, cellSize); // get cell coordinates - - // loop over all dimensions and find neighbor cells - for (Int_t dim=0; dim<GetTotDim(); dim++) { - std::vector<Float_t> ntxvec(txvec); - PDEFoamCell* left_cell = 0; // left cell - PDEFoamCell* right_cell = 0; // right cell - - // get left cell - ntxvec[dim] = cellPosi[dim]-xoffset; - left_cell = FindCell(ntxvec); - if (!CellValueIsUndefined(left_cell)){ - // if left cell is not empty, take its value - result += GetCellValue(left_cell, cv); - norm++; - } - // get right cell - ntxvec[dim] = cellPosi[dim]+cellSize[dim]+xoffset; - right_cell = FindCell(ntxvec); - if (!CellValueIsUndefined(right_cell)){ - // if right cell is not empty, take its value - result += GetCellValue(right_cell, cv); - norm++; - } - } - if (norm>0) result /= norm; // calc average target - else result = 0; // return null if all neighbors are empty - - return result; -} - -//_____________________________________________________________________ -Bool_t TMVA::PDEFoam::CellValueIsUndefined( PDEFoamCell* cell ) -{ - // Returns true, if the value of the given cell is undefined. - - EFoamType ft = GetFoamType(); - switch(ft){ - case kSeparate: - return kFALSE; - case kDiscr: - return kFALSE; - case kMonoTarget: - return GetCellValue(cell, kTarget0Error) == -1; - case kMultiTarget: - return kFALSE; - default: - return kFALSE; - } - return kFALSE; -} - -//_____________________________________________________________________ -std::vector<Float_t> TMVA::PDEFoam::GetCellTargets( std::vector<Float_t> &tvals, ETargetSelection ts ) -{ - // This function is used when the MultiTargetRegression==True - // option is set. It calculates the mean target or most probable - // target values if 'tvals' variables are given ('tvals' does not - // contain targets) - // - // Parameters: - // - tvals - transformed event variables (element of [0,1]) (no targets!) - // - ts - method of target selection (kMean, kMpv) - // - // Result: - // vetor of mean targets or most probable targets over all cells - // which first coordinates enclose 'tvals' - - std::vector<Float_t> target(GetTotDim()-tvals.size(), 0); // returned vector - std::vector<Float_t> norm(target); // normalisation - Double_t max_dens = 0.; // maximum cell density - - // find cells, which fit tvals (no targets) - std::vector<PDEFoamCell*> cells = FindCells(tvals); - if (cells.size()<1) return target; - - // loop over all cells found - std::vector<PDEFoamCell*>::iterator cell_it(cells.begin()); - for (cell_it=cells.begin(); cell_it!=cells.end(); cell_it++){ - - // get density of cell - Double_t cell_density = GetCellValue(*cell_it, kDensity); - - // get cell position and size - PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); - (*cell_it)->GetHcub(cellPosi, cellSize); - - // loop over all target dimensions, in order to calculate target - // value - if (ts==kMean){ - // sum cell density times cell center - for (UInt_t itar=0; itar<target.size(); itar++){ - UInt_t idim = itar+tvals.size(); - target.at(itar) += cell_density * - VarTransformInvers(idim, cellPosi[idim]+0.5*cellSize[idim]); - norm.at(itar) += cell_density; - } // loop over targets - } else { - // get cell center with maximum event density - if (cell_density > max_dens){ - max_dens = cell_density; // save new max density - // fill target values - for (UInt_t itar=0; itar<target.size(); itar++){ - UInt_t idim = itar+tvals.size(); - target.at(itar) = - VarTransformInvers(idim, cellPosi[idim]+0.5*cellSize[idim]); - } // loop over targets - } - } - } // loop over cells - - // normalise mean cell density - if (ts==kMean){ - for (UInt_t itar=0; itar<target.size(); itar++){ - if (norm.at(itar)>1.0e-15) - target.at(itar) /= norm.at(itar); - else - // normalisation factor is too small -> return approximate - // target value - target.at(itar) = (fXmax[itar+tvals.size()]-fXmin[itar+tvals.size()])/2.; - } - } - - return target; -} - -//_____________________________________________________________________ -std::vector<Float_t> TMVA::PDEFoam::GetProjectedRegValue( std::vector<Float_t> &vals, EKernel kernel, ETargetSelection ts ) -{ - // This function is used when the MultiTargetRegression==True option is set. - // Returns regression value i, given the event variables 'vals'. - // Note: number of foam dimensions = number of variables + number of targets - // - // Parameters: - // - vals - event variables (no targets) - // - kernel - used kernel (None or Gaus) - // - ts - method of target selection (Mean or Mpv) - - // checkt whether vals are within foam borders. - // if not -> push it into foam - const Float_t xsmall = 1.e-7; - for (UInt_t l=0; l<vals.size(); l++) { - if (vals.at(l) <= fXmin[l]){ - vals.at(l) = fXmin[l] + xsmall; - } - else if (vals.at(l) >= fXmax[l]){ - vals.at(l) = fXmax[l] - xsmall; - } - } - - // transform variables (vals) - std::vector<Float_t> txvec(VarTransform(vals)); - - // choose kernel - switch (kernel) { - case kNone: - return GetCellTargets(txvec, ts); - - case kGaus: { - - std::vector<Float_t> target(GetTotDim()-txvec.size(), 0); // returned vector - std::vector<Float_t> norm(target); // normalisation - - // loop over all active cells to calc gaus weighted target values - for (Long_t ice=0; ice<=fLastCe; ice++) { - if (!(fCells[ice]->GetStat())) continue; - - // weight with gaus only in non-target dimensions! - Double_t gau = WeightGaus(fCells[ice], txvec, vals.size()); - - PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); - fCells[ice]->GetHcub(cellPosi, cellSize); - - // fill new vector with coordinates of new cell - std::vector<Float_t> new_vec; - for (UInt_t k=0; k<txvec.size(); k++) - new_vec.push_back(cellPosi[k] + 0.5*cellSize[k]); - - std::vector<Float_t> val = GetCellTargets(new_vec, ts); - for (UInt_t itar=0; itar<target.size(); itar++){ - target.at(itar) += gau * val.at(itar); - norm.at(itar) += gau; - } - } - - // normalisation - for (UInt_t itar=0; itar<target.size(); itar++){ - if (norm.at(itar)<1.0e-20){ - Log() << kWARNING << "Warning: norm too small!" << Endl; - target.at(itar) = 0.; - } else - target.at(itar) /= norm.at(itar); - } - return target; - } - break; - - default: - Log() << kFATAL << "<GetProjectedRegValue>: unsupported kernel!" << Endl; - return std::vector<Float_t>(GetTotDim()-txvec.size(), 0); - } - - return std::vector<Float_t>(GetTotDim()-txvec.size(), 0); -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetCellDensity( std::vector<Float_t> &xvec, EKernel kernel ) -{ - // Returns density (=number of entries / volume) of cell that - // encloses the untransformed event vector 'xvec'. This function - // is called by GetMvaValue() in case of two separated foams - // (signal and background). 'kernel' can be either kNone or kGaus. - - std::vector<Float_t> txvec(VarTransform(xvec)); - PDEFoamCell *cell = FindCell(txvec); - - if (!cell) { - Log() << kFATAL << "<GetCellDensity(event)> ERROR: No cell found!" << Endl; - return -999.; - } - - switch (kernel) { - case kNone: - // return cell entries over cell volume - return GetCellValue(cell, kDensity); - - case kGaus: { - // return gaus weighted cell density - - Double_t result = 0; - Double_t norm = 0.; - - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) continue; - - // calc cell density - Double_t cell_dens = GetCellValue(fCells[iCell], kDensity); - Double_t gau = WeightGaus(fCells[iCell], txvec); - - result += gau * cell_dens; - norm += gau; - } - - return result / norm; - } - break; - - case kLinN: - return WeightLinNeighbors(txvec, kDensity); - - default: - Log() << kFATAL << "<GetCellDensity(event)> unknown kernel!" << Endl; - return 0.; - } - - return 0; -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetCellValue( PDEFoamCell* cell, ECellValue cv ) -{ - // This function returns a value, which was saved in the foam cell, - // depending on the foam type. The value to return is specified - // with the 'cv' parameter. - - switch(cv){ - - case kTarget0: - if (GetFoamType() == kMonoTarget) return GetCellElement(cell, 0); - break; - - case kTarget0Error: - if (GetFoamType() == kMonoTarget) return GetCellElement(cell, 1); - break; - - case kDiscriminator: - if (GetFoamType() == kDiscr) return GetCellElement(cell, 0); - break; - - case kDiscriminatorError: - if (GetFoamType() == kDiscr) return GetCellElement(cell, 1); - break; - - case kMeanValue: - return cell->GetIntg(); - break; - - case kRms: - return cell->GetDriv(); - break; - - case kRmsOvMean: - if (cell->GetIntg() != 0) return cell->GetDriv()/cell->GetIntg(); - break; - - case kNev: - if (GetFoamType() == kSeparate || GetFoamType() == kMultiTarget) - return GetCellElement(cell, 0); - break; - - case kDensity: { - - Double_t volume = cell->GetVolume(); - if ( volume > 1.0e-10 ){ - return GetCellValue(cell, kNev)/volume; - } else { - if (volume<=0){ - cell->Print("1"); // debug output - Log() << kWARNING << "<GetCellDensity(cell)>: ERROR: cell volume" - << " negative or zero!" - << " ==> return cell density 0!" - << " cell volume=" << volume - << " cell entries=" << GetCellValue(cell, kNev) << Endl; - return 0; - } else - Log() << kWARNING << "<GetCellDensity(cell)>: WARNING: cell volume" - << " close to zero!" - << " cell volume: " << volume << Endl; - } - } // kDensity - - default: - return 0; - } - - return 0; -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetCellValue(std::vector<Float_t> &xvec, ECellValue cv) -{ - // This function finds the cell, which corresponds to the given - // untransformed event vector 'xvec' and return its value, which is - // given by the parameter 'cv'. - - std::vector<Float_t> txvec(VarTransform(xvec)); - return GetCellValue(FindCell(txvec), cv); -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetBuildUpCellEvents( PDEFoamCell* cell ) -{ - // Returns the number of events, saved in the 'cell' during foam build-up. - // Only used during foam build-up! - return GetCellElement(cell, 0); -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoam::WeightLinNeighbors( std::vector<Float_t> &txvec, ECellValue cv, Int_t dim1, Int_t dim2, Bool_t TreatEmptyCells ) -{ - // Returns the cell value, corresponding to 'txvec' (foam - // coordinates [0,1]), weighted by the neighbor cells via a linear - // function. - // - // Parameters: - // - txvec - event vector, transformed into foam coordinates [0,1] - // - // - cv - cell value to be weighted - // - // - dim1, dim2 - dimensions for two-dimensional projection. - // Default values: dim1 = dim2 = -1 - // If dim1 and dim2 are set to values >=0 and <fDim, than - // the function GetProjectionCellValue() is used to get cell - // value. This is used for projection to two dimensions within - // Project2(). - // - // - TreatEmptyCells - if this option is set false (default), - // it is not checked, wether the cell or its neighbors are empty - // or not. If this option is set true, than only non-empty - // neighbor cells are taken into account for weighting. If the - // cell, which contains txvec is empty, than its value is - // replaced by the average value of the non-empty neighbor cells - - Double_t result = 0.; - UInt_t norm = 0; - const Double_t xoffset = 1.e-6; - - if (txvec.size() != UInt_t(GetTotDim())) - Log() << kFATAL << "Wrong dimension of event variable!" << Endl; - - // find cell, which contains txvec - PDEFoamCell *cell= FindCell(txvec); - PDEFoamVect cellSize(GetTotDim()); - PDEFoamVect cellPosi(GetTotDim()); - cell->GetHcub(cellPosi, cellSize); - // calc value of cell, which contains txvec - Double_t cellval = 0; - if (!(TreatEmptyCells && CellValueIsUndefined(cell))) - // cell is not empty -> get cell value - cellval = GetCellValue(cell, cv); - else - // cell is empty -> get average value of non-empty neighbor - // cells - cellval = GetAverageNeighborsValue(txvec, cv); - - // loop over all dimensions to find neighbor cells - for (Int_t dim=0; dim<GetTotDim(); dim++) { - std::vector<Float_t> ntxvec(txvec); - Double_t mindist; - PDEFoamCell *mindistcell = 0; // cell with minimal distance to txvec - // calc minimal distance to neighbor cell - mindist = (txvec[dim]-cellPosi[dim])/cellSize[dim]; - if (mindist<0.5) { // left neighbour - ntxvec[dim] = cellPosi[dim]-xoffset; - mindistcell = FindCell(ntxvec); // left neighbor cell - } else { // right neighbour - mindist=1-mindist; - ntxvec[dim] = cellPosi[dim]+cellSize[dim]+xoffset; - mindistcell = FindCell(ntxvec); // right neighbor cell - } - Double_t mindistcellval = 0; // value of cell, which contains ntxvec - if (dim1>=0 && dim1<GetTotDim() && - dim2>=0 && dim2<GetTotDim() && - dim1!=dim2){ - cellval = GetProjectionCellValue(cell, dim1, dim2, cv); - mindistcellval = GetProjectionCellValue(mindistcell, dim1, dim2, cv); - } else { - mindistcellval = GetCellValue(mindistcell, cv); - } - // if treatment of empty neighbor cells is deactivated, do - // normal weighting - if (!(TreatEmptyCells && CellValueIsUndefined(mindistcell))){ - result += cellval * (0.5 + mindist); - result += mindistcellval * (0.5 - mindist); - norm++; - } - } - if (norm==0) return cellval; // all nearest neighbors were empty - else return result/norm; // normalisation -} - -//_____________________________________________________________________ -Float_t TMVA::PDEFoam::WeightGaus( PDEFoamCell* cell, std::vector<Float_t> &txvec, - UInt_t dim ) -{ - // Returns the gauss weight between the 'cell' and a given coordinate 'txvec'. - // - // Parameters: - // - cell - the cell - // - // - txvec - the transformed event variables (in [0,1]) (coordinates <0 are - // set to 0, >1 are set to 1) - // - // - dim - number of dimensions for the calculation of the euclidean distance. - // If dim=0, all dimensions of the foam are taken. Else only the first 'dim' - // coordinates of 'txvec' are used for the calculation of the euclidean distance. - // - // Returns: - // exp(-(d/sigma)^2/2), where - // - d - is the euclidean distance between 'txvec' and the point of the 'cell' - // which is most close to 'txvec' (in order to avoid artefacts because of the - // form of the cells). - // - sigma = 1/VolFrac - - // get cell coordinates - PDEFoamVect cellSize(GetTotDim()); - PDEFoamVect cellPosi(GetTotDim()); - cell->GetHcub(cellPosi, cellSize); - - // calc normalized distance - UInt_t dims; // number of dimensions for gaus weighting - if (dim == 0) - dims = GetTotDim(); // use all dimensions of cell txvec for weighting - else if (dim <= UInt_t(GetTotDim())) - dims = dim; // use only 'dim' dimensions of cell txvec for weighting - else { - Log() << kFATAL << "ERROR: too many given dimensions for Gaus weight!" << Endl; - return 0.; - } - - // calc position of nearest edge of cell - std::vector<Float_t> cell_center; - for (UInt_t i=0; i<dims; i++){ - if (txvec[i]<0.) txvec[i]=0.; - if (txvec[i]>1.) txvec[i]=1.; - //cell_center.push_back(cellPosi[i] + (0.5*cellSize[i])); - if (cellPosi[i] > txvec.at(i)) - cell_center.push_back(cellPosi[i]); - else if (cellPosi[i]+cellSize[i] < txvec.at(i)) - cell_center.push_back(cellPosi[i]+cellSize[i]); - else - cell_center.push_back(txvec.at(i)); - } - - Float_t distance = 0.; // distance for weighting - for (UInt_t i=0; i<dims; i++) - distance += Sqr(txvec.at(i)-cell_center.at(i)); - distance = TMath::Sqrt(distance); - - Float_t width = 1./GetVolumeFraction(); - if (width < 1.0e-10) - Log() << kWARNING << "Warning: wrong volume fraction: " << GetVolumeFraction() << Endl; - - // weight with Gaus with sigma = 1/VolFrac - return TMath::Gaus(distance, 0, width, kFALSE); -} - -//_____________________________________________________________________ -TMVA::PDEFoamCell* TMVA::PDEFoam::FindCell( std::vector<Float_t> &xvec ) -{ - // Find cell that contains 'xvec' (in foam coordinates [0,1]). - // - // Loop to find cell that contains 'xvec' starting at root cell, - // and traversing binary tree to find the cell quickly. Note, that - // if 'xvec' lies outside the foam, the cell which is nearest to - // 'xvec' is returned. (The returned pointer should never be - // NULL.) - - PDEFoamVect cellPosi0(GetTotDim()), cellSize0(GetTotDim()); - PDEFoamCell *cell, *cell0; - - cell=fCells[0]; // start with root cell - Int_t idim=0; - while (cell->GetStat()!=1) { //go down binary tree until cell is found - idim=cell->GetBest(); // dimension that changed - cell0=cell->GetDau0(); - cell0->GetHcub(cellPosi0,cellSize0); - - if (xvec.at(idim)<=cellPosi0[idim]+cellSize0[idim]) - cell=cell0; - else - cell=(cell->GetDau1()); - } - return cell; -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::FindCellsRecursive(std::vector<Float_t> &txvec, PDEFoamCell* cell, std::vector<PDEFoamCell*> &cells) -{ - // This is a helper function for FindCells(). It saves in 'cells' - // all cells, which contain txvec. It works analogous to - // FindCell(). - // - // Parameters: - // - // - txvec - vector of variables (no targets!) (transformed into - // foam) - // - // - cell - cell to start searching with (usually root cell - // fCells[0]) - // - // - cells - list of cells found - - PDEFoamVect cellPosi0(GetTotDim()), cellSize0(GetTotDim()); - PDEFoamCell *cell0; - Int_t idim=0; - - while (cell->GetStat()!=1) { //go down binary tree until cell is found - idim=cell->GetBest(); // dimension that changed - - if (idim < Int_t(txvec.size())){ - // case 1: cell is splitten in dimension of a variable - cell0=cell->GetDau0(); - cell0->GetHcub(cellPosi0,cellSize0); - // check, whether left daughter cell contains txvec - if (txvec.at(idim)<=cellPosi0[idim]+cellSize0[idim]) - cell=cell0; - else - cell=cell->GetDau1(); - } else { - // case 2: cell is splitten in target dimension - FindCellsRecursive(txvec, cell->GetDau0(), cells); - FindCellsRecursive(txvec, cell->GetDau1(), cells); - return; - } - } - cells.push_back(cell); -} - -//_____________________________________________________________________ -std::vector<TMVA::PDEFoamCell*> TMVA::PDEFoam::FindCells(std::vector<Float_t> &txvec) -{ - // Find all cells, that contain txvec. This function can be used, - // when the dimension of the foam is greater than the dimension of - // txvec. E.G this is the case for multi-target regression - // - // Parameters: - // - // - txvec - vector of variables (no targets!) (transformed into - // foam) - // - // Return value: - // - // - vector of cells, that fit txvec - - std::vector<PDEFoamCell*> cells(0); - - // loop over all target dimensions - FindCellsRecursive(txvec, fCells[0], cells); - - return cells; -} - -//_____________________________________________________________________ -TH1D* TMVA::PDEFoam::Draw1Dim( const char *opt, Int_t nbin ) -{ - // Draws 1-dimensional foam (= histogram) - // - // Parameters: - // - // - opt - cell_value, rms, rms_ov_mean - // if cell_value is set, the following values will be filled into - // the result histogram: - // - number of events - in case of classification with 2 separate - // foams or multi-target regression - // - discriminator - in case of classification with one - // unified foam - // - target - in case of mono-target regression - // - // - nbin - number of bins of result histogram - // - // Warning: This function is not well tested! - - // avoid plotting of wrong dimensions - if ( GetTotDim()!=1 ) return 0; - - // select value to plot - ECellValue cell_value = kNev; - EFoamType foam_type = GetFoamType(); - if (strcmp(opt,"cell_value")==0){ - switch (foam_type) { - case kSeparate: - case kMultiTarget: - cell_value = kNev; - break; - case kDiscr: - cell_value = kDiscriminator; - break; - case kMonoTarget: - cell_value = kTarget0; - break; - default: - Log() << kFATAL << "<Draw1Dim>: unknown foam type" << Endl; - return 0; - } - } else if (strcmp(opt,"rms")==0){ - cell_value = kRms; - } else if (strcmp(opt,"rms_ov_mean")==0){ - cell_value = kRmsOvMean; - } else { - Log() << kFATAL << "<Draw1Dim>: unknown option:" << opt << Endl; - return 0; - } - - - TString hname(Form("h%s",opt)); + // avoid plotting of wrong dimensions + if ( GetTotDim()!=1 ) + Log() << kFATAL << "<Draw1Dim>: function can only be used for 1-dimensional foams!" + << Endl; + TString hname("h_1dim"); TH1D* h1=(TH1D*)gDirectory->Get(hname); if (h1) delete h1; - h1= new TH1D(hname, Form("1-dimensional Foam: %s", opt), nbin, fXmin[0], fXmax[0]); + h1= new TH1D(hname, "1-dimensional Foam", nbin, fXmin[0], fXmax[0]); if (!h1) Log() << kFATAL << "ERROR: Can not create histo" << hname << Endl; - std::vector<Float_t> xvec(GetTotDim(), 0.); - // loop over all bins - for (Int_t ibinx=1; ibinx<=nbin; ibinx++) { //loop over x-bins - xvec.at(0) = h1->GetBinCenter(ibinx); - - // transform xvec - std::vector<Float_t> txvec(VarTransform(xvec)); - - // loop over all active cells - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { - if (!(fCells[iCell]->GetStat())) continue; // cell not active -> continue - - // get cell position and dimesions - PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); - fCells[iCell]->GetHcub(cellPosi,cellSize); - - // compare them with txvec - const Double_t xsmall = 1.e-10; - if (!( (txvec.at(0)>cellPosi[0]-xsmall) && - (txvec.at(0)<=cellPosi[0]+cellSize[0]+xsmall) ) ) - continue; - - Double_t vol = fCells[iCell]->GetVolume(); - if (vol<1e-10) { - Log() << kWARNING << "Project: ERROR: Volume too small!" << Endl; - continue; - } - - // filling value to histogram - h1->SetBinContent(ibinx, - GetCellValue(fCells[iCell], cell_value) + h1->GetBinContent(ibinx)); + for (Int_t ibinx=1; ibinx<=h1->GetNbinsX(); ++ibinx) { + // get event vector corresponding to bin + std::vector<Float_t> txvec; + txvec.push_back( VarTransform(0, h1->GetBinCenter(ibinx)) ); + Float_t val = 0; + if (kernel != NULL) { + // get cell value using the kernel + val = kernel->Estimate(this, txvec, cell_value); + } else { + val = GetCellValue(FindCell(txvec), cell_value); } + // fill value to histogram + h1->SetBinContent(ibinx, val + h1->GetBinContent(ibinx)); } + return h1; } //_____________________________________________________________________ -TH2D* TMVA::PDEFoam::Project2( Int_t idim1, Int_t idim2, const char *opt, const char *ker, UInt_t nbin ) +TH2D* TMVA::PDEFoam::Project2( Int_t idim1, Int_t idim2, ECellValue cell_value, PDEFoamKernelBase *kernel, UInt_t nbin ) { // Project foam variable idim1 and variable idim2 to histogram. // @@ -2182,18 +1231,14 @@ TH2D* TMVA::PDEFoam::Project2( Int_t idim1, Int_t idim2, const char *opt, const // // - idim1, idim2 - dimensions to project to // - // - opt - cell_value, rms, rms_ov_mean - // if cell_value is set, the following values will be filled into - // the result histogram: - // - number of events - in case of classification with 2 separate - // foams or multi-target regression - // - discriminator - in case of classification with one - // unified foam - // - target - in case of mono-target regression + // - cell_value - the cell value to draw // - // - ker - kGaus, kNone (warning: Gaus may be very slow!) + // - kernel - a PDEFoam kernel (optional). If NULL is given, the + // kernel is ignored and the pure cell values are + // plotted. // - // - nbin - number of bins in x and y direction of result histogram. + // - nbin - number of bins in x and y direction of result histogram + // (optional, default is 50). // // Returns: // a 2-dimensional histogram @@ -2202,46 +1247,8 @@ TH2D* TMVA::PDEFoam::Project2( Int_t idim1, Int_t idim2, const char *opt, const if ((idim1>=GetTotDim()) || (idim1<0) || (idim2>=GetTotDim()) || (idim2<0) || (idim1==idim2) ) - return 0; - - // select value to plot - ECellValue cell_value = kNev; - EFoamType foam_type = GetFoamType(); - if (strcmp(opt,"cell_value")==0){ - switch (foam_type) { - case kSeparate: - case kMultiTarget: - cell_value = kNev; - break; - case kDiscr: - cell_value = kDiscriminator; - break; - case kMonoTarget: - cell_value = kTarget0; - break; - default: - Log() << kFATAL << "<Draw1Dim>: unknown foam type" << Endl; - return 0; - } - } else if (strcmp(opt,"rms")==0){ - cell_value = kRms; - } else if (strcmp(opt,"rms_ov_mean")==0){ - cell_value = kRmsOvMean; - } else { - Log() << kFATAL << "unknown option given" << Endl; - return 0; - } - - // select kernel to use - EKernel kernel = kNone; - if (!strcmp(ker, "kNone")) - kernel = kNone; - else if (!strcmp(ker, "kGaus")) - kernel = kGaus; - else if (!strcmp(ker, "kLinN")) - kernel = kLinN; - else - Log() << kWARNING << "Warning: wrong kernel! using kNone instead" << Endl; + Log() << kFATAL << "<Project2>: wrong dimensions given: " + << idim1 << ", " << idim2 << Endl; // root can not handle too many bins in one histogram --> catch this // Furthermore, to have more than 1000 bins in the histogram doesn't make @@ -2257,193 +1264,137 @@ TH2D* TMVA::PDEFoam::Project2( Int_t idim1, Int_t idim2, const char *opt, const } // create result histogram - TString hname(Form("h%s_%d_vs_%d",opt,idim1,idim2)); + TString hname(Form("h_%d_vs_%d",idim1,idim2)); // if histogram with this name already exists, delete it TH2D* h1=(TH2D*)gDirectory->Get(hname.Data()); if (h1) delete h1; - h1= new TH2D(hname.Data(), Form("%s var%d vs var%d",opt,idim1,idim2), nbin, fXmin[idim1], fXmax[idim1], nbin, fXmin[idim2], fXmax[idim2]); + h1= new TH2D(hname.Data(), Form("var%d vs var%d",idim1,idim2), nbin, fXmin[idim1], fXmax[idim1], nbin, fXmin[idim2], fXmax[idim2]); if (!h1) Log() << kFATAL << "ERROR: Can not create histo" << hname << Endl; // ============== start projection algorithm ================ - // loop over all active cells - for (Long_t iCell=0; iCell<=fLastCe; iCell++) { // loop over all active cells - if (!(fCells[iCell]->GetStat())) continue; // cell not active -> continue - - // get cell position and dimesions - PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); - fCells[iCell]->GetHcub(cellPosi,cellSize); - - // get cell value (depending on the option) - // this value will later be filled into the histogram - Double_t var = GetProjectionCellValue(fCells[iCell], idim1, idim2, cell_value); - - // coordinates of upper left corner of cell - Double_t x1 = VarTransformInvers( idim1, cellPosi[idim1] ); - Double_t y1 = VarTransformInvers( idim2, cellPosi[idim2] ); - - // coordinates of lower right corner of cell - Double_t x2 = VarTransformInvers( idim1, cellPosi[idim1]+cellSize[idim1] ); - Double_t y2 = VarTransformInvers( idim2, cellPosi[idim2]+cellSize[idim2] ); - - // most left and most right bins, which correspond to cell - // borders - Int_t xbin_start = TMath::Max(1, h1->GetXaxis()->FindBin(x1)); - Int_t xbin_stop = h1->GetXaxis()->FindBin(x2); - - // upper and lower bins, which correspond to cell borders - Int_t ybin_start = TMath::Max(1, h1->GetYaxis()->FindBin(y1)); - Int_t ybin_stop = h1->GetYaxis()->FindBin(y2); - - // loop over all bins, which the cell occupies - for (Int_t ibinx=xbin_start; ibinx<xbin_stop; ibinx++) { //loop over x-bins - for (Int_t ibiny=ybin_start; ibiny<ybin_stop; ibiny++) { //loop over y-bins - - ////////////////////// weight with kernel /////////////////////// - if (kernel == kGaus){ - Double_t result = 0.; - Double_t norm = 0.; - - // calc current position (depending on ibinx, ibiny) - Double_t x_curr = - VarTransform( idim1, ((x2-x1)*ibinx - x2*xbin_start + x1*xbin_stop)/(xbin_stop-xbin_start) ); - Double_t y_curr = - VarTransform( idim2, ((y2-y1)*ibiny - y2*ybin_start + y1*ybin_stop)/(ybin_stop-ybin_start) ); - - // loop over all active cells - for (Long_t ice=0; ice<=fLastCe; ice++) { - if (!(fCells[ice]->GetStat())) continue; - - // get cell value (depending on option) - Double_t cell_var = GetProjectionCellValue(fCells[ice], idim1, idim2, cell_value); - - // fill ndim coordinate of current cell - std::vector<Float_t> coor; - for (Int_t i=0; i<GetTotDim(); i++) { - if (i == idim1) - coor.push_back(x_curr); - else if (i == idim2) - coor.push_back(y_curr); - else - coor.push_back(cellPosi[i] + 0.5*cellSize[i]); // approximation - } - - // calc weighted value - Double_t weight_ = WeightGaus(fCells[ice], coor); - - result += weight_ * cell_var; - norm += weight_; - } - var = result/norm; - } - else if (kernel == kLinN){ - // calc current position (depending on ibinx, ibiny) - Double_t x_curr = - VarTransform( idim1, ((x2-x1)*ibinx - x2*xbin_start + x1*xbin_stop)/(xbin_stop-xbin_start) ); - Double_t y_curr = - VarTransform( idim2, ((y2-y1)*ibiny - y2*ybin_start + y1*ybin_stop)/(ybin_stop-ybin_start) ); - - // fill ndim coordinate of current cell - std::vector<Float_t> coor; - for (Int_t i=0; i<GetTotDim(); i++) { - if (i == idim1) - coor.push_back(x_curr); - else if (i == idim2) - coor.push_back(y_curr); - else - coor.push_back(cellPosi[i] + 0.5*cellSize[i]); // approximation - } - - var = WeightLinNeighbors(coor, cell_value, idim1, idim2); - } - ////////////////////// END weight with kernel /////////////////////// - - // filling value to histogram - h1->SetBinContent(ibinx, ibiny, var + h1->GetBinContent(ibinx, ibiny)); - } // y-loop - } // x-loop - } // cell loop + // loop over all histogram bins (2-dim) + for (Int_t xbin = 1; xbin <= h1->GetNbinsX(); ++xbin) { + for (Int_t ybin = 1; ybin <= h1->GetNbinsY(); ++ybin) { + // calculate the phase space point, which corresponds to this + // bin combination + std::map<Int_t, Float_t> txvec; + txvec[idim1] = VarTransform(idim1, h1->GetXaxis()->GetBinCenter(xbin)); + txvec[idim2] = VarTransform(idim2, h1->GetYaxis()->GetBinCenter(ybin)); + + // find the cells, which corresponds to this phase space + // point + std::vector<TMVA::PDEFoamCell*> cells = FindCells(txvec); + + // loop over cells and fill the histogram with the cell + // values + Float_t sum_cv = 0; // sum of the cell values + for (std::vector<TMVA::PDEFoamCell*>::const_iterator it = cells.begin(); + it != cells.end(); ++it) { + // get cell position and size + PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); + (*it)->GetHcub(cellPosi,cellSize); + // Create complete event vector from txvec. The missing + // coordinates of txvec are set to the cell center. + std::vector<Float_t> tvec; + for (Int_t i=0; i<GetTotDim(); ++i) { + if ( i != idim1 && i != idim2 ) + tvec.push_back(cellPosi[i] + 0.5*cellSize[i]); + else + tvec.push_back(txvec[i]); + } + if (kernel != NULL) { + // get the cell value using the kernel + sum_cv += kernel->Estimate(this, tvec, cell_value); + } else { + sum_cv += GetCellValue(FindCell(tvec), cell_value); + } + } + + // fill the bin content + h1->SetBinContent(xbin, ybin, sum_cv + h1->GetBinContent(xbin, ybin)); + } + } return h1; } //_____________________________________________________________________ -Double_t TMVA::PDEFoam::GetProjectionCellValue( PDEFoamCell* cell, - Int_t idim1, - Int_t idim2, - ECellValue cv ) +Float_t TMVA::PDEFoam::GetCellValue(PDEFoamCell* cell, ECellValue cv) { - // Helper function for projection function Project2(). It returns - // the cell value of 'cell' corresponding to the given option 'cv'. - // The two dimensions are needed for weighting the return value, - // because Project2() projects the foam to two dimensions. - - // get cell position and dimesions - PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); - cell->GetHcub(cellPosi,cellSize); - const Double_t foam_area = (fXmax[idim1]-fXmin[idim1])*(fXmax[idim2]-fXmin[idim2]); + // Returns the cell value of 'cell' corresponding to the given + // option 'cv'. This function should be overridden by the subclass + // in order to specify which cell elements to return for a given + // cell value 'cv'. By default kValue returns cell element 0, and + // kValueError returns cell element 1. // calculate cell value (depending on the given option 'cv') switch (cv) { - case kNev: { - // calculate projected area of cell - Double_t area = cellSize[idim1] * cellSize[idim2]; - if (area<1e-20){ - Log() << kWARNING << "<Project2>: Warning, cell volume too small --> skiping cell!" << Endl; - return 0; - } - // calc cell entries per projected cell area - return GetCellValue(cell, kNev)/(area*foam_area); + case kValue: + return GetCellElement(cell, 0); + + case kValueError: + return GetCellElement(cell, 1); + + case kValueDensity: { + + Double_t volume = cell->GetVolume(); + if (volume > numeric_limits<double>::epsilon()) { + return GetCellValue(cell, kValue)/volume; + } else { + if (volume<=0){ + cell->Print("1"); // debug output + Log() << kWARNING << "<GetCellDensity(cell)>: ERROR: cell volume" + << " negative or zero!" + << " ==> return cell density 0!" + << " cell volume=" << volume + << " cell entries=" << GetCellValue(cell, kValue) << Endl; + } else { + Log() << kWARNING << "<GetCellDensity(cell)>: WARNING: cell volume" + << " close to zero!" + << " cell volume: " << volume << Endl; + } + } } - // ========================================================= + return 0; + + case kMeanValue: + return cell->GetIntg(); + case kRms: - return GetCellValue(cell, kRms); - // ========================================================= + return cell->GetDriv(); + case kRmsOvMean: - return GetCellValue(cell, kRmsOvMean); - // ========================================================= - case kDiscriminator: { - // calculate cell volume in other dimensions (not including idim1 and idim2) - Double_t area_cell = 1.; - for (Int_t d1=0; d1<GetTotDim(); d1++){ - if ((d1!=idim1) && (d1!=idim2)) - area_cell *= cellSize[d1]; - } - if (area_cell<1e-20){ - Log() << kWARNING << "<Project2>: Warning, cell volume too small --> skiping cell!" << Endl; + if (cell->GetIntg() != 0) + return cell->GetDriv()/cell->GetIntg(); + else return 0; - } - // calc discriminator * (cell area times foam area) - // foam is normalized -> length of foam = 1.0 - return GetCellValue(cell, kDiscriminator)*area_cell; - } - // ========================================================= - case kDiscriminatorError: - return GetCellValue(cell, kDiscriminator); - // ========================================================= - case kTarget0: - // plot mean over all underlying cells? - return GetCellValue(cell, kTarget0); + case kCellVolume: + return cell->GetVolume(); + default: - Log() << kFATAL << "<Project2>: unknown option" << Endl; + Log() << kFATAL << "<GetCellValue>: unknown cell value" << Endl; return 0; } + + return 0; } //_____________________________________________________________________ Double_t TMVA::PDEFoam::GetCellElement( PDEFoamCell *cell, UInt_t i ) { - // Returns cell element i of cell 'cell'. - - if (i >= GetNElements()) Log() << kFATAL << "ERROR: Index out of range" << Endl; + // Returns cell element i of cell 'cell'. If the cell has no + // elements or the index 'i' is out of range, than 0 is returned. // dynamic_cast doesn't seem to work here ?! TVectorD *vec = (TVectorD*)cell->GetElement(); - if (!vec) Log() << kFATAL << "<GetCellElement> ERROR: cell element is not a TVectorD*" << Endl; + // if vec is not set or index out of range, return 0 + if (!vec || i >= (UInt_t) vec->GetNrows()) + return 0; return (*vec)(i); } @@ -2451,19 +1402,29 @@ Double_t TMVA::PDEFoam::GetCellElement( PDEFoamCell *cell, UInt_t i ) //_____________________________________________________________________ void TMVA::PDEFoam::SetCellElement( PDEFoamCell *cell, UInt_t i, Double_t value ) { - // Set cell element i of cell to value. - - if (i >= GetNElements()) { - Log() << kFATAL << "ERROR: Index out of range" << Endl; - return; + // Set cell element i of cell to value. If the cell element i does + // not exist, it is created. + + TVectorD *vec = NULL; + + // if no cell elements are set, create TVectorD with i+1 entries, + // ranging from [0,i] + if (cell->GetElement() == NULL) { + vec = new TVectorD(i+1); + vec->Zero(); // set all values to zero + (*vec)(i) = value; // set element i to value + cell->SetElement(vec); + } else { + // dynamic_cast doesn't seem to work here ?! + vec = (TVectorD*)cell->GetElement(); + if (!vec) + Log() << kFATAL << "<SetCellElement> ERROR: cell element is not a TVectorD*" << Endl; + // check vector size and resize if necessary + if (i >= (UInt_t) vec->GetNrows()) + vec->ResizeTo(0,i); + // set element i to value + (*vec)(i) = value; } - - // dynamic_cast doesn't seem to work here ?! - TVectorD *vec = (TVectorD*)cell->GetElement(); - - if (!vec) Log() << kFATAL << "<SetCellElement> ERROR: cell element is not a TVectorD*" << Endl; - - (*vec)(i) = value; } //_____________________________________________________________________ @@ -2486,7 +1447,7 @@ void TMVA::PDEFoam::OutputGrow( Bool_t finished ) //_____________________________________________________________________ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, - Bool_t CreateCanvas, Bool_t colors, Bool_t log_colors ) + Bool_t CreateCanvas, Bool_t colors ) { // Debugging tool which plots the cells of a 2-dimensional PDEFoam // as rectangles in C++ format readable for ROOT. @@ -2511,8 +1472,6 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, // // - colors - whether to fill cells with colors or shades of grey // - // - log_colors - whether to fill cells with colors (logarithmic scale) - // // Example: // // The following commands load a mono-target regression foam from @@ -2531,26 +1490,11 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, << "two-dimensional foams!" << Endl; // select value to plot - ECellValue cell_value = kNev; - EFoamType foam_type = GetFoamType(); + ECellValue cell_value = kValue; Bool_t plotcellnumber = kFALSE; Bool_t fillcells = kTRUE; if (opt.Contains("cell_value")){ - switch (foam_type) { - case kSeparate: - case kMultiTarget: - cell_value = kNev; - break; - case kDiscr: - cell_value = kDiscriminator; - break; - case kMonoTarget: - cell_value = kTarget0; - break; - default: - Log() << kFATAL << "<Draw1Dim>: unknown foam type" << Endl; - return; - } + cell_value = kValue; } else if (opt.Contains("rms_ov_mean")){ cell_value = kRmsOvMean; } else if (opt.Contains("rms")){ @@ -2596,15 +1540,15 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, if (fillcells) (colors ? gStyle->SetPalette(1, 0) : gStyle->SetPalette(0) ); - Double_t zmin = 1E8; // minimal value (for color calculation) - Double_t zmax = -1E8; // maximal value (for color calculation) + Float_t zmin = 1E8; // minimal value (for color calculation) + Float_t zmax = -1E8; // maximal value (for color calculation) // if cells shall be filled, calculate minimal and maximal plot // value --> store in zmin and zmax if (fillcells) { for (Long_t iCell=1; iCell<=fLastCe; iCell++) { if ( fCells[iCell]->GetStat() == 1) { - Double_t value = GetCellValue(fCells[iCell], cell_value); + Float_t value = GetCellValue(fCells[iCell], cell_value); if (value<zmin) zmin=value; if (value>zmax) @@ -2612,28 +1556,19 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, } } outfile << "// observed minimum and maximum of distribution: " << std::endl; - outfile << "// Double_t zmin = "<< zmin << ";" << std::endl; - outfile << "// Double_t zmax = "<< zmax << ";" << std::endl; + outfile << "// Float_t zmin = "<< zmin << ";" << std::endl; + outfile << "// Float_t zmax = "<< zmax << ";" << std::endl; } - if (log_colors) { - if (zmin<1) - zmin=1; - zmin=TMath::Log(zmin); - zmax=TMath::Log(zmax); - outfile << "// logarthmic color scale used " << std::endl; - } - else outfile << "// linear color scale used " << std::endl; - outfile << "// used minimum and maximum of distribution (taking into account log scale if applicable): " << std::endl; - outfile << "Double_t zmin = "<< zmin << ";" << std::endl; - outfile << "Double_t zmax = "<< zmax << ";" << std::endl; + outfile << "Float_t zmin = "<< zmin << ";" << std::endl; + outfile << "Float_t zmax = "<< zmax << ";" << std::endl; - Double_t x1,y1,x2,y2,x,y; // box and text coordintates - Double_t offs = 0.01; - Double_t lpag = 1-2*offs; + Float_t x1,y1,x2,y2,x,y; // box and text coordintates + Float_t offs = 0.01; + Float_t lpag = 1-2*offs; Int_t ncolors = colors ? gStyle->GetNumberOfColors() : 100; - Double_t scale = (ncolors-1)/(zmax - zmin); + Float_t scale = (ncolors-1)/(zmax - zmin); PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); // loop over cells and draw a box for every cell (and maybe the @@ -2649,12 +1584,7 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, if (fillcells) { // get cell value - Double_t value = GetCellValue(fCells[iCell], cell_value); - - if (log_colors) { - if (value<1.) value=1; - value = TMath::Log(value); - } + Float_t value = GetCellValue(fCells[iCell], cell_value); // calculate fill color Int_t color; @@ -2695,178 +1625,18 @@ void TMVA::PDEFoam::RootPlot2dim( const TString& filename, TString opt, } //_____________________________________________________________________ -void TMVA::PDEFoam::FillBinarySearchTree( const Event* ev, Bool_t NoNegWeights ) +void TMVA::PDEFoam::FillBinarySearchTree( const Event* ev ) { - // Insert event to internal foam density PDEFoamDistr. - GetDistr()->FillBinarySearchTree(ev, GetFoamType(), NoNegWeights); + // Insert event to internal foam's density estimator + // PDEFoamDensityBase. + GetDistr()->FillBinarySearchTree(ev); } //_____________________________________________________________________ void TMVA::PDEFoam::DeleteBinarySearchTree() { - // Delete the fDistr object, which contains the binary search tree + // Delete the foam's density estimator, which contains the binary + // search tree. if(fDistr) delete fDistr; fDistr = NULL; } - -//_____________________________________________________________________ -void TMVA::PDEFoam::Init() -{ - // Initialize binary search tree, stored in object of type - // PDEFoamDistr - GetDistr()->SetPDEFoam(this); - GetDistr()->Initialize(); -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::SetFoamType( EFoamType ft ) -{ - // Set the foam type. This determinates the method of the - // calculation of the density during the foam build-up. - switch (ft) { - case kDiscr: - GetDistr()->SetDensityCalc(kDISCRIMINATOR); - break; - case kMonoTarget: - GetDistr()->SetDensityCalc(kTARGET); - break; - default: - GetDistr()->SetDensityCalc(kEVENT_DENSITY); - break; - } - - fFoamType = ft; // set foam type class variable -} - -//_____________________________________________________________________ -ostream& TMVA::operator<< ( ostream& os, const TMVA::PDEFoam& pdefoam ) -{ - // Write PDEFoam variables to stream 'os'. - pdefoam.PrintStream(os); - return os; // Return the output stream. -} - -//_____________________________________________________________________ -istream& TMVA::operator>> ( istream& istr, TMVA::PDEFoam& pdefoam ) -{ - // Read PDEFoam variables from stream 'istr'. - pdefoam.ReadStream(istr); - return istr; -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::ReadStream( istream & istr ) -{ - // Read PDEFoam variables from stream 'istr'. - - // inherited class variables: fLastCe, fNCells, fDim[GetTotDim()] - istr >> fLastCe; - istr >> fNCells; - // coverity[tainted_data_argument] - istr >> fDim; - if (fDim < 1 || fDim >= INT_MAX) { - Log() << kERROR << "Foam dimension " << GetTotDim() << "our of range!" << Endl; - return; - } - - Double_t vfr = -1.; - istr >> vfr; - SetVolumeFraction(vfr); - - Log() << kVERBOSE << "Foam dimension: " << GetTotDim() << Endl; - - // read Class Variables: fXmin, fXmax - if (fXmin) delete [] fXmin; - if (fXmax) delete [] fXmax; - fXmin = new Double_t[GetTotDim()]; - fXmax = new Double_t[GetTotDim()]; - for (Int_t i=0; i<GetTotDim(); i++) - istr >> fXmin[i]; - for (Int_t i=0; i<GetTotDim(); i++) - istr >> fXmax[i]; -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::PrintStream( ostream & ostr ) const -{ - // Write PDEFoam variables to stream 'os'. - - // inherited class variables: fLastCe, fNCells, fDim[GetTotDim()] - ostr << fLastCe << std::endl; - ostr << fNCells << std::endl; - ostr << fDim << std::endl; - ostr << GetVolumeFraction() << std::endl; - - // write class variables: fXmin, fXmax - for (Int_t i=0; i<GetTotDim(); i++) - ostr << fXmin[i] << std::endl; - for (Int_t i=0; i<GetTotDim(); i++) - ostr << fXmax[i] << std::endl; -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::AddXMLTo( void* parent ) -{ - // write foam variables to xml - - void *variables = gTools().AddChild( parent, "Variables" ); - gTools().AddAttr( variables, "LastCe", fLastCe ); - gTools().AddAttr( variables, "nCells", fNCells ); - gTools().AddAttr( variables, "Dim", fDim ); - gTools().AddAttr( variables, "VolumeFraction", GetVolumeFraction() ); - - void *xmin_wrap; - for (Int_t i=0; i<GetTotDim(); i++){ - xmin_wrap = gTools().AddChild( variables, "Xmin" ); - gTools().AddAttr( xmin_wrap, "Index", i ); - gTools().AddAttr( xmin_wrap, "Value", fXmin[i] ); - } - - void *xmax_wrap; - for (Int_t i=0; i<GetTotDim(); i++){ - xmax_wrap = gTools().AddChild( variables, "Xmax" ); - gTools().AddAttr( xmax_wrap, "Index", i ); - gTools().AddAttr( xmax_wrap, "Value", fXmax[i] ); - } -} - -//_____________________________________________________________________ -void TMVA::PDEFoam::ReadXML( void* parent ) -{ - void *variables = gTools().GetChild( parent ); - gTools().ReadAttr( variables, "LastCe", fLastCe ); - gTools().ReadAttr( variables, "nCells", fNCells ); - gTools().ReadAttr( variables, "Dim", fDim ); - if (fDim < 1 || fDim >= INT_MAX) { - Log() << kERROR << "Foam dimension " << GetTotDim() << "our of range!" << Endl; - return; - } - Float_t volfr; - gTools().ReadAttr( variables, "VolumeFraction", volfr ); - SetVolumeFraction( volfr ); - - if (fXmin) delete [] fXmin; - if (fXmax) delete [] fXmax; - fXmin = new Double_t[GetTotDim()]; - fXmax = new Double_t[GetTotDim()]; - - void *xmin_wrap = gTools().GetChild( variables ); - for (Int_t counter=0; counter<fDim; counter++) { - Int_t i=0; - gTools().ReadAttr( xmin_wrap , "Index", i ); - if (i >= GetTotDim() || i<0) - Log() << kFATAL << "dimension index out of range:" << i << Endl; - gTools().ReadAttr( xmin_wrap , "Value", fXmin[i] ); - xmin_wrap = gTools().GetNextChild( xmin_wrap ); - } - - void *xmax_wrap = xmin_wrap; //gTools().xmlengine().GetChild( variables ); - for (Int_t counter=0; counter<fDim; counter++) { - Int_t i=0; - gTools().ReadAttr( xmax_wrap , "Index", i ); - if (i >= GetTotDim() || i<0) - Log() << kFATAL << "dimension index out of range:" << i << Endl; - gTools().ReadAttr( xmax_wrap , "Value", fXmax[i] ); - xmax_wrap = gTools().GetNextChild( xmax_wrap ); - } -} diff --git a/tmva/src/PDEFoamCell.cxx b/tmva/src/PDEFoamCell.cxx index 8cefcd8fddfbf7662d23675c8f9d19652a70669d..272d7ad1579bc2c45ba7b51577144307d9d367f7 100644 --- a/tmva/src/PDEFoamCell.cxx +++ b/tmva/src/PDEFoamCell.cxx @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S.Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -15,7 +17,7 @@ * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * * Copyright (c) 2008: * * CERN, Switzerland * @@ -205,6 +207,23 @@ UInt_t TMVA::PDEFoamCell::GetDepth() return depth; } +//_____________________________________________________________________ +UInt_t TMVA::PDEFoamCell::GetTreeDepth(UInt_t depth) +{ + // Get depth of cell tree, starting at this cell. + + if (GetStat() == 1) // this is an active cell + return depth + 1; + + UInt_t depth0 = 0, depth1 = 0; + if (GetDau0() != NULL) + depth0 = GetDau0()->GetTreeDepth(depth+1); + if (GetDau1() != NULL) + depth1 = GetDau1()->GetTreeDepth(depth+1); + + return (depth0 > depth1 ? depth0 : depth1); +} + //_____________________________________________________________________ void TMVA::PDEFoamCell::Print(Option_t *option) const { diff --git a/tmva/src/PDEFoamDecisionTree.cxx b/tmva/src/PDEFoamDecisionTree.cxx new file mode 100644 index 0000000000000000000000000000000000000000..42e308e9146f333bb0538e02b42f609997d5c067 --- /dev/null +++ b/tmva/src/PDEFoamDecisionTree.cxx @@ -0,0 +1,217 @@ +// @(#)root/tmva $Id$ +// Author: Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDecisionTree * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation of decision tree like PDEFoam * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamDecisionTree +// +// This PDEFoam variant acts like a decision tree and stores in every +// cell the discriminant +// +// D = #events with given class / total number of events +// +// as well as the statistical error on the discriminant. It therefore +// acts as a discriminant estimator. The decision tree-like behaviour +// is achieved by overriding PDEFoamDiscriminant::Explore() to use a +// decision tree-like cell splitting algorithm (given a separation +// type). +// +// This PDEFoam variant should be booked together with the +// PDEFoamDecisionTreeDensity density estimator, which returns the +// events in a cell without sampling. +// +//_____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamDecisionTree +#include "TMVA/PDEFoamDecisionTree.h" +#endif +#ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity +#include "TMVA/PDEFoamDecisionTreeDensity.h" +#endif + +ClassImp(TMVA::PDEFoamDecisionTree) + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree() + : PDEFoamDiscriminant() + , fSepType(NULL) +{ + // Default constructor for streamer, user should not use it. +} + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const TString& Name, SeparationBase *sepType, UInt_t cls) + : PDEFoamDiscriminant(Name, cls) + , fSepType(sepType) +{ + // Parameters: + // + // - Name - name of the foam + // + // - sepType - separation type used for the cell splitting + // + // - cls - class to consider as signal when calcualting the purity +} + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const PDEFoamDecisionTree &From) + : PDEFoamDiscriminant(From) + , fSepType(NULL) +{ + // Copy Constructor NOT IMPLEMENTED (NEVER USED) + Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; +} + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTree::~PDEFoamDecisionTree() +{} + +//_____________________________________________________________________ +void TMVA::PDEFoamDecisionTree::Explore(PDEFoamCell *cell) +{ + // Internal subprogram used by Create. It explores newly defined + // cell with according to the decision tree logic. The separation + // set via the 'sepType' option in the constructor. + // + // The optimal division point for eventual future cell division is + // determined/recorded. Note that links to parents and initial + // volume = 1/2 parent has to be already defined prior to calling + // this routine. + // + // Note, that according to the decision tree logic, a cell is only + // split, if the number of (unweighted) events in each dautghter + // cell is greater than fNmin. + + if (!cell) + Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl; + + // create edge histograms + std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw; + for (Int_t idim = 0; idim < fDim; idim++) { + hsig.push_back(new TH1D(Form("hsig_%i", idim), + Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim])); + hbkg.push_back(new TH1D(Form("hbkg_%i", idim), + Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim])); + hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim), + Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim])); + hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim), + Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim])); + } + + // get cell position and size + PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim()); + cell->GetHcub(cellPosi, cellSize); + + // determine lower and upper cell bound + std::vector<Double_t> lb(GetTotDim()); // lower bound + std::vector<Double_t> ub(GetTotDim()); // upper bound + for (Int_t idim = 0; idim < GetTotDim(); idim++) { + lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon()); + ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon()); + } + + // fDistr must be of type PDEFoamDecisionTreeDensity* + PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr); + if (distr == NULL) + Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: " + << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl; + + // create TMVA::Volume object needed for searching within the BST + TMVA::Volume volume(&lb, &ub); + + // fill the signal and background histograms for the given volume + distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw); + + // ------ determine the best division edge + Double_t xBest = 0.5; // best division point + Int_t kBest = -1; // best split dimension + Double_t maxGain = -1.0; // maximum gain + Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1); + Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1); + Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1); + Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1); + + for (Int_t idim = 0; idim < fDim; ++idim) { + Double_t nSelS = hsig.at(idim)->GetBinContent(0); + Double_t nSelB = hbkg.at(idim)->GetBinContent(0); + Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0); + Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0); + for (Int_t jLo = 1; jLo < fNBin; jLo++) { + nSelS += hsig.at(idim)->GetBinContent(jLo); + nSelB += hbkg.at(idim)->GetBinContent(jLo); + nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo); + nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo); + + // proceed if total number of events in left and right cell + // is greater than fNmin + if (!((nSelS_unw + nSelB_unw) >= GetNmin() && + (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin())) + continue; + + Double_t xLo = 1.0 * jLo / fNBin; + + // calculate separation gain + Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB); + + if (gain >= maxGain) { + maxGain = gain; + xBest = xLo; + kBest = idim; + } + } // jLo + } // idim + + if (kBest >= fDim || kBest < 0) { + // No best division edge found! One must ensure, that this cell + // is not chosen for splitting in PeekMax(). But since in + // PeekMax() it is ensured that cell->GetDriv() > epsilon, one + // should set maxGain to -1.0 (or even 0.0?) here. + maxGain = -1.0; + } + + // set cell properties + cell->SetBest(kBest); + cell->SetXdiv(xBest); + if (nTotB + nTotS > 0) + cell->SetIntg(nTotS / (nTotB + nTotS)); + else + cell->SetIntg(0.0); + cell->SetDriv(maxGain); + cell->CalcVolume(); + + // set cell element 0 (total number of events in cell) during + // build-up + if (GetNmin() > 0) + SetCellElement(cell, 0, nTotS + nTotB); + + // clean up + for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih); + for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih); + for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih); + for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih); +} diff --git a/tmva/src/PDEFoamDecisionTreeDensity.cxx b/tmva/src/PDEFoamDecisionTreeDensity.cxx new file mode 100644 index 0000000000000000000000000000000000000000..c628c35b1591eeb30f0e9c6604e58b5812117ac1 --- /dev/null +++ b/tmva/src/PDEFoamDecisionTreeDensity.cxx @@ -0,0 +1,154 @@ +// @(#)root/tmva $Id$ +// Author: Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDecisionTreeDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * This class provides an interface between the Binary search tree * + * and the PDEFoam object. In order to build-up the foam one needs to * + * calculate the density of events at a given point (sampling during * + * Foam build-up). The function PDEFoamDecisionTreeDensity::Density() * + * does this job. It uses a binary search tree, filled with training * + * events, in order to provide this density. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamDecisionTreeDensity +// +// This is a concrete implementation of PDEFoam. The Density(...) +// function returns allways 0. The function FillHistograms() is +// added, which returns all events in a given TMVA::Volume. +// _____________________________________________________________________ + +#include <limits> + +#ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity +#include "TMVA/PDEFoamDecisionTreeDensity.h" +#endif + +ClassImp(TMVA::PDEFoamDecisionTreeDensity) + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTreeDensity::PDEFoamDecisionTreeDensity() + : PDEFoamDensityBase() + , fClass(0) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTreeDensity::PDEFoamDecisionTreeDensity(std::vector<Double_t> box, UInt_t cls) + : PDEFoamDensityBase(box) + , fClass(cls) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDecisionTreeDensity::PDEFoamDecisionTreeDensity(const PDEFoamDecisionTreeDensity &distr) + : PDEFoamDensityBase(distr) + , fClass(distr.fClass) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Double_t TMVA::PDEFoamDecisionTreeDensity::Density(std::vector<Double_t>& /* Xarg */, + Double_t& /* event_density */) +{ + // This function is not used in the decision tree like PDEFoam, + // instead FillHist() is used. + return 0; +} + +//_____________________________________________________________________ +void TMVA::PDEFoamDecisionTreeDensity::FillHistograms(TMVA::Volume &volume, std::vector<TH1D*> &hsig, + std::vector<TH1D*> &hbkg, std::vector<TH1D*> &hsig_unw, + std::vector<TH1D*> &hbkg_unw) +{ + // Fill the given histograms with signal and background events, + // which are found in the volume. + // + // Parameters: + // + // - volume - volume box to search in + // + // - hsig, hbkg, hsig_unw, hbkg_unw - histograms with weighted and + // unweighted signal and background events + + // sanity check + if (hsig.size() != volume.fLower->size() + || hbkg.size() != volume.fLower->size() + || hsig_unw.size() != volume.fLower->size() + || hbkg_unw.size() != volume.fLower->size()) + Log() << kFATAL << "<PDEFoamDistr::FillHistograms> Edge histograms have wrong size!" << Endl; + + // check histograms + for (UInt_t idim = 0; idim < hsig.size(); ++idim) { + if (!hsig.at(idim) || !hbkg.at(idim) || + !hsig_unw.at(idim) || !hbkg_unw.at(idim)) + Log() << kFATAL << "<PDEFoamDistr::FillHist> Histograms not initialized!" << Endl; + } + + // BST nodes found in volume + std::vector<const TMVA::BinarySearchTreeNode*> nodes; + + // do range searching + fBst->SearchVolume(&volume, &nodes); + + // calc xmin and xmax of events found in cell + std::vector<Float_t> xmin(volume.fLower->size(), std::numeric_limits<float>::max()); + std::vector<Float_t> xmax(volume.fLower->size(), -std::numeric_limits<float>::max()); + for (std::vector<const TMVA::BinarySearchTreeNode*>::const_iterator it = nodes.begin(); + it != nodes.end(); ++it) { + std::vector<Float_t> ev = (*it)->GetEventV(); + for (UInt_t idim = 0; idim < xmin.size(); ++idim) { + if (ev.at(idim) < xmin.at(idim)) xmin.at(idim) = ev.at(idim); + if (ev.at(idim) > xmax.at(idim)) xmax.at(idim) = ev.at(idim); + } + } + + // reset histogram ranges to xmin, xmax found in volume + for (UInt_t idim = 0; idim < hsig.size(); ++idim) { + hsig.at(idim)->GetXaxis()->SetLimits(xmin.at(idim), xmax.at(idim)); + hbkg.at(idim)->GetXaxis()->SetLimits(xmin.at(idim), xmax.at(idim)); + hsig_unw.at(idim)->GetXaxis()->SetLimits(xmin.at(idim), xmax.at(idim)); + hbkg_unw.at(idim)->GetXaxis()->SetLimits(xmin.at(idim), xmax.at(idim)); + hsig.at(idim)->Reset(); + hbkg.at(idim)->Reset(); + hsig_unw.at(idim)->Reset(); + hbkg_unw.at(idim)->Reset(); + } + + // fill histograms with events found + for (std::vector<const TMVA::BinarySearchTreeNode*>::const_iterator it = nodes.begin(); + it != nodes.end(); ++it) { + std::vector<Float_t> ev = (*it)->GetEventV(); + Float_t wt = (*it)->GetWeight(); + for (UInt_t idim = 0; idim < ev.size(); ++idim) { + if ((*it)->GetClass() == fClass) { + hsig.at(idim)->Fill(ev.at(idim), wt); + hsig_unw.at(idim)->Fill(ev.at(idim), 1); + } else { + hbkg.at(idim)->Fill(ev.at(idim), wt); + hbkg_unw.at(idim)->Fill(ev.at(idim), 1); + } + } + } +} diff --git a/tmva/src/PDEFoamDensityBase.cxx b/tmva/src/PDEFoamDensityBase.cxx new file mode 100644 index 0000000000000000000000000000000000000000..200f7837002bde77605b574d2a2e8c8abd4a2ae0 --- /dev/null +++ b/tmva/src/PDEFoamDensityBase.cxx @@ -0,0 +1,152 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDensityBase * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * This class provides an interface between the Binary search tree * + * and the PDEFoam object. In order to build-up the foam one needs to * + * calculate the density of events at a given point (sampling during * + * Foam build-up). The function PDEFoamDensityBase::Density() does this job. It * + * uses a binary search tree, filled with training events, in order to * + * provide this density. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamDensityBase +// +// This is an abstract class, which provides an interface for a +// PDEFoam density estimator. Derived classes have to implement the +// Density(...) function, which returns the density of a certain +// quantity at a given phase-space point during the foam build-up. +// +// Variants of PDEFoamDensityBase are: +// +// - PDEFoamEventDensity +// - PDEFoamDiscriminantDensity +// - PDEFoamTargetDensity +// - PDEFoamDecisionTreeDensity +// +// Usage: +// +// The user has to instantiate a child class of PDEFoamDensityBase and +// set the pointer to the owner, which is a PDEFoam object: +// +// PDEFoamDensityBase *dens = new MyDensity(); +// pdefoam->SetDensity(dens); +// +// Afterwards the binary search tree should be filled with TMVA +// events, by either using +// +// pdefoam->FillBinarySearchTree(event); +// +// or +// +// dens->FillBinarySearchTree(event); +// _____________________________________________________________________ + +#include <numeric> + +#ifndef ROOT_TMVA_PDEFoamDensityBase +#include "TMVA/PDEFoamDensityBase.h" +#endif + +ClassImp(TMVA::PDEFoamDensityBase) + +//_____________________________________________________________________ +TMVA::PDEFoamDensityBase::PDEFoamDensityBase() + : TObject(), + fBox(std::vector<Double_t>()), + fBoxVolume(1.0), + fBoxHasChanged(kTRUE), + fBst(new TMVA::BinarySearchTree()), + fLogger(new MsgLogger("PDEFoamDensityBase")) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDensityBase::PDEFoamDensityBase(std::vector<Double_t> box) + : TObject(), + fBox(box), + fBoxVolume(1.0), + fBoxHasChanged(kTRUE), + fBst(new TMVA::BinarySearchTree()), + fLogger(new MsgLogger("PDEFoamDensityBase")) +{ + if (box.size() == 0) + Log() << kFATAL << "Dimension of PDEFoamDensityBase is zero" << Endl; + + // set periode (number of variables) of binary search tree + fBst->SetPeriode(box.size()); +} + +//_____________________________________________________________________ +TMVA::PDEFoamDensityBase::~PDEFoamDensityBase() +{ + if (fBst) delete fBst; + if (fLogger) delete fLogger; +} + +//_____________________________________________________________________ +TMVA::PDEFoamDensityBase::PDEFoamDensityBase(const PDEFoamDensityBase &distr) + : TObject(), + fBox(distr.fBox), + fBoxVolume(distr.fBoxVolume), + fBoxHasChanged(distr.fBoxHasChanged), + fBst(new BinarySearchTree(*distr.fBst)), + fLogger(new MsgLogger(*distr.fLogger)) +{ + // Copy constructor + // + // Creates a deep copy, using the copy constructor of + // TMVA::BinarySearchTree +} + +//_____________________________________________________________________ +void TMVA::PDEFoamDensityBase::FillBinarySearchTree(const Event* ev) +{ + // This method inserts the given event 'ev' it into the binary + // search tree. + + if (fBst == NULL) + Log() << kFATAL << "<PDEFoamDensityBase::FillBinarySearchTree> " + << "Binary tree is not set!" << Endl; + + // insert into binary search tree + fBst->Insert(ev); +} + +//_____________________________________________________________________ +Double_t TMVA::PDEFoamDensityBase::GetBoxVolume() +{ + // Returns the volume of range searching box fBox. + // + // If the range searching box 'fBox' has changed (fBoxHasChanged is + // kTRUE), recalculate the box volume and set fBoxHasChanged to + // kFALSE + if (fBoxHasChanged) { + fBoxHasChanged = kFALSE; + fBoxVolume = std::accumulate(fBox.begin(), fBox.end(), 1.0, + std::multiplies<Double_t>()); + } + return fBoxVolume; +} diff --git a/tmva/src/PDEFoamDiscriminant.cxx b/tmva/src/PDEFoamDiscriminant.cxx new file mode 100644 index 0000000000000000000000000000000000000000..33da937c03d74740e84bf6d03c11bcb904aaf6eb --- /dev/null +++ b/tmva/src/PDEFoamDiscriminant.cxx @@ -0,0 +1,258 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDiscriminant * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamDiscriminant +// +// This PDEFoam variant stores in every cell the discriminant +// +// D = #events with given class / total number of events +// +// as well as the statistical error on the discriminant. It therefore +// acts as a discriminant estimator. It should be booked together +// with the PDEFoamDiscriminantDensity density estimator, which +// returns the discriminant density at a given phase space point +// during the foam build-up. +// +//_____________________________________________________________________ + +#include <climits> + +#ifndef ROOT_TMath +#include "TMath.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamDiscriminant +#include "TMVA/PDEFoamDiscriminant.h" +#endif + +ClassImp(TMVA::PDEFoamDiscriminant) + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminant::PDEFoamDiscriminant() + : PDEFoam() + , fClass(0) +{ + // Default constructor for streamer, user should not use it. +} + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminant::PDEFoamDiscriminant(const TString& Name, UInt_t cls) + : PDEFoam(Name) + , fClass(cls) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminant::PDEFoamDiscriminant(const PDEFoamDiscriminant &From) + : PDEFoam(From) + , fClass(0) +{ + // Copy Constructor NOT IMPLEMENTED (NEVER USED) + Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; +} + +//_____________________________________________________________________ +void TMVA::PDEFoamDiscriminant::FillFoamCells(const Event* ev, Float_t wt) +{ + // This function fills an event into the discriminant PDEFoam. The + // event weight 'wt' is filled into cell element 0 if the event is + // of class fClass, and filled into cell element 1 otherwise. + + // find corresponding foam cell + std::vector<Float_t> values = ev->GetValues(); + std::vector<Float_t> tvalues = VarTransform(values); + PDEFoamCell *cell = FindCell(tvalues); + + // 0. Element: Number of signal events (even class == fClass) + // 1. Element: Number of background events times normalization + if (ev->GetClass() == fClass) + SetCellElement(cell, 0, GetCellElement(cell, 0) + wt); + else + SetCellElement(cell, 1, GetCellElement(cell, 1) + wt); +} + +//_____________________________________________________________________ +void TMVA::PDEFoamDiscriminant::Finalize() +{ + // Calc discriminator and its error for every cell and save it to + // the cell. + + // loop over cells + for (Long_t iCell = 0; iCell <= fLastCe; iCell++) { + if (!(fCells[iCell]->GetStat())) + continue; + + Double_t N_sig = GetCellElement(fCells[iCell], 0); // get number of signal events + Double_t N_bg = GetCellElement(fCells[iCell], 1); // get number of bg events + + if (N_sig < 0.) { + Log() << kWARNING << "Negative number of signal events in cell " << iCell + << ": " << N_sig << ". Set to 0." << Endl; + N_sig = 0.; + } + if (N_bg < 0.) { + Log() << kWARNING << "Negative number of background events in cell " << iCell + << ": " << N_bg << ". Set to 0." << Endl; + N_bg = 0.; + } + + // calculate discriminant + if (N_sig + N_bg > 0) { + // discriminant + SetCellElement(fCells[iCell], 0, N_sig / (N_sig + N_bg)); + // discriminant error + SetCellElement(fCells[iCell], 1, TMath::Sqrt(Sqr(N_sig / Sqr(N_sig + N_bg))*N_sig + + Sqr(N_bg / Sqr(N_sig + N_bg))*N_bg)); + + } else { + SetCellElement(fCells[iCell], 0, 0.5); // set discriminator + SetCellElement(fCells[iCell], 1, 1.); // set discriminator error + } + } +} + +//_____________________________________________________________________ +TH2D* TMVA::PDEFoamDiscriminant::Project2(Int_t idim1, Int_t idim2, ECellValue cell_value, PDEFoamKernelBase *kernel, UInt_t nbin) +{ + // Project foam variable idim1 and variable idim2 to histogram. + // The projection algorithm is modified such that the z axis range + // of the returned histogram is [0, 1], as necessary for the + // interpretation as a discriminator. This is done by weighting + // the cell values (in case of cell_value = kValue) by the cell + // volume in all dimensions, excluding 'idim1' and 'idim2'. + // + // Parameters: + // + // - idim1, idim2 - dimensions to project to + // + // - cell_value - the cell value to draw + // + // - kernel - a PDEFoam kernel (optional). If NULL is given, the + // kernel is ignored and the pure cell values are + // plotted. + // + // - nbin - number of bins in x and y direction of result histogram + // (optional, default is 50). + // + // Returns: + // a 2-dimensional histogram + + // avoid plotting of wrong dimensions + if ((idim1 >= GetTotDim()) || (idim1 < 0) || + (idim2 >= GetTotDim()) || (idim2 < 0) || + (idim1 == idim2)) + Log() << kFATAL << "<Project2>: wrong dimensions given: " + << idim1 << ", " << idim2 << Endl; + + // root can not handle too many bins in one histogram --> catch this + // Furthermore, to have more than 1000 bins in the histogram doesn't make + // sense. + if (nbin > 1000) { + Log() << kWARNING << "Warning: number of bins too big: " << nbin + << " Using 1000 bins for each dimension instead." << Endl; + nbin = 1000; + } else if (nbin < 1) { + Log() << kWARNING << "Wrong bin number: " << nbin + << "; set nbin=50" << Endl; + nbin = 50; + } + + // create result histogram + TString hname(Form("h_%d_vs_%d", idim1, idim2)); + + // if histogram with this name already exists, delete it + TH2D* h1 = (TH2D*)gDirectory->Get(hname.Data()); + if (h1) delete h1; + h1 = new TH2D(hname.Data(), Form("var%d vs var%d", idim1, idim2), nbin, fXmin[idim1], fXmax[idim1], nbin, fXmin[idim2], fXmax[idim2]); + + if (!h1) Log() << kFATAL << "ERROR: Can not create histo" << hname << Endl; + if (cell_value == kValue) + h1->GetZaxis()->SetRangeUser(-std::numeric_limits<float>::epsilon(), + 1. + std::numeric_limits<float>::epsilon()); + + // ============== start projection algorithm ================ + // loop over all histogram bins (2-dim) + for (Int_t xbin = 1; xbin <= h1->GetNbinsX(); ++xbin) { + for (Int_t ybin = 1; ybin <= h1->GetNbinsY(); ++ybin) { + // calculate the phase space point, which corresponds to this + // bin combination + std::map<Int_t, Float_t> txvec; + txvec[idim1] = VarTransform(idim1, h1->GetXaxis()->GetBinCenter(xbin)); + txvec[idim2] = VarTransform(idim2, h1->GetYaxis()->GetBinCenter(ybin)); + + // find the cells, which corresponds to this phase space + // point + std::vector<TMVA::PDEFoamCell*> cells = FindCells(txvec); + + // loop over cells and fill the histogram with the cell + // values + Float_t sum_cv = 0; // sum of the cell values + for (std::vector<TMVA::PDEFoamCell*>::const_iterator it = cells.begin(); + it != cells.end(); ++it) { + // get cell position and size + PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); + (*it)->GetHcub(cellPosi, cellSize); + // Create complete event vector from txvec. The missing + // coordinates of txvec are set to the cell center. + std::vector<Float_t> tvec; + for (Int_t i = 0; i < GetTotDim(); ++i) { + if (i != idim1 && i != idim2) + tvec.push_back(cellPosi[i] + 0.5 * cellSize[i]); + else + tvec.push_back(txvec[i]); + } + // get the cell value using the kernel + Float_t cv = 0; + if (kernel != NULL) { + cv = kernel->Estimate(this, tvec, cell_value); + } else { + cv = GetCellValue(FindCell(tvec), cell_value); + } + if (cell_value == kValue) { + // calculate cell volume in other dimensions (not + // including idim1 and idim2) + Float_t area_cell = 1.; + for (Int_t d1 = 0; d1 < GetTotDim(); ++d1) { + if ((d1 != idim1) && (d1 != idim2)) + area_cell *= cellSize[d1]; + } + // calc discriminator * (cell area times foam area) + // foam is normalized -> length of foam = 1.0 + cv *= area_cell; + } + sum_cv += cv; + } + + // fill the bin content + h1->SetBinContent(xbin, ybin, sum_cv + h1->GetBinContent(xbin, ybin)); + } + } + + return h1; +} diff --git a/tmva/src/PDEFoamDiscriminantDensity.cxx b/tmva/src/PDEFoamDiscriminantDensity.cxx new file mode 100644 index 0000000000000000000000000000000000000000..8bbb4e793aa410cd5261a0c5e606bb5cb9d78bf8 --- /dev/null +++ b/tmva/src/PDEFoamDiscriminantDensity.cxx @@ -0,0 +1,115 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamDiscriminantDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * The TFDSITR class provides an interface between the Binary search tree * + * and the PDEFoam object. In order to build-up the foam one needs to * + * calculate the density of events at a given point (sampling during * + * Foam build-up). The function PDEFoamDiscriminantDensity::Density() does this job. It * + * uses a binary search tree, filled with training events, in order to * + * provide this density. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamDiscriminantDensity +// +// This is a concrete implementation of PDEFoam. Density(...) +// estimates the discriminant density at a given phase-space point +// using range-searching. The discriminant D is defined as +// +// D = #events with given class / total number of events +// _____________________________________________________________________ + +#include <cmath> + +#ifndef ROOT_TMVA_PDEFoamDiscriminantDensity +#include "TMVA/PDEFoamDiscriminantDensity.h" +#endif + +ClassImp(TMVA::PDEFoamDiscriminantDensity) + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminantDensity::PDEFoamDiscriminantDensity() + : PDEFoamDensityBase() + , fClass(0) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminantDensity::PDEFoamDiscriminantDensity(std::vector<Double_t> box, UInt_t cls) + : PDEFoamDensityBase(box) + , fClass(cls) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamDiscriminantDensity::PDEFoamDiscriminantDensity(const PDEFoamDiscriminantDensity &distr) + : PDEFoamDensityBase(distr) + , fClass(distr.fClass) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Double_t TMVA::PDEFoamDiscriminantDensity::Density(std::vector<Double_t> &Xarg, Double_t &event_density) +{ + // This function is needed during the foam buildup. It returns the + // average target value within volume divided by volume (specified + // by fVolFrac). + + if (!fBst) + Log() << kFATAL << "<PDEFoamDiscriminantDensity::Density()> Binary tree not set!" << Endl; + + //create volume around point to be found + std::vector<Double_t> lb(GetBox().size()); + std::vector<Double_t> ub(GetBox().size()); + + // probevolume relative to hypercube with edge length 1: + const Double_t probevolume_inv = 1.0 / GetBoxVolume(); + + // set upper and lower bound for search volume + for (UInt_t idim = 0; idim < GetBox().size(); ++idim) { + lb[idim] = Xarg[idim] - GetBox().at(idim) / 2.0; + ub[idim] = Xarg[idim] + GetBox().at(idim) / 2.0; + } + + TMVA::Volume volume(&lb, &ub); // volume to search in + std::vector<const TMVA::BinarySearchTreeNode*> nodes; // BST nodes found + + // do range searching + Double_t SumOfWeights = fBst->SearchVolume(&volume, &nodes); + + // store density based on total number of events + event_density = nodes.size() * probevolume_inv; + + Double_t N_sig = 0; // number of signal events found + // calc number of signal events in nodes + for (std::vector<const TMVA::BinarySearchTreeNode*>::const_iterator it = nodes.begin(); + it != nodes.end(); ++it) { + if ((*it)->GetClass() == fClass) // signal node + N_sig += (*it)->GetWeight(); + } + + // return: (N_sig/N_total) / (cell_volume) + return (N_sig / (SumOfWeights + 0.1)) * probevolume_inv; +} diff --git a/tmva/src/PDEFoamDistr.cxx b/tmva/src/PDEFoamDistr.cxx deleted file mode 100644 index 6a08ebba97228e99c6138f575908fd5a9d2f4e8d..0000000000000000000000000000000000000000 --- a/tmva/src/PDEFoamDistr.cxx +++ /dev/null @@ -1,293 +0,0 @@ - -/********************************************************************************** - * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * - * Package: TMVA * - * Classes: PDEFoamDistr * - * Web : http://tmva.sourceforge.net * - * * - * Description: * - * The TFDSITR class provides an interface between the Binary search tree * - * and the PDEFoam object. In order to build-up the foam one needs to * - * calculate the density of events at a given point (sampling during * - * Foam build-up). The function PDEFoamDistr::Density() does this job. It * - * uses a binary search tree, filled with training events, in order to * - * provide this density. * - * * - * Authors (alphabetical): * - * Tancredi Carli - CERN, Switzerland * - * Dominik Dannheim - CERN, Switzerland * - * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * - * Alexander Voigt - CERN, Switzerland * - * Peter Speckmayer - CERN, Switzerland * - * * - * Copyright (c) 2008: * - * CERN, Switzerland * - * MPI-K Heidelberg, Germany * - * * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted according to the terms listed in LICENSE * - * (http://tmva.sourceforge.net/LICENSE) * - **********************************************************************************/ - -#include <cmath> -#include <limits> - -#ifndef ROOT_TMath -#include "TMath.h" -#endif - -#ifndef ROOT_TMVA_PDEFoamDistr -#include "TMVA/PDEFoamDistr.h" -#endif - -ClassImp(TMVA::PDEFoamDistr) - -//_____________________________________________________________________ -TMVA::PDEFoamDistr::PDEFoamDistr() - : TObject(), - fPDEFoam(NULL), - fBst(NULL), - fDensityCalc(kEVENT_DENSITY), // default: fill event density to BinarySearchTree - fLogger( new MsgLogger("PDEFoamDistr")) -{} - -//_____________________________________________________________________ -TMVA::PDEFoamDistr::~PDEFoamDistr() -{ - if (fBst) delete fBst; - delete fLogger; -} - -//_____________________________________________________________________ -TMVA::PDEFoamDistr::PDEFoamDistr(const PDEFoamDistr &distr) - : TObject(), - fPDEFoam (distr.fPDEFoam), - fBst (distr.fBst), - fDensityCalc (kEVENT_DENSITY), // default: fill event density to BinarySearchTree - fLogger( new MsgLogger("PDEFoamDistr")) -{ - // Copy constructor - Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; -} - -//_____________________________________________________________________ -void TMVA::PDEFoamDistr::Initialize() -{ - // Initialisation of binary search tree. - // Set dimension and create new BinarySearchTree. - - if (!GetPDEFoam()) - Log() << kFATAL << "<PDEFoamDistr::Initialize()> Pointer to owner not set!" << Endl; - - if (fBst) delete fBst; - fBst = new TMVA::BinarySearchTree(); - - if (!fBst){ - Log() << kFATAL << "<PDEFoamDistr::Initialize> " - << "ERROR: an not create binary tree !" << Endl; - } - - // set periode (number of variables) - fBst->SetPeriode(GetPDEFoam()->GetTotDim()); -} - -//_____________________________________________________________________ -void TMVA::PDEFoamDistr::FillBinarySearchTree( const Event* ev, EFoamType ft, Bool_t NoNegWeights ) -{ - // This method creates an TMVA::Event and inserts it into the - // binary search tree. - // - // If 'NoNegWeights' is true, an event with negative weight will - // not be filled into the foam. (Default value: false) - - if((NoNegWeights && ev->GetWeight()<=0) || ev->GetOriginalWeight()==0) - return; - - TMVA::Event *event = new TMVA::Event(*ev); - - // set event variables in case of multi-target regression - if (ft==kMultiTarget){ - // since in multi target regression targets are handled like - // variables, remove targets and add them to the event variabels - std::vector<Float_t> targets = ev->GetTargets(); - for (UInt_t i = 0; i < targets.size(); i++) - event->SetVal(i+ev->GetValues().size(), targets.at(i)); - event->GetTargets().clear(); - } - fBst->Insert(event); - - delete event; -} - -//_____________________________________________________________________ -Double_t TMVA::PDEFoamDistr::Density( Double_t *Xarg, Double_t &event_density ) -{ - // This function is needed during the foam buildup. - // It return a certain density depending on the selected classification - // or regression options: - // - // In case of separated foams (classification) or multi target regression: - // - returns event density within volume (specified by VolFrac) - // In case of unified foams: (classification) - // - returns discriminator (N_sig)/(N_sig + N_bg) divided by volume - // (specified by VolFrac) - // In case of mono target regression: - // - returns average target value within volume divided by volume - // (specified by VolFrac) - - if (!GetPDEFoam()) - Log() << kFATAL << "<PDEFoamDistr::Density()> Pointer to owner not set!" << Endl; - - if (!fBst) - Log() << kFATAL << "<PDEFoamDistr::Density()> Binary tree not found!"<< Endl; - - // get PDEFoam properties - Int_t Dim = GetPDEFoam()->GetTotDim(); // dimension of foam - Float_t VolFrac = GetPDEFoam()->GetVolumeFraction(); // get fVolFrac - - // make the variable Xarg transform, since Foam only knows about x=[0,1] - // transformation [0, 1] --> [xmin, xmax] - for (Int_t idim=0; idim<Dim; idim++) - Xarg[idim] = GetPDEFoam()->VarTransformInvers(idim, Xarg[idim]); - - //create volume around point to be found - std::vector<Double_t> lb(Dim); - std::vector<Double_t> ub(Dim); - - // probevolume relative to hypercube with edge length 1: - const Double_t probevolume_inv = std::pow((VolFrac/2), Dim); - - // set upper and lower bound for search volume - for (Int_t idim = 0; idim < Dim; idim++) { - Double_t volsize=(GetPDEFoam()->GetXmax(idim) - - GetPDEFoam()->GetXmin(idim)) / VolFrac; - lb[idim] = Xarg[idim] - volsize; - ub[idim] = Xarg[idim] + volsize; - } - - TMVA::Volume volume(&lb, &ub); // volume to search in - std::vector<const TMVA::BinarySearchTreeNode*> nodes; // BST nodes found - - // do range searching - fBst->SearchVolume(&volume, &nodes); - - // normalized density: (number of counted events) / volume / (total - // number of events) should be ~1 on average - const UInt_t count = nodes.size(); // number of events found - - // store density based on total number of events - event_density = count * probevolume_inv; - - Double_t weighted_count = 0.; // number of events found (sum of weights!) - for (UInt_t j=0; j<nodes.size(); j++) - weighted_count += (nodes.at(j))->GetWeight(); - - if (FillDiscriminator()){ // calc number of signal events in nodes - Double_t N_sig = 0; // number of signal events found - // now sum over all nodes->IsSignal; - for (UInt_t j=0; j<count; j++){ - if (nodes.at(j)->IsSignal()) N_sig += nodes.at(j)->GetWeight(); - } - return (N_sig/(weighted_count+0.1))*probevolume_inv; // return: (N_sig/N_total) / (cell_volume) - } - else if (FillTarget0()){ // calc sum of weighted target values - Double_t N_tar = 0; // number of target events found - // now sum over all nodes->GetTarget(0); - for (UInt_t j=0; j<count; j++) { - N_tar += ((nodes.at(j))->GetTargets()).at(0) * ((nodes.at(j))->GetWeight()); - } - return (N_tar/(weighted_count+0.1))*probevolume_inv; // return: (N_tar/N_total) / (cell_volume) - } - - return ((weighted_count+0.1)*probevolume_inv); // return: N_total(weighted) / cell_volume -} - -//_____________________________________________________________________ -void TMVA::PDEFoamDistr::FillHist(PDEFoamCell* cell, std::vector<TH1F*> &hsig, std::vector<TH1F*> &hbkg, std::vector<TH1F*> &hsig_unw, std::vector<TH1F*> &hbkg_unw) -{ - // fill the given histograms with signal and background events, - // which are located in the given cell - - if (!GetPDEFoam()) - Log() << kFATAL << "<PDEFoamDistr::FillHist> Pointer to owner not set!" << Endl; - - // get PDEFoam properties - Int_t Dim = GetPDEFoam()->GetTotDim(); // dimension of foam - - // sanity check - if (!cell) - Log() << kFATAL << "<PDEFoamDistr::FillHist> Null pointer for cell given!" << Endl; - if (Int_t(hsig.size()) != Dim || Int_t(hbkg.size()) != Dim || - Int_t(hsig_unw.size()) != Dim || Int_t(hbkg_unw.size()) != Dim) - Log() << kFATAL << "<PDEFoamDistr::FillHist> Edge histograms have wrong size!" << Endl; - - // check histograms - for (Int_t idim=0; idim<Dim; idim++) { - if (!hsig.at(idim) || !hbkg.at(idim) || - !hsig_unw.at(idim) || !hbkg_unw.at(idim)) - Log() << kFATAL << "<PDEFoamDistr::FillHist> Histogram not initialized!" << Endl; - } - - // get cell position and size - PDEFoamVect cellSize(Dim); - PDEFoamVect cellPosi(Dim); - cell->GetHcub(cellPosi, cellSize); - - // determine lower and upper cell bound - std::vector<Double_t> lb(Dim); // lower bound - std::vector<Double_t> ub(Dim); // upper bound - for (Int_t idim = 0; idim < Dim; idim++) { - lb[idim] = GetPDEFoam()->VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon()); - ub[idim] = GetPDEFoam()->VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon()); - } - - // create TMVA::Volume object needed for searching within the BST - TMVA::Volume volume(&lb, &ub); // volume to search in - std::vector<const TMVA::BinarySearchTreeNode*> nodes; // BST nodes found - - // do range searching - fBst->SearchVolume(&volume, &nodes); - - // calc xmin and xmax of events found in cell - std::vector<Float_t> xmin(Dim, std::numeric_limits<float>::max()); - std::vector<Float_t> xmax(Dim, -std::numeric_limits<float>::max()); - for (UInt_t iev=0; iev<nodes.size(); iev++) { - std::vector<Float_t> ev = nodes.at(iev)->GetEventV(); - for (Int_t idim=0; idim<Dim; idim++) { - if (ev.at(idim) < xmin.at(idim)) xmin.at(idim) = ev.at(idim); - if (ev.at(idim) > xmax.at(idim)) xmax.at(idim) = ev.at(idim); - } - } - - // reset histogram ranges - for (Int_t idim=0; idim<Dim; idim++) { - hsig.at(idim)->GetXaxis()->SetLimits(GetPDEFoam()->VarTransform(idim,xmin.at(idim)), - GetPDEFoam()->VarTransform(idim,xmax.at(idim))); - hbkg.at(idim)->GetXaxis()->SetLimits(GetPDEFoam()->VarTransform(idim,xmin.at(idim)), - GetPDEFoam()->VarTransform(idim,xmax.at(idim))); - hsig_unw.at(idim)->GetXaxis()->SetLimits(GetPDEFoam()->VarTransform(idim,xmin.at(idim)), - GetPDEFoam()->VarTransform(idim,xmax.at(idim))); - hbkg_unw.at(idim)->GetXaxis()->SetLimits(GetPDEFoam()->VarTransform(idim,xmin.at(idim)), - GetPDEFoam()->VarTransform(idim,xmax.at(idim))); - hsig.at(idim)->Reset(); - hbkg.at(idim)->Reset(); - hsig_unw.at(idim)->Reset(); - hbkg_unw.at(idim)->Reset(); - } - - // fill histograms - for (UInt_t iev=0; iev<nodes.size(); iev++) { - std::vector<Float_t> ev = nodes.at(iev)->GetEventV(); - Float_t wt = nodes.at(iev)->GetWeight(); - Bool_t signal = nodes.at(iev)->IsSignal(); - for (Int_t idim=0; idim<Dim; idim++) { - if (signal) { - hsig.at(idim)->Fill(GetPDEFoam()->VarTransform(idim,ev.at(idim)), wt); - hsig_unw.at(idim)->Fill(GetPDEFoam()->VarTransform(idim,ev.at(idim)), 1); - } else { - hbkg.at(idim)->Fill(GetPDEFoam()->VarTransform(idim,ev.at(idim)), wt); - hbkg_unw.at(idim)->Fill(GetPDEFoam()->VarTransform(idim,ev.at(idim)), 1); - } - } - } -} diff --git a/tmva/src/PDEFoamEvent.cxx b/tmva/src/PDEFoamEvent.cxx new file mode 100644 index 0000000000000000000000000000000000000000..9912f13f0e6e375273724638280d98d2204e2409 --- /dev/null +++ b/tmva/src/PDEFoamEvent.cxx @@ -0,0 +1,84 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamEvent * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamEvent +// +// This PDEFoam variant stores in every cell the sum of event weights +// and the sum of the squared event weights. It therefore acts as +// event density estimator. It should be booked together with the +// PDEFoamEventDensity density estimator, which returns the event +// weight density at a given phase space point during the foam +// build-up. +// +//_____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamEvent +#include "TMVA/PDEFoamEvent.h" +#endif + +ClassImp(TMVA::PDEFoamEvent) + +//_____________________________________________________________________ +TMVA::PDEFoamEvent::PDEFoamEvent() + : PDEFoam() +{ + // Default constructor for streamer, user should not use it. +} + +//_____________________________________________________________________ +TMVA::PDEFoamEvent::PDEFoamEvent(const TString& Name) + : PDEFoam(Name) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamEvent::PDEFoamEvent(const PDEFoamEvent &From) + : PDEFoam(From) +{ + // Copy Constructor NOT IMPLEMENTED (NEVER USED) + Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; +} + +//_____________________________________________________________________ +void TMVA::PDEFoamEvent::FillFoamCells(const Event* ev, Float_t wt) +{ + // This function fills an event weight 'wt' into the PDEFoam. Cell + // element 0 is filled with the weight 'wt', and element 1 is + // filled with the squared weight. + + // find corresponding foam cell + std::vector<Float_t> values = ev->GetValues(); + std::vector<Float_t> tvalues = VarTransform(values); + PDEFoamCell *cell = FindCell(tvalues); + + // 0. Element: Sum of event weights 'wt' + // 1. Element: Sum of squared event weights 'wt' + SetCellElement(cell, 0, GetCellElement(cell, 0) + wt); + SetCellElement(cell, 1, GetCellElement(cell, 1) + wt * wt); +} diff --git a/tmva/src/PDEFoamEventDensity.cxx b/tmva/src/PDEFoamEventDensity.cxx new file mode 100644 index 0000000000000000000000000000000000000000..401a31751f282f408cfe19880d001d541cec7134 --- /dev/null +++ b/tmva/src/PDEFoamEventDensity.cxx @@ -0,0 +1,108 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamEventDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * The TFDSITR class provides an interface between the Binary search tree * + * and the PDEFoam object. In order to build-up the foam one needs to * + * calculate the density of events at a given point (sampling during * + * Foam build-up). The function PDEFoamEventDensity::Density() does * + * this job. It * + * uses a binary search tree, filled with training events, in order to * + * provide this density. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamEventDensity +// +// This is a concrete implementation of PDEFoam. Density(...) +// estimates the event (weight) density at a given phase-space point +// using range-searching. +// _____________________________________________________________________ + +#include <cmath> + +#ifndef ROOT_TMVA_PDEFoamEventDensity +#include "TMVA/PDEFoamEventDensity.h" +#endif + +ClassImp(TMVA::PDEFoamEventDensity) + +//_____________________________________________________________________ +TMVA::PDEFoamEventDensity::PDEFoamEventDensity() + : PDEFoamDensityBase() +{} + +//_____________________________________________________________________ +TMVA::PDEFoamEventDensity::PDEFoamEventDensity(std::vector<Double_t> box) + : PDEFoamDensityBase(box) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamEventDensity::PDEFoamEventDensity(const PDEFoamEventDensity &distr) + : PDEFoamDensityBase(distr) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Double_t TMVA::PDEFoamEventDensity::Density(std::vector<Double_t> &Xarg, Double_t &event_density) +{ + // This function is needed during the foam buildup. It return the + // event density within volume (specified by fBox). + // + // Parameters: + // + // - Xarg - event vector (in [fXmin,fXmax]) + // + // - event_density - here the event density is stored + + if (!fBst) + Log() << kFATAL << "<PDEFoamEventDensity::Density()> Binary tree not found!" << Endl; + + //create volume around point to be found + std::vector<Double_t> lb(GetBox().size()); + std::vector<Double_t> ub(GetBox().size()); + + // probevolume relative to hypercube with edge length 1: + const Double_t probevolume_inv = 1.0 / GetBoxVolume(); + + // set upper and lower bound for search volume + for (UInt_t idim = 0; idim < GetBox().size(); ++idim) { + lb[idim] = Xarg[idim] - GetBox().at(idim) / 2.0; + ub[idim] = Xarg[idim] + GetBox().at(idim) / 2.0; + } + + TMVA::Volume volume(&lb, &ub); // volume to search in + std::vector<const TMVA::BinarySearchTreeNode*> nodes; // BST nodes found + + // do range searching + Double_t SumOfWeights = fBst->SearchVolume(&volume, &nodes); + + // store density based on total number of events + event_density = nodes.size() * probevolume_inv; + + // return: N_total(weighted) / cell_volume + return (SumOfWeights + 0.1) * probevolume_inv; +} diff --git a/tmva/src/PDEFoamKernelBase.cxx b/tmva/src/PDEFoamKernelBase.cxx new file mode 100644 index 0000000000000000000000000000000000000000..66b89634c6c8393dc32d84ddd84ed476f2c27a9b --- /dev/null +++ b/tmva/src/PDEFoamKernelBase.cxx @@ -0,0 +1,69 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelBase * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation of PDEFoam kernel interface * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamKernelBase +// +// This class is the abstract kernel interface for PDEFoam. The +// kernel can be used for manipulating (smearing) the cell values of a +// PDEFoam, by passing it as an argument to +// PDEFoam::GetCellValue(...). +// +// Derived classes must implement the Estimate() function to provide a +// specific kernel behaviour. +// _____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamKernelBase +#include "TMVA/PDEFoamKernelBase.h" +#endif + +ClassImp(TMVA::PDEFoamKernelBase) + +//_____________________________________________________________________ +TMVA::PDEFoamKernelBase::PDEFoamKernelBase() + : TObject() + , fLogger(new MsgLogger("PDEFoamKernelBase")) +{ + // Default constructor for streamer +} + +//_____________________________________________________________________ +TMVA::PDEFoamKernelBase::PDEFoamKernelBase(const PDEFoamKernelBase &other) + : TObject() + , fLogger(new MsgLogger(*other.fLogger)) +{ + // Copy constructor +} + +//_____________________________________________________________________ +TMVA::PDEFoamKernelBase::~PDEFoamKernelBase() +{ + // Destructor + if (fLogger != NULL) + delete fLogger; +} diff --git a/tmva/src/PDEFoamKernelGauss.cxx b/tmva/src/PDEFoamKernelGauss.cxx new file mode 100644 index 0000000000000000000000000000000000000000..052031066a9a8ce5acaf207b28cec3b9d7491b8a --- /dev/null +++ b/tmva/src/PDEFoamKernelGauss.cxx @@ -0,0 +1,203 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelGauss * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation of gauss PDEFoam kernel * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamKernelGauss +// +// This PDEFoam kernel estimates a cell value for a given event by +// weighting all cell values with a gauss function. +// _____________________________________________________________________ + +#ifndef ROOT_TMath +#include "TMath.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamKernelGauss +#include "TMVA/PDEFoamKernelGauss.h" +#endif + +ClassImp(TMVA::PDEFoamKernelGauss) + +//_____________________________________________________________________ +TMVA::PDEFoamKernelGauss::PDEFoamKernelGauss(Float_t sigma) + : PDEFoamKernelBase() + , fSigma(sigma) +{ + // Default constructor for streamer +} + +//_____________________________________________________________________ +TMVA::PDEFoamKernelGauss::PDEFoamKernelGauss(const PDEFoamKernelGauss &other) + : PDEFoamKernelBase(other) + , fSigma(other.fSigma) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelGauss::Estimate(PDEFoam *foam, std::vector<Float_t> &txvec, ECellValue cv) +{ + // Gaussian kernel estimator. It returns the cell value 'cv', + // corresponding to the event vector 'txvec' (in foam coordinates) + // weighted by the cell values of all other cells, where the weight + // is a gaussian function. + // + // Parameters: + // + // - foam - the pdefoam to search in + // + // - txvec - event vector in foam coordinates [0,1] + // + // - cv - cell value to estimate + + if (foam == NULL) + Log() << kFATAL << "<PDEFoamKernelGauss::Estimate>: PDEFoam not set!" << Endl; + + Float_t result = 0, norm = 0; + + for (Long_t iCell = 0; iCell <= foam->fLastCe; iCell++) { + if (!(foam->fCells[iCell]->GetStat())) continue; + + // calc cell density + Float_t cell_val = 0; + if (!foam->CellValueIsUndefined(foam->fCells[iCell])) + // cell is not empty + cell_val = foam->GetCellValue(foam->fCells[iCell], cv); + else + // cell is empty -> calc average target of neighbor cells + cell_val = GetAverageNeighborsValue(foam, txvec, cv); + + // calculate gaussian weight between txvec and fCells[iCell] + Float_t gau = WeightGaus(foam, foam->fCells[iCell], txvec); + + result += gau * cell_val; + norm += gau; + } + + return (norm != 0 ? result / norm : 0); +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelGauss::GetAverageNeighborsValue(PDEFoam *foam, + std::vector<Float_t> &txvec, + ECellValue cv) +{ + // This function returns the average value 'cv' of only nearest + // neighbor cells. It is used in cases when a cell value is + // undefined and the cell value shall be estimated by the + // (well-defined) cell values of the neighbor cells. + // + // Parameters: + // - foam - the foam to search in + // - txvec - event vector, transformed into foam coordinates [0, 1] + // - cv - cell value, see definition of ECellValue + + const Float_t xoffset = 1.e-6; + Float_t norm = 0; // normalisation + Float_t result = 0; // return value + + PDEFoamCell *cell = foam->FindCell(txvec); // find cooresponding cell + PDEFoamVect cellSize(foam->GetTotDim()); + PDEFoamVect cellPosi(foam->GetTotDim()); + cell->GetHcub(cellPosi, cellSize); // get cell coordinates + + // loop over all dimensions and find neighbor cells + for (Int_t dim = 0; dim < foam->GetTotDim(); dim++) { + std::vector<Float_t> ntxvec(txvec); + PDEFoamCell* left_cell = 0; // left cell + PDEFoamCell* right_cell = 0; // right cell + + // get left cell + ntxvec[dim] = cellPosi[dim] - xoffset; + left_cell = foam->FindCell(ntxvec); + if (!foam->CellValueIsUndefined(left_cell)) { + // if left cell is not empty, take its value + result += foam->GetCellValue(left_cell, cv); + norm++; + } + // get right cell + ntxvec[dim] = cellPosi[dim] + cellSize[dim] + xoffset; + right_cell = foam->FindCell(ntxvec); + if (!foam->CellValueIsUndefined(right_cell)) { + // if right cell is not empty, take its value + result += foam->GetCellValue(right_cell, cv); + norm++; + } + } + if (norm > 0) result /= norm; // calc average target + else result = 0; // return null if all neighbors are empty + + return result; +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelGauss::WeightGaus(PDEFoam *foam, PDEFoamCell* cell, + std::vector<Float_t> &txvec) +{ + // Returns the gauss weight between the 'cell' and a given coordinate 'txvec'. + // + // Parameters: + // - cell - the cell + // + // - txvec - the transformed event variables (in [0,1]) (coordinates <0 are + // set to 0, >1 are set to 1) + // + // Returns: + // exp(-(d/sigma)^2/2), where + // - d - is the euclidean distance between 'txvec' and the point of the 'cell' + // which is most close to 'txvec' (in order to avoid artefacts because of the + // form of the cells). + // - sigma = 1/VolFrac + + // get cell coordinates + PDEFoamVect cellSize(foam->GetTotDim()); + PDEFoamVect cellPosi(foam->GetTotDim()); + cell->GetHcub(cellPosi, cellSize); + + // calc position of nearest edge of cell + std::vector<Float_t> cell_center; + for (Int_t i = 0; i < foam->GetTotDim(); i++) { + if (txvec[i] < 0.) txvec[i] = 0.; + if (txvec[i] > 1.) txvec[i] = 1.; + //cell_center.push_back(cellPosi[i] + (0.5*cellSize[i])); + if (cellPosi[i] > txvec.at(i)) + cell_center.push_back(cellPosi[i]); + else if (cellPosi[i] + cellSize[i] < txvec.at(i)) + cell_center.push_back(cellPosi[i] + cellSize[i]); + else + cell_center.push_back(txvec.at(i)); + } + + Float_t distance = 0; // euclidean distance for weighting + for (Int_t i = 0; i < foam->GetTotDim(); i++) + distance += Sqr(txvec.at(i) - cell_center.at(i)); + distance = TMath::Sqrt(distance); + + // weight with Gaus + return TMath::Gaus(distance, 0, fSigma, kFALSE); +} diff --git a/tmva/src/PDEFoamKernelLinN.cxx b/tmva/src/PDEFoamKernelLinN.cxx new file mode 100644 index 0000000000000000000000000000000000000000..cb63378527ef39a3d90a72b39cbeeb9705efed16 --- /dev/null +++ b/tmva/src/PDEFoamKernelLinN.cxx @@ -0,0 +1,203 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelLinN * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation of linear neighbors PDEFoam kernel * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamKernelLinN +// +// This PDEFoam kernel estimates a cell value for a given event by +// weighting with cell values of the nearest neighbor cells. +// _____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamKernelLinN +#include "TMVA/PDEFoamKernelLinN.h" +#endif + +ClassImp(TMVA::PDEFoamKernelLinN) + +//_____________________________________________________________________ +TMVA::PDEFoamKernelLinN::PDEFoamKernelLinN() + : PDEFoamKernelBase() +{ + // Default constructor for streamer +} + +//_____________________________________________________________________ +TMVA::PDEFoamKernelLinN::PDEFoamKernelLinN(const PDEFoamKernelLinN &other) + : PDEFoamKernelBase(other) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelLinN::Estimate(PDEFoam *foam, std::vector<Float_t> &txvec, ECellValue cv) +{ + // Linear neighbors kernel estimator. It returns the cell value + // 'cv', corresponding to the event vector 'txvec' (in foam + // coordinates) linear weighted by the cell values of the neighbor + // cells. + // + // Parameters: + // + // - foam - the pdefoam to search in + // + // - txvec - event vector in foam coordinates [0,1] + // + // - cv - cell value to estimate + + if (foam == NULL) + Log() << kFATAL << "<PDEFoamKernelLinN::Estimate>: PDEFoam not set!" << Endl; + + return WeightLinNeighbors(foam, txvec, cv, kTRUE); +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelLinN::WeightLinNeighbors(PDEFoam *foam, std::vector<Float_t> &txvec, ECellValue cv, Bool_t TreatEmptyCells) +{ + // Returns the cell value, corresponding to 'txvec' (foam + // coordinates [0,1]), weighted by the neighbor cells via a linear + // function. + // + // Parameters: + // - foam - the foam to search in + // + // - txvec - event vector, transformed into foam coordinates [0,1] + // + // - cv - cell value to be weighted + // + // - TreatEmptyCells - if this option is set to false (default), + // it is not checked, wether the cell value or neighbor cell + // values are undefined (using foam->CellValueIsUndefined()). + // If this option is set to true, than only non-empty neighbor + // cells are taken into account for weighting. If the cell + // value of the cell, which contains txvec, is empty, than its + // value is estimated by the average value of the non-empty + // neighbor cells (using GetAverageNeighborsValue()). + + Float_t result = 0.; + UInt_t norm = 0; + const Float_t xoffset = 1.e-6; + + if (txvec.size() != UInt_t(foam->GetTotDim())) + Log() << kFATAL << "Wrong dimension of event variable!" << Endl; + + // find cell, which contains txvec + PDEFoamCell *cell = foam->FindCell(txvec); + PDEFoamVect cellSize(foam->GetTotDim()); + PDEFoamVect cellPosi(foam->GetTotDim()); + cell->GetHcub(cellPosi, cellSize); + // calc value of cell, which contains txvec + Float_t cellval = 0; + if (!(TreatEmptyCells && foam->CellValueIsUndefined(cell))) + // cell is not empty -> get cell value + cellval = foam->GetCellValue(cell, cv); + else + // cell is empty -> get average value of non-empty neighbor + // cells + cellval = GetAverageNeighborsValue(foam, txvec, cv); + + // loop over all dimensions to find neighbor cells + for (Int_t dim = 0; dim < foam->GetTotDim(); dim++) { + std::vector<Float_t> ntxvec(txvec); + Float_t mindist; + PDEFoamCell *mindistcell = 0; // cell with minimal distance to txvec + // calc minimal distance to neighbor cell + mindist = (txvec[dim] - cellPosi[dim]) / cellSize[dim]; + if (mindist < 0.5) { // left neighbour + ntxvec[dim] = cellPosi[dim] - xoffset; + mindistcell = foam->FindCell(ntxvec); // left neighbor cell + } else { // right neighbour + mindist = 1 - mindist; + ntxvec[dim] = cellPosi[dim] + cellSize[dim] + xoffset; + mindistcell = foam->FindCell(ntxvec); // right neighbor cell + } + // get cell value of cell, which contains ntxvec + Float_t mindistcellval = foam->GetCellValue(mindistcell, cv); + // if treatment of empty neighbor cells is deactivated, do + // normal weighting + if (!(TreatEmptyCells && foam->CellValueIsUndefined(mindistcell))) { + result += cellval * (0.5 + mindist); + result += mindistcellval * (0.5 - mindist); + norm++; + } + } + if (norm == 0) return cellval; // all nearest neighbors were empty + else return result / norm; // normalisation +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelLinN::GetAverageNeighborsValue(PDEFoam *foam, + std::vector<Float_t> &txvec, + ECellValue cv) +{ + // This function returns the average value 'cv' of only nearest + // neighbor cells. It is used in cases when a cell value is + // undefined and the cell value shall be estimated by the + // (well-defined) cell values of the neighbor cells. + // + // Parameters: + // - foam - the foam to search in + // - txvec - event vector, transformed into foam coordinates [0, 1] + // - cv - cell value, see definition of ECellValue + + const Float_t xoffset = 1.e-6; + Float_t norm = 0; // normalisation + Float_t result = 0; // return value + + PDEFoamCell *cell = foam->FindCell(txvec); // find cooresponding cell + PDEFoamVect cellSize(foam->GetTotDim()); + PDEFoamVect cellPosi(foam->GetTotDim()); + cell->GetHcub(cellPosi, cellSize); // get cell coordinates + + // loop over all dimensions and find neighbor cells + for (Int_t dim = 0; dim < foam->GetTotDim(); dim++) { + std::vector<Float_t> ntxvec(txvec); + PDEFoamCell* left_cell = 0; // left cell + PDEFoamCell* right_cell = 0; // right cell + + // get left cell + ntxvec[dim] = cellPosi[dim] - xoffset; + left_cell = foam->FindCell(ntxvec); + if (!foam->CellValueIsUndefined(left_cell)) { + // if left cell is not empty, take its value + result += foam->GetCellValue(left_cell, cv); + norm++; + } + // get right cell + ntxvec[dim] = cellPosi[dim] + cellSize[dim] + xoffset; + right_cell = foam->FindCell(ntxvec); + if (!foam->CellValueIsUndefined(right_cell)) { + // if right cell is not empty, take its value + result += foam->GetCellValue(right_cell, cv); + norm++; + } + } + if (norm > 0) result /= norm; // calc average target + else result = 0; // return null if all neighbors are empty + + return result; +} diff --git a/tmva/src/PDEFoamKernelTrivial.cxx b/tmva/src/PDEFoamKernelTrivial.cxx new file mode 100644 index 0000000000000000000000000000000000000000..ccb6dc2a0f7b5fee1d352f3bc8ef430b04af9cfd --- /dev/null +++ b/tmva/src/PDEFoamKernelTrivial.cxx @@ -0,0 +1,74 @@ +// @(#)root/tmva $Id$ +// Author: Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamKernelTrivial * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation of trivial PDEFoam kernel * + * * + * Authors (alphabetical): * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * + * * + * Copyright (c) 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamKernelTrivial +// +// This class is a trivial PDEFoam kernel estimator. The Estimate() +// function returns the cell value, given an event 'txvec'. +// _____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamKernelTrivial +#include "TMVA/PDEFoamKernelTrivial.h" +#endif + +ClassImp(TMVA::PDEFoamKernelTrivial) + +//_____________________________________________________________________ +TMVA::PDEFoamKernelTrivial::PDEFoamKernelTrivial() + : PDEFoamKernelBase() +{ + // Default constructor for streamer +} + +//_____________________________________________________________________ +TMVA::PDEFoamKernelTrivial::PDEFoamKernelTrivial(const PDEFoamKernelTrivial &other) + : PDEFoamKernelBase(other) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamKernelTrivial::Estimate(PDEFoam *foam, std::vector<Float_t> &txvec, ECellValue cv) +{ + // Simple kernel estimator. It returns the cell value 'cv', + // corresponding to the event vector 'txvec' (in foam coordinates). + // + // Parameters: + // + // - foam - the pdefoam to search in + // + // - txvec - event vector in foam coordinates [0,1] + // + // - cv - cell value to estimate + + if (foam == NULL) + Log() << kFATAL << "<PDEFoamKernelTrivial::Estimate>: PDEFoam not set!" << Endl; + + return foam->GetCellValue(foam->FindCell(txvec), cv); +} diff --git a/tmva/src/PDEFoamMultiTarget.cxx b/tmva/src/PDEFoamMultiTarget.cxx new file mode 100644 index 0000000000000000000000000000000000000000..525b10b8cfeb65694c588166036493925ed6ffe5 --- /dev/null +++ b/tmva/src/PDEFoamMultiTarget.cxx @@ -0,0 +1,195 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamMultiTarget * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamMultiTarget +// +// This PDEFoam variant is used to estimate multiple targets by +// creating an event density foam (PDEFoamEvent), which has dimension: +// +// dimension = number of variables + number targets +// +// This PDEFoam variant stores in every cell the sum of event weights +// and the sum of the squared event weights. During evaluation for a +// given event, which has only variables and no targets (number of +// event variables is smaller than the foam dimension), the targets +// are estimated by finding all cells, which correspond to this event +// and calculate the Mean (or Mpv, depending on the ETargetSelection) +// cell center weighted by the event density in the cell. +// +// This PDEFoam variant should be booked together with the +// PDEFoamEventDensity density estimator, which returns the event +// weight density at a given phase space point during the foam +// build-up. +// +//_____________________________________________________________________ + +#ifndef ROOT_TMVA_PDEFoamMultiTarget +#include "TMVA/PDEFoamMultiTarget.h" +#endif + +ClassImp(TMVA::PDEFoamMultiTarget) + +//_____________________________________________________________________ +TMVA::PDEFoamMultiTarget::PDEFoamMultiTarget() + : PDEFoamEvent() + , fTargetSelection(kMean) +{ + // Default constructor for streamer, user should not use it. +} + +//_____________________________________________________________________ +TMVA::PDEFoamMultiTarget::PDEFoamMultiTarget(const TString& Name, ETargetSelection ts) + : PDEFoamEvent(Name) + , fTargetSelection(ts) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamMultiTarget::PDEFoamMultiTarget(const PDEFoamMultiTarget &From) + : PDEFoamEvent(From) + , fTargetSelection(kMean) +{ + // Copy Constructor NOT IMPLEMENTED (NEVER USED) + Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; +} + +//_____________________________________________________________________ +std::vector<Float_t> TMVA::PDEFoamMultiTarget::GetCellValue(std::map<Int_t, Float_t>& xvec, ECellValue /*cv*/) +{ + // This function is overridden from PDFEFoam. It returns all + // regression targets (in order), given an untransformed event + // vector 'xvec'. The key of 'xvec' is the dimension and the value + // (Float_t) is the coordinate. + // + // Note: number of foam dimensions = number of variables + number + // of targets + // + // Parameters: + // - xvec - map of event variables (no targets!) + // - cv - cell value to return (ignored!) + // + // Return: + // Targets, ordered by missing dimensions in 'xvec'. + // The size of the returned vector = foam dimension - size of xvec. + + // transform event vector + std::map<Int_t, Float_t> txvec; // transformed event vector + for (std::map<Int_t, Float_t>::const_iterator it = xvec.begin(); + it != xvec.end(); ++it) { + Float_t coordinate = it->second; // event coordinate + Int_t dim = it->first; // dimension + // checkt whether coordinate is within foam borders. if not, + // push event coordinate into foam + if (coordinate <= fXmin[dim]) + coordinate = fXmin[dim] + std::numeric_limits<float>::epsilon(); + else if (coordinate >= fXmax[dim]) + coordinate = fXmax[dim] - std::numeric_limits<float>::epsilon(); + // transform event + txvec[dim] = VarTransform(dim, coordinate); + } + + // map of targets and normalization + std::map<Int_t, Float_t> target, norm; + Double_t max_dens = 0.; // maximum cell density + + // find cells, which fit txvec + std::vector<PDEFoamCell*> cells = FindCells(txvec); + if (cells.size() < 1) { + // return empty target vector (size = dimension of foam - + // number of variables) + return std::vector<Float_t>(GetTotDim() - xvec.size(), 0); + } + + // loop over all cells that were found + for (std::vector<PDEFoamCell*>::const_iterator cell_it = cells.begin(); + cell_it != cells.end(); cell_it++) { + + // get event density in cell + Double_t cell_density = GetCellValue(*cell_it, kValueDensity); + + // get cell position and size + PDEFoamVect cellPosi(GetTotDim()), cellSize(GetTotDim()); + (*cell_it)->GetHcub(cellPosi, cellSize); + + // loop over all target dimensions (= dimensions, that are + // missing in txvec), in order to calculate target value + for (Int_t idim = 0; idim < GetTotDim(); ++idim) { + // is idim a target dimension, i.e. is idim missing in txvec? + std::map<Int_t, Float_t>::const_iterator txvec_it = txvec.find(idim); + if (txvec_it == txvec.end()) { + // idim is missing in txvec --> this is a target + // dimension! + switch (fTargetSelection) { + case kMean: + target[idim] += cell_density * + VarTransformInvers(idim, cellPosi[idim] + 0.5 * cellSize[idim]); + norm[idim] += cell_density; + break; + case kMpv: + if (cell_density > max_dens) { + max_dens = cell_density; // save new max density + target[idim] = + VarTransformInvers(idim, cellPosi[idim] + 0.5 * cellSize[idim]); + } + break; + default: + Log() << "<PDEFoamMultiTarget::GetCellValue>: " + << "unknown target selection type!" << Endl; + break; + } + } + } // loop over foam dimensions + } // loop over cells + + // normalise mean cell density + if (fTargetSelection == kMean) { + // loop over all dimensions + for (Int_t idim = 0; idim < GetTotDim(); ++idim) { + // is idim in target map? + std::map<Int_t, Float_t>::const_iterator target_it = target.find(idim); + if (target_it != target.end()) { + // idim is in target map! --> Normalize + if (norm[idim] > std::numeric_limits<float>::epsilon()) + target[idim] /= norm[idim]; + else + // normalisation factor is too small -> return + // approximate target value + target[idim] = (fXmax[idim] - fXmin[idim]) / 2.; + } + } + } + + // copy targets to result vector + std::vector<Float_t> result; + for (std::map<Int_t, Float_t>::const_iterator it = target.begin(); + it != target.end(); ++it) + result.push_back(it->second); + + return result; +} diff --git a/tmva/src/PDEFoamTarget.cxx b/tmva/src/PDEFoamTarget.cxx new file mode 100644 index 0000000000000000000000000000000000000000..cf3e15c16d68e0e8971f5c7e6c2c38fecf0eab5d --- /dev/null +++ b/tmva/src/PDEFoamTarget.cxx @@ -0,0 +1,200 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamTarget * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamTarget +// +// This PDEFoam variant stores in every cell the average target +// fTarget (see the Constructor) as well as the statistical error on +// the target fTarget. It therefore acts as a target estimator. It +// should be booked together with the PDEFoamTargetDensity density +// estimator, which returns the target fTarget density at a given +// phase space point during the foam build-up. +// +//_____________________________________________________________________ + +#ifndef ROOT_TMath +#include "TMath.h" +#endif + +#ifndef ROOT_TMVA_PDEFoamTarget +#include "TMVA/PDEFoamTarget.h" +#endif + +ClassImp(TMVA::PDEFoamTarget) + +//_____________________________________________________________________ +TMVA::PDEFoamTarget::PDEFoamTarget() + : PDEFoam() + , fTarget(0) +{ + // Default constructor for streamer, user should not use it. +} + +//_____________________________________________________________________ +TMVA::PDEFoamTarget::PDEFoamTarget(const TString& Name, UInt_t target) + : PDEFoam(Name) + , fTarget(target) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamTarget::PDEFoamTarget(const PDEFoamTarget &From) + : PDEFoam(From) + , fTarget(From.fTarget) +{ + // Copy Constructor NOT IMPLEMENTED (NEVER USED) + Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl; +} + +//_____________________________________________________________________ +void TMVA::PDEFoamTarget::FillFoamCells(const Event* ev, Float_t wt) +{ + // This function fills an event into the discriminant PDEFoam. The + // weight 'wt' is filled into cell element 0 if the event is of + // class 'fTarget', and filled into cell element 1 otherwise. + + // find corresponding foam cell + std::vector<Float_t> values = ev->GetValues(); + std::vector<Float_t> tvalues = VarTransform(values); + std::vector<Float_t> targets = ev->GetTargets(); + PDEFoamCell *cell = FindCell(tvalues); + + // 0. Element: Number of events + // 1. Element: Target 0 + SetCellElement(cell, 0, GetCellElement(cell, 0) + wt); + SetCellElement(cell, 1, GetCellElement(cell, 1) + wt * targets.at(fTarget)); +} + +//_____________________________________________________________________ +void TMVA::PDEFoamTarget::Finalize() +{ + // Calculate average cell target in every cell and save them to the + // cell. Cell element 0 will contain the average target and cell + // element 1 will contain the error on the target. + + // loop over cells + for (Long_t iCell = 0; iCell <= fLastCe; iCell++) { + if (!(fCells[iCell]->GetStat())) + continue; + + Double_t N_ev = GetCellElement(fCells[iCell], 0); // get number of events + Double_t tar = GetCellElement(fCells[iCell], 1); // get sum of targets + + if (N_ev > 0) { + SetCellElement(fCells[iCell], 0, tar / N_ev); // set average target + SetCellElement(fCells[iCell], 1, tar / TMath::Sqrt(N_ev)); // set error on average target + } else { + SetCellElement(fCells[iCell], 0, 0.0); // set mean target + SetCellElement(fCells[iCell], 1, -1); // set mean target error + } + } +} + +//_____________________________________________________________________ +Bool_t TMVA::PDEFoamTarget::CellValueIsUndefined(PDEFoamCell* cell) +{ + // Returns true, if the target error equals -1, as set in + // Finalize() in case of no events in the cell + return GetCellValue(cell, kValueError) == -1; +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamTarget::GetCellValue(std::vector<Float_t> &xvec, ECellValue cv, PDEFoamKernelBase *kernel) +{ + // This function finds the cell, which corresponds to the given + // untransformed event vector 'xvec' and return its value, which is + // given by the parameter 'cv'. + // + // If cv == kValue, it is checked wether the cell value is + // undefined. If this is the case, then the mean of the neighbor's + // target values is returned, using GetAverageNeighborsValue(). + + std::vector<Float_t> txvec(VarTransform(xvec)); + PDEFoamCell *cell = FindCell(txvec); + + if (!CellValueIsUndefined(cell)) { + // cell is not empty + if (kernel == NULL) + return GetCellValue(cell, cv); + else + return kernel->Estimate(this, txvec, cv); + } else + // cell is empty -> calc average target of neighbor cells + return GetAverageNeighborsValue(txvec, kValue); +} + +//_____________________________________________________________________ +Float_t TMVA::PDEFoamTarget::GetAverageNeighborsValue(std::vector<Float_t> &txvec, + ECellValue cv) +{ + // This function returns the average value 'cv' of only nearest + // neighbor cells. It is used in cases, where empty cells shall + // not be evaluated. + // + // Parameters: + // - txvec - event vector, transformed into foam coordinates [0, 1] + // - cv - cell value, see definition of ECellValue + + const Float_t xoffset = 1.e-6; + Float_t norm = 0; // normalisation + Float_t result = 0; // return value + + PDEFoamCell *cell = FindCell(txvec); // find cooresponding cell + PDEFoamVect cellSize(GetTotDim()); + PDEFoamVect cellPosi(GetTotDim()); + cell->GetHcub(cellPosi, cellSize); // get cell coordinates + + // loop over all dimensions and find neighbor cells + for (Int_t dim = 0; dim < GetTotDim(); dim++) { + std::vector<Float_t> ntxvec(txvec); + PDEFoamCell* left_cell = 0; // left cell + PDEFoamCell* right_cell = 0; // right cell + + // get left cell + ntxvec[dim] = cellPosi[dim] - xoffset; + left_cell = FindCell(ntxvec); + if (!CellValueIsUndefined(left_cell)) { + // if left cell is not empty, take its value + result += GetCellValue(left_cell, cv); + norm++; + } + // get right cell + ntxvec[dim] = cellPosi[dim] + cellSize[dim] + xoffset; + right_cell = FindCell(ntxvec); + if (!CellValueIsUndefined(right_cell)) { + // if right cell is not empty, take its value + result += GetCellValue(right_cell, cv); + norm++; + } + } + if (norm > 0) result /= norm; // calc average target + else result = 0; // return null if all neighbors are empty + + return result; +} diff --git a/tmva/src/PDEFoamTargetDensity.cxx b/tmva/src/PDEFoamTargetDensity.cxx new file mode 100644 index 0000000000000000000000000000000000000000..f89baba039f20f691caf7a35a7433ac47d99caa0 --- /dev/null +++ b/tmva/src/PDEFoamTargetDensity.cxx @@ -0,0 +1,118 @@ +// @(#)root/tmva $Id$ +// Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Classes: PDEFoamTargetDensity * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * The TFDSITR class provides an interface between the Binary search tree * + * and the PDEFoam object. In order to build-up the foam one needs to * + * calculate the density of events at a given point (sampling during * + * Foam build-up). The function PDEFoamTargetDensity::Density() does this job. It * + * uses a binary search tree, filled with training events, in order to * + * provide this density. * + * * + * Authors (alphabetical): * + * Tancredi Carli - CERN, Switzerland * + * Dominik Dannheim - CERN, Switzerland * + * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * + * Alexander Voigt - TU Dresden, Germany * + * Peter Speckmayer - CERN, Switzerland * + * * + * Copyright (c) 2008, 2010: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +//_____________________________________________________________________ +// +// PDEFoamTargetDensity +// +// This is a concrete implementation of PDEFoam. Density(...) +// estimates the target density (target number: fTarget) at a given +// phase-space point using range-searching. +// _____________________________________________________________________ + +#include <cmath> + +#ifndef ROOT_TMVA_PDEFoamTargetDensity +#include "TMVA/PDEFoamTargetDensity.h" +#endif + +ClassImp(TMVA::PDEFoamTargetDensity) + +//_____________________________________________________________________ +TMVA::PDEFoamTargetDensity::PDEFoamTargetDensity() + : PDEFoamDensityBase() + , fTarget(0) +{} + +//_____________________________________________________________________ +TMVA::PDEFoamTargetDensity::PDEFoamTargetDensity(std::vector<Double_t> box, UInt_t target) + : PDEFoamDensityBase(box) + , fTarget(target) +{ + // Parameters: + // + // - box - size of sampling box in each dimension + // + // - target - the target number to calculate the density for +} + +//_____________________________________________________________________ +TMVA::PDEFoamTargetDensity::PDEFoamTargetDensity(const PDEFoamTargetDensity &distr) + : PDEFoamDensityBase(distr) + , fTarget(distr.fTarget) +{ + // Copy constructor +} + +//_____________________________________________________________________ +Double_t TMVA::PDEFoamTargetDensity::Density(std::vector<Double_t> &Xarg, Double_t &event_density) +{ + // This function is needed during the foam buildup. It returns the + // average target value within volume divided by volume (specified + // by fVolFrac). + + if (!fBst) + Log() << kFATAL << "<PDEFoamTargetDensity::Density()> Binary tree not found!" << Endl; + + //create volume around point to be found + std::vector<Double_t> lb(GetBox().size()); + std::vector<Double_t> ub(GetBox().size()); + + // probevolume relative to hypercube with edge length 1: + const Double_t probevolume_inv = 1.0 / GetBoxVolume(); + + // set upper and lower bound for search volume + for (UInt_t idim = 0; idim < GetBox().size(); ++idim) { + lb[idim] = Xarg[idim] - GetBox().at(idim) / 2.0; + ub[idim] = Xarg[idim] + GetBox().at(idim) / 2.0; + } + + TMVA::Volume volume(&lb, &ub); // volume to search in + std::vector<const TMVA::BinarySearchTreeNode*> nodes; // BST nodes found + + // do range searching + Double_t SumOfWeights = fBst->SearchVolume(&volume, &nodes); + + // store density based on total number of events + event_density = nodes.size() * probevolume_inv; + + Double_t N_tar = 0; // number of target events found + // now sum over all nodes->GetTarget(0); + for (std::vector<const TMVA::BinarySearchTreeNode*>::const_iterator it = nodes.begin(); + it != nodes.end(); ++it) { + N_tar += ((*it)->GetTargets()).at(fTarget) * ((*it)->GetWeight()); + } + + // return: (N_tar/N_total) / (cell_volume) + return (N_tar / (SumOfWeights + 0.1)) * probevolume_inv; +} diff --git a/tmva/src/PDEFoamVect.cxx b/tmva/src/PDEFoamVect.cxx index 945f5a63cace42962565940b5ff439a31145ba5d..d26b012f68f33bfac845a1d28f48f44f37022bc4 100644 --- a/tmva/src/PDEFoamVect.cxx +++ b/tmva/src/PDEFoamVect.cxx @@ -1,3 +1,5 @@ +// @(#)root/tmva $Id$ +// Author: S. Jadach, Tancredi Carli, Dominik Dannheim, Alexander Voigt /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -13,7 +15,7 @@ * S. Jadach - Institute of Nuclear Physics, Cracow, Poland * * Tancredi Carli - CERN, Switzerland * * Dominik Dannheim - CERN, Switzerland * - * Alexander Voigt - CERN, Switzerland * + * Alexander Voigt - TU Dresden, Germany * * * * Copyright (c) 2008: * * CERN, Switzerland * diff --git a/tmva/src/PDF.cxx b/tmva/src/PDF.cxx index cf1e815caad4c5801fed951a2069efb0900fddfa..c31cfc567b2869e973ffc4045e3a4496f4dde248 100644 --- a/tmva/src/PDF.cxx +++ b/tmva/src/PDF.cxx @@ -248,7 +248,7 @@ void TMVA::PDF::BuildPDF( const TH1* hist ) // histogram should be non empty if (hist->GetEntries() <= 0) - Log() << kFATAL << "Number of entries <= 0 in histogram: " << hist->GetTitle() << Endl; + Log() << kFATAL << "Number of entries <= 0 (" << hist->GetEntries() << " in histogram: " << hist->GetTitle() << Endl; if (fInterpolMethod == PDF::kKDE) { Log() << "Create " diff --git a/tmva/src/Reader.cxx b/tmva/src/Reader.cxx index 4cec00e195dae2ad820a6725b69a6e6c60fa1978..25de65640ccb9e2dfaf3f112166daa33a907b6c3 100644 --- a/tmva/src/Reader.cxx +++ b/tmva/src/Reader.cxx @@ -164,9 +164,8 @@ TMVA::Reader::Reader( std::vector<TString>& inputVars, const TString& theOption, // arguments: names of input variables (vector) // verbose flag - for (std::vector<TString>::iterator ivar = inputVars.begin(); ivar != inputVars.end(); ivar++) { + for (std::vector<TString>::iterator ivar = inputVars.begin(); ivar != inputVars.end(); ivar++) DataInfo().AddVariable( *ivar ); - } Init(); } @@ -194,9 +193,8 @@ TMVA::Reader::Reader( std::vector<std::string>& inputVars, const TString& theOpt // arguments: names of input variables (vector) // verbose flag - for (std::vector<std::string>::iterator ivar = inputVars.begin(); ivar != inputVars.end(); ivar++) { + for (std::vector<std::string>::iterator ivar = inputVars.begin(); ivar != inputVars.end(); ivar++) DataInfo().AddVariable( ivar->c_str() ); - } Init(); } @@ -465,7 +463,10 @@ Double_t TMVA::Reader::EvaluateMVA( const std::vector<Float_t>& inputVec, const IMethod* imeth = FindMVA( methodTag ); MethodBase* meth = dynamic_cast<TMVA::MethodBase*>(imeth); if(meth==0) return 0; - Event* tmpEvent=new Event(inputVec, 2); // ToDo resolve magic 2 issue + +// Event* tmpEvent=new Event(inputVec, 2); // ToDo resolve magic 2 issue + Event* tmpEvent=new Event(inputVec, DataInfo().GetNVariables()); // is this the solution? + if (meth->GetMethodType() == TMVA::Types::kCuts) { TMVA::MethodCuts* mc = dynamic_cast<TMVA::MethodCuts*>(meth); if(mc) diff --git a/tmva/src/Results.cxx b/tmva/src/Results.cxx index 7c480b257d12dacbd8ddd109457a5b98cc0cffb0..c485489ce8bac52b6a9f32e87ceb5b940a1b65ae 100644 --- a/tmva/src/Results.cxx +++ b/tmva/src/Results.cxx @@ -28,6 +28,7 @@ #include <vector> #include "TH1.h" +#include "TGraph.h" #include "TMVA/Results.h" #include "TMVA/MsgLogger.h" @@ -96,6 +97,12 @@ TH1* TMVA::Results::GetHist(const TString & alias) const return (TH1*)GetObject(alias); } +//_______________________________________________________________________ +TGraph* TMVA::Results::GetGraph(const TString & alias) const +{ + return (TGraph*)GetObject(alias); +} + //_______________________________________________________________________ void TMVA::Results::Delete() diff --git a/tmva/src/SeparationBase.cxx b/tmva/src/SeparationBase.cxx index 76a7a06e6bc0f93444c6cb9936cc28510636e423..7c730f7b9355b6eec76ff708e765155ed2cf5c0b 100644 --- a/tmva/src/SeparationBase.cxx +++ b/tmva/src/SeparationBase.cxx @@ -45,6 +45,7 @@ ClassImp(TMVA::SeparationBase) #include <limits> +#include <iostream> #include "TMath.h" @@ -82,12 +83,15 @@ Double_t TMVA::SeparationBase::GetSeparationGain(const Double_t &nSelS, const Do * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) ); Double_t rightIndex = (nSelS+nSelB) * this->GetSeparationIndex(nSelS,nSelB); - Double_t diff = parentIndex - leftIndex - rightIndex; - // if (!fInit){ - fPrecisionCut = (TMath::Sqrt(std::numeric_limits<double>::epsilon())); - // fInit = kTRUE; - // } - if(diff/parentIndex<fPrecisionCut ) return 0; + //Double_t diff = parentIndex - leftIndex - rightIndex; + Double_t diff = (parentIndex - leftIndex - rightIndex)/(nTotS+nTotB); + + if(diff<fPrecisionCut ) { + // std::cout << " Warning value in GetSeparation is below numerical presicion " + // << diff/parentIndex + // << std::endl; + return 0; + } return diff; } diff --git a/tmva/src/Tools.cxx b/tmva/src/Tools.cxx index f9834de0f26e191b1b14a226ee86e7a47e5ec1b4..5437b367114fd9afdb950f788bdabff5e4e8596d 100644 --- a/tmva/src/Tools.cxx +++ b/tmva/src/Tools.cxx @@ -188,7 +188,7 @@ void TMVA::Tools::ComputeStat( const std::vector<TMVA::Event*>& events, std::vec Log() << kFATAL << "<Tools::ComputeStat> value vector is zero pointer" << Endl; if ( events.size() != valVec->size() ) - Log() << kFATAL << "<Tools::ComputeStat> event and value vector have different lengths " + Log() << kWARNING << "<Tools::ComputeStat> event and value vector have different lengths " << events.size() << "!=" << valVec->size() << Endl; Long64_t entries = valVec->size(); @@ -1031,7 +1031,7 @@ Bool_t TMVA::Tools::HasAttr( void* node, const char* attrname ) void TMVA::Tools::ReadAttr( void* node, const char* attrname, TString& value ) { // add attribute from xml - if(!HasAttr(node, attrname)) { + if (!HasAttr(node, attrname)) { const char * nodename = xmlengine().GetNodeName(node); Log() << kFATAL << "Trying to read non-existing attribute '" << attrname << "' from xml node '" << nodename << "'" << Endl; } @@ -1048,7 +1048,9 @@ void TMVA::Tools::AddAttr( void* node, const char* attrname, const char* value ) } //_______________________________________________________________________ -void* TMVA::Tools::AddChild( void* parent, const char* childname, const char* content, bool isRootNode ) { +void* TMVA::Tools::AddChild( void* parent, const char* childname, const char* content, bool isRootNode ) +{ + // add child node if( !isRootNode && parent == 0 ) return 0; return gTools().xmlengine().NewChild(parent, 0, childname, content); } @@ -1061,6 +1063,7 @@ Bool_t TMVA::Tools::AddComment( void* node, const char* comment ) { //_______________________________________________________________________ void* TMVA::Tools::GetParent( void* child) { + // get parent node void* par = xmlengine().GetParent(child); return par; @@ -1068,6 +1071,7 @@ void* TMVA::Tools::GetParent( void* child) //_______________________________________________________________________ void* TMVA::Tools::GetChild( void* parent, const char* childname ) { + // get child node void* ch = xmlengine().GetChild(parent); if (childname != 0) { while (ch!=0 && strcmp(xmlengine().GetNodeName(ch),childname) != 0) ch = xmlengine().GetNext(ch); @@ -1145,12 +1149,12 @@ TString TMVA::Tools::StringFromDouble( Double_t d ) { // string tools std::stringstream s; - s << d; + s << Form( "%5.10e", d ); return TString(s.str().c_str()); } //_______________________________________________________________________ -void TMVA::Tools::WriteTMatrixDToXML(void* node, const char* name, TMatrixD* mat) +void TMVA::Tools::WriteTMatrixDToXML( void* node, const char* name, TMatrixD* mat ) { // XML helpers void* matnode = xmlengine().NewChild(node, 0, name); @@ -1159,39 +1163,37 @@ void TMVA::Tools::WriteTMatrixDToXML(void* node, const char* name, TMatrixD* mat std::stringstream s; for (Int_t row = 0; row<mat->GetNrows(); row++) { for (Int_t col = 0; col<mat->GetNcols(); col++) { - s << (*mat)[row][col] << " "; + s << Form( "%5.15e ", (*mat)[row][col] ); } } xmlengine().AddRawLine( matnode, s.str().c_str() ); } //_______________________________________________________________________ -void TMVA::Tools::WriteTVectorDToXML(void* node, const char* name, TVectorD* vec) +void TMVA::Tools::WriteTVectorDToXML( void* node, const char* name, TVectorD* vec ) { TMatrixD mat(1,vec->GetNoElements(),&((*vec)[0])); - WriteTMatrixDToXML(node, name, &mat); + WriteTMatrixDToXML( node, name, &mat ); } //_______________________________________________________________________ -void TMVA::Tools::ReadTVectorDFromXML(void* node, const char* name, TVectorD* vec) +void TMVA::Tools::ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec ) { TMatrixD mat(1,vec->GetNoElements(),&((*vec)[0])); - ReadTMatrixDFromXML(node,name,&mat); - for (int i=0;i<vec->GetNoElements();++i){ - (*vec)[i]=mat[0][i]; - } + ReadTMatrixDFromXML( node, name, &mat ); + for (int i=0;i<vec->GetNoElements();++i) (*vec)[i] = mat[0][i]; } //_______________________________________________________________________ -void TMVA::Tools::ReadTMatrixDFromXML(void* node, const char* name, TMatrixD* mat) +void TMVA::Tools::ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat ) { if (strcmp(xmlengine().GetNodeName(node),name)!=0){ Log() << kWARNING << "Possible Error: Name of matrix in weight file" << " does not match name of matrix passed as argument!" << Endl; } Int_t nrows, ncols; - ReadAttr(node, "Rows", nrows); - ReadAttr(node, "Columns", ncols); + ReadAttr( node, "Rows", nrows ); + ReadAttr( node, "Columns", ncols ); if (mat->GetNrows() != nrows || mat->GetNcols() != ncols){ Log() << kWARNING << "Possible Error: Dimension of matrix in weight file" << " does not match dimension of matrix passed as argument!" << Endl; @@ -1370,7 +1372,7 @@ void TMVA::Tools::TMVACitation( MsgLogger& logger, ECitation citType ) case kLaTeX: logger << "%\\cite{TMVA2007}" << Endl; - logger << "\bibitem{TMVA2007}" << Endl; + logger << "\\bibitem{TMVA2007}" << Endl; logger << " A.~Hoecker, P.~Speckmayer, J.~Stelzer, J.~Therhaag, E.~von Toerne, H.~Voss" << Endl; logger << " %``TMVA: Toolkit for multivariate data analysis,''" << Endl; logger << " PoS A {\\bf CAT} (2007) 040" << Endl; @@ -1396,14 +1398,14 @@ Bool_t TMVA::Tools::HistoHasEquidistantBins(const TH1& h) //_______________________________________________________________________ std::vector<TMatrixDSym*>* -TMVA::Tools::CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls ) +TMVA::Tools::CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, Int_t maxNumberVar ) { // compute covariance matrices if (events.size() == 0) return 0; - UInt_t nvar = events.at(0)->GetNVariables(), ivar = 0, jvar = 0; + UInt_t nvar = (maxNumberVar == -1 ? events.at(0)->GetNVariables():maxNumberVar), ivar = 0, jvar = 0; // init matrices Int_t matNum = maxCls; diff --git a/tmva/src/TransformationHandler.cxx b/tmva/src/TransformationHandler.cxx index c303e19c5b078ae85f25cac3a8b857c5d77fc1cc..4c3e5289e2b5503ef598c61a2841f42ee7eebe98 100644 --- a/tmva/src/TransformationHandler.cxx +++ b/tmva/src/TransformationHandler.cxx @@ -65,6 +65,7 @@ #include "TMVA/VariablePCATransform.h" #include "TMVA/VariableGaussTransform.h" #include "TMVA/VariableNormalizeTransform.h" +#include "TMVA/VariableRearrangeTransform.h" //_______________________________________________________________________ TMVA::TransformationHandler::TransformationHandler( DataSetInfo& dsi, const TString& callerName ) @@ -108,7 +109,7 @@ TMVA::VariableTransformBase* TMVA::TransformationHandler::AddTransformation( Var trf->Log().SetSource(TString(fCallerName+"_"+tfname+"_TF").Data()); fTransformations.Add(trf); fTransformationsReferenceClasses.push_back( cls ); - return trf; + return trf; } //_______________________________________________________________________ @@ -139,7 +140,7 @@ void TMVA::TransformationHandler::SetTransformationReferenceClass( Int_t cls ) const TMVA::Event* TMVA::TransformationHandler::Transform( const Event* ev ) const { // the transformation - + TListIter trIt(&fTransformations); std::vector<Int_t>::const_iterator rClsIt = fTransformationsReferenceClasses.begin(); const Event* trEv = ev; @@ -151,18 +152,41 @@ const TMVA::Event* TMVA::TransformationHandler::Transform( const Event* ev ) con } //_______________________________________________________________________ -const TMVA::Event* TMVA::TransformationHandler::InverseTransform( const Event* ev ) const +const TMVA::Event* TMVA::TransformationHandler::InverseTransform( const Event* ev, Bool_t suppressIfNoTargets ) const { // the inverse transformation - TListIter trIt(&fTransformations); - std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.begin(); + TListIter trIt(&fTransformations, kIterBackward); + std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.end(); + rClsIt--; const Event* trEv = ev; - while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) { - if (trf->IsCreated()) trEv = trf->InverseTransform(ev, (*rClsIt) ); + UInt_t nvars = 0, ntgts = 0, nspcts = 0; + while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) { // shouldn't be the transformation called in the inverse order for the inversetransformation????? + if (trf->IsCreated()) { + trf->CountVariableTypes( nvars, ntgts, nspcts ); + if( !(suppressIfNoTargets && ntgts==0) ) + trEv = trf->InverseTransform(ev, (*rClsIt) ); + } else break; - rClsIt++; + --rClsIt; } return trEv; + + +// TListIter trIt(&fTransformations); +// std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.begin(); +// const Event* trEv = ev; +// UInt_t nvars = 0, ntgts = 0, nspcts = 0; +// while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) { // shouldn't be the transformation called in the inverse order for the inversetransformation????? +// if (trf->IsCreated()) { +// trf->CountVariableTypes( nvars, ntgts, nspcts ); +// if( !(suppressIfNoTargets && ntgts==0) ) +// trEv = trf->InverseTransform(ev, (*rClsIt) ); +// } +// else break; +// rClsIt++; +// } +// return trEv; + } //_______________________________________________________________________ @@ -713,12 +737,22 @@ void TMVA::TransformationHandler::PlotVariables( const std::vector<Event*>& even while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) outputDir += "_" + TString(trf->GetShortName()); - TObject* o = fRootBaseDir->FindObject(outputDir); - if (o != 0) { - Log() << kFATAL << "A " << o->ClassName() << " with name " << o->GetName() << " already exists in " - << fRootBaseDir->GetPath() << "("<<outputDir<<")" << Endl; - } - localDir = fRootBaseDir->mkdir( outputDir ); + TString uniqueOutputDir = outputDir; + Int_t counter = 0; + TObject* o = NULL; + while( (o = fRootBaseDir->FindObject(uniqueOutputDir)) != 0 ){ + uniqueOutputDir = outputDir+Form("_%d",counter); + Log() << kINFO << "A " << o->ClassName() << " with name " << o->GetName() << " already exists in " + << fRootBaseDir->GetPath() << ", I will try with "<<uniqueOutputDir<<"." << Endl; + ++counter; + } + +// TObject* o = fRootBaseDir->FindObject(outputDir); +// if (o != 0) { +// Log() << kFATAL << "A " << o->ClassName() << " with name " << o->GetName() << " already exists in " +// << fRootBaseDir->GetPath() << "("<<outputDir<<")" << Endl; +// } + localDir = fRootBaseDir->mkdir( uniqueOutputDir ); localDir->cd(); Log() << kVERBOSE << "Create and switch to directory " << localDir->GetPath() << Endl; @@ -850,9 +884,17 @@ void TMVA::TransformationHandler::ReadFromXML( void* trfsnode ) else if (trfname == "Gauss" ) { newtrf = new VariableGaussTransform(fDataSetInfo); } + else if (trfname == "Uniform" ) { + newtrf = new VariableGaussTransform(fDataSetInfo, "Uniform"); + } else if (trfname == "Normalize" ) { newtrf = new VariableNormalizeTransform(fDataSetInfo); } + else if (trfname == "Rearrange" ) { + newtrf = new VariableRearrangeTransform(fDataSetInfo); + } + else if (trfname != "None") { + } else { Log() << kFATAL << "<ReadFromXML> Variable transform '" << trfname << "' unknown." << Endl; diff --git a/tmva/src/VariableDecorrTransform.cxx b/tmva/src/VariableDecorrTransform.cxx index cbf8329fe51dc4869170df017a9b1ef6c063500a..8e0d3e25a35e5dbe003db4f8cbb8847d3f09bc14 100644 --- a/tmva/src/VariableDecorrTransform.cxx +++ b/tmva/src/VariableDecorrTransform.cxx @@ -27,6 +27,7 @@ #include <iostream> #include <iomanip> +#include <algorithm> #include "TVectorF.h" #include "TVectorD.h" @@ -80,9 +81,10 @@ Bool_t TMVA::VariableDecorrTransform::PrepareTransformation( const std::vector<E Log() << kINFO << "Preparing the Decorrelation transformation..." << Endl; - SetNVariables(events[0]->GetNVariables()); + Int_t inputSize = fGet.size(); + SetNVariables(inputSize); - if (GetNVariables() > 200) { + if (inputSize > 200) { Log() << kINFO << "----------------------------------------------------------------------------" << Endl; Log() << kINFO @@ -121,7 +123,7 @@ std::vector<TString>* TMVA::VariableDecorrTransform::GetTransformationStrings( I << Endl; } - const Int_t nvar = GetNVariables(); + const Int_t nvar = fGet.size(); std::vector<TString>* strVec = new std::vector<TString>; // fill vector @@ -129,7 +131,23 @@ std::vector<TString>* TMVA::VariableDecorrTransform::GetTransformationStrings( I TString str( "" ); for (Int_t jvar=0; jvar<nvar; jvar++) { str += ((*m)(ivar,jvar) > 0) ? " + " : " - "; - str += Form( "%10.5g*[%s]", TMath::Abs((*m)(ivar,jvar)), Variables()[jvar].GetLabel().Data() ); + + Char_t type = fGet.at(jvar).first; + Int_t idx = fGet.at(jvar).second; + + switch( type ) { + case 'v': + str += Form( "%10.5g*[%s]", TMath::Abs((*m)(ivar,jvar)), Variables()[idx].GetLabel().Data() ); + break; + case 't': + str += Form( "%10.5g*[%s]", TMath::Abs((*m)(ivar,jvar)), Targets()[idx].GetLabel().Data() ); + break; + case 's': + str += Form( "%10.5g*[%s]", TMath::Abs((*m)(ivar,jvar)), Spectators()[idx].GetLabel().Data() ); + break; + default: + Log() << kFATAL << "VariableDecorrTransform::GetTransformationStrings : unknown type '" << type << "'." << Endl; + } } strVec->push_back( str ); } @@ -165,71 +183,49 @@ const TMVA::Event* TMVA::VariableDecorrTransform::Transform( const TMVA::Event* << Endl; } - // transformation to decorrelate the variables - const Int_t nvar = GetNVariables(); - TVectorD vec( nvar ); - for (Int_t ivar=0; ivar<nvar; ivar++) vec(ivar) = ev->GetValue(ivar); - // diagonalise variable vectors - vec *= *m; - if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) { if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; } fTransformedEvent = new Event(); } - for (UInt_t itgt=0; itgt<ev->GetNTargets(); itgt++) fTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) ); - for (Int_t ivar=0; ivar<nvar; ivar++) fTransformedEvent->SetVal ( ivar,vec(ivar) ); - - fTransformedEvent->SetWeight ( ev->GetWeight() ); - fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); - fTransformedEvent->SetClass ( ev->GetClass() ); - return fTransformedEvent; -} - -//_______________________________________________________________________ -const TMVA::Event* TMVA::VariableDecorrTransform::InverseTransform( const TMVA::Event* const ev, Int_t cls ) const -{ - // apply the inverse decorrelation transformation ... - // TODO : this is only a copy of the transform method... build the inverse transformation - Log() << kFATAL << "Inverse transformation for decorrelation transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl; - - if (!IsCreated()) - Log() << kFATAL << "Transformation matrix not yet created" - << Endl; + // transformation to decorrelate the variables + const Int_t nvar = fGet.size(); - Int_t whichMatrix = cls; - // if cls (the class chosen by the user) not existing, assume that he wants to have the matrix for all classes together. - if (cls < 0 || cls > GetNClasses()) { - whichMatrix = GetNClasses(); - } + std::vector<Float_t> input; + std::vector<Char_t> mask; // entries with kTRUE must not be transformed + Bool_t hasMaskedEntries = GetInput( ev, input, mask ); - TMatrixD* m = fDecorrMatrices.at(whichMatrix); - if (m == 0) { - if (whichMatrix == GetNClasses() ) - Log() << kFATAL << "Transformation matrix all classes is not defined" - << Endl; - else - Log() << kFATAL << "Transformation matrix for class " << whichMatrix << " is not defined" - << Endl; + if( hasMaskedEntries ){ // targets might be masked (for events where the targets have not been computed yet) + UInt_t numMasked = std::count(mask.begin(), mask.end(), kTRUE); + UInt_t numOK = std::count(mask.begin(), mask.end(), kFALSE); + if( numMasked>0 && numOK>0 ){ + Log() << kFATAL << "You mixed variables and targets in the decorrelation transformation. This is not possible." << Endl; + } + SetOutput( fTransformedEvent, input, mask, ev ); + return fTransformedEvent; } - // transformation to decorrelate the variables - const Int_t nvar = GetNVariables(); TVectorD vec( nvar ); - for (Int_t ivar=0; ivar<nvar; ivar++) vec(ivar) = ev->GetValue(ivar); + for (Int_t ivar=0; ivar<nvar; ivar++) vec(ivar) = input.at(ivar); // diagonalise variable vectors vec *= *m; - if (fBackTransformedEvent==0 || fBackTransformedEvent->GetNVariables()!=ev->GetNVariables()) { - if (fBackTransformedEvent!=0) { delete fBackTransformedEvent; fBackTransformedEvent = 0; } - fBackTransformedEvent = new Event( *ev ); - } - for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++ ) fBackTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) ); - for ( Int_t ivar=0; ivar<nvar; ivar++) fBackTransformedEvent->SetVal(ivar,vec(ivar)); - fBackTransformedEvent->SetWeight ( ev->GetWeight() ); - fBackTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); - fBackTransformedEvent->SetClass ( ev->GetClass() ); + input.clear(); + for (Int_t ivar=0; ivar<nvar; ivar++) input.push_back( vec(ivar) ); + + SetOutput( fTransformedEvent, input, mask, ev ); + + return fTransformedEvent; +} + +//_______________________________________________________________________ +const TMVA::Event* TMVA::VariableDecorrTransform::InverseTransform( const TMVA::Event* const /*ev*/, Int_t /*cls*/ ) const +{ + // apply the inverse decorrelation transformation ... + // TODO : ... build the inverse transformation + Log() << kFATAL << "Inverse transformation for decorrelation transformation not yet implemented. Hence, this transformation cannot be applied together with regression if targets should be transformed. Please contact the authors if necessary." << Endl; + return fBackTransformedEvent; } @@ -250,7 +246,7 @@ void TMVA::VariableDecorrTransform::CalcSQRMats( const std::vector<Event*>& even const UInt_t matNum = (maxCls<=1)?maxCls:maxCls+1; fDecorrMatrices.resize( matNum, (TMatrixD*) 0 ); - std::vector<TMatrixDSym*>* covMat = gTools().CalcCovarianceMatrices( events, maxCls ); + std::vector<TMatrixDSym*>* covMat = gTools().CalcCovarianceMatrices( events, maxCls, fGet.size() ); for (UInt_t cls=0; cls<matNum; cls++) { @@ -292,6 +288,8 @@ void TMVA::VariableDecorrTransform::AttachXMLTo(void* parent) void* trf = gTools().AddChild(parent, "Transform"); gTools().AddAttr(trf,"Name", "Decorrelation"); + VariableTransformBase::AttachXMLTo( trf ); + for (std::vector<TMatrixD*>::const_iterator itm = fDecorrMatrices.begin(); itm != fDecorrMatrices.end(); itm++) { TMatrixD* mat = (*itm); /*void* decmat = gTools().xmlengine().NewChild(trf, 0, "Matrix"); @@ -319,7 +317,25 @@ void TMVA::VariableDecorrTransform::ReadFromXML( void* trfnode ) if( (*it) != 0 ) delete (*it); fDecorrMatrices.clear(); - void* ch = gTools().GetChild(trfnode); + Bool_t newFormat = kFALSE; + + void* inpnode = NULL; + + inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format + if( inpnode!=NULL ) + newFormat = kTRUE; // new xml format + + void* ch = NULL; + if( newFormat ){ + // ------------- new format -------------------- + // read input + VariableTransformBase::ReadFromXML( inpnode ); + + ch = gTools().GetNextChild(inpnode); + }else + ch = gTools().GetChild(trfnode); + + // Read the transformation matrices from the xml node while(ch!=0) { Int_t nrows, ncols; gTools().ReadAttr(ch, "Rows", nrows); @@ -414,6 +430,7 @@ void TMVA::VariableDecorrTransform::MakeFunction( std::ostream& fout, const TStr fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl; fout << "{" << std::endl; + fout << " // Decorrelation transformation, initialisation" << std::endl; for (UInt_t icls = 0; icls < numC; icls++){ TMatrixD* matx = fDecorrMatrices.at(icls); for (int i=0; i<matx->GetNrows(); i++) { @@ -428,18 +445,22 @@ void TMVA::VariableDecorrTransform::MakeFunction( std::ostream& fout, const TStr fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls) const" << std::endl; fout << "{" << std::endl; + fout << " // Decorrelation transformation" << std::endl; fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl; fout << " else cls = "<<(fDecorrMatrices.size()==1?0:2)<<";"<< std::endl; fout << " }"<< std::endl; + + VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 ); + fout << " std::vector<double> tv;" << std::endl; fout << " for (int i=0; i<"<<matx->GetNrows()<<";i++) {" << std::endl; fout << " double v = 0;" << std::endl; fout << " for (int j=0; j<"<<matx->GetNcols()<<"; j++)" << std::endl; - fout << " v += iv[j] * fDecTF_"<<trCounter<<"[cls][i][j];" << std::endl; + fout << " v += iv[indicesGet.at(j)] * fDecTF_"<<trCounter<<"[cls][i][j];" << std::endl; fout << " tv.push_back(v);" << std::endl; fout << " }" << std::endl; - fout << " for (int i=0; i<"<<matx->GetNrows()<<";i++) iv[i] = tv[i];" << std::endl; + fout << " for (int i=0; i<"<<matx->GetNrows()<<";i++) iv[indicesPut.at(i)] = tv[i];" << std::endl; fout << "}" << std::endl; } diff --git a/tmva/src/VariableGaussTransform.cxx b/tmva/src/VariableGaussTransform.cxx index 6be0c5ac9c5b0ecbe036caec30c31a0429e66edd..0f5990da5621a2c65e619d4a85822997395beeca 100644 --- a/tmva/src/VariableGaussTransform.cxx +++ b/tmva/src/VariableGaussTransform.cxx @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * @@ -35,6 +36,8 @@ #include <iomanip> #include <list> #include <limits> +#include <exception> +#include <stdexcept> #include "TVectorF.h" #include "TVectorD.h" @@ -93,9 +96,10 @@ Bool_t TMVA::VariableGaussTransform::PrepareTransformation( const std::vector<Ev Log() << kINFO << "Preparing the Gaussian transformation..." << Endl; - SetNVariables(events[0]->GetNVariables()); + UInt_t inputSize = fGet.size(); + SetNVariables(inputSize); - if (GetNVariables() > 200) { + if (inputSize > 200) { Log() << kWARNING << "----------------------------------------------------------------------------" << Endl; Log() << kWARNING @@ -125,25 +129,40 @@ const TMVA::Event* TMVA::VariableGaussTransform::Transform(const Event* const ev //EVT} if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1; //EVT workaround end - + // get the variable vector of the current event - const UInt_t nvar = GetNVariables(); - TVectorD vec( nvar ); - for (UInt_t ivar=0; ivar<nvar; ivar++) vec(ivar) = ev->GetValue(ivar); + UInt_t inputSize = fGet.size(); + + std::vector<Float_t> input(0); + std::vector<Float_t> output(0); + + std::vector<Char_t> mask; // entries with kTRUE must not be transformed + GetInput( ev, input, mask ); + + std::vector<Char_t>::iterator itMask = mask.begin(); + +// TVectorD vec( inputSize ); +// for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar); Double_t cumulant; //transformation - for (UInt_t ivar=0; ivar<nvar; ivar++) { + for (UInt_t ivar=0; ivar<inputSize; ivar++) { + + if ( (*itMask) ){ + ++itMask; + continue; + } + if (0 != fCumulativePDF[ivar][cls]) { // first make it flat if(fTMVAVersion>TMVA_VERSION(3,9,7)) - cumulant = (fCumulativePDF[ivar][cls])->GetVal(vec(ivar)); + cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar)); else - cumulant = OldCumulant(vec(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() ); + cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() ); cumulant = TMath::Min(cumulant,1.-10e-10); cumulant = TMath::Max(cumulant,0.+10e-10); if (fFlatNotGauss) - vec(ivar) = cumulant; + output.push_back( cumulant ); else { // sanity correction for out-of-range values Double_t maxErfInvArgRange = 0.99999999; @@ -151,7 +170,7 @@ const TMVA::Event* TMVA::VariableGaussTransform::Transform(const Event* const ev arg = TMath::Min(+maxErfInvArgRange,arg); arg = TMath::Max(-maxErfInvArgRange,arg); - vec(ivar) = 1.414213562*TMath::ErfInverse(arg); + output.push_back( 1.414213562*TMath::ErfInverse(arg) ); } } } @@ -161,12 +180,7 @@ const TMVA::Event* TMVA::VariableGaussTransform::Transform(const Event* const ev fTransformedEvent = new Event(); } - for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) fTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) ); - for (UInt_t ivar=0; ivar<nvar; ivar++) fTransformedEvent->SetVal ( ivar, vec(ivar) ); - - fTransformedEvent->SetWeight ( ev->GetWeight() ); - fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); - fTransformedEvent->SetClass ( ev->GetClass() ); + SetOutput( fTransformedEvent, output, mask, ev ); return fTransformedEvent; } @@ -176,7 +190,7 @@ const TMVA::Event* TMVA::VariableGaussTransform::InverseTransform( const Event* { // apply the Gauss transformation // TODO: implementation of inverse transformation - Log() << kFATAL << "Inverse transformation for Gauss transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl; + Log() << kFATAL << "Inverse transformation for Gauss transformation not yet implemented. Hence, this transformation cannot be applied together with regression if targets should be transformed. Please contact the authors if necessary." << Endl; if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" @@ -230,7 +244,10 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& { // fill the cumulative distributions - const UInt_t nvar = GetNVariables(); + const UInt_t inputSize = fGet.size(); +// const UInt_t nCls = GetNClasses(); + +// const UInt_t nvar = GetNVariables(); UInt_t nevt = events.size(); const UInt_t nClasses = GetNClasses(); @@ -243,11 +260,13 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist]; std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist]; for (UInt_t i=0; i < numDist; i++) { - listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [nvar]; - vsForBinning[i] = new std::vector<Float_t> [nvar]; - nbins[i] = new UInt_t[nvar]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason. + listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize]; + vsForBinning[i] = new std::vector<Float_t> [inputSize]; + nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason. } + std::vector<Float_t> input; + std::vector<Char_t> mask; // entries with kTRUE must not be transformed // perform event loop Float_t *sumOfWeights = new Float_t[numDist]; @@ -255,19 +274,32 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& Float_t *maxWeight = new Float_t[numDist]; for (UInt_t i=0; i<numDist; i++) { sumOfWeights[i]=0; - minWeight[i]=10E10; - maxWeight[i]=0; + minWeight[i]=10E10; // TODO: change this to std::max ? + maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible? } for (UInt_t ievt=0; ievt < nevt; ievt++) { const Event* ev= events[ievt]; Int_t cls = ev->GetClass(); - sumOfWeights[cls] += ev->GetWeight(); - if (minWeight[cls] > ev->GetWeight()) minWeight[cls]=ev->GetWeight(); - if (maxWeight[cls] < ev->GetWeight()) maxWeight[cls]=ev->GetWeight(); - if (numDist>1) sumOfWeights[numDist-1] += ev->GetWeight(); - for (UInt_t ivar=0; ivar<nvar; ivar++) { - listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(ev->GetValue(ivar),ev->GetWeight())); - if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(ev->GetValue(ivar),ev->GetWeight())); + Float_t eventWeight = ev->GetWeight(); + sumOfWeights[cls] += eventWeight; + if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight; + if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight; + if (numDist>1) sumOfWeights[numDist-1] += eventWeight; + + Bool_t hasMaskedEntries = GetInput( ev, input, mask ); + if( hasMaskedEntries ){ + Log() << kWARNING << "Incomplete event" << Endl; + ev->Print(Log()); + Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl; + } + + + Int_t ivar = 0; + for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) { + Float_t value = (*itInput); + listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight)); + if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight)); + ++ivar; } } if (numDist > 1) { @@ -282,7 +314,7 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& const UInt_t nbinsmax=2000; // maximum number of bins for (UInt_t icl=0; icl< numDist; icl++){ - for (UInt_t ivar=0; ivar<nvar; ivar++) { + for (UInt_t ivar=0; ivar<inputSize; ivar++) { listsForBinning[icl][ivar].sort(); std::list< TMVA::TMVAGaussPair >::iterator it; Float_t sumPerBin = sumOfWeights[icl]/nbinsmax; @@ -315,9 +347,9 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& delete[] maxWeight; // create histogram for the cumulative distribution. - fCumulativeDist.resize(nvar); + fCumulativeDist.resize(inputSize); for (UInt_t icls = 0; icls < numDist; icls++) { - for (UInt_t ivar=0; ivar < nvar; ivar++){ + for (UInt_t ivar=0; ivar < inputSize; ivar++){ Float_t* binnings = new Float_t[nbins[icls][ivar]]; //the binning for this particular histogram: for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){ @@ -352,10 +384,17 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& const Event* ev= events[ievt]; Int_t cls = ev->GetClass(); + Float_t eventWeight = ev->GetWeight(); - for (UInt_t ivar=0; ivar<nvar; ivar++) { - fCumulativeDist[ivar][cls]->Fill(ev->GetValue(ivar),ev->GetWeight());; - if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(ev->GetValue(ivar),ev->GetWeight());; + GetInput( ev, input, mask ); + + Int_t ivar = 0; + for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) { + Float_t value = (*itInput); + fCumulativeDist[ivar][cls]->Fill(value,eventWeight); + if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight); + + ++ivar; } } @@ -364,17 +403,18 @@ void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& // now sum up in order to get the real cumulative distribution Double_t sum = 0, total=0; - for (UInt_t ivar=0; ivar<nvar; ivar++) { - fCumulativePDF.resize(ivar+1); + fCumulativePDF.resize(inputSize); + for (UInt_t ivar=0; ivar<inputSize; ivar++) { +// fCumulativePDF.resize(ivar+1); for (UInt_t icls=0; icls<numDist; icls++) { (fCumulativeDist[ivar][icls])->Smooth(); sum = 0; total = 0.; - for (Int_t ibin=1; ibin <=fCumulativeDist[ivar][icls]->GetNbinsX() ; ibin++){ + for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){ Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin); if (val>0) total += val; } - for (Int_t ibin=1; ibin <=fCumulativeDist[ivar][icls]->GetNbinsX() ; ibin++){ + for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){ Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin); if (val>0) sum += val; (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total); @@ -418,9 +458,12 @@ void TMVA::VariableGaussTransform::AttachXMLTo(void* parent) { gTools().AddAttr(trfxml, "Name", "Gauss"); gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") ); - for (UInt_t ivar=0; ivar<GetNVariables(); ivar++) { + VariableTransformBase::AttachXMLTo( trfxml ); + + UInt_t nvar = fGet.size(); + for (UInt_t ivar=0; ivar<nvar; ivar++) { void* varxml = gTools().AddChild( trfxml, "Variable"); - gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() ); +// gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() ); gTools().AddAttr( varxml, "VarIndex", ivar ); if ( fCumulativePDF[ivar][0]==0 || fCumulativePDF[ivar][1]==0 ) @@ -440,17 +483,37 @@ void TMVA::VariableGaussTransform::ReadFromXML( void* trfnode ) { // clean up first CleanUpCumulativeArrays(); TString FlatOrGauss; + gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss ); + if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE; else fFlatNotGauss = kFALSE; + Bool_t newFormat = kFALSE; + + void* inpnode = NULL; + + inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format + if( inpnode!=NULL ) + newFormat = kTRUE; // new xml format + + void* varnode = NULL; + if( newFormat ){ + // ------------- new format -------------------- + // read input + VariableTransformBase::ReadFromXML( inpnode ); + + varnode = gTools().GetNextChild(inpnode); + }else + varnode = gTools().GetChild(trfnode); + // Read the cumulative distribution - void* varnode = gTools().GetChild( trfnode ); TString varname, histname, classname; UInt_t ivar; while(varnode) { - gTools().ReadAttr(varnode, "Name", varname); + if( gTools().HasAttr(varnode,"Name") ) + gTools().ReadAttr(varnode, "Name", varname); gTools().ReadAttr(varnode, "VarIndex", ivar); void* clsnode = gTools().GetChild( varnode); @@ -617,11 +680,13 @@ void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TStri { // creates the transformation function // - const UInt_t nvar = GetNVariables(); + const UInt_t nvar = fGet.size(); UInt_t numDist = GetNClasses() + 1; Int_t nBins = 1000; // creates the gauss transformation function if (part==1) { + fout << std::endl; + fout << " int nvar;" << std::endl; fout << std::endl; // declare variables fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl; @@ -635,6 +700,8 @@ void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TStri fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl; fout << "{" << std::endl; + fout << " // Gauss/Uniform transformation, initialisation" << std::endl; + fout << " nvar=" << nvar << ";" << std::endl; // fill meat here // loop over nvar , cls, loop over nBins // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar) @@ -642,6 +709,21 @@ void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TStri for (UInt_t ivar=0; ivar<nvar; ivar++) { Double_t xmn=(fCumulativePDF[ivar][icls])->GetXmin(); Double_t xmx=(fCumulativePDF[ivar][icls])->GetXmax(); + + Int_t idx = 0; + try{ + idx = fGet.at(ivar).second; + Char_t type = fGet.at(ivar).first; + if( type != 'v' ){ + Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl; + } + }catch( std::out_of_range except ){ + Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl; + } + +// Double_t xmn=Variables()[idx].GetMin(); +// Double_t xmx=Variables()[idx].GetMax(); + fout << " xMin["<<ivar<<"]["<<icls<<"]="<<xmn<<";"<<std::endl; fout << " xMax["<<ivar<<"]["<<icls<<"]="<<xmx<<";"<<std::endl; for (Int_t ibin=0; ibin<=nBins; ibin++) { @@ -654,31 +736,39 @@ void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TStri fout << "}" << std::endl; fout << std::endl; fout << "//_______________________________________________________________________" << std::endl; - fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls) const" << std::endl; + fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl; fout << "{" << std::endl; + fout << " // Gauss/Uniform transformation" << std::endl; + fout << " int cls=clsIn;" << std::endl; fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl; fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl; fout << " }"<< std::endl; + fout << " // copy the variables which are going to be transformed" << std::endl; + VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 ); + fout << " std::vector<double> dv(nvar);" << std::endl; + fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl; + + fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<";"<< std::endl; fout << " double cumulant;"<< std::endl; - fout << " const int nvar = "<<GetNVariables()<<";"<< std::endl; + fout << " //const int nvar = "<<nvar<<";"<< std::endl; fout << " for (int ivar=0; ivar<nvar; ivar++) {"<< std::endl; - // ibin = (xval -xmin) / (xmax-xmin) *1000 - fout << " int ibin1 = (int) ((iv[ivar]-xMin[ivar][cls])/(xMax[ivar][cls]-xMin[ivar][cls])*"<<nBins<<");"<<std::endl; + // ibin = (xval -xMin) / (xMax-xMin) *1000 + fout << " int ibin1 = (int) ((dv[ivar]-xMin[ivar][cls])/(xMax[ivar][cls]-xMin[ivar][cls])*"<<nBins<<");"<<std::endl; fout << " if (ibin1<=0) { cumulant = cumulativeDist[ivar][cls][0];}"<<std::endl; fout << " else if (ibin1>="<<nBins<<") { cumulant = cumulativeDist[ivar][cls]["<<nBins<<"];}"<<std::endl; fout << " else {"<<std::endl; fout << " int ibin2 = ibin1+1;" << std::endl; - fout << " double dx = iv[ivar]-(xMin[ivar][cls]+"<< (1./nBins) + fout << " double dx = dv[ivar]-(xMin[ivar][cls]+"<< (1./nBins) << " * ibin1* (xMax[ivar][cls]-xMin[ivar][cls]));" << std::endl; fout << " double eps=dx/(xMax[ivar][cls]-xMin[ivar][cls])*"<<nBins<<";"<<std::endl; fout << " cumulant = eps*cumulativeDist[ivar][cls][ibin1] + (1-eps)*cumulativeDist[ivar][cls][ibin2];" << std::endl; fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10;"<< std::endl; fout << " if (cumulant<10e-10) cumulant = 10e-10;"<< std::endl; - fout << " if (FlatNotGauss) iv[ivar] = cumulant;"<< std::endl; + fout << " if (FlatNotGauss) dv[ivar] = cumulant;"<< std::endl; fout << " else {"<< std::endl; fout << " double maxErfInvArgRange = 0.99999999;"<< std::endl; fout << " double arg = 2.0*cumulant - 1.0;"<< std::endl; @@ -690,11 +780,14 @@ void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TStri fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ;"<<std::endl; fout << " else inverf += stp;"<<std::endl; fout << " } ;"<<std::endl; - fout << " //iv[ivar] = 1.414213562*TMath::ErfInverse(arg);"<< std::endl; - fout << " iv[ivar] = 1.414213562* inverf;"<< std::endl; + fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg);"<< std::endl; + fout << " dv[ivar] = 1.414213562* inverf;"<< std::endl; fout << " }"<< std::endl; fout << " }"<< std::endl; fout << " }"<< std::endl; + + fout << " // copy the transformed variables back" << std::endl; + fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar];" << std::endl; fout << "}" << std::endl; } } diff --git a/tmva/src/VariableIdentityTransform.cxx b/tmva/src/VariableIdentityTransform.cxx index 4a9b01853c536641477cccbe194e4dcab56f0281..d1ef19bc94b09d1814e1d85ca88bbde92e1a0806 100644 --- a/tmva/src/VariableIdentityTransform.cxx +++ b/tmva/src/VariableIdentityTransform.cxx @@ -57,6 +57,9 @@ Bool_t TMVA::VariableIdentityTransform::PrepareTransformation( const std::vector Log() << kINFO << "Preparing the Identity transformation..." << Endl; + if( fGet.size() < events[0]->GetNVariables() ) + Log() << kFATAL << "Identity transform does not allow for a selection of input variables. Please remove the variable selection option and put only 'I'." << Endl; + SetNVariables(events[0]->GetNVariables()); SetCreated( kTRUE ); diff --git a/tmva/src/VariableInfo.cxx b/tmva/src/VariableInfo.cxx index 4b356b91a4428a2365ea0891d6d26106fa7ef22a..5dac2f07393383ef0de577b530c58305f94f239f 100644 --- a/tmva/src/VariableInfo.cxx +++ b/tmva/src/VariableInfo.cxx @@ -143,6 +143,7 @@ void TMVA::VariableInfo::WriteToStream( std::ostream& o ) const //_______________________________________________________________________ void TMVA::VariableInfo::ReadFromStream( std::istream& istr ) { + // read VariableInfo from stream // PLEASE do not modify this, it does not have to correspond to WriteToStream diff --git a/tmva/src/VariableNormalizeTransform.cxx b/tmva/src/VariableNormalizeTransform.cxx index 19c921dce057494ac73966c04681e8735c89ce4e..c1c6a4057beb017d19d9458905caee02773812fd 100644 --- a/tmva/src/VariableNormalizeTransform.cxx +++ b/tmva/src/VariableNormalizeTransform.cxx @@ -13,7 +13,7 @@ * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * - * Peter Speckmayer <Peter:Speckmayer@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * * * Copyright (c) 2005: * @@ -64,18 +64,17 @@ void TMVA::VariableNormalizeTransform::Initialize() { // initialization of the normalization transformation - UInt_t nvar = Variables().size(); - UInt_t ntgts = Targets().size(); + UInt_t inputSize = fGet.size(); Int_t numC = GetNClasses()+1; if (GetNClasses() <= 1 ) numC = 1; fMin.resize( numC ); fMax.resize( numC ); for (Int_t i=0; i<numC; i++) { - fMin.at(i).resize(nvar+ntgts); - fMax.at(i).resize(nvar+ntgts); - fMin.at(i).assign(nvar+ntgts, 0); - fMax.at(i).assign(nvar+ntgts, 0); + fMin.at(i).resize(inputSize); + fMax.at(i).resize(inputSize); + fMin.at(i).assign(inputSize, 0); + fMax.at(i).assign(inputSize, 0); } } @@ -100,7 +99,7 @@ Bool_t TMVA::VariableNormalizeTransform::PrepareTransformation( const std::vecto const TMVA::Event* TMVA::VariableNormalizeTransform::Transform( const TMVA::Event* const ev, Int_t cls ) const { - // apply the decorrelation transformation + // apply the normalization transformation if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; // if cls (the class chosen by the user) not existing, @@ -113,43 +112,42 @@ const TMVA::Event* TMVA::VariableNormalizeTransform::Transform( const TMVA::Even if (cls < 0 || cls >= (int) fMin.size()) cls = fMin.size()-1; // EVT workaround end - const UInt_t nvars = GetNVariables(); - const UInt_t ntgts = ev->GetNTargets(); - if (nvars != ev->GetNVariables()) { - Log() << kFATAL << "Transformation defined for a different number of variables (defined for: " << GetNVariables() - << ", event contains: " << ev->GetNVariables() << ")" << Endl; - } - if (ntgts != ev->GetNTargets()) { - Log() << kFATAL << "Transformation defined for a different number of targets (defined for: " << GetNTargets() - << ", event contains: " << ev->GetNTargets() << ")" << Endl; - } + FloatVector input; // will be filled with the selected variables, targets, (spectators) + FloatVector output; // will be filled with the selected variables, targets, (spectators) + std::vector<Char_t> mask; // entries with kTRUE must not be transformed + GetInput( ev, input, mask ); if (fTransformedEvent==0) fTransformedEvent = new Event(); Float_t min,max; - for (Int_t ivar=nvars-1; ivar>=0; ivar--) { - min = fMin.at(cls).at(ivar); - max = fMax.at(cls).at(ivar); - Float_t offset = min; - Float_t scale = 1.0/(max-min); + const FloatVector& minVector = fMin.at(cls); + const FloatVector& maxVector = fMax.at(cls); + + UInt_t iidx = 0; + std::vector<Char_t>::iterator itMask = mask.begin(); + for ( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables + if( (*itMask) ){ + ++iidx; + ++itMask; + // don't put any value into output if the value is masked + continue; + } - Float_t valnorm = (ev->GetValue(ivar)-offset)*scale * 2 - 1; - fTransformedEvent->SetVal(ivar,valnorm); - } - for (Int_t itgt=ntgts-1; itgt>=0; itgt--) { - min = fMin.at(cls).at(nvars+itgt); - max = fMax.at(cls).at(nvars+itgt); + Float_t val = (*itInp); + + min = minVector.at(iidx); + max = maxVector.at(iidx); Float_t offset = min; Float_t scale = 1.0/(max-min); - Float_t original = ev->GetTarget(itgt); - Float_t valnorm = (original-offset)*scale * 2 - 1; - fTransformedEvent->SetTarget(itgt,valnorm); + Float_t valnorm = (val-offset)*scale * 2 - 1; + output.push_back( valnorm ); + + ++iidx; + ++itMask; } - fTransformedEvent->SetWeight ( ev->GetWeight() ); - fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); - fTransformedEvent->SetClass ( ev->GetClass() ); + SetOutput( fTransformedEvent, output, mask, ev ); return fTransformedEvent; } @@ -160,43 +158,40 @@ const TMVA::Event* TMVA::VariableNormalizeTransform::InverseTransform( const TMV if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; // if cls (the class chosen by the user) not existing, - // assume that user wants to have the matrix for all classes together. + // assume that user wants to have the transformation for all classes together. if (cls < 0 || cls > GetNClasses()) { if (GetNClasses() > 1 ) cls = GetNClasses(); else cls = 0; } - const UInt_t nvars = GetNVariables(); - const UInt_t ntgts = GetNTargets(); - if (nvars != ev->GetNVariables()) { - Log() << kFATAL << "Transformation defined for a different number of variables " << GetNVariables() << " " << ev->GetNVariables() - << Endl; - } + FloatVector input; // will be filled with the selected variables, targets, (spectators) + FloatVector output; // will be filled with the output + std::vector<Char_t> mask; + GetInput( ev, input, mask, kTRUE ); if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev ); Float_t min,max; - for (Int_t ivar=nvars-1; ivar>=0; ivar--) { - min = fMin.at(cls).at(ivar); - max = fMax.at(cls).at(ivar); - Float_t offset = min; - Float_t scale = 1.0/(max-min); - - Float_t valnorm = offset+((ev->GetValue(ivar)+1)/(scale * 2)); - fBackTransformedEvent->SetVal(ivar,valnorm); - } + const FloatVector& minVector = fMin.at(cls); + const FloatVector& maxVector = fMax.at(cls); + + UInt_t iidx = 0; + for ( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables + Float_t val = (*itInp); - for (Int_t itgt=ntgts-1; itgt>=0; itgt--) { - min = fMin.at(cls).at(nvars+itgt); - max = fMax.at(cls).at(nvars+itgt); + min = minVector.at(iidx); + max = maxVector.at(iidx); Float_t offset = min; Float_t scale = 1.0/(max-min); - Float_t original = ev->GetTarget(itgt); - Float_t valnorm = offset+((original+1.0)/(scale * 2)); - fBackTransformedEvent->SetTarget(itgt,valnorm); + Float_t valnorm = offset+((val+1)/(scale * 2)); + output.push_back( valnorm ); + + ++iidx; } + SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE ); + return fBackTransformedEvent; } @@ -207,47 +202,54 @@ void TMVA::VariableNormalizeTransform::CalcNormalizationParams( const std::vecto if (events.size() <= 1) Log() << kFATAL << "Not enough events (found " << events.size() << ") to calculate the normalization" << Endl; - UInt_t nvars = GetNVariables(); - UInt_t ntgts = GetNTargets(); + FloatVector input; // will be filled with the selected variables, targets, (spectators) + std::vector<Char_t> mask; - Int_t numC = GetNClasses()+1; - if (GetNClasses() <= 1 ) numC = 1; + UInt_t inputSize = fGet.size(); // number of input variables + + const UInt_t nCls = GetNClasses(); + Int_t numC = nCls+1; // prepare the min and max values for each of the classes and additionally for all classes (if more than one) + Int_t all = nCls; // at idx the min and max values for "all" classes are stored + if (nCls <= 1 ) { + numC = 1; + all = 0; + } - for (UInt_t ivar=0; ivar<nvars+ntgts; ivar++) { + for (UInt_t iinp=0; iinp<inputSize; ++iinp) { for (Int_t ic = 0; ic < numC; ic++) { - fMin.at(ic).at(ivar) = FLT_MAX; - fMax.at(ic).at(ivar) = -FLT_MAX; + fMin.at(ic).at(iinp) = FLT_MAX; + fMax.at(ic).at(iinp) = -FLT_MAX; } } - const Int_t all = GetNClasses(); std::vector<Event*>::const_iterator evIt = events.begin(); - for (;evIt!=events.end();evIt++) { - for (UInt_t ivar=0; ivar<nvars; ivar++) { - Float_t val = (*evIt)->GetValue(ivar); - UInt_t cls = (*evIt)->GetClass(); + for (;evIt!=events.end();evIt++) { // loop over all events + TMVA::Event* event = (*evIt); // get the event - if (fMin.at(cls).at(ivar) > val) fMin.at(cls).at(ivar) = val; - if (fMax.at(cls).at(ivar) < val) fMax.at(cls).at(ivar) = val; + UInt_t cls = (*evIt)->GetClass(); // get the class of this event - if (GetNClasses() != 1) { - if (fMin.at(all).at(ivar) > val) fMin.at(all).at(ivar) = val; - if (fMax.at(all).at(ivar) < val) fMax.at(all).at(ivar) = val; - } - } - for (UInt_t itgt=0; itgt<ntgts; itgt++) { - Float_t val = (*evIt)->GetTarget(itgt); - UInt_t cls = (*evIt)->GetClass(); + FloatVector& minVector = fMin.at(cls); + FloatVector& maxVector = fMax.at(cls); + + FloatVector& minVectorAll = fMin.at(all); + FloatVector& maxVectorAll = fMax.at(all); + + GetInput(event,input,mask); // select the input variables for the transformation and get them from the event + UInt_t iidx = 0; + for ( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables + Float_t val = (*itInp); - if (fMin.at(cls).at(nvars+itgt) > val) fMin.at(cls).at(nvars+itgt) = val; - if (fMax.at(cls).at(nvars+itgt) < val) fMax.at(cls).at(nvars+itgt) = val; + if( minVector.at(iidx) > val ) minVector.at(iidx) = val; + if( maxVector.at(iidx) < val ) maxVector.at(iidx) = val; - if (GetNClasses() != 1) { - if (fMin.at(all).at(nvars+itgt) > val) fMin.at(all).at(nvars+itgt) = val; - if (fMax.at(all).at(nvars+itgt) < val) fMax.at(all).at(nvars+itgt) = val; + if (nCls != 1) { // in case more than one class exists, compute min and max as well for all classes together + if (minVectorAll.at(iidx) > val) minVectorAll.at(iidx) = val; + if (maxVectorAll.at(iidx) < val) maxVectorAll.at(iidx) = val; + } + + ++iidx; } } - } return; } @@ -261,19 +263,27 @@ std::vector<TString>* TMVA::VariableNormalizeTransform::GetTransformationStrings // have the matrix for all classes together. if (cls < 0 || cls > GetNClasses()) cls = GetNClasses(); - const UInt_t nvar = GetNVariables(); - std::vector<TString>* strVec = new std::vector<TString>(nvar); - Float_t min, max; - for (Int_t ivar=nvar-1; ivar>=0; ivar--) { - min = fMin.at(cls).at(ivar); - max = fMax.at(cls).at(ivar); + const UInt_t size = fGet.size(); + std::vector<TString>* strVec = new std::vector<TString>(size); + + UInt_t iinp = 0; + for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { + min = fMin.at(cls).at(iinp); + max = fMax.at(cls).at(iinp); + + Char_t type = (*itGet).first; + UInt_t idx = (*itGet).second; Float_t offset = min; Float_t scale = 1.0/(max-min); TString str(""); - if (offset < 0) str = Form( "2*%g*([%s] + %g) - 1", scale, Variables()[ivar].GetLabel().Data(), -offset ); - else str = Form( "2*%g*([%s] - %g) - 1", scale, Variables()[ivar].GetLabel().Data(), offset ); - (*strVec)[ivar] = str; + VariableInfo& varInfo = (type=='v'?fDsi.GetVariableInfo(idx):(type=='t'?fDsi.GetTargetInfo(idx):fDsi.GetSpectatorInfo(idx))); + + if (offset < 0) str = Form( "2*%g*([%s] + %g) - 1", scale, varInfo.GetLabel().Data(), -offset ); + else str = Form( "2*%g*([%s] - %g) - 1", scale, varInfo.GetLabel().Data(), offset ); + (*strVec)[iinp] = str; + + ++iinp; } return strVec; @@ -307,31 +317,24 @@ void TMVA::VariableNormalizeTransform::WriteTransformationToStream( std::ostream void TMVA::VariableNormalizeTransform::AttachXMLTo(void* parent) { // create XML description of Normalize transformation - Int_t numC = (GetNClasses()<= 1)?1:GetNClasses()+1; - UInt_t nvars = GetNVariables(); - UInt_t ntgts = GetNTargets(); void* trfxml = gTools().AddChild(parent, "Transform"); gTools().AddAttr(trfxml, "Name", "Normalize"); - gTools().AddAttr(trfxml, "NVariables", nvars); - gTools().AddAttr(trfxml, "NTargets", ntgts); + VariableTransformBase::AttachXMLTo( trfxml ); + + Int_t numC = (GetNClasses()<= 1)?1:GetNClasses()+1; for( Int_t icls=0; icls<numC; icls++ ) { void* clsxml = gTools().AddChild(trfxml, "Class"); gTools().AddAttr(clsxml, "ClassIndex", icls); - void* varsxml = gTools().AddChild(clsxml, "Variables"); - for (UInt_t ivar=0; ivar<nvars; ivar++) { - void* varxml = gTools().AddChild(varsxml, "Variable"); - gTools().AddAttr(varxml, "VarIndex", ivar); - gTools().AddAttr(varxml, "Min", fMin.at(icls).at(ivar) ); - gTools().AddAttr(varxml, "Max", fMax.at(icls).at(ivar) ); - } - void* tgtsxml = gTools().AddChild(clsxml, "Targets"); - for (UInt_t itgt=0; itgt<ntgts; itgt++) { - void* tgtxml = gTools().AddChild(tgtsxml, "Target"); - gTools().AddAttr(tgtxml, "TargetIndex", itgt); - gTools().AddAttr(tgtxml, "Min", fMin.at(icls).at(nvars+itgt) ); - gTools().AddAttr(tgtxml, "Max", fMax.at(icls).at(nvars+itgt) ); + void* inpxml = gTools().AddChild(clsxml, "Ranges"); + UInt_t iinp = 0; + for( ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { + void* mmxml = gTools().AddChild(inpxml, "Range"); + gTools().AddAttr(mmxml, "Index", iinp); + gTools().AddAttr(mmxml, "Min", fMin.at(icls).at(iinp) ); + gTools().AddAttr(mmxml, "Max", fMax.at(icls).at(iinp) ); + ++iinp; } } } @@ -340,11 +343,68 @@ void TMVA::VariableNormalizeTransform::AttachXMLTo(void* parent) void TMVA::VariableNormalizeTransform::ReadFromXML( void* trfnode ) { // Read the transformation matrices from the xml node + Bool_t newFormat = kFALSE; + + void* inpnode = NULL; + + inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format + if( inpnode != NULL ) + newFormat = kTRUE; + + if( newFormat ){ + // ------------- new format -------------------- + // read input + VariableTransformBase::ReadFromXML( inpnode ); + + // read transformation information + + UInt_t size = fGet.size(); + UInt_t classindex, idx; + + void* ch = gTools().GetChild( trfnode, "Class" ); + while(ch) { + Int_t ci = 0; + gTools().ReadAttr(ch, "ClassIndex", ci); + classindex = UInt_t(ci); + + fMin.resize(classindex+1); + fMax.resize(classindex+1); + + fMin[classindex].resize(size,Float_t(0)); + fMax[classindex].resize(size,Float_t(0)); + + void* clch = gTools().GetChild( ch ); + while(clch) { + TString nodeName(gTools().GetName(clch)); + if(nodeName=="Ranges") { + void* varch = gTools().GetChild( clch ); + while(varch) { + gTools().ReadAttr(varch, "Index", idx); + gTools().ReadAttr(varch, "Min", fMin[classindex][idx]); + gTools().ReadAttr(varch, "Max", fMax[classindex][idx]); + varch = gTools().GetNextChild( varch ); + } + } + clch = gTools().GetNextChild( clch ); + } + ch = gTools().GetNextChild( ch ); + } + SetCreated(); + return; + } + + // ------------- old format -------------------- UInt_t classindex, varindex, tgtindex, nvars, ntgts; gTools().ReadAttr(trfnode, "NVariables", nvars); gTools().ReadAttr(trfnode, "NTargets", ntgts); + for( UInt_t ivar = 0; ivar < nvars; ++ivar ){ + fGet.push_back(std::make_pair<Char_t,UInt_t>('v',ivar)); + } + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ){ + fGet.push_back(std::make_pair<Char_t,UInt_t>('t',itgt)); + } void* ch = gTools().GetChild( trfnode ); while(ch) { gTools().ReadAttr(ch, "ClassIndex", classindex); @@ -382,8 +442,8 @@ void TMVA::VariableNormalizeTransform::ReadFromXML( void* trfnode ) } //_______________________________________________________________________ -void -TMVA::VariableNormalizeTransform::BuildTransformationFromVarInfo( const std::vector<TMVA::VariableInfo>& var ) { +void TMVA::VariableNormalizeTransform::BuildTransformationFromVarInfo( const std::vector<TMVA::VariableInfo>& var ) +{ // this method is only used when building a normalization transformation // from old text files // in this case regression didn't exist and there were no targets @@ -418,6 +478,12 @@ void TMVA::VariableNormalizeTransform::ReadTransformationFromStream( std::istrea UInt_t nvars = GetNVariables(); UInt_t ntgts = GetNTargets(); + for( UInt_t ivar = 0; ivar < nvars; ++ivar ){ + fGet.push_back(std::make_pair<Char_t,UInt_t>('v',ivar)); + } + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ){ + fGet.push_back(std::make_pair<Char_t,UInt_t>('t',itgt)); + } char buf[512]; char buf2[512]; istr.getline(buf,512); @@ -449,23 +515,28 @@ void TMVA::VariableNormalizeTransform::ReadTransformationFromStream( std::istrea } //_______________________________________________________________________ -void TMVA::VariableNormalizeTransform::PrintTransformation( ostream& o ) +void TMVA::VariableNormalizeTransform::PrintTransformation( ostream& /* o */ ) { // prints the transformation ranges - Int_t numC = GetNClasses()+1; - if (GetNClasses() <= 1 ) numC = 1; - - UInt_t nvars = GetNVariables(); - UInt_t ntgts = GetNTargets(); + Int_t nCls = GetNClasses(); + Int_t numC = nCls+1; + if (nCls <= 1 ) numC = 1; for (Int_t icls = 0; icls < numC; icls++ ) { - Log() << kINFO << "Transformation for class " << icls << " based on these ranges:" << Endl; - Log() << kINFO << "Variables:" << Endl; - for (UInt_t ivar=0; ivar<nvars; ivar++) - o << std::setw(20) << fMin[icls][ivar] << std::setw(20) << fMax[icls][ivar] << std::endl; - Log() << kINFO << "Targets:" << Endl; - for (UInt_t itgt=0; itgt<ntgts; itgt++) - o << std::setw(20) << fMin[icls][nvars+itgt] << std::setw(20) << fMax[icls][nvars+itgt] << std::endl; + if( icls == nCls ) + Log() << kINFO << "Transformation for all classes based on these ranges:" << Endl; + else + Log() << kINFO << "Transformation for class " << icls << " based on these ranges:" << Endl; + UInt_t iinp = 0; + for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ){ + Char_t type = (*itGet).first; + UInt_t idx = (*itGet).second; + + TString typeString = (type=='v'?"Variable: ": (type=='t'?"Target : ":"Spectator : ") ); + Log() << typeString.Data() << std::setw(20) << fMin[icls][idx] << std::setw(20) << fMax[icls][idx] << Endl; + + ++iinp; + } } } @@ -475,11 +546,12 @@ void TMVA::VariableNormalizeTransform::MakeFunction( std::ostream& fout, const T { // creates a normalizing function // TODO include target-transformation into makefunction + UInt_t nVar = fGet.size(); UInt_t numC = fMin.size(); if (part==1) { fout << std::endl; - fout << " double fMin_"<<trCounter<<"["<<numC<<"]["<<GetNVariables()<<"];" << std::endl; - fout << " double fMax_"<<trCounter<<"["<<numC<<"]["<<GetNVariables()<<"];" << std::endl; + fout << " double fMin_"<<trCounter<<"["<<numC<<"]["<<nVar<<"];" << std::endl; + fout << " double fMax_"<<trCounter<<"["<<numC<<"]["<<nVar<<"];" << std::endl; } if (part==2) { @@ -487,12 +559,11 @@ void TMVA::VariableNormalizeTransform::MakeFunction( std::ostream& fout, const T fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl; fout << "{" << std::endl; - for (UInt_t ivar=0; ivar<GetNVariables(); ivar++) { - Float_t min = FLT_MAX; - Float_t max = -FLT_MAX; + fout << " // Normalization transformation, initialisation" << std::endl; + for (UInt_t ivar=0; ivar<nVar; ivar++) { for (UInt_t icls = 0; icls < numC; icls++) { - min = TMath::Min(min, fMin.at(icls).at(ivar) ); - max = TMath::Max(max, fMax.at(icls).at(ivar) ); + Double_t min = TMath::Min( FLT_MAX, fMin.at(icls).at(ivar) ); + Double_t max = TMath::Max(-FLT_MAX, fMax.at(icls).at(ivar) ); fout << " fMin_"<<trCounter<<"["<<icls<<"]["<<ivar<<"] = " << std::setprecision(12) << min << ";" << std::endl; fout << " fMax_"<<trCounter<<"["<<icls<<"]["<<ivar<<"] = " << std::setprecision(12) @@ -504,14 +575,21 @@ void TMVA::VariableNormalizeTransform::MakeFunction( std::ostream& fout, const T fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls) const" << std::endl; fout << "{" << std::endl; - fout << "if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; + fout << " // Normalization transformation" << std::endl; + fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl; - fout << " else cls = "<<(fMin.size()==1?0:2)<<";"<< std::endl; - fout << "}"<< std::endl; - fout << " for (int ivar=0;ivar<"<<GetNVariables()<<";ivar++) {" << std::endl; + fout << " else cls = "<<(fMin.size()==1?0:2)<<";"<< std::endl; + fout << " }"<< std::endl; + fout << " const int nVar = " << nVar << ";" << std::endl << std::endl; + fout << " // get indices of used variables" << std::endl; + VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 ); + fout << " std::vector<double> dv(nVar);" << std::endl; + fout << " for (int ivar=0; ivar<nVar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl; + + fout << " for (int ivar=0;ivar<"<<nVar<<";ivar++) {" << std::endl; fout << " double offset = fMin_"<<trCounter<<"[cls][ivar];" << std::endl; fout << " double scale = 1.0/(fMax_"<<trCounter<<"[cls][ivar]-fMin_"<<trCounter<<"[cls][ivar]);" << std::endl; - fout << " iv[ivar] = (iv[ivar]-offset)*scale * 2 - 1;" << std::endl; + fout << " iv[indicesPut.at(ivar)] = (dv[ivar]-offset)*scale * 2 - 1;" << std::endl; fout << " }" << std::endl; fout << "}" << std::endl; } diff --git a/tmva/src/VariablePCATransform.cxx b/tmva/src/VariablePCATransform.cxx index c6d0e6b6e4e83295a4a191167f374f37f52145dd..04edb928eeef65632d0dea53df81a3481d1f5e5d 100644 --- a/tmva/src/VariablePCATransform.cxx +++ b/tmva/src/VariablePCATransform.cxx @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * * @@ -26,6 +27,8 @@ #include <iostream> #include <iomanip> +#include <stdexcept> +#include <algorithm> #include "TVectorF.h" #include "TVectorD.h" @@ -79,15 +82,17 @@ Bool_t TMVA::VariablePCATransform::PrepareTransformation( const std::vector<Even Log() << kINFO << "Preparing the Principle Component (PCA) transformation..." << Endl; - SetNVariables(events[0]->GetNVariables()); + UInt_t inputSize = fGet.size(); + + SetNVariables(inputSize); // TPrincipal doesn't support PCA transformation for 1 or less variables - if (GetNVariables() <= 1) { - Log() << kINFO << "Cannot perform PCA transformation for " << GetNVariables() << " variable only" << Endl; + if (inputSize <= 1) { + Log() << kFATAL << "Cannot perform PCA transformation for " << inputSize << " variable only" << Endl; return kFALSE; } - if (GetNVariables() > 200) { + if (inputSize > 200) { Log() << kINFO << "----------------------------------------------------------------------------" << Endl; Log() << kINFO @@ -110,7 +115,9 @@ const TMVA::Event* TMVA::VariablePCATransform::Transform( const Event* const ev, // apply the principal component analysis if (!IsCreated()) return 0; - const Int_t nvar = ev->GetNVariables(); +// const Int_t inputSize = fGet.size(); +// const UInt_t nCls = GetNClasses(); + // if we have more than one class, take the last PCA analysis where all classes are combined if // the cls parameter is outside the defined classes // If there is only one class, then no extra class for all events of all classes has to be created @@ -122,34 +129,28 @@ const TMVA::Event* TMVA::VariablePCATransform::Transform( const Event* const ev, // Perform PCA and put it into PCAed events tree - if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) { - if(fTransformedEvent!=0) delete fTransformedEvent; + if (fTransformedEvent==0 ) { fTransformedEvent = new Event(); } + std::vector<Float_t> input; + std::vector<Char_t> mask; + std::vector<Float_t> principalComponents; - // set the variable values - const std::vector<UInt_t>* varArrange = ev->GetVariableArrangement(); - - if(!varArrange) { - std::vector<Float_t> rv = X2P( ev->GetValues(), cls ); - for (Int_t ivar=0; ivar<nvar; ++ivar) - fTransformedEvent->SetVal(ivar, rv[ivar]); - } else { - std::vector<Float_t> rv(nvar); - for (Int_t ivar=0; ivar<nvar; ++ivar) - rv[ivar] = ev->GetValue(ivar); - rv = X2P( rv, cls ); - for (Int_t ivar=0; ivar<nvar; ++ivar) - fTransformedEvent->SetVal(ivar, rv[ivar]); + Bool_t hasMaskedEntries = GetInput( ev, input, mask ); + + if( hasMaskedEntries ){ // targets might be masked (for events where the targets have not been computed yet) + UInt_t numMasked = std::count(mask.begin(), mask.end(), kTRUE); + UInt_t numOK = std::count(mask.begin(), mask.end(), kFALSE); + if( numMasked>0 && numOK>0 ){ + Log() << kFATAL << "You mixed variables and targets in the decorrelation transformation. This is not possible." << Endl; + } + SetOutput( fTransformedEvent, input, mask, ev ); + return fTransformedEvent; } - // set the targets - for (UInt_t itgt=0; itgt<ev->GetNTargets(); itgt++) - fTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) ); - // and the rest - fTransformedEvent->SetWeight ( ev->GetWeight() ); - fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); - fTransformedEvent->SetClass ( ev->GetClass() ); + + X2P( principalComponents, input, cls ); + SetOutput( fTransformedEvent, principalComponents, mask, ev ); return fTransformedEvent; } @@ -159,28 +160,33 @@ const TMVA::Event* TMVA::VariablePCATransform::InverseTransform( const Event* co { // apply the principal component analysis // TODO: implementation of inverse transformation - Log() << kFATAL << "Inverse transformation for PCA transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl; +// Log() << kFATAL << "Inverse transformation for PCA transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl; if (!IsCreated()) return 0; - const Int_t nvar = ev->GetNVariables(); +// const Int_t inputSize = fGet.size(); + const UInt_t nCls = GetNClasses(); + //UInt_t evCls = ev->GetClass(); // if we have more than one class, take the last PCA analysis where all classes are combined if // the cls parameter is outside the defined classes // If there is only one class, then no extra class for all events of all classes has to be created - if (cls < 0 || cls > GetNClasses()) cls = ( GetNClasses() == 1 ? 0 : 1 ); + if (cls < 0 || UInt_t(cls) > nCls) cls = (fMeanValues.size()==1?0:2);//( GetNClasses() == 1 ? 0 : 1 ); ; + // Perform PCA and put it into PCAed events tree + + if (fBackTransformedEvent==0 ) fBackTransformedEvent = new Event(); // Perform PCA and put it into PCAed events tree - std::vector<Float_t> rv = X2P( ev->GetValues(), cls ); + if (fBackTransformedEvent==0 ) fBackTransformedEvent = new Event(); - if (fBackTransformedEvent==0 || fBackTransformedEvent->GetNVariables()!=ev->GetNVariables()) { - if(fBackTransformedEvent!=0) delete fBackTransformedEvent; - fBackTransformedEvent = new Event( *ev ); - } - for (Int_t ivar=0; ivar<nvar; ivar++) fBackTransformedEvent->SetVal(ivar, rv[ivar]); - fBackTransformedEvent->SetClass ( ev->GetClass() ); - fBackTransformedEvent->SetWeight ( ev->GetWeight() ); - fBackTransformedEvent->SetBoostWeight( ev->GetBoostWeight() ); + + std::vector<Float_t> principalComponents; + std::vector<Char_t> mask; + std::vector<Float_t> output; + + GetInput( ev, principalComponents, mask, kTRUE ); + P2X( output, principalComponents, cls ); + SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE ); return fBackTransformedEvent; } @@ -191,25 +197,50 @@ void TMVA::VariablePCATransform::CalculatePrincipalComponents( const std::vector // calculate the principal components for the signal and the background data // it uses the MakePrincipal method of ROOT's TPrincipal class - const Int_t nvar = GetNVariables(); + UInt_t nvars = 0, ntgts = 0, nspcts = 0; + CountVariableTypes( nvars, ntgts, nspcts ); + if( nvars>0 && ntgts>0 ) + Log() << kFATAL << "Variables and targets cannot be mixed in PCA transformation." << Endl; + + const Int_t inputSize = fGet.size(); // if we have more than one class, add another PCA analysis which combines all classes - const UInt_t maxPCA = (GetNClasses()<=1) ? GetNClasses() : GetNClasses()+1; + const UInt_t nCls = GetNClasses(); + const UInt_t maxPCA = (nCls<=1) ? nCls : nCls+1; // PCA [signal/background/class x/class y/... /all classes] std::vector<TPrincipal*> pca(maxPCA); - for (UInt_t i=0; i<maxPCA; i++) pca[i] = new TPrincipal(nvar,""); + for (UInt_t i=0; i<maxPCA; i++) pca[i] = new TPrincipal(nvars,""); // !! Not normalizing and not storing input data, for performance reasons. Should perhaps restore normalization. + // But this can be done afterwards by adding a normalisation transformation (user defined) Long64_t ievt, entries = events.size(); - Double_t *dvec = new Double_t[nvar]; + Double_t *dvec = new Double_t[inputSize]; + std::vector<Float_t> input; + std::vector<Char_t> mask; for (ievt=0; ievt<entries; ievt++) { Event* ev = events[ievt]; - for (Int_t i = 0; i < nvar; i++) dvec[i] = (Double_t) ev->GetValue(i); - pca.at(ev->GetClass())->AddRow( dvec ); - if (GetNClasses() > 1) pca.at(maxPCA-1)->AddRow( dvec ); + UInt_t cls = ev->GetClass(); + + Bool_t hasMaskedEntries = GetInput( ev, input, mask ); + if (hasMaskedEntries){ + Log() << kWARNING << "Print event which triggers an error" << Endl; + ev->Print(Log()); + Log() << kFATAL << "Masked entries found in event read in when calculating the principal components for the PCA transformation." << Endl; + } + + UInt_t iinp = 0; + for( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp ) + { + Float_t value = (*itInp); + dvec[iinp] = (Double_t)value; + ++iinp; + } + + pca.at(cls)->AddRow( dvec ); + if (nCls > 1) pca.at(maxPCA-1)->AddRow( dvec ); } // delete possible leftovers @@ -231,22 +262,39 @@ void TMVA::VariablePCATransform::CalculatePrincipalComponents( const std::vector } //_______________________________________________________________________ -std::vector<Float_t> TMVA::VariablePCATransform::X2P( const std::vector<Float_t>& x, Int_t cls ) const +void TMVA::VariablePCATransform::X2P( std::vector<Float_t>& pc, const std::vector<Float_t>& x, Int_t cls ) const { // Calculate the principal components from the original data vector // x, and return it in p (function extracted from TPrincipal::X2P) // It's the users responsibility to make sure that both x and p are // of the right size (i.e., memory must be allocated for p) - const Int_t nvar = x.size(); - std::vector<Float_t> p(nvar,0); + const Int_t nInput = x.size(); + pc.assign(nInput,0); - for (Int_t i = 0; i < nvar; i++) { + for (Int_t i = 0; i < nInput; i++) { Double_t pv = 0; - for (Int_t j = 0; j < nvar; j++) + for (Int_t j = 0; j < nInput; j++) pv += (((Double_t)x.at(j)) - (*fMeanValues.at(cls))(j)) * (*fEigenVectors.at(cls))(j,i); - p[i] = pv; + pc[i] = pv; + } +} + +//_______________________________________________________________________ +void TMVA::VariablePCATransform::P2X( std::vector<Float_t>& x, const std::vector<Float_t>& pc, Int_t cls ) const +{ + // Perform the back-transformation from the principal components + // pc, and return x + // It's the users responsibility to make sure that both x and pc are + // of the right size (i.e., memory must be allocated for p) + const Int_t nInput = pc.size(); + x.assign(nInput,0); + + for (Int_t i = 0; i < nInput; i++) { + Double_t xv = 0; + for (Int_t j = 0; j < nInput; j++) + xv += (((Double_t)pc.at(j)) * (*fEigenVectors.at(cls))(i,j) ) + (*fMeanValues.at(cls))(j); + x[i] = xv; } - return p; } //_______________________________________________________________________ @@ -286,6 +334,8 @@ void TMVA::VariablePCATransform::AttachXMLTo(void* parent) { void* trfxml = gTools().AddChild(parent, "Transform"); gTools().AddAttr(trfxml, "Name", "PCA"); + VariableTransformBase::AttachXMLTo( trfxml ); + // write mean values to stream for (UInt_t sbType=0; sbType<fMeanValues.size(); sbType++) { void* meanxml = gTools().AddChild( trfxml, "Statistics"); @@ -325,6 +375,21 @@ void TMVA::VariablePCATransform::ReadFromXML( void* trfnode ) TString classtype; TString nodeName; + Bool_t newFormat = kFALSE; + + void* inpnode = NULL; + + inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format + if( inpnode!=NULL ) + newFormat = kTRUE; // new xml format + + if( newFormat ){ + // ------------- new format -------------------- + // read input + VariableTransformBase::ReadFromXML( inpnode ); + + } + void* ch = gTools().GetChild(trfnode); while (ch) { nodeName = gTools().GetName(ch); @@ -486,17 +551,18 @@ void TMVA::VariablePCATransform::MakeFunction( std::ostream& fout, const TString fout << " // x, and return it in p (function extracted from TPrincipal::X2P)" << std::endl; fout << " // It's the users responsibility to make sure that both x and p are" << std::endl; fout << " // of the right size (i.e., memory must be allocated for p)." << std::endl; - fout << " const int nvar = " << nvar << ";" << std::endl; + fout << " const int nVar = " << nvar << ";" << std::endl; fout << std::endl; - fout << " for (int i = 0; i < nvar; i++) {" << std::endl; + fout << " for (int i = 0; i < nVar; i++) {" << std::endl; fout << " p[i] = 0;" << std::endl; - fout << " for (int j = 0; j < nvar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<"[index][j]) * fEigenVectors_"<<trCounter<<"[index][j][i];" << std::endl; + fout << " for (int j = 0; j < nVar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<"[index][j]) * fEigenVectors_"<<trCounter<<"[index][j][i];" << std::endl; fout << " }" << std::endl; fout << "}" << std::endl; fout << std::endl; fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl; fout << "{" << std::endl; + fout << " // PCA transformation, initialisation" << std::endl; // fill vector of mean values fout << " // initialise vector of mean values" << std::endl; @@ -525,18 +591,24 @@ void TMVA::VariablePCATransform::MakeFunction( std::ostream& fout, const TString fout << "//_______________________________________________________________________" << std::endl; fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls ) const" << std::endl; fout << "{" << std::endl; - fout << " const int nvar = " << nvar << ";" << std::endl; - fout << " double *dv = new double[nvar];" << std::endl; - fout << " double *rv = new double[nvar];" << std::endl; + fout << " // PCA transformation" << std::endl; + fout << " const int nVar = " << nvar << ";" << std::endl; + fout << " double *dv = new double[nVar];" << std::endl; + fout << " double *rv = new double[nVar];" << std::endl; fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl; fout << " else cls = "<<(numC==1?0:2)<<";"<< std::endl; fout << " }"<< std::endl; - fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[ivar];" << std::endl; + + VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 ); + + fout << " for (int ivar=0; ivar<nVar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl; + fout << std::endl; fout << " // Perform PCA and put it into PCAed events tree" << std::endl; fout << " this->X2P_"<<trCounter<<"( dv, rv, cls );" << std::endl; - fout << " for (int ivar=0; ivar<nvar; ivar++) iv[ivar] = rv[ivar];" << std::endl; + fout << " for (int ivar=0; ivar<nVar; ivar++) iv[indicesPut.at(ivar)] = rv[ivar];" << std::endl; + fout << std::endl; fout << " delete [] dv;" << std::endl; fout << " delete [] rv;" << std::endl; diff --git a/tmva/src/VariableRearrangeTransform.cxx b/tmva/src/VariableRearrangeTransform.cxx new file mode 100644 index 0000000000000000000000000000000000000000..cbef74317f4b54a2a4f4393c0df37f1521cc23e8 --- /dev/null +++ b/tmva/src/VariableRearrangeTransform.cxx @@ -0,0 +1,395 @@ +// @(#)root/tmva $Id$ +// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Peter Speckmayer + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : VariableRearrangeTransform * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation (see header for description) * + * * + * Authors (alphabetical): * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * + * * + * Copyright (c) 2005: * + * CERN, Switzerland * + * MPI-K Heidelberg, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include <iostream> +#include <iomanip> +#include <stdexcept> + +#ifndef ROOT_TMVA_MsgLogger +#include "TMVA/MsgLogger.h" +#endif +#ifndef ROOT_TMVA_VariableRearrangeTransform +#include "TMVA/VariableRearrangeTransform.h" +#endif +#ifndef ROOT_TMVA_Tools +#include "TMVA/Tools.h" +#endif +#ifndef ROOT_TMVA_DataSet +#include "TMVA/DataSet.h" +#endif + +ClassImp(TMVA::VariableRearrangeTransform) + +//_______________________________________________________________________ +TMVA::VariableRearrangeTransform::VariableRearrangeTransform( DataSetInfo& dsi ) +: VariableTransformBase( dsi, Types::kRearranged, "Rearrange" ) +{ + // constructor +} + +//_______________________________________________________________________ +TMVA::VariableRearrangeTransform::~VariableRearrangeTransform() { +} + +//_______________________________________________________________________ +void TMVA::VariableRearrangeTransform::Initialize() +{ + // initialization of the rearrangement transformation + // (nothing to do) +} + +//_______________________________________________________________________ +Bool_t TMVA::VariableRearrangeTransform::PrepareTransformation( const std::vector<Event*>& /*events*/ ) +{ + // prepare transformation --> (nothing to do) + if (!IsEnabled() || IsCreated()) return kTRUE; + + UInt_t nvars = 0, ntgts = 0, nspcts = 0; + CountVariableTypes( nvars, ntgts, nspcts ); +// std::cout << "vartypes&varrearrtransf: " << nvars << " " << ntgts << " " << nspcts << std::endl; +// events[0]->Print(std::cout); + if( ntgts>0 ) + Log() << kFATAL << "Targets used in Rearrange-transformation." << Endl; + + SetCreated( kTRUE ); + return kTRUE; +} + +//_______________________________________________________________________ +const TMVA::Event* TMVA::VariableRearrangeTransform::Transform( const TMVA::Event* const ev, Int_t /*cls*/ ) const +{ + if( !IsEnabled() ) + return ev; + + // apply the normalization transformation + if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; + + if (fTransformedEvent==0 ) + fTransformedEvent = new Event(); + + FloatVector input; // will be filled with the selected variables, (targets) + std::vector<Char_t> mask; // masked variables +// std::cout << "========" << std::endl; +// UInt_t nvars = 0, ntgts = 0, nspcts = 0; +// CountVariableTypes( nvars, ntgts, nspcts ); +// std::cout << "vartypes&varrearrtransf/trnsfrm: " << nvars << " " << ntgts << " " << nspcts << std::endl; +// ev->Print(std::cout); + GetInput( ev, input, mask ); +// for( std::vector<Float_t>::iterator it = input.begin(), itEnd = input.end(); it != itEnd; ++it ){ +// std::cout << (*it) << " "; +// } +// std::cout << std::endl; + SetOutput( fTransformedEvent, input, mask, ev ); +// std::cout << "transformed ---" << std::endl; +// fTransformedEvent->Print(std::cout); + + + return fTransformedEvent; +} + +//_______________________________________________________________________ +const TMVA::Event* TMVA::VariableRearrangeTransform::InverseTransform( const TMVA::Event* const ev, Int_t /*cls*/ ) const +{ + if( !IsEnabled() ) + return ev; + + // apply the inverse transformation + if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; + + if (fBackTransformedEvent==0) + fBackTransformedEvent = new Event( *ev ); + + FloatVector input; // will be filled with the selected variables, targets, (spectators) + std::vector<Char_t> mask; // masked variables +// std::cout << "inv =====" << std::endl; + GetInput( ev, input, mask, kTRUE ); +// ev->Print(std::cout); + SetOutput( fBackTransformedEvent, input, mask, ev, kTRUE ); +// std::cout << "inv ---" << std::endl; +// fBackTransformedEvent->Print(std::cout); + + + return fBackTransformedEvent; +} + + +//_______________________________________________________________________ +std::vector<TString>* TMVA::VariableRearrangeTransform::GetTransformationStrings( Int_t /*cls*/ ) const +{ +// // creates string with variable transformations applied + +// // if cls (the class chosen by the user) not existing, assume that user wants to +// // have the matrix for all classes together. +// if (cls < 0 || cls > GetNClasses()) cls = GetNClasses(); + +// Float_t min, max; + + const UInt_t size = fGet.size(); + std::vector<TString>* strVec = new std::vector<TString>(size); + +// UInt_t iinp = 0; +// for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { +// min = fMin.at(cls).at(iinp); +// max = fMax.at(cls).at(iinp); + +// Char_t type = (*itGet).first; +// UInt_t idx = (*itGet).second; + +// Float_t offset = min; +// Float_t scale = 1.0/(max-min); +// TString str(""); +// VariableInfo& varInfo = (type=='v'?fDsi.GetVariableInfo(idx):(type=='t'?fDsi.GetTargetInfo(idx):fDsi.GetSpectatorInfo(idx))); + +// if (offset < 0) str = Form( "2*%g*([%s] + %g) - 1", scale, varInfo.GetLabel().Data(), -offset ); +// else str = Form( "2*%g*([%s] - %g) - 1", scale, varInfo.GetLabel().Data(), offset ); +// (*strVec)[iinp] = str; + +// ++iinp; +// } + + + return strVec; +} + +//_______________________________________________________________________ +void TMVA::VariableRearrangeTransform::AttachXMLTo(void* parent) +{ +// // create XML description of Rearrange transformation + void* trfxml = gTools().AddChild(parent, "Transform"); + gTools().AddAttr(trfxml, "Name", "Rearrange"); + + VariableTransformBase::AttachXMLTo( trfxml ); + +// Int_t numC = (GetNClasses()<= 1)?1:GetNClasses()+1; + + +// for( Int_t icls=0; icls<numC; icls++ ) { +// void* clsxml = gTools().AddChild(trfxml, "Class"); +// gTools().AddAttr(clsxml, "ClassIndex", icls); +// void* inpxml = gTools().AddChild(clsxml, "Ranges"); +// UInt_t iinp = 0; +// for( ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { +// void* mmxml = gTools().AddChild(inpxml, "Range"); +// gTools().AddAttr(mmxml, "Index", iinp); +// gTools().AddAttr(mmxml, "Min", fMin.at(icls).at(iinp) ); +// gTools().AddAttr(mmxml, "Max", fMax.at(icls).at(iinp) ); +// ++iinp; +// } +// } +} + +//_______________________________________________________________________ +void TMVA::VariableRearrangeTransform::ReadFromXML( void* trfnode ) +{ +// // Read the transformation matrices from the xml node + + + Bool_t newFormat = kFALSE; + + void* inpnode = NULL; + + inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format + if(inpnode == NULL) + Log() << kFATAL << "Unknown weight file format for transformations. (tried to read in 'rearrange' transform)" << Endl; + newFormat = kTRUE; + + VariableTransformBase::ReadFromXML( inpnode ); + + SetCreated(); + +// Bool_t newFormat = kFALSE; + +// void* inpnode = NULL; +// try{ +// inpnode = gTools().GetChild(trfnode, "Input"); // new xml format +// newFormat = kTRUE; +// }catch( std::logic_error& excpt ){ +// newFormat = kFALSE; // old xml format +// } +// if( newFormat ){ +// // ------------- new format -------------------- +// // read input +// VariableTransformBase::ReadFromXML( inpnode ); + +// // read transformation information + +// UInt_t size = fGet.size(); +// UInt_t classindex, idx; + +// void* ch = gTools().GetChild( trfnode ); +// while(ch) { +// Int_t ci = 0; +// gTools().ReadAttr(ch, "ClassIndex", ci); +// classindex = UInt_t(ci); + +// fMin.resize(classindex+1); +// fMax.resize(classindex+1); + +// fMin[classindex].resize(size,Float_t(0)); +// fMax[classindex].resize(size,Float_t(0)); + +// void* clch = gTools().GetChild( ch ); +// while(clch) { +// TString nodeName(gTools().GetName(clch)); +// if(nodeName=="Ranges") { +// void* varch = gTools().GetChild( clch ); +// while(varch) { +// gTools().ReadAttr(varch, "Index", idx); +// gTools().ReadAttr(varch, "Min", fMin[classindex][idx]); +// gTools().ReadAttr(varch, "Max", fMax[classindex][idx]); +// varch = gTools().GetNextChild( varch ); +// } +// } +// clch = gTools().GetNextChild( clch ); +// } +// ch = gTools().GetNextChild( ch ); +// } + +// SetCreated(); +// return; +// } + +// // ------------- old format -------------------- +// UInt_t classindex, varindex, tgtindex, nvars, ntgts; + +// gTools().ReadAttr(trfnode, "NVariables", nvars); +// gTools().ReadAttr(trfnode, "NTargets", ntgts); + +// for( UInt_t ivar = 0; ivar < nvars; ++ivar ){ +// fGet.push_back(std::make_pair<Char_t,UInt_t>('v',ivar)); +// } +// for( UInt_t itgt = 0; itgt < ntgts; ++itgt ){ +// fGet.push_back(std::make_pair<Char_t,UInt_t>('t',itgt)); +// } + +// void* ch = gTools().GetChild( trfnode ); +// while(ch) { +// gTools().ReadAttr(ch, "ClassIndex", classindex); + +// fMin.resize(classindex+1); +// fMax.resize(classindex+1); +// fMin[classindex].resize(nvars+ntgts,Float_t(0)); +// fMax[classindex].resize(nvars+ntgts,Float_t(0)); + +// void* clch = gTools().GetChild( ch ); +// while(clch) { +// TString nodeName(gTools().GetName(clch)); +// if(nodeName=="Variables") { +// void* varch = gTools().GetChild( clch ); +// while(varch) { +// gTools().ReadAttr(varch, "VarIndex", varindex); +// gTools().ReadAttr(varch, "Min", fMin[classindex][varindex]); +// gTools().ReadAttr(varch, "Max", fMax[classindex][varindex]); +// varch = gTools().GetNextChild( varch ); +// } +// } else if (nodeName=="Targets") { +// void* tgtch = gTools().GetChild( clch ); +// while(tgtch) { +// gTools().ReadAttr(tgtch, "TargetIndex", tgtindex); +// gTools().ReadAttr(tgtch, "Min", fMin[classindex][nvars+tgtindex]); +// gTools().ReadAttr(tgtch, "Max", fMax[classindex][nvars+tgtindex]); +// tgtch = gTools().GetNextChild( tgtch ); +// } +// } +// clch = gTools().GetNextChild( clch ); +// } +// ch = gTools().GetNextChild( ch ); +// } +// SetCreated(); +} + + + +//_______________________________________________________________________ +void TMVA::VariableRearrangeTransform::PrintTransformation( ostream& ) +{ +// // prints the transformation ranges + +// Int_t numC = GetNClasses()+1; +// if (GetNClasses() <= 1 ) numC = 1; + +// for (Int_t icls = 0; icls < numC; icls++ ) { +// Log() << "Transformation for class " << icls << " based on these ranges:" << Endl; + +// UInt_t iinp = 0; +// for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ){ +// Char_t type = (*itGet).first; +// UInt_t idx = (*itGet).second; + +// TString typeString = (type=='v'?"Variable: ": (type=='t'?"Target : ":"Spectator : ") ); +// Log() << typeString.Data() << std::setw(20) << fMin[icls][idx] << std::setw(20) << fMax[icls][idx] << Endl; + +// ++iinp; +// } +// } +} + +//_______________________________________________________________________ +void TMVA::VariableRearrangeTransform::MakeFunction( std::ostream& /*fout*/, const TString& /*fcncName*/, + Int_t /*part*/, UInt_t /*trCounter*/, Int_t ) +{ +// // creates a normalizing function + +// UInt_t numC = fMin.size(); +// if (part==1) { +// fout << std::endl; +// fout << " double fMin_"<<trCounter<<"["<<numC<<"]["<<fGet.size()<<"];" << std::endl; +// fout << " double fMax_"<<trCounter<<"["<<numC<<"]["<<fGet.size()<<"];" << std::endl; +// } + +// if (part==2) { +// fout << std::endl; +// fout << "//_______________________________________________________________________" << std::endl; +// fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl; +// fout << "{" << std::endl; + +// for (UInt_t ivar=0; ivar<GetNVariables(); ivar++) { +// Float_t min = FLT_MAX; +// Float_t max = -FLT_MAX; +// for (UInt_t icls = 0; icls < numC; icls++) { +// min = TMath::Min(min, fMin.at(icls).at(ivar) ); +// max = TMath::Max(max, fMax.at(icls).at(ivar) ); +// fout << " fMin_"<<trCounter<<"["<<icls<<"]["<<ivar<<"] = " << std::setprecision(12) +// << min << ";" << std::endl; +// fout << " fMax_"<<trCounter<<"["<<icls<<"]["<<ivar<<"] = " << std::setprecision(12) +// << max << ";" << std::endl; +// } +// } +// fout << "}" << std::endl; +// fout << std::endl; +// fout << "//_______________________________________________________________________" << std::endl; +// fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls) const" << std::endl; +// fout << "{" << std::endl; +// fout << "if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl; +// fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl; +// fout << " else cls = "<<(fMin.size()==1?0:2)<<";"<< std::endl; +// fout << "}"<< std::endl; +// fout << " for (int ivar=0;ivar<"<<GetNVariables()<<";ivar++) {" << std::endl; +// fout << " double offset = fMin_"<<trCounter<<"[cls][ivar];" << std::endl; +// fout << " double scale = 1.0/(fMax_"<<trCounter<<"[cls][ivar]-fMin_"<<trCounter<<"[cls][ivar]);" << std::endl; +// fout << " iv[ivar] = (iv[ivar]-offset)*scale * 2 - 1;" << std::endl; +// fout << " }" << std::endl; +// fout << "}" << std::endl; +// } +} diff --git a/tmva/src/VariableTransformBase.cxx b/tmva/src/VariableTransformBase.cxx index 75119090ddc97e1cf5c12fd8baef7ad20294e036..0b160d4bf130d0b161b7332feabcf4e9655a0180 100644 --- a/tmva/src/VariableTransformBase.cxx +++ b/tmva/src/VariableTransformBase.cxx @@ -12,6 +12,7 @@ * * * Authors (alphabetical): * * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * + * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland * * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland * * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * * * @@ -25,6 +26,11 @@ **********************************************************************************/ #include <iomanip> +#include <algorithm> +#include <exception> +#include <stdexcept> +#include <set> +#include <cassert> #include "TMath.h" #include "TVectorD.h" @@ -50,6 +56,7 @@ TMVA::VariableTransformBase::VariableTransformBase( DataSetInfo& dsi, const TString& trfName ) : TObject(), fDsi(dsi), + fDsiOutput(NULL), fTransformedEvent(0), fBackTransformedEvent(0), fVariableTransform(tf), @@ -57,6 +64,11 @@ TMVA::VariableTransformBase::VariableTransformBase( DataSetInfo& dsi, fCreated( kFALSE ), fNormalise( kFALSE ), fTransformName(trfName), + fVariableTypesAreCounted(false), + fNVariables(0), + fNTargets(0), + fNSpectators(0), + fSortGet(kTRUE), fTMVAVersion(TMVA_VERSION_CODE), fLogger( 0 ) { @@ -68,6 +80,9 @@ TMVA::VariableTransformBase::VariableTransformBase( DataSetInfo& dsi, for (UInt_t itgt = 0; itgt < fDsi.GetNTargets(); itgt++) { fTargets.push_back( VariableInfo( fDsi.GetTargetInfo(itgt) ) ); } + for (UInt_t ispct = 0; ispct < fDsi.GetNSpectators(); ispct++) { + fTargets.push_back( VariableInfo( fDsi.GetSpectatorInfo(ispct) ) ); + } } //_______________________________________________________________________ @@ -79,9 +94,366 @@ TMVA::VariableTransformBase::~VariableTransformBase() delete fLogger; } +//_______________________________________________________________________ +void TMVA::VariableTransformBase::SelectInput( const TString& _inputVariables, Bool_t putIntoVariables ) +{ + // select the variables/targets/spectators which serve as input to the transformation + TString inputVariables = _inputVariables; + + // unselect all variables first + fGet.clear(); + + UInt_t nvars = GetNVariables(); + UInt_t ntgts = GetNTargets(); + UInt_t nspcts = GetNSpectators(); + + typedef std::set<Int_t> SelectedIndices; + + SelectedIndices varIndices; + SelectedIndices tgtIndices; + SelectedIndices spctIndices; + + if (inputVariables == "") // default is all variables and all targets + { // (the default can be changed by decorating this member function in the implementations) + inputVariables = "_V_,_T_"; + } + + TList* inList = gTools().ParseFormatLine( inputVariables, "," ); + TListIter inIt(inList); + while (TObjString* os = (TObjString*)inIt()) { + + TString variables = os->GetString(); + + if( variables.BeginsWith("_") && variables.EndsWith("_") ) { // special symbol (keyword) + variables.Remove( 0,1); // remove first "_" + variables.Remove( variables.Length()-1,1 ); // remove last "_" + + if( variables.BeginsWith("V") ) { // variables + variables.Remove(0,1); // remove "V" + if( variables.Length() == 0 ){ + for( UInt_t ivar = 0; ivar < nvars; ++ivar ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('v',ivar) ); + varIndices.insert( ivar ); + } + } else { + UInt_t idx = variables.Atoi(); + if( idx >= nvars ) + Log() << kFATAL << "You selected variable with index : " << idx << " of only " << nvars << " variables." << Endl; + fGet.push_back( std::make_pair<Char_t,UInt_t>('v',idx) ); + varIndices.insert( idx ); + } + }else if( variables.BeginsWith("T") ) { // targets + variables.Remove(0,1); // remove "T" + if( variables.Length() == 0 ){ + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('t',itgt) ); + tgtIndices.insert( itgt ); + } + } else { + UInt_t idx = variables.Atoi(); + if( idx >= ntgts ) + Log() << kFATAL << "You selected target with index : " << idx << " of only " << ntgts << " targets." << Endl; + fGet.push_back( std::make_pair<Char_t,UInt_t>('t',idx) ); + tgtIndices.insert( idx ); + } + }else if( variables.BeginsWith("S") ) { // spectators + variables.Remove(0,1); // remove "S" + if( variables.Length() == 0 ){ + for( UInt_t ispct = 0; ispct < nspcts; ++ispct ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('s',ispct) ); + spctIndices.insert( ispct ); + } + } else { + UInt_t idx = variables.Atoi(); + if( idx >= nspcts ) + Log() << kFATAL << "You selected spectator with index : " << idx << " of only " << nspcts << " spectators." << Endl; + fGet.push_back( std::make_pair<Char_t,UInt_t>('s',idx) ); + spctIndices.insert( idx ); + } + }else if( TString("REARRANGE").BeginsWith(variables) ) { // toggle rearrange sorting (take sort order given in the options) + ToggleInputSortOrder( kFALSE ); + if( !fSortGet ) + Log() << kINFO << "Variable rearrangement set true: Variable order given in transformation option is used for input to transformation!" << Endl; + + } + }else{ // no keyword, ... user provided variable labels + Int_t numIndices = varIndices.size()+tgtIndices.size()+spctIndices.size(); + for( UInt_t ivar = 0; ivar < nvars; ++ivar ) { // search all variables + if( fDsi.GetVariableInfo( ivar ).GetLabel() == variables ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('v',ivar) ); + varIndices.insert( ivar ); + break; + } + } + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ) { // search all targets + if( fDsi.GetTargetInfo( itgt ).GetLabel() == variables ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('t',itgt) ); + tgtIndices.insert( itgt ); + break; + } + } + for( UInt_t ispct = 0; ispct < nspcts; ++ispct ) { // search all spectators + if( fDsi.GetSpectatorInfo( ispct ).GetLabel() == variables ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('s',ispct) ); + spctIndices.insert( ispct ); + break; + } + } + Int_t numIndicesEndOfLoop = varIndices.size()+tgtIndices.size()+spctIndices.size(); + if( numIndicesEndOfLoop == numIndices ) + Log() << kWARNING << "Error at parsing the options for the variable transformations: Variable/Target/Spectator '" << variables.Data() << "' not found." << Endl; + numIndices = numIndicesEndOfLoop; + } + } + + + if( putIntoVariables ) { + Int_t idx = 0; + for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('v',idx) ); + ++idx; + } + for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('t',idx) ); + ++idx; + } + for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('s',idx) ); + ++idx; + } + }else { + for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) { + Int_t idx = (*it); + fPut.push_back( std::make_pair<Char_t,UInt_t>('v',idx) ); + } + for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) { + Int_t idx = (*it); + fPut.push_back( std::make_pair<Char_t,UInt_t>('t',idx) ); + } + for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) { + Int_t idx = (*it); + fPut.push_back( std::make_pair<Char_t,UInt_t>('s',idx) ); + } + + // if sorting is turned on, fGet should have the indices sorted as fPut has them. + if( fSortGet ) { + fGet.clear(); + fGet.assign( fPut.begin(), fPut.end() ); + } + } + + + Log() << kINFO << "Transformation, Variable selection : " << Endl; + + // choose the new dsi for output if present, if not, take the common one + const DataSetInfo* outputDsiPtr = (fDsiOutput? &(*fDsiOutput) : &fDsi ); + + + + ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end(); + ItVarTypeIdx itPut = fPut.begin(), itPutEnd = fPut.end(); + for( ; itGet != itGetEnd; ++itGet ) { + TString inputTypeString = "?"; + + Char_t inputType = (*itGet).first; + Int_t inputIdx = (*itGet).second; + + TString inputLabel = "NOT FOND"; + if( inputType == 'v' ) { + inputLabel = fDsi.GetVariableInfo( inputIdx ).GetLabel(); + inputTypeString = "variable"; + } + else if( inputType == 't' ){ + inputLabel = fDsi.GetTargetInfo( inputIdx ).GetLabel(); + inputTypeString = "target"; + } + else if( inputType == 's' ){ + inputLabel = fDsi.GetSpectatorInfo( inputIdx ).GetLabel(); + inputTypeString = "spectator"; + } + + TString outputTypeString = "?"; + + Char_t outputType = (*itPut).first; + Int_t outputIdx = (*itPut).second; + + TString outputLabel = "NOT FOUND"; + if( outputType == 'v' ) { + outputLabel = outputDsiPtr->GetVariableInfo( outputIdx ).GetLabel(); + outputTypeString = "variable"; + } + else if( outputType == 't' ){ + outputLabel = outputDsiPtr->GetTargetInfo( outputIdx ).GetLabel(); + outputTypeString = "target"; + } + else if( outputType == 's' ){ + outputLabel = outputDsiPtr->GetSpectatorInfo( outputIdx ).GetLabel(); + outputTypeString = "spectator"; + } + + + Log() << kINFO << "Input : " << inputTypeString.Data() << " '" << inputLabel.Data() << "' (index=" << inputIdx << "). <---> " + << "Output : " << outputTypeString.Data() << " '" << outputLabel.Data() << "' (index=" << outputIdx << ")." << Endl; + + ++itPut; + } +} + + +//_______________________________________________________________________ +Bool_t TMVA::VariableTransformBase::GetInput( const Event* event, std::vector<Float_t>& input, std::vector<Char_t>& mask, Bool_t backTransformation ) const +{ + // select the values from the event + + ItVarTypeIdxConst itEntry; + ItVarTypeIdxConst itEntryEnd; + + if( backTransformation ){ + itEntry = fPut.begin(); + itEntryEnd = fPut.end(); + } + else { + itEntry = fGet.begin(); + itEntryEnd = fGet.end(); + } + + input.clear(); + mask.clear(); + Bool_t hasMaskedEntries = kFALSE; +// event->Print(std::cout); + for( ; itEntry != itEntryEnd; ++itEntry ) { + Char_t type = (*itEntry).first; + Int_t idx = (*itEntry).second; + + try{ + switch( type ) { + case 'v': + input.push_back( event->GetValue(idx) ); + break; + case 't': + input.push_back( event->GetTarget(idx) ); + break; + case 's': + input.push_back( event->GetSpectator(idx) ); + break; + default: + Log() << kFATAL << "VariableTransformBase/GetInput : unknown type '" << type << "'." << Endl; + } + mask.push_back(kFALSE); + } + catch(std::out_of_range& excpt){ // happens when an event is transformed which does not yet have the targets calculated (in the application phase) + input.push_back(0.f); + mask.push_back(kTRUE); + hasMaskedEntries = kTRUE; + } + } + return hasMaskedEntries; +} + +//_______________________________________________________________________ +void TMVA::VariableTransformBase::SetOutput( Event* event, std::vector<Float_t>& output, std::vector<Char_t>& mask, const Event* oldEvent, Bool_t backTransformation ) const +{ + // select the values from the event + + std::vector<Float_t>::iterator itOutput = output.begin(); + std::vector<Char_t>::iterator itMask = mask.begin(); + + if( oldEvent ) + event->CopyVarValues( *oldEvent ); + + try { + + ItVarTypeIdxConst itEntry; + ItVarTypeIdxConst itEntryEnd; + + if( !backTransformation ){ // as in GetInput, but the other way round (from fPut for transformation, from fGet for backTransformation) + itEntry = fPut.begin(); + itEntryEnd = fPut.end(); + } + else { + itEntry = fGet.begin(); + itEntryEnd = fGet.end(); + } + + + for( ; itEntry != itEntryEnd; ++itEntry ) { + + if( (*itMask) ){ // if the value is masked +// ++itOutput; // no value available + ++itMask; + continue; + } + + Char_t type = (*itEntry).first; + Int_t idx = (*itEntry).second; + + Float_t value = (*itOutput); + + switch( type ) { + case 'v': + event->SetVal( idx, value ); + break; + case 't': + event->SetTarget( idx, value ); + break; + case 's': + event->SetSpectator( idx, value ); + break; + default: + Log() << kFATAL << "VariableTransformBase/GetInput : unknown type '" << type << "'." << Endl; + } + ++itOutput; + } + }catch( std::exception& except ){ + Log() << kFATAL << "VariableTransformBase/SetOutput : exception/" << except.what() << Endl; + throw; + } +} + + +//_______________________________________________________________________ +void TMVA::VariableTransformBase::CountVariableTypes( UInt_t& nvars, UInt_t& ntgts, UInt_t& nspcts ) const +{ + // count variables, targets and spectators + if( fVariableTypesAreCounted ){ + nvars = fNVariables; + ntgts = fNTargets; + nspcts = fNSpectators; + return; + } + + nvars = ntgts = nspcts = 0; + + for( ItVarTypeIdxConst itEntry = fGet.begin(), itEntryEnd = fGet.end(); itEntry != itEntryEnd; ++itEntry ) { + Char_t type = (*itEntry).first; + + switch( type ) { + case 'v': + nvars++; + break; + case 't': + ntgts++; + break; + case 's': + nspcts++; + break; + default: + Log() << kFATAL << "VariableTransformBase/GetVariableTypeNumbers : unknown type '" << type << "'." << Endl; + } + } + + fNVariables = nvars; + fNTargets = ntgts; + fNSpectators = nspcts; + + fVariableTypesAreCounted = true; +} + + //_______________________________________________________________________ void TMVA::VariableTransformBase::CalcNorm( const std::vector<Event*>& events ) { + // TODO --> adapt to variable,target,spectator selection // method to calculate minimum, maximum, mean, and RMS for all // variables used in the MVA @@ -173,6 +545,7 @@ void TMVA::VariableTransformBase::CalcNorm( const std::vector<Event*>& events ) //_______________________________________________________________________ std::vector<TString>* TMVA::VariableTransformBase::GetTransformationStrings( Int_t /*cls*/ ) const { + // TODO --> adapt to variable,target,spectator selection // default transformation output // --> only indicate that transformation occurred std::vector<TString>* strVec = new std::vector<TString>; @@ -186,6 +559,7 @@ std::vector<TString>* TMVA::VariableTransformBase::GetTransformationStrings( Int //_______________________________________________________________________ void TMVA::VariableTransformBase::UpdateNorm ( Int_t ivar, Double_t x ) { + // TODO --> adapt to variable,target,spectator selection // update min and max of a given variable (target) and a given transformation method Int_t nvars = fDsi.GetNVariables(); if( ivar < nvars ){ @@ -197,3 +571,259 @@ void TMVA::VariableTransformBase::UpdateNorm ( Int_t ivar, Double_t x ) } } +//_______________________________________________________________________ +void TMVA::VariableTransformBase::AttachXMLTo(void* parent) +{ + // create XML description the transformation (write out info of selected variables) + + void* selxml = gTools().AddChild(parent, "Selection"); + + void* inpxml = gTools().AddChild(selxml, "Input"); + gTools().AddAttr(inpxml, "NInputs", fGet.size() ); + + // choose the new dsi for output if present, if not, take the common one + const DataSetInfo* outputDsiPtr = (fDsiOutput? fDsiOutput : &fDsi ); + + for( ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { + UInt_t idx = (*itGet).second; + Char_t type = (*itGet).first; + + TString label = ""; + TString expression = ""; + TString typeString = ""; + switch( type ){ + case 'v': + typeString = "Variable"; + label = fDsi.GetVariableInfo( idx ).GetLabel(); + expression = fDsi.GetVariableInfo( idx ).GetExpression(); + break; + case 't': + typeString = "Target"; + label = fDsi.GetTargetInfo( idx ).GetLabel(); + expression = fDsi.GetTargetInfo( idx ).GetExpression(); + break; + case 's': + typeString = "Spectator"; + label = fDsi.GetSpectatorInfo( idx ).GetLabel(); + expression = fDsi.GetSpectatorInfo( idx ).GetExpression(); + break; + default: + Log() << kFATAL << "VariableTransformBase/AttachXMLTo unknown variable type '" << type << "'." << Endl; + } + + void* idxxml = gTools().AddChild(inpxml, "Input"); +// gTools().AddAttr(idxxml, "Index", idx); + gTools().AddAttr(idxxml, "Type", typeString); + gTools().AddAttr(idxxml, "Label", label); + gTools().AddAttr(idxxml, "Expression", expression); + } + + + void* outxml = gTools().AddChild(selxml, "Output"); + gTools().AddAttr(outxml, "NOutputs", fPut.size() ); + + for( ItVarTypeIdx itPut = fPut.begin(), itPutEnd = fPut.end(); itPut != itPutEnd; ++itPut ) { + UInt_t idx = (*itPut).second; + Char_t type = (*itPut).first; + + TString label = ""; + TString expression = ""; + TString typeString = ""; + switch( type ){ + case 'v': + typeString = "Variable"; + label = outputDsiPtr->GetVariableInfo( idx ).GetLabel(); + expression = outputDsiPtr->GetVariableInfo( idx ).GetExpression(); + break; + case 't': + typeString = "Target"; + label = outputDsiPtr->GetTargetInfo( idx ).GetLabel(); + expression = outputDsiPtr->GetTargetInfo( idx ).GetExpression(); + break; + case 's': + typeString = "Spectator"; + label = outputDsiPtr->GetSpectatorInfo( idx ).GetLabel(); + expression = outputDsiPtr->GetSpectatorInfo( idx ).GetExpression(); + break; + default: + Log() << kFATAL << "VariableTransformBase/AttachXMLTo unknown variable type '" << type << "'." << Endl; + } + + void* idxxml = gTools().AddChild(outxml, "Output"); +// gTools().AddAttr(idxxml, "Index", idx); + gTools().AddAttr(idxxml, "Type", typeString); + gTools().AddAttr(idxxml, "Label", label); + gTools().AddAttr(idxxml, "Expression", expression); + } + + +} + +//_______________________________________________________________________ +void TMVA::VariableTransformBase::ReadFromXML( void* selnode ) +{ + // Read the input variables from the XML node + + void* inpnode = gTools().GetChild( selnode ); + void* outnode = gTools().GetNextChild( inpnode ); + + UInt_t nvars = GetNVariables(); + UInt_t ntgts = GetNTargets(); + UInt_t nspcts = GetNSpectators(); + + + + // read inputs + fGet.clear(); + + UInt_t nInputs = 0; + gTools().ReadAttr(inpnode, "NInputs", nInputs); + + void* ch = gTools().GetChild( inpnode ); + while(ch) { + TString typeString = ""; + TString label = ""; + TString expression = ""; + + gTools().ReadAttr(ch, "Type", typeString); + gTools().ReadAttr(ch, "Label", label); + gTools().ReadAttr(ch, "Expression", expression); + + if( typeString == "Variable" ){ + for( UInt_t ivar = 0; ivar < nvars; ++ivar ) { // search all variables + if( fDsi.GetVariableInfo( ivar ).GetLabel() == label || + fDsi.GetVariableInfo( ivar ).GetExpression() == expression) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('v',ivar) ); + break; + } + } + }else if( typeString == "Target" ){ + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ) { // search all targets + if( fDsi.GetTargetInfo( itgt ).GetLabel() == label || + fDsi.GetTargetInfo( itgt ).GetExpression() == expression ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('t',itgt) ); + break; + } + } + }else if( typeString == "Spectator" ){ + for( UInt_t ispct = 0; ispct < nspcts; ++ispct ) { // search all spectators + if( fDsi.GetSpectatorInfo( ispct ).GetLabel() == label || + fDsi.GetSpectatorInfo( ispct ).GetExpression() == expression ) { + fGet.push_back( std::make_pair<Char_t,UInt_t>('s',ispct) ); + break; + } + } + }else{ + Log() << kFATAL << "VariableTransformationBase/ReadFromXML : unknown type '" << typeString << "'." << Endl; + } + ch = gTools().GetNextChild( ch ); + } + + assert( nInputs == fGet.size() ); + + // read outputs + fPut.clear(); + + UInt_t nOutputs = 0; + gTools().ReadAttr(outnode, "NOutputs", nOutputs); + + void* chOut = gTools().GetChild( outnode ); + while(chOut) { + TString typeString = ""; + TString label = ""; + TString expression = ""; + + gTools().ReadAttr(chOut, "Type", typeString); + gTools().ReadAttr(chOut, "Label", label); + gTools().ReadAttr(chOut, "Expression", expression); + + if( typeString == "Variable" ){ + for( UInt_t ivar = 0; ivar < nvars; ++ivar ) { // search all variables + if( fDsi.GetVariableInfo( ivar ).GetLabel() == label || + fDsi.GetVariableInfo( ivar ).GetExpression() == expression ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('v',ivar) ); + break; + } + } + }else if( typeString == "Target" ){ + for( UInt_t itgt = 0; itgt < ntgts; ++itgt ) { // search all targets + if( fDsi.GetTargetInfo( itgt ).GetLabel() == label || + fDsi.GetTargetInfo( itgt ).GetExpression() == expression ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('t',itgt) ); + break; + } + } + }else if( typeString == "Spectator" ){ + for( UInt_t ispct = 0; ispct < nspcts; ++ispct ) { // search all spectators + if( fDsi.GetSpectatorInfo( ispct ).GetLabel() == label || + fDsi.GetSpectatorInfo( ispct ).GetExpression() == expression ) { + fPut.push_back( std::make_pair<Char_t,UInt_t>('s',ispct) ); + break; + } + } + }else{ + Log() << kFATAL << "VariableTransformationBase/ReadFromXML : unknown type '" << typeString << "'." << Endl; + } + chOut = gTools().GetNextChild( chOut ); + } + + assert( nOutputs == fPut.size() ); + + +} + + +//_______________________________________________________________________ +void TMVA::VariableTransformBase::MakeFunction( std::ostream& fout, const TString& /*fncName*/, Int_t part, + UInt_t /*trCounter*/, Int_t /*cls*/ ) +{ + // getinput and setoutput equivalent + if( part == 0 ){ // definitions + fout << std::endl; + fout << " // define the indices of the variables which are transformed by this transformation" << std::endl; + fout << " std::vector<int> indicesGet;" << std::endl; + fout << " std::vector<int> indicesPut;" << std::endl << std::endl; + + for( ItVarTypeIdxConst itEntry = fGet.begin(), itEntryEnd = fGet.end(); itEntry != itEntryEnd; ++itEntry ) { + Char_t type = (*itEntry).first; + Int_t idx = (*itEntry).second; + + switch( type ) { + case 'v': + fout << " indicesGet.push_back( " << idx << ");" << std::endl; + break; + case 't': + Log() << kWARNING << "MakeClass doesn't work with transformation of targets. The results will be wrong!" << Endl; + break; + case 's': + Log() << kWARNING << "MakeClass doesn't work with transformation of spectators. The results will be wrong!" << Endl; + break; + default: + Log() << kFATAL << "VariableTransformBase/GetInput : unknown type '" << type << "'." << Endl; + } + } + + for( ItVarTypeIdxConst itEntry = fPut.begin(), itEntryEnd = fPut.end(); itEntry != itEntryEnd; ++itEntry ) { + Char_t type = (*itEntry).first; + Int_t idx = (*itEntry).second; + + switch( type ) { + case 'v': + fout << " indicesPut.push_back( " << idx << ");" << std::endl; + break; + case 't': + Log() << kWARNING << "MakeClass doesn't work with transformation of targets. The results will be wrong!" << Endl; + break; + case 's': + Log() << kWARNING << "MakeClass doesn't work with transformation of spectators. The results will be wrong!" << Endl; + break; + default: + Log() << kFATAL << "VariableTransformBase/PutInput : unknown type '" << type << "'." << Endl; + } + } + + fout << std::endl; + + }else if( part == 1){ + } +} diff --git a/tmva/test/BDTControlPlots.C b/tmva/test/BDTControlPlots.C index 99ba4348c349f2b763753f5361272e6f66a1a0f4..86fb62edb9ad29155e4af88358409d804a87e8f1 100644 --- a/tmva/test/BDTControlPlots.C +++ b/tmva/test/BDTControlPlots.C @@ -46,14 +46,14 @@ void bdtcontrolplots( TDirectory *bdtdir ) { const TString titName = bdtdir->GetName(); - TString hname[nPlots]={"BoostWeight","BoostWeightVsTree","ErrFractHist","NodesBeforePruning","NodesAfterPruning",titName+"_FOMvsIterFrame"} + TString hname[nPlots]={"BoostMonitor","BoostWeight","BoostWeightVsTree","ErrFractHist","NodesBeforePruning",titName+"_FOMvsIterFrame"} for (Int_t i=0; i<nPlots; i++){ Int_t color = 4; TPad * cPad = (TPad*)c->cd(i+1); TH1 *h = (TH1*) bdtdir->Get(hname[i]); + if (h){ - TString plotname = h->GetName(); h->SetMaximum(h->GetMaximum()*1.3); h->SetMinimum( 0 ); h->SetMarkerColor(color); @@ -61,7 +61,18 @@ void bdtcontrolplots( TDirectory *bdtdir ) { h->SetMarkerStyle( 24 ); h->SetLineWidth(1); h->SetLineColor(color); + if(hname[i]=="NodesBeforePruning")h->SetTitle("Nodes before/after pruning"); h->Draw(); + if(hname[i]=="NodesBeforePruning"){ + TH1 *h2 = (TH1*) bdtdir->Get("NodesAfterPruning"); + h2->SetLineWidth(1); + h2->SetLineColor(2); + h2->Draw("same"); + } + if(hname[i]=="BoostMonitor"){ // a plot only available in case of automatic parameter option tuning + TGraph *g = (TGraph*) bdtdir->Get("BoostMonitorGraph"); + g->Draw("LP*"); + } if(hname[i]==titName+"_FOMvsIterFrame"){ // a plot only available in case of automatic parameter option tuning TGraph *g = (TGraph*) bdtdir->Get(titName+"_FOMvsIter"); g->Draw(); diff --git a/tmva/test/Boost.C b/tmva/test/Boost.C index dfa0939c1ed432a8615e563c314d10710091cad2..87e41da9ab480bbbdc5d45dc098c875fbdc5be87 100644 --- a/tmva/test/Boost.C +++ b/tmva/test/Boost.C @@ -57,13 +57,18 @@ void Boost(){ "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); TString fisher="H:!V"; + TString mlp = "H:!V:NeuronType=tanh:NCycles=100:HiddenLayers=N:TestRate=5:!UseRegulator"; factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoost", fisher+":Boost_Num=100:Boost_Type=AdaBoost" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog2", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=2.0" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep2", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.2" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep3", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.5" ); + factory->BookMethod( TMVA::Types::kFisher, "FisherBoost", fisher+":Boost_Num=10:Boost_Type=AdaBoost" ); + factory->BookMethod(TMVA::Types::kMLP, "MLP", mlp); + + factory->BookMethod(TMVA::Types::kMLP, "BoostedMLP", mlp+":Boost_Num=3:Boost_Type=AdaBoost:Boost_Transform=linear:" ); + + //factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); + //factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog2", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=2.0" ); + //factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); + //factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep2", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.2" ); + //factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep3", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.5" ); // Train MVAs using the set of training events factory->TrainAllMethods(); diff --git a/tmva/test/Boost2.C b/tmva/test/Boost2.C index ae2414b8dc967eab6d816a0960f3b45a06561052..63a80667cec27f2b3e965172b44e63f4a44b19ec 100644 --- a/tmva/test/Boost2.C +++ b/tmva/test/Boost2.C @@ -56,10 +56,11 @@ void Boost2(){ factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=10000:nTrain_Background=10000:SplitMode=Random:NormMode=NumEvents:!V" ); - TString fisher="H:!V"; + TString fisher="!H:!V"; factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher ); - factory->BookMethod( TMVA::Types::kFisher, "FisherBS", fisher+":Boost_Num=100:Boost_Type=Bagging:Boost_Transform=step" ); - factory->BookMethod( TMVA::Types::kFisher, "FisherS", fisher+":Boost_Num=100:Boost_Type=AdaBoost:Boost_Transform=step" ); + factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:V:NTrees=150:NCuts=101:MaxDepth=1:UseFisherCuts:UseExclusiveVars:MinLinCorrForFisher=0." ); +// factory->BookMethod( TMVA::Types::kFisher, "FisherBS", fisher+":Boost_Num=100:Boost_Type=Bagging:Boost_Transform=step" ); + factory->BookMethod( TMVA::Types::kFisher, "FisherS", fisher+":Boost_Num=150:Boost_Type=AdaBoost:Boost_Transform=step" ); // Train MVAs using the set of training events diff --git a/tmva/test/BoostControlPlots.C b/tmva/test/BoostControlPlots.C index 8f99251fcf6b0b9d7aa0fb8223269b2a52b734b7..d4dd2f81fc1aa1ef4e73bde259f13bd7e2a676a0 100644 --- a/tmva/test/BoostControlPlots.C +++ b/tmva/test/BoostControlPlots.C @@ -1,5 +1,7 @@ #include <vector> #include <string> +#include "TLegend.h" +#include "TText.h" #include "tmvaglob.C" @@ -76,6 +78,10 @@ void boostcontrolplots( TDirectory *boostdir ) { TPad * cPad = (TPad*)c->cd(nPlots+i+1); TH1 *htest = (TH1*) boostdir->Get(hname_roctest[i]); TH1 *htrain = (TH1*) boostdir->Get(hname_roctrain[i]); + + // check if filled + Bool_t histFilled = (htest->GetMaximum() > 0 || htrain->GetMaximum() > 0); + htest->SetTitle(htitle[i]); htest->SetMaximum(1.0); htest->SetMinimum(0.0); @@ -94,16 +100,25 @@ void boostcontrolplots( TDirectory *boostdir ) { htrain->SetLineColor(color-2); htrain->Draw("same"); - TLegend *legend= new TLegend( cPad->GetLeftMargin(), - 0.2 + cPad->GetBottomMargin(), - cPad->GetLeftMargin() + 0.6, - cPad->GetBottomMargin() ); - legend->AddEntry(htest, TString("testing sample"), "L"); - legend->AddEntry(htrain, TString("training sample (orig. weights)"), "L"); - legend->SetFillStyle( 1 ); - legend->SetBorderSize(1); - legend->SetMargin( 0.3 ); - legend->Draw("same"); + if (histFilled) { + TLegend *legend= new TLegend( cPad->GetLeftMargin(), + 0.2 + cPad->GetBottomMargin(), + cPad->GetLeftMargin() + 0.6, + cPad->GetBottomMargin() ); + legend->AddEntry(htest, TString("testing sample"), "L"); + legend->AddEntry(htrain, TString("training sample (orig. weights)"), "L"); + legend->SetFillStyle( 1 ); + legend->SetBorderSize(1); + legend->SetMargin( 0.3 ); + legend->Draw("same"); + } + else { + TText* t = new TText(); + t->SetTextSize( 0.056 ); + t->SetTextColor( 2 ); + t->DrawText( 1, 0.6, "Use MethodBoost option: \"DetailedMonitoring\" " ); + t->DrawText( 1, 0.51, "to fill this histograms" ); + } c->Update(); } diff --git a/tmva/test/ClassApplication.C b/tmva/test/ClassApplication.C index d173e98632ec55bd37490f6e31970e72a17d632a..d18192f8a19f0e94b08a0ac608fdd6c0c5f09d69 100644 --- a/tmva/test/ClassApplication.C +++ b/tmva/test/ClassApplication.C @@ -7,6 +7,20 @@ **********************************************************************************/ #include <vector> +class ReadLikelihoodPCA; +class ReadLikelihoodMIX; +class ReadHMatrix; +class ReadFisherG; +class ReadLD; +class ReadFDA_MT; +class ReadFDA_MC; +class ReadFDA_GA; +class ReadMLP; +class ReadMLPBFGS; +class ReadBDT; +class ReadBDTD; +class ReadBDTG; +class ReadBDTB; void ClassApplication( TString myMethodList = "Fisher" ) { diff --git a/tmva/test/PlotDecisionBoundary.C b/tmva/test/PlotDecisionBoundary.C index b2a46141f88e63de13ed638ecb7acb22b19dbb07..a6898a6d7cfd1cf2626cc59dbf769d32b5e104d8 100755 --- a/tmva/test/PlotDecisionBoundary.C +++ b/tmva/test/PlotDecisionBoundary.C @@ -9,7 +9,6 @@ #include <cstdlib> #include <vector> #include <iostream> -#include <map> #include <string> #include "TFile.h" @@ -27,32 +26,43 @@ #include "TMVA/Tools.h" #include "TMVA/Reader.h" #include "TMVA/MethodCuts.h" +#include "TMVA/SeparationBase.h" +#include "TMVA/GiniIndex.h" +#include "TMVA/MisClassificationErrror.h" #endif using namespace TMVA; -void plot(TH2D *sig, TH2D *bkg, TH2F *MVA, TString v0="var0", TString v1="var1"){ +void plot(TH2D *sig, TH2D *bkg, TH2F *MVA, TString v0="var0", TString v1="var1",Float_t mvaCut){ TCanvas *c = new TCanvas(Form("DecisionBoundary%s",MVA->GetTitle()),MVA->GetTitle(),800,800); + cout << "MVACut = "<<mvaCut << endl; gStyle->SetPalette(1); MVA->SetXTitle(v0); MVA->SetYTitle(v1); MVA->SetStats(0); - MVA->Draw("cont1"); - sig->SetMarkerColor(2); - bkg->SetMarkerColor(4); + Double_t contours[1]; + contours[0]=mvaCut; + MVA->SetLineWidth(7); + MVA->SetLineStyle(1); + MVA->SetMarkerColor(1); + MVA->SetLineColor(1); + MVA->SetContour(1, contours); + sig->SetMarkerColor(4); + bkg->SetMarkerColor(2); sig->SetMarkerStyle(20); bkg->SetMarkerStyle(20); - sig->SetMarkerSize(.5); - bkg->SetMarkerSize(.5); - sig->Draw("same"); + sig->SetMarkerSize(.2); + bkg->SetMarkerSize(.2); + sig->Draw(); bkg->Draw("same"); + MVA->Draw("CONT2 same"); } -void PlotDecisionBoundary( TString myMethodList = "",TString v0="var0", TString v1="var1", TString dataFileName = "/home/hvoss/TMVA/TMVA_data/data/data_3Bumps.root", TString weightFilePrefix="TMVA") +void PlotDecisionBoundary( TString weightFile = "weights/TMVAClassification_BDT.weights.xml",TString v0="var0", TString v1="var1", TString dataFileName = "/home/hvoss/TMVA/TMVA_data/data/data_circ.root") { //--------------------------------------------------------------- // default MVA methods to be trained + tested @@ -60,75 +70,9 @@ void PlotDecisionBoundary( TString myMethodList = "",TString v0="var0", TString // this loads the library TMVA::Tools::Instance(); - std::map<std::string,int> Use; - - Use["CutsGA"] = 0; // other "Cuts" methods work identically - // --- - Use["Likelihood"] = 0; - Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) - Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) - Use["LikelihoodKDE"] = 0; - Use["LikelihoodMIX"] = 0; - // --- - Use["PDERS"] = 0; - Use["PDERSD"] = 0; - Use["PDERSPCA"] = 0; - Use["PDERSkNN"] = 0; // depreciated until further notice - Use["PDEFoam"] = 0; - // -- - Use["KNN"] = 0; - // --- - Use["HMatrix"] = 0; - Use["Fisher"] = 0; - Use["FisherG"] = 0; - Use["BoostedFisher"] = 0; - Use["LD"] = 0; - // --- - Use["FDA_GA"] = 0; - Use["FDA_SA"] = 0; - Use["FDA_MC"] = 0; - Use["FDA_MT"] = 0; - Use["FDA_GAMT"] = 0; - Use["FDA_MCMT"] = 0; - // --- - Use["MLP"] = 0; // this is the recommended ANN - Use["MLPBFGS"] = 0; // recommended ANN with optional training method - Use["CFMlpANN"] = 0; // *** missing - Use["TMlpANN"] = 0; - // --- - Use["SVM"] = 0; - // --- - Use["BDT"] = 0; - Use["BDTD"] = 0; - Use["BDTG"] = 0; - Use["BDTB"] = 0; - // --- - Use["RuleFit"] = 0; - // --- - Use["Category"] = 0; - // --- - Use["Plugin"] = 0; - // --------------------------------------------------------------- - std::cout << std::endl; std::cout << "==> Start TMVAClassificationApplication" << std::endl; - if (myMethodList != "") { - for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; - - std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); - for (UInt_t i=0; i<mlist.size(); i++) { - std::string regMethod(mlist[i]); - - if (Use.find(regMethod) == Use.end()) { - std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; - for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; - std::cout << std::endl; - return; - } - Use[regMethod] = 1; - } - } // // create the Reader object @@ -143,32 +87,22 @@ void PlotDecisionBoundary( TString myMethodList = "",TString v0="var0", TString reader->AddVariable( v1, &var1 ); // - // book the MVA methods + // book the MVA method // - TString dir = "weights/"; - TString prefix = weightFilePrefix; - - // book method(s) - for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) { - if (it->second) { - TString methodName = it->first + " method"; - TString weightfile = dir + prefix + "_" + TString(it->first) + ".weights.xml"; - reader->BookMVA( methodName, weightfile ); - } - } + reader->BookMVA( "M1", weightFile ); TFile *f = new TFile(dataFileName); TTree *signal = (TTree*)f->Get("TreeS"); TTree *background = (TTree*)f->Get("TreeB"); - - //Declaration of leaves types Float_t svar0; Float_t svar1; Float_t bvar0; Float_t bvar1; + Float_t sWeight=1.0; // just in case you have weight defined, also set these branchaddresses + Float_t bWeight=1.0*signal->GetEntries()/background->GetEntries(); // just in case you have weight defined, also set these branchaddresses // Set branch addresses. signal->SetBranchAddress(v0,&svar0); @@ -182,188 +116,150 @@ void PlotDecisionBoundary( TString myMethodList = "",TString v0="var0", TString Float_t xmin = signal->GetMinimum(v0.Data()); Float_t ymax = signal->GetMaximum(v1.Data()); Float_t ymin = signal->GetMinimum(v1.Data()); + + xmax = TMath::Max(xmax,background->GetMaximum(v0.Data())); + xmin = TMath::Min(xmin,background->GetMinimum(v0.Data())); + ymax = TMath::Max(ymax,background->GetMaximum(v1.Data())); + ymin = TMath::Min(ymin,background->GetMinimum(v1.Data())); + TH2D *hs=new TH2D("hs","",nbin,xmin,xmax,nbin,ymin,ymax); TH2D *hb=new TH2D("hb","",nbin,xmin,xmax,nbin,ymin,ymax); + hs->SetXTitle(v0); + hs->SetYTitle(v1); + hb->SetXTitle(v0); + hb->SetYTitle(v1); + hs->SetMarkerColor(4); + hb->SetMarkerColor(2); + + + TH2F * hist = new TH2F( "MVA", "MVA", nbin,xmin,xmax,nbin,ymin,ymax); + + // Prepare input tree (this must be replaced by your data source) + // in this example, there is a toy tree with signal and one with background events + // we'll later on use only the "signal" events for the test in this example. + + Float_t MinMVA=10000, MaxMVA=-100000; + for (Int_t ibin=1; ibin<nbin+1; ibin++){ + for (Int_t jbin=1; jbin<nbin+1; jbin++){ + var0 = hs->GetXaxis()->GetBinCenter(ibin); + var1 = hs->GetYaxis()->GetBinCenter(jbin); + Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; + if (MinMVA>mvaVal) MinMVA=mvaVal; + if (MaxMVA<mvaVal) MaxMVA=mvaVal; + hist->SetBinContent(ibin,jbin, mvaVal); + } + } + + // creating a fine histograms containing the error rate + const Int_t nValBins=100; + Double_t sum = 0.; + TH1F *mvaS= new TH1F("mvaS","",nValBins,MinMVA,MaxMVA); + TH1F *mvaB= new TH1F("mvaB","",nValBins,MinMVA,MaxMVA); + TH1F *mvaSC= new TH1F("mvaSC","",nValBins,MinMVA,MaxMVA); + TH1F *mvaBC= new TH1F("mvaBC","",nValBins,MinMVA,MaxMVA); Long64_t nentries; nentries = TreeS->GetEntries(); for (Long64_t is=0; is<nentries;is++) { signal->GetEntry(is); + sum +=sWeight; + var0 = svar0; + var1 = svar1; + Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; hs->Fill(svar0,svar1); + mvaS->Fill(mvaVal,sWeight); } nentries = TreeB->GetEntries(); for (Long64_t ib=0; ib<nentries;ib++) { background->GetEntry(ib); + sum +=bWeight; + var0 = bvar0; + var1 = bvar1; + Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; hb->Fill(bvar0,bvar1); + mvaB->Fill(mvaVal,bWeight); } + //SeparationBase *sepGain = new MisClassificationError(); + //SeparationBase *sepGain = new GiniIndex(); + SeparationBase *sepGain = new CrossEntropy(); + + Double_t sTot = mvaS->GetSum(); + Double_t bTot = mvaB->GetSum(); + + mvaSC->SetBinContent(1,mvaS->GetBinContent(1)); + mvaBC->SetBinContent(1,mvaB->GetBinContent(1)); + Double_t sSel=mvaSC->GetBinContent(1); + Double_t bSel=mvaBC->GetBinContent(1); + Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); + Double_t mvaCut=mvaSC->GetBinCenter(1); + Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal + for (Int_t ibin=2;ibin<nValBins;ibin++){ + mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1)); + mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1)); + + sSel=mvaSC->GetBinContent(ibin); + bSel=mvaBC->GetBinContent(ibin); + + if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot) && mvaSC->GetBinCenter(ibin)<0){ + separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); + mvaCut=mvaSC->GetBinCenter(ibin); + if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1; + else mvaCutOrientation=1; + } + } + - hb->SetMarkerColor(4); - hs->SetMarkerColor(2); - - - // book output histograms - TH2F *histLk(0), *histLkD(0), *histLkPCA(0), *histLkKDE(0), *histLkMIX(0), *histPD(0), *histPDD(0); - TH2F *histPDPCA(0), *histPDEFoam(0), *histPDEFoamErr(0), *histPDEFoamSig(0), *histKNN(0), *histHm(0); - TH2F *histFi(0), *histFiG(0), *histFiB(0), *histLD(0), *histNn(0), *histNnC(0), *histNnT(0), *histBdt(0), *histBdtG(0), *histBdtD(0); - TH2F *histRf(0), *histSVMG(0), *histSVMP(0), *histSVML(0), *histFDAMT(0), *histFDAGA(0), *histCat(0), *histPBdt(0); - - if (Use["Likelihood"]) histLk = new TH2F( "MVA_Likelihood", "MVA_Likelihood", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["LikelihoodD"]) histLkD = new TH2F( "MVA_LikelihoodD", "MVA_LikelihoodD", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["LikelihoodPCA"]) histLkPCA = new TH2F( "MVA_LikelihoodPCA", "MVA_LikelihoodPCA", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["LikelihoodKDE"]) histLkKDE = new TH2F( "MVA_LikelihoodKDE", "MVA_LikelihoodKDE", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["LikelihoodMIX"]) histLkMIX = new TH2F( "MVA_LikelihoodMIX", "MVA_LikelihoodMIX", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["PDERS"]) histPD = new TH2F( "MVA_PDERS", "MVA_PDERS", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["PDERSD"]) histPDD = new TH2F( "MVA_PDERSD", "MVA_PDERSD", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["PDERSPCA"]) histPDPCA = new TH2F( "MVA_PDERSPCA", "MVA_PDERSPCA", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["KNN"]) histKNN = new TH2F( "MVA_KNN", "MVA_KNN", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["HMatrix"]) histHm = new TH2F( "MVA_HMatrix", "MVA_HMatrix", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["Fisher"]) histFi = new TH2F( "MVA_Fisher", "MVA_Fisher", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["FisherG"]) histFiG = new TH2F( "MVA_FisherG", "MVA_FisherG", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["BoostedFisher"]) histFiB = new TH2F( "MVA_BoostedFisher", "MVA_BoostedFisher", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["LD"]) histLD = new TH2F( "MVA_LD", "MVA_LD", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["MLP"]) histNn = new TH2F( "MVA_MLP", "MVA_MLP", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["CFMlpANN"]) histNnC = new TH2F( "MVA_CFMlpANN", "MVA_CFMlpANN", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["TMlpANN"]) histNnT = new TH2F( "MVA_TMlpANN", "MVA_TMlpANN", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["BDT"]) histBdt = new TH2F( "MVA_BDT", "MVA_BDT", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["BDTD"]) histBdtD = new TH2F( "MVA_BDTD", "MVA_BDTD", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["BDTG"]) histBdtG = new TH2F( "MVA_BDTG", "MVA_BDTG", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["RuleFit"]) histRf = new TH2F( "MVA_RuleFit", "MVA_RuleFit", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["SVM_Gauss"]) histSVMG = new TH2F( "MVA_SVM_Gauss", "MVA_SVM_Gauss", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["SVM_Poly"]) histSVMP = new TH2F( "MVA_SVM_Poly", "MVA_SVM_Poly", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["SVM_Lin"]) histSVML = new TH2F( "MVA_SVM_Lin", "MVA_SVM_Lin", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["FDA_MT"]) histFDAMT = new TH2F( "MVA_FDA_MT", "MVA_FDA_MT", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["FDA_GA"]) histFDAGA = new TH2F( "MVA_FDA_GA", "MVA_FDA_GA", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["Category"]) histCat = new TH2F( "MVA_Category", "MVA_Category", nbin,xmin,xmax,nbin,ymin,ymax); - if (Use["Plugin"]) histPBdt = new TH2F( "MVA_PBDT", "MVA_BDT", nbin,xmin,xmax,nbin,ymin,ymax); - - - - // Prepare input tree (this must be replaced by your data source) - // in this example, there is a toy tree with signal and one with background events - // we'll later on use only the "signal" events for the test in this example. - + cout << "Min="<<MinMVA << " Max=" << MaxMVA + << " sTot=" << sTot + << " bTot=" << bTot + << " sepGain="<<separationGain + << " cut=" << mvaCut + << " cutOrientation="<<mvaCutOrientation + << endl; - for (Int_t ibin=1; ibin<nbin+1; ibin++){ - for (Int_t jbin=1; jbin<nbin+1; jbin++){ - var0 = hs->GetXaxis()->GetBinCenter(ibin); - var1 = hs->GetYaxis()->GetBinCenter(jbin); - - - if (Use["Likelihood" ]) histLk ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "Likelihood method" ) ); - if (Use["LikelihoodD" ]) histLkD ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "LikelihoodD method" ) ); - if (Use["LikelihoodPCA"]) histLkPCA ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "LikelihoodPCA method" ) ); - if (Use["LikelihoodKDE"]) histLkKDE ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "LikelihoodKDE method" ) ); - if (Use["LikelihoodMIX"]) histLkMIX ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "LikelihoodMIX method" ) ); - if (Use["PDERS" ]) histPD ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "PDERS method" ) ); - if (Use["PDERSD" ]) histPDD ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "PDERSD method" ) ); - if (Use["PDERSPCA" ]) histPDPCA ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "PDERSPCA method" ) ); - if (Use["KNN" ]) histKNN ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "KNN method" ) ); - if (Use["HMatrix" ]) histHm ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "HMatrix method" ) ); - if (Use["Fisher" ]) histFi ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "Fisher method" ) ); - if (Use["FisherG" ]) histFiG ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "FisherG method" ) ); - if (Use["BoostedFisher"]) histFiB ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "BoostedFisher method" ) ); - if (Use["LD" ]) histLD ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "LD method" ) ); - if (Use["MLP" ]) histNn ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "MLP method" ) ); - if (Use["CFMlpANN" ]) histNnC ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "CFMlpANN method" ) ); - if (Use["TMlpANN" ]) histNnT ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "TMlpANN method" ) ); - if (Use["BDT" ]) histBdt ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "BDT method" ) ); - if (Use["BDTD" ]) histBdtD ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "BDTD method" ) ); - if (Use["BDTG" ]) histBdtG ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "BDTG method" ) ); - if (Use["RuleFit" ]) histRf ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "RuleFit method" ) ); - if (Use["SVM_Gauss" ]) histSVMG ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "SVM_Gauss method" ) ); - if (Use["SVM_Poly" ]) histSVMP ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "SVM_Poly method" ) ); - if (Use["SVM_Lin" ]) histSVML ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "SVM_Lin method" ) ); - if (Use["FDA_MT" ]) histFDAMT ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "FDA_MT method" ) ); - if (Use["FDA_GA" ]) histFDAGA ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "FDA_GA method" ) ); - if (Use["Category" ]) histCat ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "Category method" ) ); - if (Use["Plugin" ]) histPBdt ->SetBinContent(ibin,jbin, reader->EvaluateMVA( "P_BDT method" ) ); - } - } + delete reader; + gStyle->SetPalette(1); - std::cout << "--- Created root file: \"TMVApp.root\" containing the MVA output histograms" << std::endl; - delete reader; + plot(hs,hb,hist ,v0,v1,mvaCut); + TCanvas *cm=new TCanvas ("cm","",900,1200); + cm->cd(); + cm->Divide(1,2); + cm->cd(1); + mvaS->SetLineColor(4); + mvaB->SetLineColor(2); + mvaS->Draw(); + mvaB->Draw("same"); - - std::cout << "==> TMVAClassificationApplication is done!" << endl << std::endl; + cm->cd(2); + mvaSC->SetLineColor(4); + mvaBC->SetLineColor(2); + mvaBC->Draw(); + mvaSC->Draw("same"); + // TH1F *add=(TH1F*)mvaBC->Clone("add"); + // add->Add(mvaSC); + // add->Draw(); - gStyle->SetPalette(1); + // errh->Draw("same"); - if (Use["Likelihood" ]) plot(hs,hb,histLk ,v0,v1); - if (Use["LikelihoodD" ]) plot(hs,hb,histLkD ,v0,v1); - if (Use["LikelihoodPCA"]) plot(hs,hb,histLkPCA ,v0,v1); - if (Use["LikelihoodKDE"]) plot(hs,hb,histLkKDE ,v0,v1); - if (Use["LikelihoodMIX"]) plot(hs,hb,histLkMIX ,v0,v1); - if (Use["PDERS" ]) plot(hs,hb,histPD ,v0,v1); - if (Use["PDERSD" ]) plot(hs,hb,histPDD ,v0,v1); - if (Use["PDERSPCA" ]) plot(hs,hb,histPDPCA ,v0,v1); - if (Use["KNN" ]) plot(hs,hb,histKNN ,v0,v1); - if (Use["HMatrix" ]) plot(hs,hb,histHm ,v0,v1); - if (Use["Fisher" ]) plot(hs,hb,histFi ,v0,v1); - if (Use["FisherG" ]) plot(hs,hb,histFiG ,v0,v1); - if (Use["BoostedFisher"]) plot(hs,hb,histFiB ,v0,v1); - if (Use["LD" ]) plot(hs,hb,histLD ,v0,v1); - if (Use["MLP" ]) plot(hs,hb,histNn ,v0,v1); - if (Use["CFMlpANN" ]) plot(hs,hb,histNnC ,v0,v1); - if (Use["TMlpANN" ]) plot(hs,hb,histNnT ,v0,v1); - if (Use["BDT" ]) plot(hs,hb,histBdt ,v0,v1); - if (Use["BDTD" ]) plot(hs,hb,histBdtD ,v0,v1); - if (Use["BDTG" ]) plot(hs,hb,histBdtG ,v0,v1); - if (Use["RuleFit" ]) plot(hs,hb,histRf ,v0,v1); - if (Use["SVM_Gauss" ]) plot(hs,hb,histSVMG ,v0,v1); - if (Use["SVM_Poly" ]) plot(hs,hb,histSVMP ,v0,v1); - if (Use["SVM_Lin" ]) plot(hs,hb,histSVML ,v0,v1); - if (Use["FDA_MT" ]) plot(hs,hb,histFDAMT ,v0,v1); - if (Use["FDA_GA" ]) plot(hs,hb,histFDAGA ,v0,v1); - if (Use["Category" ]) plot(hs,hb,histCat ,v0,v1); - if (Use["Plugin" ]) plot(hs,hb,histPBdt ,v0,v1); - - // // write histograms // - TFile *target = new TFile( "TMVApp.root","RECREATE" ); + TFile *target = new TFile( "TMVAPlotDecisionBoundary.root","RECREATE" ); hs->Write(); hb->Write(); - if (Use["Likelihood" ]) histLk ->Write(); - if (Use["LikelihoodD" ]) histLkD ->Write(); - if (Use["LikelihoodPCA"]) histLkPCA ->Write(); - if (Use["LikelihoodKDE"]) histLkKDE ->Write(); - if (Use["LikelihoodMIX"]) histLkMIX ->Write(); - if (Use["PDERS" ]) histPD ->Write(); - if (Use["PDERSD" ]) histPDD ->Write(); - if (Use["PDERSPCA" ]) histPDPCA ->Write(); - if (Use["KNN" ]) histKNN ->Write(); - if (Use["HMatrix" ]) histHm ->Write(); - if (Use["Fisher" ]) histFi ->Write(); - if (Use["FisherG" ]) histFiG ->Write(); - if (Use["BoostedFisher"]) histFiB ->Write(); - if (Use["LD" ]) histLD ->Write(); - if (Use["MLP" ]) histNn ->Write(); - if (Use["CFMlpANN" ]) histNnC ->Write(); - if (Use["TMlpANN" ]) histNnT ->Write(); - if (Use["BDT" ]) histBdt ->Write(); - if (Use["BDTD" ]) histBdtD ->Write(); - if (Use["BDTG" ]) histBdtG ->Write(); - if (Use["RuleFit" ]) histRf ->Write(); - if (Use["SVM_Gauss" ]) histSVMG ->Write(); - if (Use["SVM_Poly" ]) histSVMP ->Write(); - if (Use["SVM_Lin" ]) histSVML ->Write(); - if (Use["FDA_MT" ]) histFDAMT ->Write(); - if (Use["FDA_GA" ]) histFDAGA ->Write(); - if (Use["Category" ]) histCat ->Write(); - if (Use["Plugin" ]) histPBdt ->Write(); + hist->Write(); target->Close(); diff --git a/tmva/test/PlotFoams.C b/tmva/test/PlotFoams.C index 54ed9bc2f1a511b345cbacece6f0f4cdeb9a633a..1c63e2c61de84e3a45631f88a3e90e16b8e01d91 100644 --- a/tmva/test/PlotFoams.C +++ b/tmva/test/PlotFoams.C @@ -1,12 +1,12 @@ #include "tmvaglob.C" #include "TControlBar.h" +#include "TMap.h" + #include <sstream> #include <string> #include <cfloat> - -typedef enum { kNEV, kDISCR, kMONO, kRMS, kRMSOVMEAN } EPlotType; -typedef enum { kSEPARATE, kUNIFIED, kMONOTARGET, kMULTITARGET } EFoamType; +#include "TMVA/PDEFoam.h" void PlotFoams( TString fin = "weights/TMVAClassification_PDEFoam.weights_foams.root", bool useTMVAStyle=kTRUE ) @@ -21,192 +21,244 @@ void PlotFoams( TString fin = "weights/TMVAClassification_PDEFoam.weights_foams. TControlBar* cbar = new TControlBar( "vertical", "Choose cell value for plot:", 50, 50 ); if ((gDirectory->Get("SignalFoam") && gDirectory->Get("BgFoam")) || gDirectory->Get("MultiTargetRegressionFoam")) { - TString macro = Form( "Plot(\"%s\", kNEV)", fin.Data() ); + TString macro = Form( "Plot(\"%s\", TMVA::kValueDensity, \"Event density\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); cbar->AddButton( "Event density", macro, "Plot event density", "button" ); - } else if (gDirectory->Get("DiscrFoam")){ - TString macro = Form( "Plot(\"%s\", kDISCR)", fin.Data() ); + } else if (gDirectory->Get("DiscrFoam") || gDirectory->Get("MultiClassFoam0")){ + TString macro = Form( "Plot(\"%s\", TMVA::kValue, \"Discriminator\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); cbar->AddButton( "Discriminator", macro, "Plot discriminator", "button" ); } else if (gDirectory->Get("MonoTargetRegressionFoam")){ - TString macro = Form( "Plot(\"%s\", kMONO)", fin.Data() ); + TString macro = Form( "Plot(\"%s\", TMVA::kValue, \"Target\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); cbar->AddButton( "Target", macro, "Plot target", "button" ); } else { cout << "Error: no foams found in file: " << fin << endl; return; } - TString macro_rms = Form( "Plot(\"%s\", kRMS)", fin.Data() ); - cbar->AddButton( "RMS", macro_rms, "Plot RMS (Variance)", "button" ); - TString macro_rms_ov_mean = Form( "Plot(\"%s\", kRMSOVMEAN)", fin.Data() ); - cbar->AddButton( "RMS over Mean", macro_rms_ov_mean, "Plot RMS over Mean", "button" ); + TString macro_rms = Form( "Plot(\"%s\", TMVA::kRms, \"Variance\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); + cbar->AddButton( "Variance", macro_rms, "Plot variance", "button" ); + TString macro_rms_ov_mean = Form( "Plot(\"%s\", TMVA::kRmsOvMean, \"Variance/Mean\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); + cbar->AddButton( "Variance/Mean", macro_rms_ov_mean, "Plot variance over mean", "button" ); + TString macro_cell_tree = Form( "PlotCellTree(\"%s\", \"Cell tree\", %s)", + fin.Data(), (useTMVAStyle ? "kTRUE" : "kFALSE") ); + cbar->AddButton( "Cell tree", macro_cell_tree, "Plot cell tree", "button" ); cbar->Show(); file->Close(); } // foam plotting macro -void Plot( TString fin = "weights/TMVAClassification_PDEFoam.weights_foams.root", EPlotType pt ) +void Plot( TString fin = "weights/TMVAClassification_PDEFoam.weights_foams.root", + TMVA::ECellValue cv, TString cv_long, bool useTMVAStyle=kTRUE ) { cout << "read file: " << fin << endl; TFile *file = TFile::Open(fin); gStyle->SetNumberContours(999); - TMVAGlob::SetTMVAStyle(); - - string cellval = ""; // quantity to draw in foam projection - string cellval_long = ""; // name of quantity to draw in foam projection - - if (pt == kNEV){ - cellval = "cell_value"; - cellval_long = "Event density"; - } - else if (pt == kDISCR){ - cellval = "cell_value"; - cellval_long = "Discriminator"; - } - else if (pt == kMONO){ - cellval = "cell_value"; - cellval_long = "Target"; - } - else if (pt == kRMS){ - cellval = "rms"; - cellval_long = "RMS"; - } - else if (pt == kRMSOVMEAN){ - cellval = "rms_ov_mean"; - cellval_long = "RMS/Mean"; - } + if (useTMVAStyle) TMVAGlob::SetTMVAStyle(); // find foams and foam type - EFoamType ft; - TMVA::PDEFoam *foam = 0; - TMVA::PDEFoam *foam2 = 0; - string foam_capt, foam2_capt; + TList foam_list; // the foams and their captions if (gDirectory->Get("SignalFoam") && gDirectory->Get("BgFoam")){ - foam = SignalFoam; - foam2 = BgFoam; - foam_capt = "Signal Foam"; - foam2_capt = "Background Foam"; - ft = kSEPARATE; + foam_list.Add(new TPair(SignalFoam, new TObjString("Signal Foam"))); + foam_list.Add(new TPair(BgFoam, new TObjString("Background Foam"))); } else if (gDirectory->Get("DiscrFoam")){ - foam = DiscrFoam; - foam_capt = "Discriminator Foam"; - ft = kDISCR; + foam_list.Add(new TPair(DiscrFoam, new TObjString("Discriminator Foam"))); + } else if (gDirectory->Get("MultiClassFoam0")){ + UInt_t cls = 0; + TMVA::PDEFoam *fm = NULL; + while (fm = (TMVA::PDEFoam*) gDirectory->Get(Form("MultiClassFoam%u", cls))) { + foam_list.Add(new TPair(fm, new TObjString(Form("Discriminator Foam %u",cls)))); + cls++; + } } else if (gDirectory->Get("MonoTargetRegressionFoam")){ - foam = MonoTargetRegressionFoam; - foam_capt = "MonoTargetRegression Foam"; - ft = kMONOTARGET; + foam_list.Add(new TPair(MonoTargetRegressionFoam, + new TObjString("MonoTargetRegression Foam"))); } else if (gDirectory->Get("MultiTargetRegressionFoam")){ - foam = MultiTargetRegressionFoam; - foam_capt = "MultiTargetRegression Foam"; - ft = kMULTITARGET; + foam_list.Add(new TPair(MultiTargetRegressionFoam, + new TObjString("MultiTargetRegression Foam"))); } else { cout << "ERROR: no Foams found in file: " << fin << endl; return; } - Int_t kDim = foam->GetTotDim(); - cout << foam_capt << " loaded" << endl; - cout << "Dimension of foam: " << kDim << endl; + // loop over all foams and print out a debug message + TListIter foamIter(&foam_list); + TPair *fm_pair = NULL; + Int_t kDim; // foam dimensions + while (fm_pair = (TPair*) foamIter()) { + kDim = ((TMVA::PDEFoam*) fm_pair->Key())->GetTotDim(); + cout << "Foam loaded: " << ((TObjString*) fm_pair->Value())->String() + << " (dimension = " << kDim << ")" << endl; + } + + // kernel to use for the projection + TMVA::PDEFoamKernelBase *kernel = new TMVA::PDEFoamKernelTrivial(); // ********** plot foams ********** // - if (kDim==1){ + if (kDim == 1){ // draw histogram - TH1D *hist1 = 0, *hist2 = 0; - TCanvas *canv = new TCanvas("canv", "Foam(s)", 400, (ft==kSEPARATE) ? 800 : 400); - if (ft==kSEPARATE) - canv->Divide(0,2); - canv->cd(1); - - string var_name = foam->GetVariableName(0)->String(); - hist1 = foam->Draw1Dim(cellval.c_str(), 100); - hist1->SetTitle((cellval_long+" of "+foam_capt+";"+var_name).c_str()); - hist1->Draw(); - hist1->SetDirectory(0); - - if (ft==kSEPARATE){ - canv->cd(2); - string var_name2 = foam2->GetVariableName(0)->String(); - if (ft==kSEPARATE) - hist2 = foam2->Draw1Dim(cellval.c_str(), 100); - hist2->SetTitle((cellval_long+" of "+foam2_capt+";"+var_name2).c_str()); - hist2->Draw(); - hist2->SetDirectory(0); - } + TCanvas *canv = NULL; // the canvas + TH1D *proj = NULL; // the foam projection - // save canvas to file - stringstream fname (stringstream::in | stringstream::out); - fname << "plots/" << "foam_var_" << cellval << "_0"; - canv->Update(); - TMVAGlob::imgconv( canv, fname.str() ); - } else{ + // loop over all foams and draw the projection + TListIter it(&foam_list); // the iterator + TPair *fm_pair = NULL; // the (foam, caption) pair + while (fm_pair = (TPair*) it()) { + TMVA::PDEFoam *foam = (TMVA::PDEFoam*) fm_pair->Key(); + TString foam_capt(((TObjString*) fm_pair->Value())->String()); + + canv = new TCanvas(Form("canvas_%u",foam), "1-dimensional PDEFoam", 400, 400); + + TString var_name = foam->GetVariableName(0)->String(); + proj = foam->Draw1Dim(cv, 100, kernel); + proj->SetTitle(cv_long+" of "+foam_capt+";"+var_name); + proj->Draw(); + proj->SetDirectory(0); + + canv->Update(); + } // loop over foams + } else { // if dimension of foam > 1, draw foam projections - TCanvas* canv=0; - TH2D *proj=0, *proj2=0; - - // draw all possible projections (kDim*(kDim-1)/2) - for(Int_t i=0; i<kDim; i++){ - for (Int_t k=i+1; k<kDim; k++){ - - // set titles of canvas and foam projections - stringstream title (stringstream::in | stringstream::out); - stringstream caption (stringstream::in | stringstream::out); - title << "combined_" << i << ":" << k; - caption << "Foam projections " << i << ":" << k; - cout << "draw projection: " << i << ":" << k << endl; - - stringstream title_proj1 (stringstream::in | stringstream::out); - stringstream title_proj2 (stringstream::in | stringstream::out); - title_proj1 << cellval_long << " of " - << foam_capt << ": Projection " - << foam->GetVariableName(i)->String() - << ":" << foam->GetVariableName(k)->String() - << ";" << foam->GetVariableName(i)->String() - << ";" << foam->GetVariableName(k)->String(); - if (ft==kSEPARATE){ - title_proj2 << cellval_long << " of " - << foam2_capt << ": Projection " - << foam2->GetVariableName(i)->String() - << ":" << foam2->GetVariableName(k)->String() - << ";" << foam2->GetVariableName(i)->String() - << ";" << foam2->GetVariableName(k)->String(); - } - - // create canvas - canv = new TCanvas(title.str().c_str(), caption.str().c_str(), - (Int_t)(400/(1.-0.2)), (ft==kSEPARATE ? 800 : 400)); - if (ft==kSEPARATE){ - canv->Divide(0,2); - canv->GetPad(1)->SetRightMargin(0.2); - canv->GetPad(2)->SetRightMargin(0.2); - } else { + TCanvas *canv = NULL; // the canvas + TH2D *proj = NULL; // the foam projection + + // loop over all foams and draw the projection + TListIter it(&foam_list); // the iterator + TPair *fm_pair = NULL; // the (foam, caption) pair + while (fm_pair = (TPair*) it()) { + TMVA::PDEFoam *foam = (TMVA::PDEFoam*) fm_pair->Key(); + TString foam_capt(((TObjString*) fm_pair->Value())->String()); + + // draw all possible projections (kDim*(kDim-1)/2) + for(Int_t i=0; i<kDim; i++){ + for (Int_t k=i+1; k<kDim; k++){ + + // create canvas + canv = new TCanvas(Form("canvas_%u_%i:%i",foam,i,k), + Form("Foam projections %i:%i",i,k), + (Int_t)(400/(1.-0.2)), 400); canv->SetRightMargin(0.2); - } - canv->cd(1); - - // do projections - proj = foam->Project2(i, k, cellval.c_str(), "kNone"); - proj->SetTitle(title_proj1.str().c_str()); - if (pt==kDISCR) - proj->GetZaxis()->SetRangeUser(-DBL_EPSILON, 1.+DBL_EPSILON); - proj->Draw("COLZ"); // CONT4Z - proj->SetDirectory(0); - - if (ft==kSEPARATE){ - canv->cd(2); - proj2 = foam2->Project2(i, k, cellval.c_str(), "kNone"); - proj2->SetTitle(title_proj2.str().c_str()); - proj2->Draw("COLZ"); // CONT4Z - proj2->SetDirectory(0); - } - - // save canvas to file - stringstream fname (stringstream::in | stringstream::out); - fname << "plots/" << "foam_projection_var_" << cellval << "_" << i << ":" << k; - canv->Update(); - TMVAGlob::imgconv( canv, fname.str() ); + + TString title_proj = Form("%s of %s: Projection %s:%s;%s;%s", + cv_long.Data(), + foam_capt.Data(), + foam->GetVariableName(i)->String().Data(), + foam->GetVariableName(k)->String().Data(), + foam->GetVariableName(i)->String().Data(), + foam->GetVariableName(k)->String().Data() ); + + // do projections + proj = foam->Project2(i, k, cv, kernel); + proj->SetTitle(title_proj); + proj->Draw("COLZ"); // CONT4Z + proj->SetDirectory(0); + + canv->Update(); + } // loop over all possible projections } // loop over all possible projections - } // loop over all possible projections + } // loop over foams } // if dimension > 1 file->Close(); } + + +void PlotCellTree( TString fin = "weights/TMVAClassification_PDEFoam.weights_foams.root", + TString cv_long, bool useTMVAStyle=kTRUE ) +{ + // Draw the PDEFoam cell tree + + cout << "read file: " << fin << endl; + TFile *file = TFile::Open(fin); + + if (useTMVAStyle) TMVAGlob::SetTMVAStyle(); + + // find foams + TListIter foamIter(gDirectory->GetListOfKeys()); + TKey *foam_key = NULL; // the foam key + TCanvas *canv = NULL; // the canvas + while (foam_key = (TKey*) foamIter()) { + TString name(foam_key->GetName()); + TString class_name(foam_key->GetClassName()); + if (!class_name.Contains("PDEFoam")) + continue; + cout << "PDEFoam found: " << class_name + << " " << name << endl; + + // read the foam + TMVA::PDEFoam *foam = (TMVA::PDEFoam*) foam_key->ReadObj(); + canv = new TCanvas(Form("canvas_%s",name.Data()), + Form("%s of %s",cv_long.Data(),name.Data()), 640, 480); + + // get cell tree depth + UInt_t depth = foam->GetRootCell()->GetTreeDepth() - 1; + Double_t ystep = 1.0/(depth + 1.0); + DrawCell(foam->GetRootCell(), foam, 0.5, 1.-0.5*ystep, 0.25, ystep); + } + + file->Close(); +} + +void DrawCell( TMVA::PDEFoamCell *cell, TMVA::PDEFoam *foam, + Double_t x, Double_t y, + Double_t xscale, Double_t yscale ) +{ + // recursively draw cell and it's daughters + + Float_t xsize = xscale*1.5; + Float_t ysize = yscale/3; + if (xsize > 0.15) xsize=0.1; //xscale/2; + if (cell->GetDau0() != NULL) { + TLine *a1 = new TLine(x-xscale/4, y-ysize, x-xscale, y-ysize*2); + a1->SetLineWidth(2); + a1->Draw(); + DrawCell(cell->GetDau0(), foam, x-xscale, y-yscale, xscale/2, yscale); + } + if (cell->GetDau1() != NULL){ + TLine *a1 = new TLine(x+xscale/4, y-ysize, x+xscale, y-ysize*2); + a1->SetLineWidth(2); + a1->Draw(); + DrawCell(cell->GetDau1(), foam, x+xscale, y-yscale, xscale/2, yscale); + } + + TPaveText *t = new TPaveText(x-xsize, y-ysize, x+xsize, y+ysize, "NDC"); + + t->SetBorderSize(1); + t->SetFillStyle(1); + + // draw all cell elements + t->AddText( Form("Intg=%.5f", cell->GetIntg()) ); + t->AddText( Form("Var=%.5f", cell->GetDriv()) ); + TVectorD *vec = (TVectorD*) cell->GetElement(); + if (vec != NULL){ + for (Int_t i = 0; i < vec->GetNrows(); ++i) { + t->AddText( Form("E[%i]=%.5f", i, vec(i)) ); + } + } + + if (cell->GetStat() != 1) { + // cell is inactive --> draw split point + t->SetFillColor( TColor::GetColor("#BBBBBB") ); + t->SetTextColor( TColor::GetColor("#000000") ); + + // cell position and size + TMVA::PDEFoamVect cellPosi(foam->GetTotDim()), cellSize(foam->GetTotDim()); + cell->GetHcub(cellPosi, cellSize); + Int_t kBest = cell->GetBest(); // best division variable + Double_t xBest = cell->GetXdiv(); // best division point + t->AddText( Form("dim=%i", kBest) ); + t->AddText( Form("cut=%.5g", foam->VarTransformInvers(kBest,cellPosi[kBest] + xBest*cellSize[kBest])) ); + } else { + t->SetFillColor( TColor::GetColor("#DD0033") ); + t->SetTextColor( TColor::GetColor("#FFFFFF") ); + } + + t->Draw(); + + return; +} diff --git a/tmva/test/TMVAClassification.C b/tmva/test/TMVAClassification.C index 26cf8472ab56d0a5078ba2d6bdf5a725177537d4..261ff755b2240b97368b6fb6b984344c0fcb1406 100644 --- a/tmva/test/TMVAClassification.C +++ b/tmva/test/TMVAClassification.C @@ -345,7 +345,7 @@ void TMVAClassification( TString myMethodList = "" ) // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", - "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); + "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", @@ -358,7 +358,7 @@ void TMVAClassification( TString myMethodList = "" ) // H-Matrix (chi2-squared) method if (Use["HMatrix"]) - factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); + factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) @@ -366,7 +366,7 @@ void TMVAClassification( TString myMethodList = "" ) // Fisher discriminant (same as LD) if (Use["Fisher"]) - factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); + factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) @@ -374,8 +374,8 @@ void TMVAClassification( TString myMethodList = "" ) // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) - factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", - "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); + factory->BookMethod( TMVA::Types::kMLP, "BoostedFisher", + "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) diff --git a/tmva/test/TMVAClassification.cxx b/tmva/test/TMVAClassification.cxx index e1ff22057c42f553f09e175ad6a20031f96abfd6..cd3fe2c7d25249f76c81bacaab53c207dd006f91 100644 --- a/tmva/test/TMVAClassification.cxx +++ b/tmva/test/TMVAClassification.cxx @@ -339,7 +339,7 @@ int main( int argc, char** argv ) // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", - "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); + "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", diff --git a/tmva/test/TMVAClassification.py b/tmva/test/TMVAClassification.py index 43d25523598224130f7f71b04764d86b6ca1e91d..c6b0c3614dbc57a7f21e398b610329c4d6eacff7 100755 --- a/tmva/test/TMVAClassification.py +++ b/tmva/test/TMVAClassification.py @@ -289,7 +289,7 @@ def main(): # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", - "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) + "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", diff --git a/tmva/test/TMVAGui.C b/tmva/test/TMVAGui.C index 3db6abdede042b9280789b01bce9391b181754ce..07c0299d307d2dfa858ae2b417bd98f981ccbbbf 100644 --- a/tmva/test/TMVAGui.C +++ b/tmva/test/TMVAGui.C @@ -233,13 +233,6 @@ void TMVAGui( const char* fName = "TMVA.root" ) Form( ".x BDTControlPlots.C(\"%s\")", fName ), "Plots to monitor boosting and pruning of decision trees (macro BDTControlPlots.C)", buttonType, "BDT" ); - - // ActionButton( cbar, - // Form( "(%i) Monitor Decision Tree Boosting", ++ic ), - // Form( ".x BDTBoostWatch.C+(\"%s\")", fName ), - // "Plots separation obtained by BDT method as function of boosting steps (macro BDTBoostWatch.C(itree,...))", - // buttonType, "BDT" ); - // ActionButton( cbar, // Form( "(%i) Rule Ensemble Importance Plots (RuleFit)", ++ic ), // Form( ".x rulevis.C(\"%s\",0)", fName ), @@ -249,7 +242,7 @@ void TMVAGui( const char* fName = "TMVA.root" ) title = Form( "(%i) Plot Foams (PDEFoam)", ++ic ); ActionButton( cbar, title, - ".x PlotFoams.C", + ".x PlotFoams.C(\"weights/TMVAClassification_PDEFoam.weights_foams.root\")", "Plot Foams (macro PlotFoams.C)", buttonType, "PDEFoam" ); diff --git a/tmva/test/TMVAMultiClassGui.C b/tmva/test/TMVAMultiClassGui.C index b7a41c87e45f889982d2a765106b6fcee747b29d..91a2b5742ba8a7474623a7e2ef6a4fbe7b3f14f3 100644 --- a/tmva/test/TMVAMultiClassGui.C +++ b/tmva/test/TMVAMultiClassGui.C @@ -241,14 +241,14 @@ void TMVAMultiClassGui( const char* fName = "TMVAMulticlass.root" ) "Plots to monitor boosting and pruning of decision trees (macro BDTControlPlots.C)", buttonType, "BDT" ); - + */ title = Form( "(%i) Plot Foams (PDEFoam)", ++ic ); ActionButton( cbar, title, - Form( ".x PlotFoams.C", fName ), + ".x PlotFoams.C(\"weights/TMVAMulticlass_PDEFoam.weights_foams.root\")", "Plot Foams (macro PlotFoams.C)", buttonType, "PDEFoam" ); - + /* title = Form( "(%i) General Boost Control Plots", ++ic ); ActionButton( cbar, title, diff --git a/tmva/test/TMVAMulticlass.C b/tmva/test/TMVAMulticlass.C index 8e790684d70ca5e344d918d1f49099f90f97eda4..7ef4804deb119076d3f16594cea3eb53e24e95b1 100644 --- a/tmva/test/TMVAMulticlass.C +++ b/tmva/test/TMVAMulticlass.C @@ -38,6 +38,7 @@ void TMVAMulticlass( TString myMethodList = "" ) Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; + Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; @@ -109,6 +110,8 @@ void TMVAMulticlass( TString myMethodList = "" ) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); if (Use["FDA_GA"]) // functional discriminant with GA minimizer factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); + if (Use["PDEFoam"]) // PDE-Foam approach + factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); // Train MVAs using the set of training events factory->TrainAllMethods(); diff --git a/tmva/test/TMVAMulticlass.cxx b/tmva/test/TMVAMulticlass.cxx index 8ba7f62926851b29b7473719e441489ecbf1f4bd..1077d9726e2eecddd80eb597f6af5c26f8a30d0e 100644 --- a/tmva/test/TMVAMulticlass.cxx +++ b/tmva/test/TMVAMulticlass.cxx @@ -38,6 +38,7 @@ int main(int argc, char** argv ) Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; + Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; @@ -109,6 +110,8 @@ int main(int argc, char** argv ) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=300:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); if (Use["FDA_GA"]) // functional discriminant with GA minimizer factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); + if (Use["PDEFoam"]) // PDE-Foam approach + factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); // Train MVAs using the set of training events factory->TrainAllMethods(); diff --git a/tmva/test/TMVAMulticlassApplication.C b/tmva/test/TMVAMulticlassApplication.C index 677839bb7df92c20192c3f7ae29b3d6b50759b61..67eeba6950152fec83af217b793cbac6cc49279e 100644 --- a/tmva/test/TMVAMulticlassApplication.C +++ b/tmva/test/TMVAMulticlassApplication.C @@ -42,6 +42,7 @@ void TMVAMulticlassApplication( TString myMethodList = "" ) Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; + Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; @@ -90,13 +91,16 @@ void TMVAMulticlassApplication( TString myMethodList = "" ) // book output histograms UInt_t nbin = 100; - TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0); + TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0), *histPDEFoam_signal(0); if (Use["MLP"]) histMLP_signal = new TH1F( "MVA_MLP_signal", "MVA_MLP_signal", nbin, 0., 1.1 ); if (Use["BDTG"]) histBDTG_signal = new TH1F( "MVA_BDTG_signal", "MVA_BDTG_signal", nbin, 0., 1.1 ); if (Use["FDA_GA"]) histFDAGA_signal = new TH1F( "MVA_FDA_GA_signal", "MVA_FDA_GA_signal", nbin, 0., 1.1 ); + if (Use["PDEFoam"]) + histPDEFoam_signal = new TH1F( "MVA_PDEFoam_signal", "MVA_PDEFoam_signal", nbin, 0., 1.1 ); + TFile *input(0); TString fname = "./tmva_example_multiple_background.root"; @@ -137,6 +141,8 @@ void TMVAMulticlassApplication( TString myMethodList = "" ) histBDTG_signal->Fill((reader->EvaluateMulticlass( "BDTG method" ))[0]); if (Use["FDA_GA"]) histFDAGA_signal->Fill((reader->EvaluateMulticlass( "FDA_GA method" ))[0]); + if (Use["PDEFoam"]) + histPDEFoam_signal->Fill((reader->EvaluateMulticlass( "PDEFoam method" ))[0]); } @@ -151,6 +157,8 @@ void TMVAMulticlassApplication( TString myMethodList = "" ) histBDTG_signal->Write(); if (Use["FDA_GA"]) histFDAGA_signal->Write(); + if (Use["PDEFoam"]) + histPDEFoam_signal->Write(); target->Close(); std::cout << "--- Created root file: \"TMVMulticlassApp.root\" containing the MVA output histograms" << std::endl; diff --git a/tmva/test/TMVAMulticlassApplication.cxx b/tmva/test/TMVAMulticlassApplication.cxx index 3359bab1e727578361a913010944f461bc5942de..6b3e964e0c7cdc9a6eb54cc698066f2e2afbef3b 100644 --- a/tmva/test/TMVAMulticlassApplication.cxx +++ b/tmva/test/TMVAMulticlassApplication.cxx @@ -36,6 +36,7 @@ int main(int argc, char** argv ) Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; + Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; @@ -85,13 +86,15 @@ int main(int argc, char** argv ) // book output histograms UInt_t nbin = 100; - TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0); + TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0), *histPDEFoam_signal(0); if (Use["MLP"]) histMLP_signal = new TH1F( "MVA_MLP_signal", "MVA_MLP_signal", nbin, 0., 1.1 ); if (Use["BDTG"]) histBDTG_signal = new TH1F( "MVA_BDTG_signal", "MVA_BDTG_signal", nbin, 0., 1.1 ); if (Use["FDA_GA"]) histFDAGA_signal = new TH1F( "MVA_FDA_GA_signal", "MVA_FDA_GA_signal", nbin, 0., 1.1 ); + if (Use["PDEFoam"]) + histPDEFoam_signal = new TH1F( "MVA_PDEFoam_signal", "MVA_PDEFoam_signal", nbin, 0., 1.1 ); TFile *input(0); @@ -133,7 +136,8 @@ int main(int argc, char** argv ) histBDTG_signal->Fill((reader->EvaluateMulticlass( "BDTG method" ))[0]); if (Use["FDA_GA"]) histFDAGA_signal->Fill((reader->EvaluateMulticlass( "FDA_GA method" ))[0]); - + if (Use["PDEFoam"]) + histPDEFoam_signal->Fill((reader->EvaluateMulticlass( "PDEFoam method" ))[0]); } @@ -148,7 +152,8 @@ int main(int argc, char** argv ) histBDTG_signal->Write(); if (Use["FDA_GA"]) histFDAGA_signal->Write(); - + if (Use["PDEFoam"]) + histPDEFoam_signal->Write(); target->Close(); std::cout << "--- Created root file: \"TMVMulticlassApp.root\" containing the MVA output histograms" << std::endl; diff --git a/tmva/test/TMVARegGui.C b/tmva/test/TMVARegGui.C index 0ca63ec8ab0268998870dbee5047819a62adcd74..ce2741e9a32be5e0dbf9dd0b7b3baec143967c1f 100644 --- a/tmva/test/TMVARegGui.C +++ b/tmva/test/TMVARegGui.C @@ -176,7 +176,7 @@ void TMVARegGui( const char* fName = "TMVAReg.root" ) ActionButton( cbar, Form( "(%i) Plot Foams", ++ic ), - Form( ".x PlotFoams.C(\"weights/TMVARegression_PDEFoam.weights_foams.root\")", fName ), + ".x PlotFoams.C(\"weights/TMVARegression_PDEFoam.weights_foams.root\")", "Plot Foams (macro PlotFoams.C)", buttonType, "PDEFoam" ); diff --git a/tmva/test/TMVARegression.C b/tmva/test/TMVARegression.C index 6bd96eba347a039b2cd92989883c5d5f6147720a..aaa05544c9e15fa2cdbae7f9eb873f82fb21845b 100644 --- a/tmva/test/TMVARegression.C +++ b/tmva/test/TMVARegression.C @@ -211,7 +211,7 @@ void TMVARegression( TString myMethodList = "" ) if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", - "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); + "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) diff --git a/tmva/test/TMVARegression.cxx b/tmva/test/TMVARegression.cxx index c74826058d1342d321b4eee27be41f6bfb905b96..d2fa905a6c8cc7965bd6c317b3e15582cb38e454 100644 --- a/tmva/test/TMVARegression.cxx +++ b/tmva/test/TMVARegression.cxx @@ -206,7 +206,7 @@ int main( int argc, char** argv ) if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", - "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); + "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) diff --git a/tmva/test/setup.sh b/tmva/test/setup.sh index 772fa8bd11e88a9af6c475624bf9c5a9c9dd2064..17fd0edfe7de72de65a6b1183c16c18e2fccc6df 100755 --- a/tmva/test/setup.sh +++ b/tmva/test/setup.sh @@ -1,5 +1,7 @@ #!/bin/sh +export HERE=$PWD + # set symbolic links to data file and to rootmaps #cd test; if [ ! -h tmva_example.root ]; then ln -s data/toy_sigbkg.root tmva_example.root; fi @@ -53,4 +55,4 @@ fi # prepare for PyROOT export PYTHONPATH=$PWD/lib:`root-config --libdir`:$PYTHONPATH -cd test +cd $HERE