From d38321efbe5212d2083e640d9504a23df3706fe9 Mon Sep 17 00:00:00 2001 From: Rene Brun <Rene.Brun@cern.ch> Date: Tue, 23 Jun 2009 05:04:02 +0000 Subject: [PATCH] From Joerg: new version of TMVA stress suite adapted to the new version of TMVA git-svn-id: http://root.cern.ch/svn/root/trunk@29152 27541ba8-7e3a-0410-8455-c3a389f83636 --- test/stressTMVA.cxx | 828 +++++++++++++++++++++++++++++++------------- 1 file changed, 583 insertions(+), 245 deletions(-) diff --git a/test/stressTMVA.cxx b/test/stressTMVA.cxx index 0ff64e68a70..b083ef12472 100644 --- a/test/stressTMVA.cxx +++ b/test/stressTMVA.cxx @@ -13,298 +13,636 @@ /////////////////////////////////////////////////////////////////////////////////// #include <iostream> +#include <ostream> +#include <fstream> +#include <assert.h> +#include <string> +#include <sstream> #include "TString.h" #include "TFile.h" +#include "TMath.h" #include "TH1.h" +#include "TSystem.h" +#include "TRandom3.h" #include "TMath.h" +#include "TMatrixD.h" + #include "TMVA/Factory.h" +#include "TMVA/Event.h" +#include "TMVA/PDF.h" +#include "TMVA/Reader.h" +#include "TMVA/Types.h" +#include "TMVA/VariableDecorrTransform.h" +#include "TMVA/VariableIdentityTransform.h" +#include "TMVA/VariableGaussTransform.h" +#include "TMVA/VariableNormalizeTransform.h" +#include "TMVA/VariablePCATransform.h" +#include "TMVA/VariableNormalizeTransform.h" + + +namespace TMVA { class TMVATest { -public: + + + + public: - enum tmvatests { CutsGA = 0x001, Likelihood = 0x002, LikelihoodPCA = 0x004, PDERS = 0x008, - KNN = 0x010, HMatrix = 0x020, Fisher = 0x040, FDA_MT = 0x080, - MLP = 0x100, SVM_Gauss = 0x200, BDT = 0x400, RuleFit = 0x800 }; - - TMVATest() : - fFactory(0), - fTestDataFileName( "http://root.cern.ch/files/tmva_example.root" ), - fReferenceDataFileName( "http://root.cern.ch/files/TMVAref.root" ), - //fReferenceDataFileName( "TMVAref.root" ), - fOutputFileName( "TMVAtest.root" ), - fInputfile(0), - fOutputfile(0), - fReffile(0), - fTests(0), - fNEvents(500) - { - fTests |= CutsGA; - fTests |= Likelihood; - fTests |= LikelihoodPCA; - fTests |= PDERS; - fTests |= KNN; - fTests |= HMatrix; - fTests |= Fisher; - fTests |= FDA_MT; - fTests |= MLP; - fTests |= SVM_Gauss; - fTests |= BDT; - fTests |= RuleFit; - - fOutputfile = TFile::Open( fOutputFileName, "RECREATE" ); - fFactory = new TMVA::Factory( "TMVAtest", fOutputfile, "S" ); + enum tmvatests { CutsGA = 0x001, Likelihood = 0x002, LikelihoodPCA = 0x004, PDERS = 0x008, + KNN = 0x010, HMatrix = 0x020, Fisher = 0x040, FDA_MT = 0x080, + MLP = 0x100, SVM_Gauss = 0x200, BDT = 0x400, RuleFit = 0x800 }; + + TMVATest() {} + ~TMVATest() {} + + void prepareClassificationData(); + void getGaussRnd( TArrayD& v, const TMatrixD& sqrtMat, TRandom& R ); + TMatrixD* produceSqrtMat( const TMatrixD& covMat ); + + + int test_event(); + int test_createDataSetFromTree(); + int test_pdf_streams(); + int test_factory_basic(); + int test_reader_basic(); + int test_variable_transforms(); + + private: + +}; + + + +void TMVATest::prepareClassificationData(){ + std::cout << "---------- prepare data -----START " << std::endl; + // create the data + Int_t N = 20000; + const Int_t nvar = 4; + Float_t xvar[nvar]; + + // output file + TFile* dataFile = TFile::Open( "testData.root", "RECREATE" ); + + // create signal and background trees + TTree* treeS = new TTree( "TreeS", "TreeS", 1 ); + TTree* treeB = new TTree( "TreeB", "TreeB", 1 ); + for (Int_t ivar=0; ivar<nvar; ivar++) { + treeS->Branch( TString(Form( "var%i", ivar+1 )).Data(), &xvar[ivar], TString(Form( "var%i/F", ivar+1 )).Data() ); + treeB->Branch( TString(Form( "var%i", ivar+1 )).Data(), &xvar[ivar], TString(Form( "var%i/F", ivar+1 )).Data() ); } + + TRandom R( 100 ); + Float_t xS[nvar] = { 0.2, 0.3, 0.5, 0.9 }; + Float_t xB[nvar] = { -0.2, -0.3, -0.5, -0.6 }; + Float_t dx[nvar] = { 1.0, 1.0, 1.0, 1.0 }; + TArrayD* v = new TArrayD( nvar ); + Float_t rho[20]; + rho[1*2] = 0.4; + rho[1*3] = 0.6; + rho[1*4] = 0.9; + rho[2*3] = 0.7; + rho[2*4] = 0.8; + rho[3*4] = 0.93; + + // create covariance matrix + TMatrixD* covMatS = new TMatrixD( nvar, nvar ); + TMatrixD* covMatB = new TMatrixD( nvar, nvar ); + for (Int_t ivar=0; ivar<nvar; ivar++) { + (*covMatS)(ivar,ivar) = dx[ivar]*dx[ivar]; + (*covMatB)(ivar,ivar) = dx[ivar]*dx[ivar]; + for (Int_t jvar=ivar+1; jvar<nvar; jvar++) { + (*covMatS)(ivar,jvar) = rho[(ivar+1)*(jvar+1)]*dx[ivar]*dx[jvar]; + (*covMatS)(jvar,ivar) = (*covMatS)(ivar,jvar); + + (*covMatB)(ivar,jvar) = rho[(ivar+1)*(jvar+1)]*dx[ivar]*dx[jvar]; + (*covMatB)(jvar,ivar) = (*covMatB)(ivar,jvar); + } + } + std::cout << "signal covariance matrix: " << std::endl; + covMatS->Print(); + std::cout << "background covariance matrix: " << std::endl; + covMatB->Print(); + + // produce the square-root matrix + TMatrixD* sqrtMatS = produceSqrtMat( *covMatS ); + TMatrixD* sqrtMatB = produceSqrtMat( *covMatB ); + + // loop over species + for (Int_t itype=0; itype<2; itype++) { + + Float_t* x; + TMatrixD* m; + if (itype == 0) { x = xS; m = sqrtMatS; std::cout << "- produce signal" << std::endl; } + else { x = xB; m = sqrtMatB; std::cout << "- produce background" << std::endl; } - ~TMVATest() { - fOutputfile->Close(); - delete fFactory; + // event loop + TTree* tree = (itype==0) ? treeS : treeB; + for (Int_t i=0; i<N; i++) { + + if (i%1000 == 0) std::cout << "... event: " << i << " (" << N << ")" << std::endl; + getGaussRnd( *v, *m, R ); + + for (Int_t ivar=0; ivar<nvar; ivar++) xvar[ivar] = (*v)[ivar] + x[ivar]; + + tree->Fill(); + } } - bool Initialize() { - // accessing the data file via the web from - fInputfile = TFile::Open( fTestDataFileName ); + // write trees + treeS->Write(); + treeB->Write(); + + treeS->Show(0); + treeB->Show(1); + + dataFile->Close(); + std::cout << "created data file: " << dataFile->GetName() << std::endl; + + std::cout << "---------- prepare data -----END " << std::endl; +} + + + +TMatrixD* TMVATest::produceSqrtMat( const TMatrixD& covMat ) +{ + Double_t sum = 0; + Int_t size = covMat.GetNrows();; + TMatrixD* sqrtMat = new TMatrixD( size, size ); + + for (Int_t i=0; i< size; i++) { + + sum = 0; + for (Int_t j=0;j< i; j++) sum += (*sqrtMat)(i,j) * (*sqrtMat)(i,j); + + (*sqrtMat)(i,i) = TMath::Sqrt(TMath::Abs(covMat(i,i) - sum)); + + for (Int_t k=i+1 ;k<size; k++) { + + sum = 0; + for (Int_t l=0; l<i; l++) sum += (*sqrtMat)(k,l) * (*sqrtMat)(i,l); + + (*sqrtMat)(k,i) = (covMat(k,i) - sum) / (*sqrtMat)(i,i); - if (!fInputfile) { - fErrorMsg.push_back(TString(Form("ERROR: could not open data file %s", fTestDataFileName.Data() ))); - return false; } + } + return sqrtMat; +} + +void TMVATest::getGaussRnd( TArrayD& v, const TMatrixD& sqrtMat, TRandom& R ) +{ + // generate "size" correlated Gaussian random numbers - TTree *signal = (TTree*)fInputfile->Get("TreeS"); - TTree *background = (TTree*)fInputfile->Get("TreeB"); - - // global event weights (see below for setting event-wise weights) - Double_t signalWeight = 1.0; - Double_t backgroundWeight = 1.0; - - fFactory->AddSignalTree ( signal, signalWeight ); - fFactory->AddBackgroundTree( background, backgroundWeight ); - - fFactory->AddVariable("var1+var2", 'F'); - fFactory->AddVariable("var1-var2", 'F'); - fFactory->AddVariable("var3", 'F'); - fFactory->AddVariable("var4", 'F'); - - // Apply additional cuts on the signal and background samples (can be different) - TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; - TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; - - // tell the factory to use all remaining events in the trees after training for testing: - fFactory->PrepareTrainingAndTestTree( mycuts, mycutb, - Form("NSigTrain=%i:NBkgTrain=%i:NSigTest=%i:NBkgTest=%i:SplitMode=Random:NormMode=NumEvents:V", - fNEvents,fNEvents,fNEvents,fNEvents)); - - fInputfile->Close(); - - // Cut optimisation with genetic algorithm - - if( fTests & CutsGA ) - fFactory->BookMethod( TMVA::Types::kCuts, "CutsGA", - "!H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" ); - - // Likelihood - if( fTests & Likelihood ) - fFactory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", - "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmooth=10:NAvEvtPerBin=50" ); - - // test the principal component analysis - if( fTests & LikelihoodPCA ) - fFactory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", - "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); - - // PDE - RS method - if( fTests & PDERS ) - fFactory->BookMethod( TMVA::Types::kPDERS, "PDERS", - "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); - - // K-Nearest Neighbour classifier (KNN) - if( fTests & KNN ) - fFactory->BookMethod( TMVA::Types::kKNN, "KNN", - "!H:!V:nkNN=400:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" ); - - // H-Matrix (chi2-squared) method - if( fTests & HMatrix ) - fFactory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", - "!H:!V" ); - - // Fisher discriminant - if( fTests & Fisher ) - fFactory->BookMethod( TMVA::Types::kFisher, "Fisher", - "!H:!V:Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" ); - - // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit or GA - if( fTests & FDA_MT ) - fFactory->BookMethod( TMVA::Types::kFDA, "FDA_MT", - "!H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); - - // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons - if( fTests & MLP ) - fFactory->BookMethod( TMVA::Types::kMLP, "MLP", - "!H:!V:Normalise:NCycles=200:HiddenLayers=N+1,N:TestRate=5" ); - - // Support Vector Machines using three different Kernel types (Gauss, polynomial and linear) - if( fTests & SVM_Gauss ) - fFactory->BookMethod( TMVA::Types::kSVM, "SVM_Gauss", - "!H:!V:Sigma=2:C=1:Tol=0.001:Kernel=Gauss" ); - - // Boosted Decision Trees (second one with decorrelation) - if( fTests & BDT ) - fFactory->BookMethod( TMVA::Types::kBDT, "BDT", - "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5" ); - - // RuleFit -- TMVA implementation of Friedman's method - if( fTests & RuleFit ) - fFactory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", - "!H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); - - return kTRUE; + // sanity check + const Int_t size = sqrtMat.GetNrows(); + if (size != v.GetSize()) + std::cout << "<getGaussRnd> too short input vector: " << size << " " << v.GetSize() << std::endl; + + Double_t* tmpVec = new Double_t[size]; + + for (Int_t i=0; i<size; i++) { + Double_t x, y, z; + y = R.Rndm(); + z = R.Rndm(); + x = 2*TMath::Pi()*z; + tmpVec[i] = TMath::Sin(x) * TMath::Sqrt(-2.0*TMath::Log(y)); } - bool TrainAndEval() { - // Train MVAs using the set of training events - fFactory->TrainAllMethods(); - - // ---- Evaluate all MVAs using the set of test events - fFactory->TestAllMethods(); - - // ----- Evaluate and compare performance of all configured MVAs - fFactory->EvaluateAllMethods(); - - return kTRUE; + for (Int_t i=0; i<size; i++) { + v[i] = 0; + for (Int_t j=0; j<=i; j++) v[i] += sqrtMat(i,j) * tmpVec[j]; } - void PrintError() { - for(std::vector<TString>::iterator it = fErrorMsg.begin(); - it != fErrorMsg.end(); it++ ) - std::cout << *it << std::endl; - fErrorMsg.clear(); + delete tmpVec; +} + + + + + + + + + + + + + +int TMVATest::test_event(){ + std::cout << "---------- event -----START " << std::endl; + std::cout << " constructor: Event() " << std::endl; + Event* evt0 = new Event(); + delete evt0; + + std::cout << " constructor: Event( const std::vector<Float_t>&, Bool_t isSignal = kTRUE, Float_t weight = 1.0, Float_t boostweight = 1.0 ) " << std::endl; + std::vector<Float_t> ev; + ev.push_back( 1.5 ); + ev.push_back( 2.3 ); + ev.push_back( 8.8 ); + ev.push_back( 2.2 ); + Event *ptrEv = new Event(ev,Types::kSignal ); +// ptrEv->Print( std::cout ); + for( UInt_t iv = 0; iv < ptrEv->GetNVars(); iv++ ){ +// std::cout << "iv : "<< iv << " val: " << ptrEv->GetVal( iv ) << " evval: " << ev[iv] << std::endl; + assert( TMath::Abs( ptrEv->GetVal(iv) - ev[iv] ) < 1e-10 ); } + delete ptrEv; + + std::cout << " constructor: Event( const std::vector<Float_t*>*& ) " << std::endl; + Float_t var1 = 1.2; + Float_t var2 = 3.3; + Float_t var3 = 0.0; + Float_t var4 = -0.12; + std::vector<Float_t*> *vars = new std::vector<Float_t*>(0); + vars->push_back( &var1 ); + vars->push_back( &var2 ); + vars->push_back( &var3 ); + vars->push_back( &var4 ); + + Event *evt2 = new Event((const std::vector<Float_t*>*&) vars ); + assert( TMath::Abs( var1 - evt2->GetVal( 0 ) ) < 1e-10 ); + delete evt2; + + std::cout << "---------- event -----END " << std::endl; + return 0; +} + + + + - bool CompareResult() { - fReffile = TFile::Open(fReferenceDataFileName); - if (!fReffile) { - fErrorMsg.push_back(TString(Form("ERROR: could not find reference file %s", fReferenceDataFileName.Data() ))); - return false; - } - bool success = kTRUE; - - if( fTests & CutsGA ) success &= Check("Cuts" , "CutsGA"); - if( fTests & Likelihood ) success &= Check("Likelihood" , ""); - if( fTests & LikelihoodPCA ) success &= Check("Likelihood" , "LikelihoodPCA"); - if( fTests & PDERS ) success &= Check("PDERS" , ""); - if( fTests & KNN ) success &= Check("KNN" , ""); - if( fTests & HMatrix ) success &= Check("HMatrix" , ""); - if( fTests & Fisher ) success &= Check("Fisher" , ""); - if( fTests & FDA_MT ) success &= Check("FDA" , "FDA_MT"); - if( fTests & MLP ) success &= Check("MLP" , ""); - if( fTests & SVM_Gauss ) success &= Check("SVM" , "SVM_Gauss"); - if( fTests & BDT ) success &= Check("BDT" , ""); - if( fTests & RuleFit ) success &= Check("RuleFit" , ""); - - fReffile->Close(); - return success; +int TMVATest::test_createDataSetFromTree(){ + std::cout << "---------- createDataSetFromTree -----START " << std::endl; + + prepareClassificationData(); + + TFile *f = TFile::Open( "testData.root" ); + TTree *treeS = (TTree*)f->Get( "TreeS" ); + TTree *treeB = (TTree*)f->Get( "TreeB" ); + + DataInputHandler *dih = new DataInputHandler(); + DataSetManager::CreateInstance(*dih); + + TString dsiName = "testDataSet"; + DataSetInfo* dsi = DataSetManager::Instance().GetDataSetInfo(dsiName); + if(dsi!=0){ + std::cout << "dataset with name " << dsiName << " already present." << std::endl; + return true; } + std::cout << "no dataset with name " << dsiName << " found. A new one will be created." << std::endl; + + + dsi = new DataSetInfo(dsiName); + DataSetManager::Instance().AddDataSetInfo(*dsi); + + dsi->AddVariable( "var1" ); + dsi->AddVariable( "var2" ); + dsi->AddVariable( "var3" ); + dsi->AddVariable( "var4" ); + + + dih->AddSignalTree( treeS ); + dih->AddBackgroundTree( treeB ); - bool Check(const TString& classname, TString instancename="") { - if(instancename=="") instancename = classname; - TString hist(Form("Method_%s/%s/MVA_%s_effBvsS",classname.Data(),instancename.Data(),instancename.Data())); - TH1 * effBvsS[2]; - effBvsS[0] = (TH1*)fOutputfile->Get(hist); - effBvsS[1] = (TH1*)fReffile->Get(hist); - if(!effBvsS[0]) { - fErrorMsg.push_back(TString("Can not find test histogram ") + hist); + std::cout << " datasets for signal and background created ==> loop through events" << std::endl; + + DataSet* ds = DataSetManager::Instance().CreateDataSet( dsiName ); + +// ds = dsi->GetDataSet(); + UInt_t n = 0; + std::cout << "number of events=" << ds->GetNEvents() << std::endl; + for( UInt_t ievt=0; ievt<ds->GetNEvents(); ievt++ ){ + const Event *ev = ds->GetEvent(ievt); +// Float_t val = ev->GetVal(ievt); + if( n%5000== 0 ){ + std::cout << "<event=" << n << ":vars=" << ev->GetNVariables(); std::cout.flush(); + for( UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++ ){ + std::cout << "|" << ivar << "=" << ev->GetVal(ivar); std::cout.flush(); + } + std::cout << ">"; std::cout.flush(); } - if(!effBvsS[1]) { - fErrorMsg.push_back(TString("Can not find reference histogram ") + hist); + n++; + } + std::cout << std::endl; + + + std::cout << "---------- createDataSetFromTree -----END " << std::endl; + return 0; +} + + + + +int TMVATest::test_pdf_streams(){ + + std::cout << "---------- pdf_streams -----START " << std::endl; + // prepare a histogram for the production of a pdf + TH1F *hist = new TH1F( "histogram_test_pdf_streams", "histogram_test_pdf_streams", 10, 0.0,10.0 ); + // fill in some values + for( int i=0; i<100; i++ ){ + hist->Fill( i/10.0 ); + } + + // produce a pdf from the histogram + PDF *pdf = new PDF( "testPdf" ); + pdf->BuildPDF( hist ); + + // create an outstream + ofstream fout; + fout.open( "test_pdf_streams.temp" ); + + // dump pdf into the outstream (file) and close the file + fout<<*pdf; + fout.close(); + + // delete the pdf + //delete pdf; + + // prepare another pdf + TMVA::PDF *rec = new TMVA::PDF( "test2Pdf" ); + + // open the file again + ifstream fin ( "test_pdf_streams.temp" , ifstream::in ); + fin >> *rec; + + TH1 *hrec = rec->GetOriginalHist(); + + for( int i=0; i<10; i++ ){ + //std::cout << "check bin " << i << std::endl; + assert( hist->GetBin( i ) == hrec->GetBin( i ) ); + } + + // delete everything + delete rec; + delete pdf; + delete hist; + + std::cout << "---------- pdf_streams -----END " << std::endl; + return 0; +} + + + + + +int TMVATest::test_variable_transforms(){ + std::cout << "---------- variable_transforms -----START " << std::endl; + + test_createDataSetFromTree(); + + TString dsiName = "testDataSet"; + TMVA::DataSetInfo* dsi = TMVA::DataSetManager::Instance().GetDataSetInfo(dsiName); + + int returnValue = 0; + +// TMVA::Event::ClearDynamicVariables(); + + std::vector<VariableInfo> vars; + vars.push_back( VariableInfo( "var1", "var1Title", "var1unit", 0, 'F' ) ); + vars.push_back( VariableInfo( "var2", "var2Title", "var2unit", 0, 'F' ) ); + vars.push_back( VariableInfo( "var3", "var3Title", "var3unit", 0, 'F' ) ); + vars.push_back( VariableInfo( "var4", "var4Title", "var4unit", 0, 'F' ) ); + + // create some transforms + std::vector<VariableTransformBase*> vtb; + // vtb.push_back( new TMVA::VariableGaussTransform( *dsi ) ); + vtb.push_back( new TMVA::VariableIdentityTransform( *dsi ) ); + vtb.push_back( new TMVA::VariableDecorrTransform( *dsi ) ); + vtb.push_back( new TMVA::VariablePCATransform( *dsi ) ); + vtb.push_back( new TMVA::VariableNormalizeTransform( *dsi ) ); + + // create some doubles for the transforms of which the values will be set + // from an input stream. + std::vector<VariableTransformBase*> vtbRec; + // vtbRec.push_back( new TMVA::VariableGaussTransform( *dsi ) ); + vtbRec.push_back( new TMVA::VariableIdentityTransform( *dsi ) ); + vtbRec.push_back( new TMVA::VariableDecorrTransform( *dsi ) ); + vtbRec.push_back( new TMVA::VariablePCATransform( *dsi ) ); + vtbRec.push_back( new TMVA::VariableNormalizeTransform( *dsi ) ); + + TRandom *rnd = new TRandom3(); + std::vector<Event*> evts; + int nSig = 100; + int nBack = 100; + for( int i=0; i<nSig+nBack; i++ ){ + std::vector<Float_t> ev; + ev.push_back( rnd->Rndm()*(i/3.0) ); + ev.push_back( (i<nSig?10.0:20.0)+rnd->Rndm()*i*i+i ); + ev.push_back( (i<nSig?30.0:25.0)+rnd->Rndm()*i*i ); + ev.push_back( 5.0+rnd->Rndm()*i ); + Event *ptrEv = new Event(ev,(i<nSig?Types::kSignal : Types::kBackground) ); + evts.push_back( ptrEv ); + // ptrEv->Print( std::cout ); + for( UInt_t iv = 0; iv < ptrEv->GetNVars(); iv++ ){ + // std::cout << "iv : "<< iv << " val: " << ptrEv->GetVal( iv ) << " evval: " << ev[iv] << std::endl; + assert( TMath::Abs( ptrEv->GetVal( iv ) - ev[iv] ) < 1e-10 ); // difference between original value and value stored in event } - if(!effBvsS[0] || !effBvsS[1]) return kFALSE; - - Float_t beff[2][2], beffErr[2][2]; - Float_t distance[2]; // difference between test and reference - Int_t nEvts[2]; nEvts[0] = fNEvents; nEvts[1] = 3000; - Int_t signalEff[2] = {50,80}; - for( Int_t y=0; y<2; y++) { // y=0: 50, y=1:80 - for(Int_t x=0; x<2; x++ ) { // x=0: test, x=1: reference - beff[y][x] = effBvsS[x]->GetBinContent(signalEff[y]); - beffErr[y][x] = TMath::Sqrt(beff[y][x]*(1-beff[y][x])/nEvts[x]); - } - distance[y] = TMath::Abs(beff[y][0]-beff[y][1])/TMath::Sqrt(beffErr[y][0]*beffErr[y][0]+beffErr[y][1]*beffErr[y][1]); + } + + bool everythingOK = true; + std::vector<VariableTransformBase*>::iterator itRec = vtbRec.begin(); + for( std::vector<VariableTransformBase*>::iterator it = vtb.begin(); it < vtb.end(); it++ ){ + std::cout << ">TRANSFORMATION: " << (*it)->GetName() << std::endl; + (*it)->PrepareTransformation( evts ); + // create an outstream + std::stringstream st; + st << "test_transformation_" << (*it)->GetName() << ".temp"; + + ofstream fout; + fout.open( st.str().c_str() ); + (*it)->WriteTransformationToStream( fout ); + fout.close(); + + // create an inputstream + ifstream fin ( st.str().c_str() , ifstream::in ); + (*itRec)->ReadTransformationFromStream( fin ); + fin.close(); + + // now loop through all events and compare the transformations of the events with + // the oiginal and the read in transformation. + Int_t nCls = dsi->GetNClasses(); + if( nCls > 1 ) nCls++; + for( Int_t cls = 0; cls < nCls; cls ++ ){ + for( std::vector<Event*>::iterator itEv = evts.begin(); itEv < evts.end(); itEv++ ){ + const Event* evOriginal = (*it)->Transform( (*itEv), cls ); + const Event* evRecieved = (*itRec)->Transform( (*itEv), cls ); + for( UInt_t iv = 0; iv < (*itEv)->GetNVars(); iv++ ){ + bool isOK = TMath::Abs( evOriginal->GetVal( iv )-evRecieved->GetVal( iv ) )<1e-10; + if( !isOK ) std::cout << "iv " << iv << " orig: " << evOriginal->GetVal( iv ) << " rec: " << evRecieved->GetVal( iv ) << std::endl; + everythingOK &= isOK; + //assert( everythingOK ); // transformation of events ARE NOT EQUAL for original and re-read transformation + } + } + if(!everythingOK) { + std::cout << "ORIGINAL TRANSFORMATION" << std::endl; + (*it)->PrintTransformation( std::cout ); + std::cout << std::endl; + std::cout << "READ TRANSFORMATION" << std::endl; + (*itRec)->PrintTransformation( std::cout ); + std::cout << std::endl; + } + // if( everythingOK ){ + // std::cout << " transformations of events are EQUAL for original and copied transformation" << std::endl; + // }else{ + // std::cout << " transformations of events ARE NOT EQUAL for original and copied transformation" << std::endl; + // } + assert( everythingOK ); // transformation of events ARE NOT EQUAL for original and re-read transformation + returnValue &= int(everythingOK); + itRec++; // take as well the next transformation for the "receiver" transformation } + } + + std::cout << "---------- variable_transforms -----END " << std::endl; + return returnValue; +} - bool success = kTRUE; - if(distance[0]>5) { - fErrorMsg.push_back(TString(Form("Method %s shows disagreement of %g sigma (background efficiency at 50%% signal efficiency)", - classname.Data(),distance[0]))); - std::cout << beff[0][0] << " +/- " << beffErr[0][0] << " disagrees with reference " << beff[0][1] << " +/- " << beffErr[0][1] << " (" << distance[0] << ")" << std::endl; - success = kFALSE; - } else { - //std::cout << beff[0][0] << " +/- " << beffErr[0][0] << " agrees with reference " << beff[0][1] << " +/- " << beffErr[0][1] << " (" << distance[0] << ")" << std::endl; - } - if(distance[1]>5) { - fErrorMsg.push_back(TString(Form("Method %s shows disagreement of %g sigma (background efficiency at 80%% signal efficiency)", - classname.Data(),distance[1]))); - std::cout << beff[1][0] << " +/- " << beffErr[1][0] << " disagrees with reference " << beff[1][1] << " +/- " << beffErr[1][1] << " (" << distance[1] << ")" << std::endl; - success = kFALSE; - } else { - //std::cout << beff[1][0] << " +/- " << beffErr[1][0] << " agrees with reference " << beff[1][1] << " +/- " << beffErr[1][1] << " (" << distance[1] << ")" << std::endl; + + + +int TMVATest::test_factory_basic(){ + return 1; +} + + + + + + + + +int TMVATest::test_reader_basic(){ + std::cout << "---------- reader_basic -----START " << std::endl; + // + // create the Reader object + // + TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); + + // create a set of variables and declare them to the reader + // - the variable names must corresponds in name and type to + // those given in the weight file(s) that you use + Float_t var1, var2; + Float_t var3, var4; + reader->AddVariable( "var1+var2", &var1 ); + reader->AddVariable( "var1-var2", &var2 ); + reader->AddVariable( "var3", &var3 ); + reader->AddVariable( "var4", &var4 ); + // + // book a MVA method + // + std::string dir = "../macros/weights/"; + std::string prefix = "TMVAnalysis"; + reader->BookMVA( "Fisher method", dir + prefix + "_Fisher.weights.txt" ); + TFile *input(0); + TString fname = "../macros/tmva_example.root"; + if (!gSystem->AccessPathName( fname )) { + // first we try to find tmva_example.root in the local directory + std::cout << "--- Accessing data file: " << fname << std::endl; + input = TFile::Open( fname ); + } + else { + // second we try accessing the file via the web from + // http://root.cern.ch/files/tmva_example.root + std::cout << "--- Accessing tmva_example.root file from http://root.cern.ch/files" << std::endl; + std::cout << "--- for faster startup you may consider downloading it into you local directory" << std::endl; + input = TFile::Open("http://root.cern.ch/files/tmva_example.root"); + } + // + UInt_t nbin = 100; + TH1F *histFi; + histFi = new TH1F( "MVA_Fisher", "MVA_Fisher", nbin, -4, 4 );// prepare the tree + // - here the variable names have to corresponds to your tree + // - you can use the same variables as above which is slightly faster, + // but of course you can use different ones and copy the values inside the event loop + // + TTree* theTree = (TTree*)input->Get("TreeS"); + std::cout << "--- Select signal sample" << std::endl; + Float_t userVar1, userVar2; + theTree->SetBranchAddress( "var1", &userVar1 ); + theTree->SetBranchAddress( "var2", &userVar2 ); + theTree->SetBranchAddress( "var3", &var3 ); + theTree->SetBranchAddress( "var4", &var4 ); + std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl; + //TStopwatch sw; + //sw.Start(); + for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) { + + if (ievt%1000 == 0){ + std::cout << "--- ... Processing event: " << ievt << std::endl; } - return success; + + theTree->GetEntry(ievt); + + var1 = userVar1 + userVar2; + var2 = userVar1 - userVar2; + histFi->Fill( reader->EvaluateMVA( "Fisher method" ) ); } + //sw.Stop(); + + // delete everything + delete reader; + delete histFi; + + std::cout << "---------- reader_basic -----END " << std::endl; + return true; +} -private: - TMVA::Factory *fFactory; - TString fTestDataFileName; - TString fReferenceDataFileName; - TString fOutputFileName; - TFile *fInputfile; - TFile *fOutputfile; - TFile *fReffile; - Int_t fTests; - Int_t fNEvents; - std::vector<TString> fErrorMsg; -}; -int stressTMVA() { - TMVATest testsuite; + +} // namespace TMVA + + + + +int main( int argc, char** argv ) { + TMVA::TMVATest testsuite; std::cout << "******************************************************************" << std::endl; std::cout << "* TMVA - S T R E S S suite *" << std::endl; std::cout << "******************************************************************" << std::endl; - // Prepare the dataset - if( ! testsuite.Initialize() ) { - std::cout << "Test 1: Data Preparation ............... FAILED" << std::endl; - testsuite.PrintError(); - return 1; - } + int back = 999; - std::cout << "Test 1: Data Preparation ............... OK" << std::endl; + std::string what = ""; - // Train MVAs using the set of training events - if( ! testsuite.TrainAndEval() ) { - std::cout << "Test 2: Training Methods ............... FAILED" << std::endl; - testsuite.PrintError(); - return 1; + if( argc>1 ) { + what = argv[1]; } - std::cout << "Test 2: Training Methods ............... OK" << std::endl; - - // Compare output - if( ! testsuite.CompareResult() ) { - std::cout << "Test 3: Compare to Reference ........... FAILED" << std::endl; - testsuite.PrintError(); - return 1; + if( what == "EVENT" ) back = testsuite.test_event(); + if( what == "CREATE_DATASET" ) back = testsuite.test_createDataSetFromTree(); + if( what == "TEST_PDF_STREAMS" ) back = testsuite.test_pdf_streams(); + if( what == "TEST_VARIABLE_TRANSFORMS" ) back = testsuite.test_variable_transforms(); + if( what == "TEST_READER_BASIC" ) back = testsuite.test_reader_basic(); + + if( what == "" ){ + back = testsuite.test_event(); + back += testsuite.test_createDataSetFromTree(); +// back += testsuite.test_pdf_streams(); +// back += testsuite.test_variable_transforms(); +// back += testsuite.test_reader_basic(); } - std::cout << "Test 3: Compare to Reference ........... OK" << std::endl; - return 0; -} + if( back == 0 ) std::cout << "SUCCESSFUL: " << what << std::endl; + else std::cout << "ERROR: " << what << std::endl; -int main() { - return stressTMVA(); + + return back; } + + + + + + + + +/* For the emacs users in the crowd. +Local Variables: + c-basic-offset: 3 +End: +*/ -- GitLab