From 6f18c7297b04153244ac54997585bee412cad88f Mon Sep 17 00:00:00 2001 From: Lorenzo Moneta <Lorenzo.Moneta@cern.ch> Date: Fri, 10 Dec 2010 13:44:57 +0000 Subject: [PATCH] from Kyle: add a set of new tutorials in RooStats which uses the histfactory as input. git-svn-id: http://root.cern.ch/svn/root/trunk@37503 27541ba8-7e3a-0410-8455-c3a389f83636 --- tutorials/roostats/StandardBayesianMCMCDemo.C | 131 +++++++++++++ .../roostats/StandardBayesianNumericalDemo.C | 145 ++++++++++++++ .../roostats/StandardFeldmanCousinsDemo.C | 179 ++++++++++++++++++ .../roostats/StandardProfileInspectorDemo.C | 120 ++++++++++++ .../roostats/StandardProfileLikelihoodDemo.C | 124 ++++++++++++ 5 files changed, 699 insertions(+) create mode 100644 tutorials/roostats/StandardBayesianMCMCDemo.C create mode 100644 tutorials/roostats/StandardBayesianNumericalDemo.C create mode 100644 tutorials/roostats/StandardFeldmanCousinsDemo.C create mode 100644 tutorials/roostats/StandardProfileInspectorDemo.C create mode 100644 tutorials/roostats/StandardProfileLikelihoodDemo.C diff --git a/tutorials/roostats/StandardBayesianMCMCDemo.C b/tutorials/roostats/StandardBayesianMCMCDemo.C new file mode 100644 index 00000000000..b0ebcf33478 --- /dev/null +++ b/tutorials/roostats/StandardBayesianMCMCDemo.C @@ -0,0 +1,131 @@ +/* +StandardProfileLikelihoodDemo + +Author: Kyle Cranmer +date: Dec. 2010 + +This is a standard demo that can be used with any ROOT file +prepared in the standard way. You specify: + - name for input ROOT file + - name of workspace inside ROOT file that holds model and data + - name of ModelConfig that specifies details for calculator tools + - name of dataset + +With default parameters the macro will attempt to run the +standard hist2workspace example and read the ROOT file +that it produces. + +The actual heart of the demo is only about 10 lines long. + +The MCMCCalculator is a Bayesian tool that uses +the Metropolis-Hastings algorithm to efficiently integrate +in many dimensions. It is not as accurate as the BayesianCalculator +for simple problems, but it scales to much more complicated cases. +*/ + +#include "TFile.h" +#include "TROOT.h" +#include "RooWorkspace.h" +#include "RooAbsData.h" + +#include "RooStats/ModelConfig.h" +#include "RooStats/MCMCCalculator.h" +#include "RooStats/MCMCInterval.h" +#include "RooStats/MCMCIntervalPlot.h" + +using namespace RooFit; +using namespace RooStats; + +void StandardBayesianMCMCDemo(const char* infile = "", + const char* workspaceName = "proto", + const char* modelConfigName = "ModelConfig", + const char* dataName = "expData"){ + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) + filename = "results/example_channel1_GammaExample_model.root"; + else + filename = infile; + // Check if example input file exists + TFile *file = TFile::Open(filename); + + // if input file was specified byt not found, quit + if(!file && strcmp(infile,"")){ + cout <<"file not found" << endl; + return; + } + + // if default file not found, try to create it + if(!file ){ + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<<endl; + gROOT->ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<<endl; + cout <<"Done creating example input"<<endl; + cout <<"---------------------\n\n"<<endl; + } + + // now try to access the file again + file = TFile::Open(filename); + if(!file){ + // if it is still not there, then we can't continue + cout << "Not able to run hist2workspace to create example input" <<endl; + return; + } + + + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <<endl; + return; + } + + ///////////////////////////////////////////// + // create and use the MCMCCalculator + // to find and plot the 95% credible interval + // on the parameter of interest as specified + // in the model config + MCMCCalculator mcmc(*data,*mc); + mcmc.SetConfidenceLevel(0.95); // 95% interval + mcmc.SetNumIters(1000000); // Metropolis-Hastings algorithm iterations + mcmc.SetNumBurnInSteps(500); // first N steps to be ignored as burn-in + + // default is the shortest interval. here use central + mcmc.SetLeftSideTailFraction(0.5); // for central interval + + MCMCInterval* interval = mcmc.GetInterval(); + + // make a plot + MCMCIntervalPlot plot(*interval); + plot.Draw(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<<endl; + +} diff --git a/tutorials/roostats/StandardBayesianNumericalDemo.C b/tutorials/roostats/StandardBayesianNumericalDemo.C new file mode 100644 index 00000000000..c812a0bc51d --- /dev/null +++ b/tutorials/roostats/StandardBayesianNumericalDemo.C @@ -0,0 +1,145 @@ +/* +StandardProfileLikelihoodDemo + +Author: Kyle Cranmer +date: Dec. 2010 + +This is a standard demo that can be used with any ROOT file +prepared in the standard way. You specify: + - name for input ROOT file + - name of workspace inside ROOT file that holds model and data + - name of ModelConfig that specifies details for calculator tools + - name of dataset + +With default parameters the macro will attempt to run the +standard hist2workspace example and read the ROOT file +that it produces. + +The actual heart of the demo is only about 10 lines long. + +The BayesianCalculator is based on Bayes's theorem +and performs the integration using ROOT's numeric integration utilities +*/ + +#include "TFile.h" +#include "TROOT.h" +#include "RooWorkspace.h" +#include "RooAbsData.h" +#include "RooRealVar.h" + +#include "RooUniform.h" +#include "RooStats/ModelConfig.h" +#include "RooStats/BayesianCalculator.h" +#include "RooStats/SimpleInterval.h" +#include "RooPlot.h" + +using namespace RooFit; +using namespace RooStats; + +void StandardBayesianNumericalDemo(const char* infile = "", + const char* workspaceName = "proto", + const char* modelConfigName = "ModelConfig", + const char* dataName = "expData"){ + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + + // note, use a different default file that BayesianCalculator can + // deal with. The direct numeric integration is limited to a few dimensions + const char* filename = ""; + if (!strcmp(infile,"")) + filename = "results/example_channel1_ConstExample_model.root"; + else + filename = infile; + // Check if example input file exists + TFile *file = TFile::Open(filename); + + // if input file was specified byt not found, quit + if(!file && strcmp(infile,"")){ + cout <<"file not found" << endl; + return; + } + + // if default file not found, try to create it + if(!file ){ + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<<endl; + gROOT->ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<<endl; + cout <<"Done creating example input"<<endl; + cout <<"---------------------\n\n"<<endl; + } + + // now try to access the file again + file = TFile::Open(filename); + if(!file){ + // if it is still not there, then we can't continue + cout << "Not able to run hist2workspace to create example input" <<endl; + return; + } + + + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <<endl; + return; + } + + ///////////////////////////////////////////// + // create and use the BayesianCalculator + // to find and plot the 95% credible interval + // on the parameter of interest as specified + // in the model config + + // before we do that, we must specify our prior + // it belongs in the model config, but it may not have + // been specified + RooUniform prior("prior","",*mc->GetParametersOfInterest()); + w->import(prior); + mc->SetPriorPdf(*w->pdf("prior")); + + + BayesianCalculator bayesianCalc(*data,*mc); + bayesianCalc.SetConfidenceLevel(0.95); // 95% interval + + // default is the shortest interval. here use central + bayesianCalc.SetLeftSideTailFraction(0.5); // for central interval + + SimpleInterval* interval = bayesianCalc.GetInterval(); + + // make a plot (slightly inconsistent interface) + // since plotting may take a long time (it requires evaluating + // the posterior in many points) this command will speed up + // by reducing the number of points to plot - do 40 + bayesianCalc.SetScanOfPosterior(40); + RooPlot * plot = bayesianCalc.GetPosteriorPlot(); + plot->Draw(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< + interval->LowerLimit() << ", "<< + interval->UpperLimit() <<"] "<<endl; + +} diff --git a/tutorials/roostats/StandardFeldmanCousinsDemo.C b/tutorials/roostats/StandardFeldmanCousinsDemo.C new file mode 100644 index 00000000000..26090f7d8b7 --- /dev/null +++ b/tutorials/roostats/StandardFeldmanCousinsDemo.C @@ -0,0 +1,179 @@ +/* +StandardFeldmanCousinsDemo + +Author: Kyle Cranmer +date: Dec. 2010 + +This is a standard demo that can be used with any ROOT file +prepared in the standard way. You specify: + - name for input ROOT file + - name of workspace inside ROOT file that holds model and data + - name of ModelConfig that specifies details for calculator tools + - name of dataset + +With default parameters the macro will attempt to run the +standard hist2workspace example and read the ROOT file +that it produces. + +The actual heart of the demo is only about 10 lines long. + +The FeldmanCousins tools is a classical frequentist calculation +based on the Neyman Construction. The test statistic can be +generalized for nuisance parameters by using the profile likeihood ratio. +But unlike the ProfileLikelihoodCalculator, this tool explicitly +builds the sampling distribution of the test statistic via toy Monte Carlo. +*/ + +#include "TFile.h" +#include "TROOT.h" +#include "TH1F.h" + +#include "RooWorkspace.h" +#include "RooAbsData.h" + +#include "RooStats/ModelConfig.h" +#include "RooStats/FeldmanCousins.h" +#include "RooStats/ToyMCSampler.h" +#include "RooStats/PointSetInterval.h" +#include "RooStats/ConfidenceBelt.h" + + +using namespace RooFit; +using namespace RooStats; + +void StandardFeldmanCousinsDemo(const char* infile = "", + const char* workspaceName = "proto", + const char* modelConfigName = "ModelConfig", + const char* dataName = "expData"){ + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) + filename = "results/example_channel1_GammaExample_model.root"; + else + filename = infile; + // Check if example input file exists + TFile *file = TFile::Open(filename); + + // if input file was specified byt not found, quit + if(!file && strcmp(infile,"")){ + cout <<"file not found" << endl; + return; + } + + // if default file not found, try to create it + if(!file ){ + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<<endl; + gROOT->ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<<endl; + cout <<"Done creating example input"<<endl; + cout <<"---------------------\n\n"<<endl; + } + + // now try to access the file again + file = TFile::Open(filename); + if(!file){ + // if it is still not there, then we can't continue + cout << "Not able to run hist2workspace to create example input" <<endl; + return; + } + + + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <<endl; + return; + } + + ///////////////////////////////////////////// + // create and use the FeldmanCousins tool + // to find and plot the 95% confidence interval + // on the parameter of interest as specified + // in the model config + FeldmanCousins fc(*data,*mc); + fc.SetConfidenceLevel(0.95); // 95% interval + //fc.AdditionalNToysFactor(0.1); // to speed up the result + fc.UseAdaptiveSampling(true); // speed it up a bit + fc.SetNBins(10); // set how many points per parameter of interest to scan + fc.CreateConfBelt(true); // save the information in the belt for plotting + + // Since this tool needs to throw toy MC the PDF needs to be + // extended or the tool needs to know how many entries in a dataset + // per pseudo experiment. + // In the 'number counting form' where the entries in the dataset + // are counts, and not values of discriminating variables, the + // datasets typically only have one entry and the PDF is not + // extended. + if(!mc->GetPdf()->canBeExtended()){ + if(data->numEntries()==1) + fc.FluctuateNumDataEntries(false); + else + cout <<"Not sure what to do about this model" <<endl; + } + + // We can use PROOF to speed things along in parallel + // ProofConfig pc(*w, 1, "workers=4"); + // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); + // toymcsampler->SetProofConfig(&pc); // enable proof + + + // Now get the interval + PointSetInterval* interval = fc.GetInterval(); + ConfidenceBelt* belt = fc.GetConfidenceBelt(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<<endl; + + ////////////////////////////////////////////// + // No nice plots yet, so plot the belt by hand + + // Ask the calculator which points were scanned + RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); + RooArgSet* tmpPoint; + + // make a histogram of parameter vs. threshold + TH1F* histOfThresholds = new TH1F("histOfThresholds","", + parameterScan->numEntries(), + firstPOI->getMin(), + firstPOI->getMax()); + + // loop through the points that were tested and ask confidence belt + // what the upper/lower thresholds were. + // For FeldmanCousins, the lower cut off is always 0 + for(Int_t i=0; i<parameterScan->numEntries(); ++i){ + tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); + double arMin = belt->GetAcceptanceRegionMax(*tmpPoint); + double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; + histOfThresholds->Fill(poiVal,arMax); + } + histOfThresholds->SetMinimum(0); + histOfThresholds->Draw(); + +} diff --git a/tutorials/roostats/StandardProfileInspectorDemo.C b/tutorials/roostats/StandardProfileInspectorDemo.C new file mode 100644 index 00000000000..bd408fd0b1e --- /dev/null +++ b/tutorials/roostats/StandardProfileInspectorDemo.C @@ -0,0 +1,120 @@ +/* +StandardProfileInspectorDemo + +Author: Kyle Cranmer +date: Dec. 2010 + +This is a standard demo that can be used with any ROOT file +prepared in the standard way. You specify: + - name for input ROOT file + - name of workspace inside ROOT file that holds model and data + - name of ModelConfig that specifies details for calculator tools + - name of dataset + +With default parameters the macro will attempt to run the +standard hist2workspace example and read the ROOT file +that it produces. + +The actual heart of the demo is only about 10 lines long. + +The ProfileInspector plots the conditional maximum likelihood estimate +of each nuisance parameter in the model vs. the parameter of interest. +(aka. profiled value of nuisance parameter vs. parameter of interest) +(aka. best fit nuisance parameter with p.o.i fixed vs. parameter of interest) + +*/ + +#include "TFile.h" +#include "TROOT.h" +#include "TCanvas.h" +#include "TList.h" +#include "RooWorkspace.h" +#include "RooAbsData.h" + +#include "RooStats/ModelConfig.h" +#include "RooStats/ProfileInspector.h" + +using namespace RooFit; +using namespace RooStats; + +void StandardProfileInspectorDemo(const char* infile = "", + const char* workspaceName = "proto", + const char* modelConfigName = "ModelConfig", + const char* dataName = "expData"){ + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) + filename = "results/example_channel1_GammaExample_model.root"; + else + filename = infile; + // Check if example input file exists + TFile *file = TFile::Open(filename); + + // if input file was specified byt not found, quit + if(!file && strcmp(infile,"")){ + cout <<"file not found" << endl; + return; + } + + // if default file not found, try to create it + if(!file ){ + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<<endl; + gROOT->ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<<endl; + cout <<"Done creating example input"<<endl; + cout <<"---------------------\n\n"<<endl; + } + + // now try to access the file again + file = TFile::Open(filename); + if(!file){ + // if it is still not there, then we can't continue + cout << "Not able to run hist2workspace to create example input" <<endl; + return; + } + + + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <<endl; + return; + } + + ////////////////////////////////////////////// + // now use the profile inspector + ProfileInspector p; + TList* list = p.GetListOfProfilePlots(*data,mc); + + // now make plots + TCanvas* c1 = new TCanvas("c1","ProfileInspectorDemo",800,200); + c1->Divide(list->GetSize()); + for(int i=0; i<list->GetSize(); ++i){ + c1->cd(i+1); + list->At(i)->Draw("al"); + } + +} diff --git a/tutorials/roostats/StandardProfileLikelihoodDemo.C b/tutorials/roostats/StandardProfileLikelihoodDemo.C new file mode 100644 index 00000000000..1c51e625743 --- /dev/null +++ b/tutorials/roostats/StandardProfileLikelihoodDemo.C @@ -0,0 +1,124 @@ +/* +StandardProfileLikelihoodDemo + +Author: Kyle Cranmer +date: Dec. 2010 + +This is a standard demo that can be used with any ROOT file +prepared in the standard way. You specify: + - name for input ROOT file + - name of workspace inside ROOT file that holds model and data + - name of ModelConfig that specifies details for calculator tools + - name of dataset + +With default parameters the macro will attempt to run the +standard hist2workspace example and read the ROOT file +that it produces. + +The actual heart of the demo is only about 10 lines long. + +The ProfileLikelihoodCalculator is based on Wilks's theorem +and the asymptotic properties of the profile likeihood ratio +(eg. that it is chi-square distributed for the true value). +*/ + +#include "TFile.h" +#include "TROOT.h" +#include "RooWorkspace.h" +#include "RooAbsData.h" + +#include "RooStats/ModelConfig.h" +#include "RooStats/ProfileLikelihoodCalculator.h" +#include "RooStats/LikelihoodInterval.h" +#include "RooStats/LikelihoodIntervalPlot.h" + +using namespace RooFit; +using namespace RooStats; + +void StandardProfileLikelihoodDemo(const char* infile = "", + const char* workspaceName = "proto", + const char* modelConfigName = "ModelConfig", + const char* dataName = "expData"){ + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) + filename = "results/example_channel1_GammaExample_model.root"; + else + filename = infile; + // Check if example input file exists + TFile *file = TFile::Open(filename); + + // if input file was specified byt not found, quit + if(!file && strcmp(infile,"")){ + cout <<"file not found" << endl; + return; + } + + // if default file not found, try to create it + if(!file ){ + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<<endl; + gROOT->ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<<endl; + cout <<"Done creating example input"<<endl; + cout <<"---------------------\n\n"<<endl; + } + + // now try to access the file again + file = TFile::Open(filename); + if(!file){ + // if it is still not there, then we can't continue + cout << "Not able to run hist2workspace to create example input" <<endl; + return; + } + + + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <<endl; + return; + } + + ///////////////////////////////////////////// + // create and use the ProfileLikelihoodCalculator + // to find and plot the 95% confidence interval + // on the parameter of interest as specified + // in the model config + ProfileLikelihoodCalculator pl(*data,*mc); + pl.SetConfidenceLevel(0.95); // 95% interval + LikelihoodInterval* interval = pl.GetInterval(); + + // make a plot + LikelihoodIntervalPlot plot(interval); + plot.Draw(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<<endl; + +} -- GitLab