From cfab8bc51c22c9fe7dba042606025f9121ef6d44 Mon Sep 17 00:00:00 2001 From: Lorenzo Moneta <Lorenzo.Moneta@cern.ch> Date: Thu, 20 Mar 2014 16:25:22 +0100 Subject: [PATCH] - Fix the HypoTestInverter::Rebuild to re-set the parameter to the same initial value before generating the toys. Apply also several debug improvements - Apply some fixes for ProofLite - have now a deterministic seed making the result reproducible - allow for setting correctly the number of workers when running in ProofLite Improve StandardHypoTestINvMacro for the rebuild. Add an option to specify the configuration used for rebuilding the limit distribution --- .../roostats/inc/RooStats/HypoTestInverter.h | 2 +- roofit/roostats/inc/RooStats/ProofConfig.h | 47 +++++++-- roofit/roostats/inc/RooStats/RooStatsUtils.h | 3 + roofit/roostats/inc/RooStats/ToyMCStudy.h | 6 +- roofit/roostats/src/HypoTestInverter.cxx | 98 ++++++++++++++++++- .../src/ProfileLikelihoodTestStat.cxx | 14 ++- roofit/roostats/src/RooStatsUtils.cxx | 17 ++++ roofit/roostats/src/ToyMCSampler.cxx | 8 ++ roofit/roostats/src/ToyMCStudy.cxx | 42 ++++++-- tutorials/roostats/StandardHypoTestInvDemo.C | 93 ++++++++++++++++-- 10 files changed, 306 insertions(+), 24 deletions(-) diff --git a/roofit/roostats/inc/RooStats/HypoTestInverter.h b/roofit/roostats/inc/RooStats/HypoTestInverter.h index 6cc1976eb07..e78598f8fde 100644 --- a/roofit/roostats/inc/RooStats/HypoTestInverter.h +++ b/roofit/roostats/inc/RooStats/HypoTestInverter.h @@ -122,7 +122,7 @@ public: // function to rebuild the distributions SamplingDistribution * RebuildDistributions(bool isUpper=true, int nToys = 100, - TList * clsDist = 0, TList *clsbDist= 0, TList * clbDist = 0); + TList * clsDist = 0, TList *clsbDist= 0, TList * clbDist = 0, const char * outputfile = "HypoTestInverterRebuiltDist.root"); // get the test statistic TestStatistic * GetTestStatistic() const; diff --git a/roofit/roostats/inc/RooStats/ProofConfig.h b/roofit/roostats/inc/RooStats/ProofConfig.h index 4cab59355ed..3f88798bb19 100644 --- a/roofit/roostats/inc/RooStats/ProofConfig.h +++ b/roofit/roostats/inc/RooStats/ProofConfig.h @@ -44,19 +44,51 @@ END_HTML #include "RooWorkspace.h" #include "RooStudyManager.h" +#include "TROOT.h" + namespace RooStats { class ProofConfig { public: - ProofConfig(RooWorkspace &w, Int_t nExperiments = 8, const char *host = "", Bool_t showGui = kFALSE) : + + // configure proof with number of experiments and host session + // in case of Prooflite, it is better to define the number of workers as "worker=n" in the host string + + ProofConfig(RooWorkspace &w, Int_t nExperiments = 0, const char *host = "", Bool_t showGui = kFALSE) : fWorkspace(w), fNExperiments(nExperiments), fHost(host), fShowGui(showGui) { + + // case of ProofLite + if (fHost == "" || fHost.Contains("lite") ) { + fLite = true; + + + // get the default value of the machine - use CINT inetrface until we have a poper PROOF interface that we can call + int nMaxWorkers = gROOT->ProcessLineFast("TProofLite::GetNumberOfWorkers()"); + + if (nExperiments == 0) { + fNExperiments = nMaxWorkers; + } + + if (nExperiments > nMaxWorkers) + std::cout << "ProofConfig - Warning: using a number of workers = " << nExperiments << " which is larger than the number of cores in the machine " + << nMaxWorkers << std::endl; + + // set the number of workers in the Host string + fHost = TString::Format("workers=%d",fNExperiments); + } + else { + fLite = false; + // have always a default number of experiments + if (nExperiments == 0) fNExperiments = 8; + } } + virtual ~ProofConfig() { ProofConfig::CloseProof(); } @@ -65,19 +97,22 @@ class ProofConfig { static void CloseProof(Option_t *option = "s") { RooStudyManager::closeProof(option); } // returns fWorkspace - RooWorkspace& GetWorkspace(void) { return fWorkspace; } + RooWorkspace& GetWorkspace(void) const { return fWorkspace; } // returns fHost - const char* GetHost(void) { return fHost; } + const char* GetHost(void) const { return fHost; } // return fNExperiments - Int_t GetNExperiments(void) { return fNExperiments; } + Int_t GetNExperiments(void) const { return fNExperiments; } // return fShowGui - Bool_t GetShowGui(void) { return fShowGui; } + Bool_t GetShowGui(void) const { return fShowGui; } + // return true if it is a Lite session (ProofLite) + Bool_t IsLite() const { return fLite; } protected: RooWorkspace& fWorkspace; // workspace that is to be used with the RooStudyManager Int_t fNExperiments; // number of experiments. This is sometimes called "events" in proof; "experiments" in RooStudyManager. - const char* fHost; // Proof hostname. Use empty string (ie "") for proof-lite. Can also handle options like "workers=2" to run on two nodes. + TString fHost; // Proof hostname. Use empty string (ie "") for proof-lite. Can also handle options like "workers=2" to run on two nodes. Bool_t fShowGui; // Whether to show the Proof Progress window. + Bool_t fLite; // Whether we have a Proof Lite session protected: ClassDef(ProofConfig,1) // Configuration options for proof. diff --git a/roofit/roostats/inc/RooStats/RooStatsUtils.h b/roofit/roostats/inc/RooStats/RooStatsUtils.h index 3efb04ca8be..e3de93df6d3 100644 --- a/roofit/roostats/inc/RooStats/RooStatsUtils.h +++ b/roofit/roostats/inc/RooStats/RooStatsUtils.h @@ -123,6 +123,9 @@ namespace RooStats { // Create a TTree with the given name and description. All RooRealVars in the RooDataSet are represented as branches that contain values of type Double_t. TTree* GetAsTTree(TString name, TString desc, const RooDataSet& data); + // useful function to print in one line the content of a set with their values + void PrintListContent(const RooArgList & l, std::ostream & os = std::cout); + } diff --git a/roofit/roostats/inc/RooStats/ToyMCStudy.h b/roofit/roostats/inc/RooStats/ToyMCStudy.h index d872f892487..faad5e9a849 100644 --- a/roofit/roostats/inc/RooStats/ToyMCStudy.h +++ b/roofit/roostats/inc/RooStats/ToyMCStudy.h @@ -49,6 +49,7 @@ class ToyMCStudy: public RooAbsStudy { // need to have constructor without arguments for proof ToyMCStudy(const char *name = "ToyMCStudy", const char *title = "ToyMCStudy") : RooAbsStudy(name, title), + fRandomSeed(0), fToyMCSampler(NULL) { // In this case, this is the normal output. The SamplingDistribution @@ -70,13 +71,16 @@ class ToyMCStudy: public RooAbsStudy { void SetToyMCSampler(ToyMCSampler& t) { fToyMCSampler = &t; } void SetParamPoint(const RooArgSet& paramPoint) { fParamPoint.add(paramPoint); } + void SetRandomSeed(unsigned int seed) { fRandomSeed = seed; } + protected: + unsigned int fRandomSeed; ToyMCSampler *fToyMCSampler; RooArgSet fParamPoint; protected: - ClassDef(ToyMCStudy,1); // toy MC study for parallel processing + ClassDef(ToyMCStudy,2); // toy MC study for parallel processing }; diff --git a/roofit/roostats/src/HypoTestInverter.cxx b/roofit/roostats/src/HypoTestInverter.cxx index d67b14db768..9f6a7d479c7 100644 --- a/roofit/roostats/src/HypoTestInverter.cxx +++ b/roofit/roostats/src/HypoTestInverter.cxx @@ -37,6 +37,8 @@ The class can scan the CLs+b values or alternativly CLs (if the method HypoTestI #include <cmath> #include "TF1.h" +#include "TFile.h" +#include "TH1.h" #include "TLine.h" #include "TCanvas.h" #include "TGraphErrors.h" @@ -1020,6 +1022,8 @@ SamplingDistribution * HypoTestInverter::GetUpperLimitDistribution(bool rebuild, // for obtained the observed limit and no extra toys will be generated // if rebuild a new set of B toys will be done and the procedure will be repeted // for each toy + // The nuisance parameter value used for rebuild is the current one in the model + // so it is user responsability to set to the desired value (nomi if (!rebuild) { if (!fResults) { oocoutE((TObject*)0,InputArguments) << "HypoTestInverter::GetUpperLimitDistribution(false) - result not existing\n"; @@ -1040,11 +1044,14 @@ void HypoTestInverter::SetData(RooAbsData & data) { if (fCalculator0) fCalculator0->SetData(data); } -SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int nToys, TList * clsDist, TList * clsbDist, TList * clbDist) { +SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int nToys, TList * clsDist, TList * clsbDist, TList * clbDist, const char *outputfile) { // rebuild the sampling distributions by // generating some toys and find for each of theam a new upper limit // Return the upper limit distribution and optionally also the pValue distributions for Cls, Clsb and Clbxs // as a TList for each scanned point + // The method uses the present parameter value. It is user responsability to give the current parameters to rebuild the distributions + // It returns a upper or lower limit distribution depending on the isUpper flag, however it computes also the lower limit distribution and it is saved in the + // output file as an histogram if (!fScannedVariable || !fCalculator0) return 0; // get first background snapshot @@ -1110,15 +1117,66 @@ SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int oocoutI((TObject*)0,InputArguments) << "HypoTestInverter - rebuilding the p value distributions by generating ntoys = " << nToys << std::endl; + + oocoutI((TObject*)0,InputArguments) << "Rebuilding using parameter of interest point: "; + RooStats::PrintListContent(paramPoint, oocoutI((TObject*)0,InputArguments) ); + if (sbModel->GetNuisanceParameters() ) { + oocoutI((TObject*)0,InputArguments) << "And using nuisance parameters: "; + RooStats::PrintListContent(*sbModel->GetNuisanceParameters(), oocoutI((TObject*)0,InputArguments) ); + } + // save all parameters to restore them later + assert(bModel->GetPdf() ); + assert(bModel->GetObservables() ); + RooArgSet * allParams = bModel->GetPdf()->getParameters( *bModel->GetObservables() ); + RooArgSet saveParams; + allParams->snapshot(saveParams); + + TFile * fileOut = TFile::Open(outputfile,"RECREATE"); + if (!fileOut) { + oocoutE((TObject*)0,InputArguments) << "HypoTestInverter - RebuildDistributions - Error opening file " << outputfile + << " - the resulting limits will not be stored" << std::endl; + } + // create temporary histograms to store the limit result + TH1D * hL = new TH1D("lowerLimitDist","Rebuilt lower limit distribution",100,1.,0.); + TH1D * hU = new TH1D("upperLimitDist","Rebuilt upper limit distribution",100,1.,0.); + hL->SetBuffer(2*nToys); + hU->SetBuffer(2*nToys); + std::vector<TH1*> hCLb; + std::vector<TH1*> hCLsb; + std::vector<TH1*> hCLs; + if (storePValues) { + for (int i = 0; i < nPoints; ++i) { + hCLb.push_back(new TH1D(TString::Format("CLbDist_bin%d",i),"CLb distribution",100,1.,0.)); + hCLs.push_back(new TH1D(TString::Format("ClsDist_bin%d",i),"CLs distribution",100,1.,0.)); + hCLsb.push_back(new TH1D(TString::Format("CLsbDist_bin%d",i),"CLs+b distribution",100,1.,0.)); + } + } + // loop now on the toys for (int itoy = 0; itoy < nToys; ++itoy) { oocoutP((TObject*)0,Eval) << "HypoTestInverter - RebuildDistributions - running toy # " << itoy << " / " << nToys << std::endl; + + // reset parameters to initial values to be sure in case they are not reset + if (itoy> 0) *allParams = saveParams; + + // need to set th epdf to clear the cache in ToyMCSampler + // pdf we must use is background pdf + toymcSampler->SetPdf(*bModel->GetPdf() ); + + RooAbsData * bkgdata = toymcSampler->GenerateToyData(paramPoint); + // for debugging in case of number counting models + if (bkgdata->numEntries() ==1) { + oocoutP((TObject*)0,Generation) << "Generate observables are : "; + RooArgList genObs(*bkgdata->get(0)); + RooStats::PrintListContent(genObs, oocoutP((TObject*)0,Generation) ); + } + // by copying I will have the same min/max as previous ones HypoTestInverter inverter = *this; inverter.SetData(*bkgdata); @@ -1129,6 +1187,17 @@ SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int double value = (isUpper) ? r->UpperLimit() : r->LowerLimit(); limit_values.push_back( value ); + hU->Fill(r->UpperLimit() ); + hL->Fill(r->LowerLimit() ); + + + std::cout << "The computed upper limit for toy #" << itoy << " is " << value << std::endl; + + // write every 10 toys + if (itoy%10 == 0 || itoy == nToys-1) { + hU->Write("",TObject::kOverwrite); + hL->Write("",TObject::kOverwrite); + } if (!storePValues) continue; @@ -1146,12 +1215,24 @@ SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int CLs_values[ipoint].push_back( hr->CLs() ); CLsb_values[ipoint].push_back( hr->CLsplusb() ); CLb_values[ipoint].push_back( hr->CLb() ); + hCLs[ipoint]->Fill( hr->CLs() ); + hCLb[ipoint]->Fill( hr->CLb() ); + hCLsb[ipoint]->Fill( hr->CLsplusb() ); } else { oocoutW((TObject*)0,InputArguments) << "HypoTestInverter: missing result for point: x = " << fResults->GetXValue(ipoint) << std::endl; } } + // write every 10 toys + if (itoy%10 == 0 || itoy == nToys-1) { + for (int ipoint = 0; ipoint < nPoints; ++ipoint) { + hCLs[ipoint]->Write("",TObject::kOverwrite); + hCLb[ipoint]->Write("",TObject::kOverwrite); + hCLsb[ipoint]->Write("",TObject::kOverwrite); + } + } + delete r; delete bkgdata; @@ -1180,6 +1261,21 @@ SamplingDistribution * HypoTestInverter::RebuildDistributions(bool isUpper, int } } } + + if (fileOut) { + fileOut->Close(); + } + else { + // delete all the histograms + delete hL; + delete hU; + for (int i = 0; i < nPoints && storePValues; ++i) { + delete hCLs[i]; + delete hCLb[i]; + delete hCLsb[i]; + } + } + const char * disName = (isUpper) ? "upperLimit_dist" : "lowerLimit_dist"; return new SamplingDistribution(disName, disName, limit_values); } diff --git a/roofit/roostats/src/ProfileLikelihoodTestStat.cxx b/roofit/roostats/src/ProfileLikelihoodTestStat.cxx index 0b772242072..50c3f3c9df4 100644 --- a/roofit/roostats/src/ProfileLikelihoodTestStat.cxx +++ b/roofit/roostats/src/ProfileLikelihoodTestStat.cxx @@ -83,6 +83,11 @@ Double_t RooStats::ProfileLikelihoodTestStat::EvaluateProfileLikelihood(int type if (fPrintLevel > 1) cout << "reusing NLL " << fNll << " new data = " << &data << endl ; fNll->setData(data,kFALSE) ; } + // print data in case of number counting (simple data sets) + if (fPrintLevel > 1 && data.numEntries() == 1) { + std::cout << "Data set used is: "; + RooStats::PrintListContent(*data.get(0), std::cout); + } // make sure we set the variables attached to this nll @@ -112,6 +117,7 @@ Double_t RooStats::ProfileLikelihoodTestStat::EvaluateProfileLikelihood(int type if (type != 2) { // minimize and count eval errors fNll->clearEvalErrorLog(); + if (fPrintLevel>1) std::cout << "Do unconditional fit" << std::endl; RooFitResult* result = GetMinNLL(); if (result) { uncondML = result->minNll(); @@ -153,6 +159,8 @@ Double_t RooStats::ProfileLikelihoodTestStat::EvaluateProfileLikelihood(int type if (doConditionalFit) { + if (fPrintLevel>1) std::cout << "Do conditional fit " << std::endl; + // cout <<" reestablish snapshot"<<endl; *attachedSet = *snap; @@ -204,8 +212,12 @@ Double_t RooStats::ProfileLikelihoodTestStat::EvaluateProfileLikelihood(int type else if (type == 2) pll = condML; else { pll = condML-uncondML; + if (fSigned) { - if (pll<0.0) pll = 0.0; // bad fit + if (pll<0.0) { + if (fPrintLevel > 0) std::cout << "pll is negative - setting it to zero " << std::endl; + pll = 0.0; // bad fit + } if (fLimitType==oneSidedDiscovery ? (fit_favored_mu < initial_mu_value) : (fit_favored_mu > initial_mu_value)) pll = -pll; diff --git a/roofit/roostats/src/RooStatsUtils.cxx b/roofit/roostats/src/RooStatsUtils.cxx index 6fe5d011c4e..09b85eb4b43 100644 --- a/roofit/roostats/src/RooStatsUtils.cxx +++ b/roofit/roostats/src/RooStatsUtils.cxx @@ -271,4 +271,21 @@ namespace RooStats { return myTree; } + + // useful function to print in one line the content of a set with their values + void PrintListContent(const RooArgList & l, std::ostream & os ) { + bool first = true; + os << "( "; + for (int i = 0; i< l.getSize(); ++i) { + if (first) { + first=kFALSE ; + } else { + os << ", " ; + } + l[i].printName(os); + os << " = "; + l[i].printValue(os); + } + os << ")\n"; + } } diff --git a/roofit/roostats/src/ToyMCSampler.cxx b/roofit/roostats/src/ToyMCSampler.cxx index 7389126ca87..7602e26a4f3 100644 --- a/roofit/roostats/src/ToyMCSampler.cxx +++ b/roofit/roostats/src/ToyMCSampler.cxx @@ -30,6 +30,7 @@ #include "RooStudyManager.h" #include "RooStats/ToyMCStudy.h" #include "RooStats/DetailedOutputAggregator.h" +#include "RooStats/RooStatsUtils.h" #include "RooSimultaneous.h" #include "RooCategory.h" @@ -324,6 +325,7 @@ RooDataSet* ToyMCSampler::GetSamplingDistributions(RooArgSet& paramPointIn) ToyMCStudy* toymcstudy = new ToyMCStudy ; toymcstudy->SetToyMCSampler(*this); toymcstudy->SetParamPoint(paramPointIn); + toymcstudy->SetRandomSeed(RooRandom::randomGenerator()->Integer(TMath::Limits<unsigned int>::Max() ) ); // temporary workspace for proof to avoid messing with TRef RooWorkspace w(fProofConfig->GetWorkspace()); @@ -626,6 +628,12 @@ RooAbsData* ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const } } } + + // in case of number counting print observables + // if (data->numEntries() == 1) { + // std::cout << "generate observables : "; + // RooStats::PrintListContent(*data->get(0), std::cout); + // } return data; } diff --git a/roofit/roostats/src/ToyMCStudy.cxx b/roofit/roostats/src/ToyMCStudy.cxx index c77d593753a..5a2f1b71611 100644 --- a/roofit/roostats/src/ToyMCStudy.cxx +++ b/roofit/roostats/src/ToyMCStudy.cxx @@ -18,6 +18,10 @@ #endif #include "RooRandom.h" +#include "TRandom2.h" +#include "TMath.h" + +#include "TEnv.h" @@ -34,21 +38,40 @@ namespace RooStats { Bool_t ToyMCStudy::initialize(void) { coutP(Generation) << "initialize" << endl; - //coutI(InputArguments) << "SetSeed(0)" << endl; - //RooRandom::randomGenerator()->SetSeed(0); - coutI(InputArguments) << "Seed is: " << RooRandom::randomGenerator()->GetSeed() << endl; - if(!fToyMCSampler) { coutE(InputArguments) << "Need an instance of ToyMCSampler to run." << endl; + return kFALSE; }else{ coutI(InputArguments) << "Using given ToyMCSampler." << endl; } + + TString worknumber = gEnv->GetValue("ProofServ.Ordinal","undef"); + int iworker = -1; + if (worknumber != "undef") { + iworker = int( worknumber.Atof()*10 + 0.1); + + // generate a seed using + std::cout << "Current global seed is " << fRandomSeed << std::endl; + TRandom2 r(fRandomSeed ); + // get a seed using the iworker-value + unsigned int seed = r.Integer(TMath::Limits<unsigned int>::Max() ); + for (int i = 0; i< iworker; ++i) + seed = r.Integer(TMath::Limits<unsigned int>::Max() ); + + // initialize worker using seed from ToyMCSampler + RooRandom::randomGenerator()->SetSeed(seed); + } + + coutI(InputArguments) << "Worker " << iworker << " seed is: " << RooRandom::randomGenerator()->GetSeed() << endl; + return kFALSE; } // _____________________________________________________________________________ Bool_t ToyMCStudy::execute(void) { + + coutP(Generation) << "ToyMCStudy::execute - run with seed " << RooRandom::randomGenerator()->Integer(TMath::Limits<unsigned int>::Max() ) << std::endl; RooDataSet* sd = fToyMCSampler->GetSamplingDistributionsSingleWorker(fParamPoint); ToyMCPayload *sdw = new ToyMCPayload(sd); storeDetailedOutput(*sdw); @@ -58,7 +81,7 @@ Bool_t ToyMCStudy::execute(void) { // _____________________________________________________________________________ Bool_t ToyMCStudy::finalize(void) { - coutP(Generation) << "finalize" << endl; + coutP(Generation) << "ToyMCStudy::finalize" << endl; if(fToyMCSampler) delete fToyMCSampler; fToyMCSampler = NULL; @@ -68,16 +91,17 @@ Bool_t ToyMCStudy::finalize(void) { RooDataSet* ToyMCStudy::merge() { - coutP(Generation) << "merge" << endl; + RooDataSet* samplingOutput = NULL; if(!detailedData()) { - coutE(Generation) << "No detailed output present." << endl; + coutE(Generation) << "ToyMCStudy::merge No detailed output present." << endl; return NULL; } RooLinkedListIter iter = detailedData()->iterator(); TObject *o = NULL; + int i = 0; while((o = iter.Next())) { ToyMCPayload *oneWorker = dynamic_cast< ToyMCPayload* >(o); if(!oneWorker) { @@ -86,10 +110,14 @@ RooDataSet* ToyMCStudy::merge() { } if( !samplingOutput ) samplingOutput = new RooDataSet(*oneWorker->GetSamplingDistributions()); + else samplingOutput->append( *oneWorker->GetSamplingDistributions() ); + i++; //delete oneWorker; } + coutP(Generation) << "Merged data from nworkers # " << i << "- merged data size is " << samplingOutput->numEntries() << std::endl; + return samplingOutput; } diff --git a/tutorials/roostats/StandardHypoTestInvDemo.C b/tutorials/roostats/StandardHypoTestInvDemo.C index 766a0194a88..12169642b46 100644 --- a/tutorials/roostats/StandardHypoTestInvDemo.C +++ b/tutorials/roostats/StandardHypoTestInvDemo.C @@ -71,11 +71,15 @@ bool noSystematics = false; // force all systematics to be off (i.e // to their nominal values) double nToysRatio = 2; // ratio Ntoys S+b/ntoysB double maxPOI = -1; // max value used of POI (in case of auto scan) -bool useProof = false; // use Proof Light when using toys (for freq or hybrid) -int nworkers = 4; // number of worker for Proof +bool useProof = false; // use Proof Lite when using toys (for freq or hybrid) +int nworkers = 0; // number of worker for ProofLite (default use all available cores) bool rebuild = false; // re-do extra toys for computing expected limits and rebuild test stat // distributions (N.B this requires much more CPU (factor is equivalent to nToyToRebuild) int nToyToRebuild = 100; // number of toys used to rebuild +int rebuildParamValues=0; // = 0 do a profile of all the parameters on the B (alt snapshot) before performing a rebuild operation (default) + // = 1 use initial workspace parameters with B snapshot values + // = 2 use all initial workspace parameters with B + // Otherwise the rebuild will be performed using int initialFit = -1; // do a first fit to the model (-1 : default, 0 skip fit, 1 do always fit) int randomSeed = -1; // random seed (if = -1: use default value, if = 0 always random ) // NOTE: Proof uses automatically a random seed @@ -140,6 +144,7 @@ namespace RooStats { bool mReuseAltToys; int mNWorkers; int mNToyToRebuild; + int mRebuildParamValues; int mPrintLevel; int mInitialFit; int mRandomSeed; @@ -163,6 +168,7 @@ RooStats::HypoTestInvTool::HypoTestInvTool() : mPlotHypoTestResult(true), mReuseAltToys(false), mNWorkers(4), mNToyToRebuild(100), + mRebuildParamValues(0), mPrintLevel(0), mInitialFit(-1), mRandomSeed(-1), @@ -208,6 +214,7 @@ RooStats::HypoTestInvTool::SetParameter(const char * name, int value){ if (s_name.find("NWorkers") != std::string::npos) mNWorkers = value; if (s_name.find("NToyToRebuild") != std::string::npos) mNToyToRebuild = value; + if (s_name.find("RebuildParamValues") != std::string::npos) mRebuildParamValues = value; if (s_name.find("PrintLevel") != std::string::npos) mPrintLevel = value; if (s_name.find("InitialFit") != std::string::npos) mInitialFit = value; if (s_name.find("RandomSeed") != std::string::npos) mRandomSeed = value; @@ -367,6 +374,7 @@ StandardHypoTestInvDemo(const char * infile = 0, calc.SetParameter("Rebuild", rebuild); calc.SetParameter("ReuseAltToys", reuseAltToys); calc.SetParameter("NToyToRebuild", nToyToRebuild); + calc.SetParameter("RebuildParamValues", rebuildParamValues); calc.SetParameter("MassValue", massValue.c_str()); calc.SetParameter("MinimizerType", minimizerType.c_str()); calc.SetParameter("PrintLevel", printLevel); @@ -470,8 +478,14 @@ RooStats::HypoTestInvTool::AnalyzeResult( HypoTestInverterResult * r, mResultFileName += name; } + // get (if existing) rebuilt UL distribution + TFile * fileULDist = TFile::Open("RULDist.root"); + TObject * ulDist = (fileULDist) ? fileULDist->Get("RULDist") : 0; + TFile * fileOut = new TFile(mResultFileName,"RECREATE"); r->Write(); + if (ulDist) ulDist->Write(); + fileOut->Close(); } @@ -639,7 +653,11 @@ RooStats::HypoTestInvTool::RunInverter(RooWorkspace * w, } } - + // save all initial parameters of the model including the global observables + RooArgSet initialParameters; + RooArgSet * allParams = sbModel->GetPdf()->getParameters(*data); + allParams->snapshot(initialParameters); + delete allParams; // run first a data fit @@ -916,8 +934,51 @@ RooStats::HypoTestInvTool::RunInverter(RooWorkspace * w, HypoTestInverterResult * r = calc.GetInterval(); std::cout << "Time to perform limit scan \n"; tw.Print(); - + if (mRebuild) { + + std::cout << "\n***************************************************************\n"; + std::cout << "Rebuild the upper limit distribution by re-generating new set of pseudo-experiment and re-compute for each of them a new upper limit\n\n"; + + + allParams = sbModel->GetPdf()->getParameters(*data); + + // define on which value of nuisance parameters to do the rebuild + // default is best fit value for bmodel snapshot + + + + if (mRebuildParamValues != 0) { + // set all parameters to their initial workspace values + *allParams = initialParameters; + } + if (mRebuildParamValues == 0 || mRebuildParamValues == 1 ) { + RooArgSet constrainParams; + if (sbModel->GetNuisanceParameters() ) constrainParams.add(*sbModel->GetNuisanceParameters()); + RooStats::RemoveConstantParameters(&constrainParams); + + const RooArgSet * poiModel = sbModel->GetParametersOfInterest(); + bModel->LoadSnapshot(); + + // do a profile using the B model snapshot + if (mRebuildParamValues == 0 ) { + + RooStats::SetAllConstant(*poiModel,true); + + sbModel->GetPdf()->fitTo(*data,InitialHesse(false), Hesse(false), + Minimizer(minimizerType.c_str(),"Migrad"), Strategy(0), PrintLevel(mPrintLevel), Constrain(constrainParams) ); + + + std::cout << "rebuild using fitted parameter value for B-model snapshot" << std::endl; + constrainParams.Print("v"); + + RooStats::SetAllConstant(*poiModel,false); + } + } + std::cout << "StandardHypoTestInvDemo: Initial parameters used for rebuilding: "; + RooStats::PrintListContent(*allParams, std::cout); + delete allParams; + calc.SetCloseProof(1); tw.Start(); SamplingDistribution * limDist = calc.GetUpperLimitDistribution(true,mNToyToRebuild); @@ -925,9 +986,27 @@ RooStats::HypoTestInvTool::RunInverter(RooWorkspace * w, tw.Print(); if (limDist) { - std::cout << "expected up limit " << limDist->InverseCDF(0.5) << " +/- " - << limDist->InverseCDF(0.16) << " " - << limDist->InverseCDF(0.84) << "\n"; + std::cout << "Expected limits after rebuild distribution " << std::endl; + std::cout << "expected upper limit (median of limit distribution) " << limDist->InverseCDF(0.5) << std::endl; + std::cout << "expected -1 sig limit (0.16% quantile of limit dist) " << limDist->InverseCDF(ROOT::Math::normal_cdf(-1)) << std::endl; + std::cout << "expected +1 sig limit (0.84% quantile of limit dist) " << limDist->InverseCDF(ROOT::Math::normal_cdf(1)) << std::endl; + std::cout << "expected -2 sig limit (.025% quantile of limit dist) " << limDist->InverseCDF(ROOT::Math::normal_cdf(-2)) << std::endl; + std::cout << "expected +2 sig limit (.975% quantile of limit dist) " << limDist->InverseCDF(ROOT::Math::normal_cdf(2)) << std::endl; + + // Plot the upper limit distribution + SamplingDistPlot limPlot( (mNToyToRebuild < 200) ? 50 : 100); + limPlot.AddSamplingDistribution(limDist); + limPlot.GetTH1F()->SetStats(true); // display statistics + limPlot.SetLineColor(kBlue); + new TCanvas("limPlot","Upper Limit Distribution"); + limPlot.Draw(); + + /// save result in a file + limDist->SetName("RULDist"); + TFile * fileOut = new TFile("RULDist.root","RECREATE"); + limDist->Write(); + fileOut->Close(); + //update r to a new updated result object containing the rebuilt expected p-values distributions // (it will not recompute the expected limit) -- GitLab