From d404b9a1be4e7e1c80525421f7bdcdeb1a50bbf3 Mon Sep 17 00:00:00 2001 From: Manuel Tobias Schiller <manuel.schiller@nikhef.nl> Date: Tue, 19 Nov 2013 13:38:25 +0100 Subject: [PATCH] RooKeysPdf: faster initialisation of the lookup table The construction of a RooKeysPdf is now much faster for large data sets. This is achieved by carefully analysing where the different Gaussians that the code superimposes can actually contribute. If a contribution is smaller than the machine precision, the code stops trying to put in those contributions (it would be lost in rounding anyway), which leads to roughly 40 % faster construction of the class for large data sets. --- roofit/roofit/inc/RooKeysPdf.h | 5 +- roofit/roofit/src/RooKeysPdf.cxx | 214 +++++++++++++++++++------------ 2 files changed, 133 insertions(+), 86 deletions(-) diff --git a/roofit/roofit/inc/RooKeysPdf.h b/roofit/roofit/inc/RooKeysPdf.h index 2593e1510c0..790429115dc 100644 --- a/roofit/roofit/inc/RooKeysPdf.h +++ b/roofit/roofit/inc/RooKeysPdf.h @@ -50,9 +50,10 @@ protected: Double_t evaluate() const; private: + // how far you have to go out in a Gaussian until it is smaller than the + // machine precision + static const Double_t _nSigma; //! - Double_t evaluateFull(Double_t x) const; - Int_t _nEvents; Double_t *_dataPts; //[_nEvents] Double_t *_dataWgts; //[_nEvents] diff --git a/roofit/roofit/src/RooKeysPdf.cxx b/roofit/roofit/src/RooKeysPdf.cxx index b6b51c67850..8c0a6c93998 100644 --- a/roofit/roofit/src/RooKeysPdf.cxx +++ b/roofit/roofit/src/RooKeysPdf.cxx @@ -18,7 +18,7 @@ #include <limits> #include <algorithm> -#include <math.h> +#include <cmath> #include "Riostream.h" #include "TMath.h" @@ -51,6 +51,8 @@ ClassImp(RooKeysPdf) // END_HTML // +const Double_t RooKeysPdf::_nSigma = std::sqrt(-2. * + std::log(std::numeric_limits<Double_t>::epsilon())); //_____________________________________________________________________________ RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0), @@ -135,66 +137,137 @@ RooKeysPdf::LoadDataSet( RooDataSet& data) { delete[] _dataWgts; delete[] _weights; - // make new arrays for data and weights to fill - _nEvents= (Int_t)data.numEntries(); - if (_mirrorLeft) _nEvents += data.numEntries(); - if (_mirrorRight) _nEvents += data.numEntries(); - - _dataPts = new Double_t[_nEvents]; - _dataWgts = new Double_t[_nEvents]; - _weights = new Double_t[_nEvents]; - _sumWgt = 0 ; - - Double_t x0(0); - Double_t x1(0); - Double_t x2(0); - - Int_t i, idata=0; - for (i=0; i<data.numEntries(); i++) { - const RooArgSet *values= data.get(i); - RooRealVar real= (RooRealVar&)(values->operator[](_varName)); - - _dataPts[idata]= real.getVal(); - _dataWgts[idata] = data.weight() ; - x0 += _dataWgts[idata] ; x1+=_dataWgts[idata]*_dataPts[idata]; x2+=_dataWgts[idata]*_dataPts[idata]*_dataPts[idata]; - idata++; - _sumWgt+= data.weight() ; - + // small helper structure + struct Data { + Double_t x; + Double_t w; + }; + // helper to order two Data structures + struct cmp { + inline bool operator()(const struct Data& a, const struct Data& b) const + { return a.x < b.x; } + }; + std::vector<Data> tmp; + tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries()); + Double_t x0 = 0., x1 = 0., x2 = 0.; + _sumWgt = 0.; + // read the data set into tmp and accumulate some statistics + RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName)); + for (Int_t i = 0; i < data.numEntries(); ++i) { + data.get(i); + const Double_t x = real.getVal(); + const Double_t w = data.weight(); + x0 += w; + x1 += w * x; + x2 += w * x * x; + _sumWgt += Double_t(1 + _mirrorLeft + _mirrorRight) * w; + + Data p; + p.x = x, p.w = w; + tmp.push_back(p); if (_mirrorLeft) { - _dataPts[idata]= 2*_lo - real.getVal(); - _dataWgts[idata]= data.weight() ; - _sumWgt+= data.weight() ; - idata++; + p.x = 2. * _lo - x; + tmp.push_back(p); } - if (_mirrorRight) { - _dataPts[idata] = 2*_hi - real.getVal(); - _dataWgts[idata] = data.weight() ; - _sumWgt+= data.weight() ; - idata++; + p.x = 2. * _hi - x; + tmp.push_back(p); } } + // sort the entire data set so that values of x are increasing + std::sort(tmp.begin(), tmp.end(), cmp()); + + // copy the sorted data set to its final destination + _nEvents = tmp.size(); + _dataPts = new Double_t[_nEvents]; + _dataWgts = new Double_t[_nEvents]; + for (unsigned i = 0; i < tmp.size(); ++i) { + _dataPts[i] = tmp[i].x; + _dataWgts[i] = tmp[i].w; + } + { + // free tmp + std::vector<Data> tmp2; + tmp2.swap(tmp); + } Double_t meanv=x1/x0; - Double_t sigmav=sqrt(x2/x0-meanv*meanv); - Double_t h=TMath::Power(Double_t(4)/Double_t(3),0.2)*TMath::Power(_nEvents,-0.2)*_rho; - Double_t hmin=h*sigmav*sqrt(2.)/10; - Double_t norm=h*sqrt(sigmav)/(2.0*sqrt(3.0)); + Double_t sigmav=std::sqrt(x2/x0-meanv*meanv); + Double_t h=std::pow(Double_t(4)/Double_t(3),0.2)*std::pow(_nEvents,-0.2)*_rho; + Double_t hmin=h*sigmav*std::sqrt(2.)/10; + Double_t norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0)); _weights=new Double_t[_nEvents]; for(Int_t j=0;j<_nEvents;++j) { - _weights[j]=norm/sqrt(g(_dataPts[j],h*sigmav)); + _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav)); if (_weights[j]<hmin) _weights[j]=hmin; } - for (i=0;i<_nPoints+1;++i) - _lookupTable[i]=evaluateFull( _lo+Double_t(i)*_binWidth ); - - + // The idea below is that beyond nSigma sigma, the value of the exponential + // in the Gaussian is well below the machine precision of a double, so it + // does not contribute any more. That way, we can limit how many bins of the + // binned approximation in _lookupTable we have to touch when filling it. + for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.; + for(Int_t j=0;j<_nEvents;++j) { + const Double_t xlo = std::min(_hi, + std::max(_lo, _dataPts[j] - _nSigma * _weights[j])); + const Double_t xhi = std::max(_lo, + std::min(_hi, _dataPts[j] + _nSigma * _weights[j])); + if (xlo >= xhi) continue; + const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.); + const Double_t weightratio = _dataWgts[j] / _weights[j]; + const Int_t binlo = std::floor((xlo - _lo) / _binWidth); + const Int_t binhi = _nPoints - std::floor((_hi - xhi) / _binWidth); + const Double_t x = (Double_t(_nPoints - binlo) * _lo + + Double_t(binlo) * _hi) / Double_t(_nPoints); + Double_t chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.); + for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) { + _lookupTable[k] += weightratio * std::exp(- chi * chi); + } + } + if (_asymLeft) { + for(Int_t j=0;j<_nEvents;++j) { + const Double_t xlo = std::min(_hi, + std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j])); + const Double_t xhi = std::max(_lo, + std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j])); + if (xlo >= xhi) continue; + const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.); + const Double_t weightratio = _dataWgts[j] / _weights[j]; + const Int_t binlo = std::floor((xlo - _lo) / _binWidth); + const Int_t binhi = _nPoints - std::floor((_hi - xhi) / _binWidth); + const Double_t x = (Double_t(_nPoints - binlo) * _lo + + Double_t(binlo) * _hi) / Double_t(_nPoints); + Double_t chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.); + for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) { + _lookupTable[k] -= weightratio * std::exp(- chi * chi); + } + } + } + if (_asymRight) { + for(Int_t j=0;j<_nEvents;++j) { + const Double_t xlo = std::min(_hi, + std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j])); + const Double_t xhi = std::max(_lo, + std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j])); + if (xlo >= xhi) continue; + const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.); + const Double_t weightratio = _dataWgts[j] / _weights[j]; + const Int_t binlo = std::floor((xlo - _lo) / _binWidth); + const Int_t binhi = _nPoints - std::floor((_hi - xhi) / _binWidth); + const Double_t x = (Double_t(_nPoints - binlo) * _lo + + Double_t(binlo) * _hi) / Double_t(_nPoints); + Double_t chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.); + for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) { + _lookupTable[k] -= weightratio * std::exp(- chi * chi); + } + } + } + static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi())); + for (Int_t i=0;i<_nPoints+1;++i) + _lookupTable[i] /= sqrt2pi * _sumWgt; } - - //_____________________________________________________________________________ Double_t RooKeysPdf::evaluate() const { Int_t i = (Int_t)floor((Double_t(_x)-_lo)/_binWidth); @@ -279,50 +352,23 @@ Double_t RooKeysPdf::maxVal(Int_t code) const return max; } -//_____________________________________________________________________________ -Double_t RooKeysPdf::evaluateFull( Double_t x ) const { - Double_t y=0; - - for (Int_t i=0;i<_nEvents;++i) { - Double_t chi=(x-_dataPts[i])/_weights[i]; - y+=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i]; - - // if mirroring the distribution across either edge of - // the range ("Boundary Kernels"), pick up the additional - // contributions -// if (_mirrorLeft) { -// chi=(x-(2*_lo-_dataPts[i]))/_weights[i]; -// y+=exp(-0.5*chi*chi)/_weights[i]; -// } - if (_asymLeft) { - chi=(x-(2*_lo-_dataPts[i]))/_weights[i]; - y-=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i]; - } -// if (_mirrorRight) { -// chi=(x-(2*_hi-_dataPts[i]))/_weights[i]; -// y+=exp(-0.5*chi*chi)/_weights[i]; -// } - if (_asymRight) { - chi=(x-(2*_hi-_dataPts[i]))/_weights[i]; - y-=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i]; - } - } - - static const Double_t sqrt2pi(sqrt(2*TMath::Pi())); - return y/(sqrt2pi*_sumWgt); -} //_____________________________________________________________________________ Double_t RooKeysPdf::g(Double_t x,Double_t sigmav) const { - Double_t c=Double_t(1)/(2*sigmav*sigmav); - Double_t y=0; - for (Int_t i=0;i<_nEvents;++i) { - Double_t r=x-_dataPts[i]; - y+=exp(-c*r*r); + // since data is sorted, we can be a little faster because we know which data + // points contribute + Double_t* it = std::lower_bound(_dataPts, _dataPts + _nEvents, + x - _nSigma * sigmav); + if (it >= (_dataPts + _nEvents)) return 0.; + Double_t* iend = std::upper_bound(it, _dataPts + _nEvents, + x + _nSigma * sigmav); + for ( ; it < iend; ++it) { + const Double_t r = (x - *it) / sigmav; + y += std::exp(-0.5 * r * r); } - static const Double_t sqrt2pi(sqrt(2*TMath::Pi())); + static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi())); return y/(sigmav*sqrt2pi*_nEvents); } -- GitLab