diff --git a/hist/hist/inc/TF1.h b/hist/hist/inc/TF1.h
index 5513533e9ee9b56a695be5b973cd1cf4af7f3bb3..2efdf396d713a1375d961c251d1f05919e9891d4 100644
--- a/hist/hist/inc/TF1.h
+++ b/hist/hist/inc/TF1.h
@@ -789,8 +789,7 @@ inline T TF1::EvalParTempl(const T *data, const Double_t *params)
 }
 
 #ifdef R__HAS_VECCORE
-
-// Internal to TF1. Evaluates Vectorized TF1 on data of type Double_t
+// Internal to TF1. Evaluates Vectorized TF1 on data of type Double_v
 inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
 {
    assert(fType == EFType::kTemplVec);
diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
index ece31de602473bad9fad5d14a57b4210b40ad4dd..9499c36ac9671728db14ac46128fe54ba69d37e9 100644
--- a/math/mathcore/CMakeLists.txt
+++ b/math/mathcore/CMakeLists.txt
@@ -1,5 +1,5 @@
 ############################################################################
-# CMakeLists.txt file for building ROOT math/MathCore package
+# CMakeLists.txt file for building ROOT io/io package
 ############################################################################
 
 set(MATHCORE_HEADERS TRandom.h
diff --git a/math/mathcore/inc/Fit/Chi2FCN.h b/math/mathcore/inc/Fit/Chi2FCN.h
index 765da5e420c13790914257963e986f8261c36e05..d33d67fee6ed2522ac0330d407dec188d66f7b47 100644
--- a/math/mathcore/inc/Fit/Chi2FCN.h
+++ b/math/mathcore/inc/Fit/Chi2FCN.h
@@ -22,7 +22,8 @@
 
 #include "Fit/BinData.h"
 
-#include "Fit/EvaluateChi2.hxx"
+
+#include "Fit/FitUtil.h"
 
 #include <memory>
 
@@ -119,13 +120,14 @@ public:
    /// i-th chi-square residual
    virtual double DataElement(const double *x, unsigned int i, double *g) const {
       if (i==0) this->UpdateNCalls();
-      return FitUtil::Chi2<T>::EvalResidual(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
+      return FitUtil::Evaluate<T>::EvalChi2Residual(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
    }
 
    // need to be virtual to be instantiated
    virtual void Gradient(const double *x, double *g) const {
       // evaluate the chi2 gradient
-      FitUtil::Chi2<T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints, fExecutionPolicy);
+      FitUtil::Evaluate<T>::EvalChi2Gradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints,
+                                             fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -145,9 +147,9 @@ private:
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
       if (BaseFCN::Data().HaveCoordErrors() || BaseFCN::Data().HaveAsymErrors())
-         return FitUtil::Chi2<T>::EvalEffective(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints);
+         return FitUtil::Evaluate<T>::EvalChi2Effective(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints);
       else
-         return FitUtil::Chi2<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints, fExecutionPolicy);
+         return FitUtil::Evaluate<T>::EvalChi2(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/inc/Fit/EvaluateChi2.hxx b/math/mathcore/inc/Fit/EvaluateChi2.hxx
deleted file mode 100644
index 84173014fb80457c17d52bc47956f9035987df6e..0000000000000000000000000000000000000000
--- a/math/mathcore/inc/Fit/EvaluateChi2.hxx
+++ /dev/null
@@ -1,409 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Header file for class FitUtil
-
-#ifndef ROOT_Fit_EvaluateChi2
-#define ROOT_Fit_EvaluateChi2
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-template <class T>
-struct Chi2 {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the Chi2 given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const BinData &data, const double *p, unsigned int &nPoints,
-                      ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
-   {
-      // normal chi2 using only error on values (from fitting histogram)
-      // optionally the integral of function in the bin is used
-
-
-      unsigned int n = data.Size();
-      nPoints = data.Size(); // npoints
-
-      // set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-      // do not cache parameter values (it is not thread safe)
-      // func.SetParameters(p);
-
-      // get fit option and check case if using integral of bins
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error(
-            "Chi2<T>::EvaluateChi2",
-            "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
-
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      double maxResValue = std::numeric_limits<double>::max() / n;
-      std::vector<double> ones{1, 1, 1, 1};
-      auto vecSize = vecCore::VectorSize<T>();
-
-      auto mapFunction = [&](unsigned int i) {
-         // in case of no error in y invError=1 is returned
-         T x1, y, invErrorVec;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         const auto invError = data.ErrorPtr(i * vecSize);
-         auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
-         vecCore::Load<T>(invErrorVec, invErrorptr);
-
-         const T *x;
-         std::vector<T> xc;
-         if (data.NDim() > 1) {
-            xc.resize(data.NDim());
-            xc[0] = x1;
-            for (unsigned int j = 1; j < data.NDim(); ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         T fval(0.);
-
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-
-         T tmp = (y - fval) * invErrorVec;
-         T chi2 = tmp * tmp;
-
-         // avoid inifinity or nan in chi2 values due to wrong function values
-         auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
-
-         vecCore::MaskedAssign<T>(chi2, m, maxResValue);
-
-         return chi2;
-      };
-
-      auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T(0.)); };
-
-#ifndef R__USE_IMT
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("Chi2<T>::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                          "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T res(0.);
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("Chi2<T>::EvaluateChi2",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Last SIMD vector of elements (if padding needed)
-      if (data.Size() % vecSize != 0)
-         vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
-                               res + mapFunction(data.Size() / vecSize));
-
-      return vecCore::ReduceAdd(res);
-   }
-
-   static double EvalEffective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
-   {
-      Error("Chi2<T>::EvaluateChi2<T>::EvalEffective",
-            "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
-      return -1.;
-   }
-
-   /**
-       Evaluate the Chi2 gradient given a vectorized model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the chi2 function
-      // this function is used when the model function knows how to calculate the derivative and we can
-      // avoid that the minimizer re-computes them
-      //
-      // case of chi2 effective (errors on coordinate) is not supported
-
-      if (data.HaveCoordErrors()) {
-         MATH_ERROR_MSG("Chi2<T>::EvaluateChi2Gradient",
-                        "Error on the coordinates are not used in calculating Chi2 gradient");
-         return; // it will assert otherwise later in GetPoint
-      }
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a gradient function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error("Chi2<T>::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
-                                                "BinVolume or ExpErrors\n. Aborting operation.");
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-      // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
-      std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
-
-      auto mapFunction = [&](const unsigned int i) {
-         // set all vector values to zero
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1, y, invError;
-
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         const auto invErrorPtr = data.ErrorPtr(i * vecSize);
-
-         if (invErrorPtr == nullptr)
-            invError = 1;
-         else
-            vecCore::Load<T>(invError, invErrorPtr);
-
-         // TODO: Check error options and invert if needed
-
-         T fval = 0;
-
-         const T *x = nullptr;
-
-         unsigned int ndim = data.NDim();
-         // need to declare vector outside if statement
-         // otherwise pointer will be invalid
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-         validPointsMasks[i] = isFinite(fval);
-         if (vecCore::MaskEmpty(validPointsMasks[i])) {
-            // Return a zero contribution to all partial derivatives on behalf of the current points
-            return pointContributionVec;
-         }
-
-         // loop on the parameters
-         for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-            // avoid singularity in the function (infinity and nan ) in the chi2 sum
-            // eventually add possibility of excluding some points (like singularity)
-            validPointsMasks[i] = isFinite(gradFunc[ipar]);
-
-            if (vecCore::MaskEmpty(validPointsMasks[i])) {
-               break; // exit loop on parameters
-            }
-
-            // calculate derivative point contribution (only for valid points)
-            vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
-                                  -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
-         }
-
-         return pointContributionVec;
-      };
-
-      // Reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : partialResults) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-      std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("Chi2<T>::EvaluateChi2Gradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("Chi2<T>::EvaluateChi2Gradient",
-               "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-      // correct the number of points
-      nPoints = initialNPoints;
-
-      if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
-                      [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
-         T nRejected_v(0);
-         T zeros(0.);
-         for (auto mask : validPointsMasks) {
-            T partial(1.);
-            vecCore::MaskedAssign(partial, mask, zeros);
-            nRejected_v += partial;
-         }
-
-         auto nRejected = vecCore::ReduceAdd(nRejected_v);
-
-         assert(nRejected <= initialNPoints);
-         nPoints = initialNPoints - nRejected;
-
-         if (nPoints < npar) {
-            MATH_ERROR_MSG("Chi2<T>::EvaluateChi2Gradient",
-                           "Too many points rejected for overflow in gradient calculation");
-         }
-      }
-   }
-
-   static double EvalResidual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
-   {
-      Error("Chi2<T>::EvaluateChi2<T>::EvalResidual",
-            "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
-      return -1.;
-   }
-
-   // Compute a mask to filter out infinite numbers and NaN values.
-   // The argument rval is updated so infinite numbers and NaN values are replaced by
-   // maximum finite values (preserving the original sign).
-   static vecCore::Mask<T> isFinite(T &rval)
-   {
-      return rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
-   }
-};
-
-template <>
-struct Chi2<double> {
-#endif
-
-   /**
-    Evaluate the Chi2 given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
-                      ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
-
-   /**
-    Evaluate the effective Chi2 given a model function and the data at the point x.
-    The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
-   */
-   static double EvalEffective(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, unsigned int &nPoints);
-
-   /**
-    Evaluate the Chi2 gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-
-   // methods required by dedicate minimizer like Fumili
-
-   /**
-    Evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
-    and if the pointer g is not null evaluate also the gradient of the residual.
-    If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-    is used
-   */
-   static double EvalResidual(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                              unsigned int i, double *g = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#endif /* ROOT_Fit_FitUtil */
diff --git a/math/mathcore/inc/Fit/EvaluateLogL.hxx b/math/mathcore/inc/Fit/EvaluateLogL.hxx
deleted file mode 100644
index c7c3f832b677aaf9bb41ac56bf411b2b61e6b9b9..0000000000000000000000000000000000000000
--- a/math/mathcore/inc/Fit/EvaluateLogL.hxx
+++ /dev/null
@@ -1,543 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-#ifndef ROOT_Fit_EvaluateLogL
-#define ROOT_Fit_EvaluateLogL
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-// internal class defining
-template <class T>
-class LikelihoodAux {
-public:
-   LikelihoodAux(T logv = 0., T w = 0., T w2 = 0.) : logvalue(logv), weight(w), weight2(w2) {}
-
-   LikelihoodAux operator+(const LikelihoodAux &l) const
-   {
-      return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
-   }
-
-   LikelihoodAux &operator+=(const LikelihoodAux &l)
-   {
-      logvalue += l.logvalue;
-      weight += l.weight;
-      weight2 += l.weight2;
-      return *this;
-   }
-
-   T logvalue;
-   T weight;
-   T weight2;
-};
-
-/**
-  Evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
-  If the pointer g is not null evaluate also the gradient of the pdf.
-  If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-  is used
-*/
-double EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *x, unsigned int ipoint, double *g = 0);
-
-#ifdef R__HAS_VECCORE
-/**
-  Evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
-  If the pointer g is not null evaluate also the gradient of the pdf.
-  If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-  is used
-*/
-template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
-double EvaluatePdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p,
-                   unsigned int i, double *)
-{
-   // Evaluate the pdf contribution to the generic logl function in case of bin data
-   // return actually the log of the pdf and its derivatives
-   // func.SetParameters(p);
-   const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
-   auto fval = func(&x, p);
-   auto logPdf = ROOT::Math::Util::EvalLog(fval);
-   return vecCore::Get<ROOT::Double_v>(logPdf, 0);
-}
-#endif
-
-template <class T>
-struct LogL {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the LogL given a vectorized model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p, int iWeight,
-                      bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0)
-   {
-      // evaluate the LogLikelihood
-      unsigned int n = data.Size();
-      nPoints = data.Size(); // npoints
-
-      // unsigned int nRejected = 0;
-      bool normalizeFunc = false;
-
-      // set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-
-#ifdef R__USE_IMT
-      // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the
-      // function this will be done in sequential mode and parameters can be set in a thread safe manner
-      if (!normalizeFunc) {
-         if (data.NDim() == 1) {
-            T x;
-            vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
-            func(&x, p);
-         } else {
-            std::vector<T> x(data.NDim());
-            for (unsigned int j = 0; j < data.NDim(); ++j)
-               vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
-            func(x.data(), p);
-         }
-      }
-#endif
-
-      // this is needed if function must be normalized
-      double norm = 1.0;
-      if (normalizeFunc) {
-         // compute integral of the function
-         std::vector<double> xmin(data.NDim());
-         std::vector<double> xmax(data.NDim());
-         IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
-         // compute integral in the ranges where is defined
-         if (data.Range().Size() > 0) {
-            norm = 0;
-            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-               data.Range().GetRange(&xmin[0], &xmax[0], ir);
-               norm += igEval.Integral(xmin.data(), xmax.data());
-            }
-         } else {
-            // use (-inf +inf)
-            data.Range().GetRange(&xmin[0], &xmax[0]);
-            // check if funcition is zero at +- inf
-            T xmin_v, xmax_v;
-            vecCore::Load<T>(xmin_v, xmin.data());
-            vecCore::Load<T>(xmax_v, xmax.data());
-            if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
-               MATH_ERROR_MSG("FitUtil::LogL<T>::Eval",
-                              "A range has not been set and the function is not zero at +/- inf");
-               return 0;
-            }
-            norm = igEval.Integral(&xmin[0], &xmax[0]);
-         }
-      }
-
-      // needed to compute effective global weight in case of extended likelihood
-
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned int numVectors = n / vecSize;
-
-      auto mapFunction = [&, p](const unsigned i) {
-         T W(0.);
-         T W2(0.);
-         T fval(0.);
-
-         (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
-
-         T x1;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         const T *x = nullptr;
-         unsigned int ndim = data.NDim();
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            if (ndim == 1)
-               std::cout << i << "  x " << x[0] << " fval = " << fval;
-            else
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval;
-         }
-#endif
-
-         if (normalizeFunc)
-            fval = fval * (1 / norm);
-
-         // function EvalLog protects against negative or too small values of fval
-         auto logval = ROOT::Math::Util::EvalLog(fval);
-         if (iWeight > 0) {
-            T weight(0.);
-            if (data.WeightsPtr(i) == nullptr)
-               weight = 1;
-            else
-               vecCore::Load<T>(weight, data.WeightsPtr(i * vecSize));
-            logval *= weight;
-            if (iWeight == 2) {
-               logval *= weight; // use square of weights in likelihood
-               if (!extended) {
-                  // needed sum of weights and sum of weight square if likelkihood is extended
-                  W = weight;
-                  W2 = weight * weight;
-               }
-            }
-         }
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            std::cout << "   " << fval << "  logfval " << logval << std::endl;
-         }
-#endif
-
-         return LikelihoodAux<T>(logval, W, W2);
-      };
-
-      auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
-         return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
-                                [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) { return l1 + l2; });
-      };
-
-#ifndef R__USE_IMT
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::LogL<T>::Eval", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                          "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T logl_v(0.);
-      T sumW_v(0.);
-      T sumW2_v(0.);
-      ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("FitUtil::LogL<T>::Eval",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      logl_v = resArray.logvalue;
-      sumW_v = resArray.weight;
-      sumW2_v = resArray.weight2;
-
-      // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
-      unsigned int remainingPoints = n % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(remainingPoints);
-         vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
-         vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
-         vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
-      }
-
-      // reduce vector type to double.
-      double logl = vecCore::ReduceAdd(logl_v);
-      double sumW = vecCore::ReduceAdd(sumW_v);
-      double sumW2 = vecCore::ReduceAdd(sumW2_v);
-
-      if (extended) {
-         // add Poisson extended term
-         double extendedTerm = 0; // extended term in likelihood
-         double nuTot = 0;
-         // nuTot is integral of function in the range
-         // if function has been normalized integral has been already computed
-         if (!normalizeFunc) {
-            IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
-            std::vector<double> xmin(data.NDim());
-            std::vector<double> xmax(data.NDim());
-
-            // compute integral in the ranges where is defined
-            if (data.Range().Size() > 0) {
-               nuTot = 0;
-               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
-                  nuTot += igEval.Integral(xmin.data(), xmax.data());
-               }
-            } else {
-               // use (-inf +inf)
-               data.Range().GetRange(&xmin[0], &xmax[0]);
-               // check if funcition is zero at +- inf
-               T xmin_v, xmax_v;
-               vecCore::Load<T>(xmin_v, xmin.data());
-               vecCore::Load<T>(xmax_v, xmax.data());
-               if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
-                  MATH_ERROR_MSG("FitUtil::LogL<T>::Eval",
-                                 "A range has not been set and the function is not zero at +/- inf");
-                  return 0;
-               }
-               nuTot = igEval.Integral(&xmin[0], &xmax[0]);
-            }
-
-            // force to be last parameter value
-            // nutot = p[func.NDim()-1];
-            if (iWeight != 2)
-               extendedTerm = -nuTot; // no need to add in this case n log(nu) since is already computed before
-            else {
-               // case use weight square in likelihood : compute total effective weight = sw2/sw
-               // ignore for the moment case when sumW is zero
-               extendedTerm = -(sumW2 / sumW) * nuTot;
-            }
-
-         } else {
-            nuTot = norm;
-            extendedTerm = -nuTot + double(n) * ROOT::Math::Util::EvalLog(nuTot);
-            // in case of weights need to use here sum of weights (to be done)
-         }
-
-         logl += extendedTerm;
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "Evaluated log L for parameters (";
-      for (unsigned int ip = 0; ip < func.NPar(); ++ip)
-         std::cout << " " << p[ip];
-      std::cout << ")  nll = " << -logl << std::endl;
-#endif
-
-      return -logl;
-   }
-   
-   /**
-    Evaluate the LogL gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p, double *grad, unsigned int &,
-                ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial, unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the log likelihood function
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a grad function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "\n===> Evaluate Gradient for parameters ";
-      for (unsigned int ip = 0; ip < npar; ++ip)
-         std::cout << "  " << p[ip];
-      std::cout << "\n";
-#endif
-
-      (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
-      const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
-
-      auto mapFunction = [&](const unsigned int i) {
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-
-         const T *x = nullptr;
-
-         unsigned int ndim = data.NDim();
-         std::vector<T> xc(ndim);
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         T fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            if (ndim > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1]
-                         << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-#endif
-
-         vecCore::Mask<T> positiveValues = fval > 0;
-
-         for (unsigned int kpar = 0; kpar < npar; ++kpar) {
-            if (!vecCore::MaskEmpty(positiveValues))
-               vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
-
-            vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
-            if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
-               T gg = kdmax1 * gradFunc[kpar];
-               pointContributionVec[kpar] = vecCore::Blend(
-                  nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2), -vecCore::math::Max(gg, -kdmax2));
-            }
-            // if func derivative is zero term is also zero so do not add in g[kpar]
-         }
-
-         return pointContributionVec;
-      };
-
-      // Vertically reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : pointContributions) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-      std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::LogL<T>::EvalGradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("FitUtil::LogL<T>::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(initialNPoints % vecSize);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "Final gradient ";
-      for (unsigned int param = 0; param < npar; param++) {
-         std::cout << "  " << grad[param];
-      }
-      std::cout << "\n";
-#endif
-   }
-};
-
-template <>
-struct LogL<double> {
-#endif
-
-   /**
-    Evaluate the LogL given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0);
-
-   /**
-    Evaluate the LogL gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &f, const UnBinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#if defined(R__HAS_VECCORE) && defined(R__HAS_VC)
-// Fixes alignment for structures of SIMD structures
-Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
-#endif
-
-#endif /* ROOT_Fit_EvaluateLogL */
diff --git a/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx b/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx
deleted file mode 100644
index 63f693a5b5cc8fec3bd647c31cbdcce02aff57c0..0000000000000000000000000000000000000000
--- a/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx
+++ /dev/null
@@ -1,393 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-#ifndef ROOT_Fit_EvaluatePoissonLogL
-#define ROOT_Fit_EvaluatePoissonLogL
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-template <class T>
-struct PoissonLogL {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the Poisson LogL given a vectorized model function and the data at the point x.
-    Return also nPoints as the effective number of used points in the LogL evaluation
-
-    By default is extended, pass extedend=false if want to be not extended (MultiNomial)
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const BinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
-   {
-      // evaluate the Poisson Log Likelihood
-      // for binned likelihood fits
-      // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
-      // add as well constant term for saturated model to make it like a Chi2/2
-      // by default is etended. If extended is false the fit is not extended and
-      // the global poisson term is removed (i.e is a binomial fit)
-      // (remember that in this case one needs to have a function with a fixed normalization
-      // like in a non extended binned fit)
-      //
-      // if use Weight use a weighted dataset
-      // iWeight = 1 ==> logL = Sum( w f(x_i) )
-      // case of iWeight==1 is actually identical to weight==0
-      // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
-      //
-
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-      auto vecSize = vecCore::VectorSize<T>();
-      // get fit option and check case of using integral of bins
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fExpErrors || fitOpt.fIntegral)
-         Error("FitUtil::PoissonLogL<T>::Eval",
-               "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
-      bool useW2 = (iWeight == 2);
-
-      auto mapFunction = [&](unsigned int i) {
-         T y;
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         T fval(0.);
-
-         if (data.NDim() > 1) {
-            std::vector<T> x(data.NDim());
-            for (unsigned int j = 0; j < data.NDim(); ++j)
-               vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
-#ifdef USE_PARAMCACHE
-            fval = func(x.data());
-#else
-            fval = func(x.data(), p);
-#endif
-            // one -dim case
-         } else {
-            T x;
-            vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
-#ifdef USE_PARAMCACHE
-            fval = func(&x);
-#else
-            fval = func(&x, p);
-#endif
-         }
-
-         // EvalLog protects against 0 values of fval but don't want to add in the -log sum
-         // negative values of fval
-         vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
-
-         T nloglike(0.); // negative loglikelihood
-
-         if (useW2) {
-            // apply weight correction . Effective weight is error^2/ y
-            // and expected events in bins is fval/weight
-            // can apply correction only when y is not zero otherwise weight is undefined
-            // (in case of weighted likelihood I don't care about the constant term due to
-            // the saturated model)
-            assert(data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
-            T error = 0.0;
-            vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
-            // for empty bin use the average weight  computed from the total data weight
-            auto m = vecCore::Mask_v<T>(y != 0.0);
-            auto weight = vecCore::Blend(m, (error * error) / y, T(data.SumOfError2() / data.SumOfContent()));
-            if (extended) {
-               nloglike = weight * (fval - y);
-            }
-            vecCore::MaskedAssign<T>(nloglike, y != 0,
-                                     nloglike +
-                                        weight * y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
-
-         } else {
-            // standard case no weights or iWeight=1
-            // this is needed for Poisson likelihood (which are extened and not for multinomial)
-            // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
-            // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
-            if (extended)
-               nloglike = fval - y;
-
-            vecCore::MaskedAssign<T>(nloglike, y > 0,
-                                     nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
-         }
-
-         return nloglike;
-      };
-
-#ifdef R__USE_IMT
-      auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T(0.)); };
-#else
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::PoissonLogL<T>::Eval",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T res(0.);
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
-            res += mapFunction(i);
-         }
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("FitUtil::PoissonLogL<T>::Eval",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Last padded SIMD vector of elements
-      if (data.Size() % vecSize != 0)
-         vecCore::MaskedAssign(res, ::vecCore::Int2Mask<T>(data.Size() % vecSize),
-                               res + mapFunction(data.Size() / vecSize));
-
-      return vecCore::ReduceAdd(res);
-   }
-
-   static double EvalBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
-   {
-      Error("FitUtil::PoissonLogL<T>::EvalBinPdf",
-            "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
-      return -1.;
-   }
-
-   /**
-    Evaluate the Poisson LogL given a vectorized model function and the data at the point x.
-   */
-   static void
-   EvalGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad, unsigned int &,
-                ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial, unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the Poisson log likelihood function
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a grad function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error("FitUtil::PoissonLogL<T>::EvalGradient", "The vectorized implementation doesn't support Integrals,"
-                                                       "BinVolume or ExpErrors\n. Aborting operation.");
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-      auto mapFunction = [&](const unsigned int i) {
-         // set all vector values to zero
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1, y;
-
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-
-         T fval = 0;
-
-         const T *x = nullptr;
-
-         unsigned ndim = data.NDim();
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-         // correct the gradient
-         for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-            vecCore::Mask<T> positiveValuesMask = fval > 0;
-
-            // df/dp * (1.  - y/f )
-            vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
-
-            vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
-
-            if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
-               const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
-               const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
-               T gg = kdmax1 * gradFunc[ipar];
-               pointContributionVec[ipar] =
-                  -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
-            }
-         }
-
-#ifdef DEBUG_FITUTIL
-         {
-            R__LOCKGUARD(gROOTMutex);
-            if (i < 5 || (i > data.Size() - 5)) {
-               if (data.NDim() > 1)
-                  std::cout << i << "  x " << x[0] << " y " << x[1];
-               else
-                  std::cout << i << "  x " << x[0];
-               std::cout << " func " << fval << " gradient ";
-               for (unsigned int ii = 0; ii < npar; ++ii)
-                  std::cout << "  " << pointContributionVec[ii];
-               std::cout << "\n";
-            }
-         }
-#endif
-
-         return pointContributionVec;
-      };
-
-      // Vertically reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : partialResults) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::PoissonLogL<T>::EvalGradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("FitUtil::PoissonLogL<T>:::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                       "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                       "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(remainingPoints);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "***** Final gradient : ";
-      for (unsigned int ii = 0; ii < npar; ++ii)
-         std::cout << grad[ii] << "   ";
-      std::cout << "\n";
-#endif
-   }
-};
-
-template <>
-struct PoissonLogL<double> {
-#endif
-
-   /**
-    Evaluate the Poisson LogL given a model function and the data at the point x.
-    Return also nPoints as the effective number of used points in the LogL evaluation.
-
-    By default is extended, pass extedend=false if want to be not extended (MultiNomial)
-   */
-   static double Eval(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0);
-
-   /**
-    Evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
-    If the pointer g is not null evaluate also the gradient of the Poisson pdf.
-    If the function provides parameter derivatives they are used. Otherwise, a simple derivative calculation
-    is used.
-   */
-   static double EvalBinPdf(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, unsigned int i, double *g);
-
-   /**
-    Evaluate the Poisson LogL given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
-                            unsigned int &,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#endif /* ROOT_Fit_EvaluateLogL */
diff --git a/math/mathcore/inc/Fit/FitUtil.h b/math/mathcore/inc/Fit/FitUtil.h
index 5ce43b1ef56277d1615ffe2a7f238b6db7ee37d7..93eda887ba171d703138be139b3aeb859d75195d 100644
--- a/math/mathcore/inc/Fit/FitUtil.h
+++ b/math/mathcore/inc/Fit/FitUtil.h
@@ -16,29 +16,32 @@
 #include "Math/IParamFunctionfwd.h"
 #include "Math/IParamFunction.h"
 
-#include "TROOT.h"
-
 #ifdef R__USE_IMT
 #include "ROOT/TThreadExecutor.hxx"
 #endif
 #include "ROOT/TSequentialExecutor.hxx"
 
+#include "Fit/BinData.h"
+#include "Fit/UnBinData.h"
 #include "Fit/FitExecutionPolicy.h"
 
 #include "Math/Integrator.h"
 #include "Math/IntegratorMultiDim.h"
 
 #include "TError.h"
+#include "TSystem.h"
 
 // using parameter cache is not thread safe but needed for normalizing the functions
 #define USE_PARAMCACHE
 
+//#define DEBUG_FITUTIL
+
 #ifdef R__HAS_VECCORE
 namespace vecCore {
 template <class T>
 vecCore::Mask<T> Int2Mask(unsigned i)
 {
-   T x{};
+   T x;
    for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
       vecCore::Set<T>(x, j, j);
    return vecCore::Mask<T>(x < T(i));
@@ -67,6 +70,53 @@ namespace FitUtil {
   template <class T>
   using IModelFunctionTempl = ROOT::Math::IParamMultiFunctionTempl<T>;
 
+  // internal class defining
+  template <class T>
+  class LikelihoodAux {
+  public:
+     LikelihoodAux(T logv = {}, T w = {}, T w2 = {}) : logvalue(logv), weight(w), weight2(w2) {}
+
+     LikelihoodAux operator+(const LikelihoodAux &l) const
+     {
+        return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
+     }
+
+     LikelihoodAux &operator+=(const LikelihoodAux &l)
+     {
+        logvalue += l.logvalue;
+        weight += l.weight;
+        weight2 += l.weight2;
+        return *this;
+     }
+
+     T logvalue;
+     T weight;
+     T weight2;
+  };
+
+  template <>
+  class LikelihoodAux<double> {
+  public:
+     LikelihoodAux(double logv = 0.0, double w = 0.0, double w2 = 0.0) : logvalue(logv), weight(w), weight2(w2){};
+
+     LikelihoodAux operator+(const LikelihoodAux &l) const
+     {
+        return LikelihoodAux<double>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
+     }
+
+     LikelihoodAux &operator+=(const LikelihoodAux &l)
+     {
+        logvalue += l.logvalue;
+        weight += l.weight;
+        weight2 += l.weight2;
+        return *this;
+     }
+
+     double logvalue;
+     double weight;
+     double weight2;
+  };
+
   // internal class to evaluate the function or the integral
   // and cached internal integration details
   // if useIntegral is false no allocation is done
@@ -111,9 +161,8 @@ namespace FitUtil {
                                                                             const>(*this, &IntegralEvaluator::FN, fDim);
            fIgNDim = new ROOT::Math::IntegratorMultiDim();
            fIgNDim->SetFunction(*fFuncNDim);
-        } else {
+        } else
            assert(fDim > 0);
-        }
      }
 
      void SetParameters(const double *p)
@@ -163,9 +212,8 @@ namespace FitUtil {
            return fIgNDim->Integral(x1, x2) / dV;
            //                   std::cout << " do integral btw x " << x1[0] << "  " << x2[0] << " y " << x1[1] << "  "
            //                   << x2[1] << " dV = " << dV << " result = " << result << std::endl; return result;
-        } else {
+        } else
            assert(1.); // should never be here
-        }
         return 0;
      }
 
@@ -182,7 +230,8 @@ namespace FitUtil {
         if (fDim == 1) {
            ROOT::Double_v xx;
            vecCore::Load<ROOT::Double_v>(xx, x);
-           auto res = (*f)(&xx, p);
+           const double *p0 = p;
+           auto res = (*f)(&xx, (const double *)p0);
            return vecCore::Get<ROOT::Double_v>(res, 0);
         } else {
            std::vector<ROOT::Double_v> xx(fDim);
@@ -210,159 +259,1212 @@ namespace FitUtil {
      ROOT::Math::IMultiGenFunction *fFuncNDim;
   };
 
-  inline unsigned setAutomaticChunking(unsigned nEvents)
-  {
-     auto ncpu = ROOT::GetImplicitMTPoolSize();
-     if (nEvents / ncpu < 1000)
-        return ncpu;
-     return nEvents / 1000;
-     //   return ((nEvents/ncpu + 1) % 1000) *40 ; //arbitrary formula
+  /** Chi2 Functions */
+
+  /**
+      evaluate the Chi2 given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  double EvaluateChi2(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints,
+                      ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
+
+  /**
+      evaluate the effective Chi2 given a model function and the data at the point x.
+      The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  double EvaluateChi2Effective(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints);
+
+  /**
+      evaluate the Chi2 gradient given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  void EvaluateChi2Gradient(const IModelFunction &func, const BinData &data, const double *x, double *grad,
+                            unsigned int &nPoints,
+                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                            unsigned nChunks = 0);
+
+  /**
+      evaluate the LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  double EvaluateLogL(const IModelFunction &func, const UnBinData &data, const double *p, int iWeight, bool extended,
+                      unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
+
+  /**
+      evaluate the LogL gradient given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  void EvaluateLogLGradient(const IModelFunction &func, const UnBinData &data, const double *x, double *grad,
+                            unsigned int &nPoints,
+                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                            unsigned nChunks = 0);
+
+  // #ifdef R__HAS_VECCORE
+  //    template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
+  //    void EvaluateLogLGradient(const IModelFunctionTempl<ROOT::Double_v> &, const UnBinData &, const double *, double
+  //    *, unsigned int & ) {}
+  // #endif
+
+  /**
+      evaluate the Poisson LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+      By default is extended, pass extedend to false if want to be not extended (MultiNomial)
+  */
+  double EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *x, int iWeight,
+                             bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
+                             unsigned nChunks = 0);
+
+  /**
+      evaluate the Poisson LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  void EvaluatePoissonLogLGradient(const IModelFunction &func, const BinData &data, const double *x, double *grad,
+                                   unsigned int &nPoints,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0);
+
+  // methods required by dedicate minimizer like Fumili
+
+  /**
+      evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
+      and if the pointer g is not null evaluate also the gradient of the residual.
+      If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+      is used
+  */
+  double EvaluateChi2Residual(const IModelFunction &func, const BinData &data, const double *x, unsigned int ipoint,
+                              double *g = 0);
+
+  /**
+      evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
+      If the pointer g is not null evaluate also the gradient of the pdf.
+      If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+      is used
+  */
+  double
+  EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *x, unsigned int ipoint, double *g = 0);
+
+#ifdef R__HAS_VECCORE
+   template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
+   double EvaluatePdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p, unsigned int i, double *) {
+      // evaluate the pdf contribution to the generic logl function in case of bin data
+      // return actually the log of the pdf and its derivatives
+      // func.SetParameters(p);
+      const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
+      auto fval = func(&x, p);
+      auto logPdf = ROOT::Math::Util::EvalLog(fval);
+      return vecCore::Get<ROOT::Double_v>(logPdf, 0);
    }
+#endif
 
-   // derivative with respect of the parameter to be integrated
-   template <class GradFunc = IGradModelFunction>
-   struct ParamDerivFunc {
-      ParamDerivFunc(const GradFunc &f) : fFunc(f), fIpar(0) {}
-      void SetDerivComponent(unsigned int ipar) { fIpar = ipar; }
-      double operator()(const double *x, const double *p) const { return fFunc.ParameterDerivative(x, p, fIpar); }
-      unsigned int NDim() const { return fFunc.NDim(); }
-      const GradFunc &fFunc;
-      unsigned int fIpar;
-   };
+   /**
+       evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
+       If the pointer g is not null evaluate also the gradient of the Poisson pdf.
+       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+       is used
+   */
+   double EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * x, unsigned int ipoint, double * g = 0);
+
+   unsigned setAutomaticChunking(unsigned nEvents);
 
-   // simple gradient calculator using the 2 points rule
-
-   class SimpleGradientCalculator {
-
-   public:
-      // construct from function and gradient dimension gdim
-      // gdim = npar for parameter gradient
-      // gdim = ndim for coordinate gradients
-      // construct (the param values will be passed later)
-      // one can choose between 2 points rule (1 extra evaluation) istrat=1
-      // or two point rule (2 extra evaluation)
-      // (found 2 points rule does not work correctly - minuit2FitBench fails)
-      SimpleGradientCalculator(int gdim, const IModelFunction &func, double eps = 2.E-8, int istrat = 1)
-         : fEps(eps), fPrecision(1.E-8),                                             // sqrt(epsilon)
-           fStrategy(istrat), fN(gdim), fFunc(func), fVec(std::vector<double>(gdim)) // this can be probably optimized
+   template<class T>
+   struct Evaluate {
+#ifdef R__HAS_VECCORE
+      static double EvalChi2(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
+                             unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
       {
+         // evaluate the chi2 given a  vectorized function reference  , the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
+
+         //Info("EvalChi2","Using vecorized implementation %d",(int) data.Opt().fIntegral);
+
+         unsigned int n = data.Size();
+         nPoints = data.Size();  // npoints
+
+         // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+         // do not cache parameter values (it is not thread safe)
+         //func.SetParameters(p);
+
+
+         // get fit option and check case if using integral of bins
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluateChi2", "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
+
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+         double maxResValue = std::numeric_limits<double>::max() / n;
+         std::vector<double> ones{1, 1, 1, 1};
+         auto vecSize = vecCore::VectorSize<T>();
+
+         auto mapFunction = [&](unsigned int i) {
+            // in case of no error in y invError=1 is returned
+            T x1, y, invErrorVec;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            const auto invError = data.ErrorPtr(i * vecSize);
+            auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
+            vecCore::Load<T>(invErrorVec, invErrorptr);
+
+            const T *x;
+            std::vector<T> xc;
+            if(data.NDim() > 1) {
+               xc.resize(data.NDim());
+               xc[0] = x1;
+               for (unsigned int j = 1; j < data.NDim(); ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+            T fval{};
+
+#ifdef USE_PARAMCACHE
+            fval = func(x);
+#else
+            fval = func(x, p);
+#endif
+
+            T tmp = (y - fval) * invErrorVec;
+            T chi2 = tmp * tmp;
+
+
+            // avoid inifinity or nan in chi2 values due to wrong function values
+            auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
+
+            vecCore::MaskedAssign<T>(chi2, m, maxResValue);
+
+            return chi2;
+         };
+
+         auto redFunction = [](const std::vector<T> &objs) {
+            return std::accumulate(objs.begin(), objs.end(), T{});
+         };
+
+#ifndef R__USE_IMT
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T res{};
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size()/vecSize), redFunction);
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
+         } else {
+            Error("FitUtil::EvaluateChi2", "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         // Last SIMD vector of elements (if padding needed)
+         if (data.Size() % vecSize != 0) 
+            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
+                                  res + mapFunction(data.Size() / vecSize));
+
+         return vecCore::ReduceAdd(res);
+      }
+
+      static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         // evaluate the LogLikelihood
+         unsigned int n = data.Size();
+         nPoints = data.Size();  // npoints
+
+         //unsigned int nRejected = 0;
+         bool normalizeFunc = false;
+
+         // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+
+#ifdef R__USE_IMT
+         // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
+         // this will be done in sequential mode and parameters can be set in a thread safe manner
+         if (!normalizeFunc) {
+            if (data.NDim() == 1) {
+               T x;
+               vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
+               func( &x, p);
+            }
+            else {
+               std::vector<T> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
+               func( x.data(), p);
+            }
+         }
+#endif
+
+         // this is needed if function must be normalized
+         double norm = 1.0;
+         if (normalizeFunc) {
+            // compute integral of the function
+            std::vector<double> xmin(data.NDim());
+            std::vector<double> xmax(data.NDim());
+            IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
+            // compute integral in the ranges where is defined
+            if (data.Range().Size() > 0) {
+               norm = 0;
+               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                  norm += igEval.Integral(xmin.data(), xmax.data());
+               }
+            } else {
+               // use (-inf +inf)
+               data.Range().GetRange(&xmin[0], &xmax[0]);
+               // check if funcition is zero at +- inf
+               T xmin_v, xmax_v;
+               vecCore::Load<T>(xmin_v, xmin.data());
+               vecCore::Load<T>(xmax_v, xmax.data());
+               if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+                  MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
+                  return 0;
+               }
+               norm = igEval.Integral(&xmin[0], &xmax[0]);
+            }
+         }
+
+         // needed to compute effective global weight in case of extended likelihood
+
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned int numVectors = n / vecSize;
+
+         auto mapFunction = [ &, p](const unsigned i) {
+            T W{};
+            T W2{};
+            T fval{};
+
+            (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
+
+            T x1;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            const T *x = nullptr;
+            unsigned int ndim = data.NDim();
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+#ifdef USE_PARAMCACHE
+            fval = func(x);
+#else
+            fval = func(x, p);
+#endif
+
+#ifdef DEBUG_FITUTIL
+            if (i < 5 || (i > numVectors-5) ) {
+               if (ndim == 1) std::cout << i << "  x " << x[0]  << " fval = " << fval; 
+               else std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval; 
+            }
+#endif
+
+            if (normalizeFunc) fval = fval * (1 / norm);
+
+            // function EvalLog protects against negative or too small values of fval
+            auto logval =  ROOT::Math::Util::EvalLog(fval);
+            if (iWeight > 0) {
+               T weight{};
+               if (data.WeightsPtr(i) == nullptr)
+                  weight = 1;
+               else
+                  vecCore::Load<T>(weight, data.WeightsPtr(i*vecSize));
+               logval *= weight;
+               if (iWeight == 2) {
+                  logval *= weight; // use square of weights in likelihood
+                  if (!extended) {
+                     // needed sum of weights and sum of weight square if likelkihood is extended
+                     W  = weight;
+                     W2 = weight * weight;
+                  }
+               }
+            }
+#ifdef DEBUG_FITUTIL
+            if (i < 5 || (i > numVectors-5)  )  {
+                 std::cout << "   " << fval << "  logfval " << logval << std::endl;
+            }
+#endif
+
+            return LikelihoodAux<T>(logval, W, W2);
+         };
+
+         auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
+            return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
+            [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) {
+               return l1 + l2;
+            });
+         };
+
+#ifndef R__USE_IMT
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T logl_v{};
+         T sumW_v{};
+         T sumW2_v{};
+         ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking( numVectors);
+            resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
+         } else {
+            Error("FitUtil::EvaluateLogL", "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         logl_v = resArray.logvalue;
+         sumW_v = resArray.weight;
+         sumW2_v = resArray.weight2;
+
+         // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
+         unsigned int remainingPoints = n % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
+            vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
+            vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
+         }
+
+
+         //reduce vector type to double.
+         double logl  = vecCore::ReduceAdd(logl_v);
+         double sumW  = vecCore::ReduceAdd(sumW_v);
+         double sumW2 = vecCore::ReduceAdd(sumW2_v);
+
+         if (extended) {
+            // add Poisson extended term
+            double extendedTerm = 0; // extended term in likelihood
+            double nuTot = 0;
+            // nuTot is integral of function in the range
+            // if function has been normalized integral has been already computed
+            if (!normalizeFunc) {
+               IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
+               std::vector<double> xmin(data.NDim());
+               std::vector<double> xmax(data.NDim());
+
+               // compute integral in the ranges where is defined
+               if (data.Range().Size() > 0) {
+                  nuTot = 0;
+                  for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                     data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                     nuTot += igEval.Integral(xmin.data(), xmax.data());
+                  }
+               } else {
+                  // use (-inf +inf)
+                  data.Range().GetRange(&xmin[0], &xmax[0]);
+                  // check if funcition is zero at +- inf
+                  T xmin_v, xmax_v;
+                  vecCore::Load<T>(xmin_v, xmin.data());
+                  vecCore::Load<T>(xmax_v, xmax.data());
+                  if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+                     MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
+                     return 0;
+                  }
+                  nuTot = igEval.Integral(&xmin[0], &xmax[0]);
+               }
+
+               // force to be last parameter value
+               //nutot = p[func.NDim()-1];
+               if (iWeight != 2)
+                  extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
+               else {
+                  // case use weight square in likelihood : compute total effective weight = sw2/sw
+                  // ignore for the moment case when sumW is zero
+                  extendedTerm = - (sumW2 / sumW) * nuTot;
+               }
+
+            } else {
+               nuTot = norm;
+               extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog(nuTot);
+               // in case of weights need to use here sum of weights (to be done)
+            }
+
+            logl += extendedTerm;
+         }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "Evaluated log L for parameters (";
+         for (unsigned int ip = 0; ip < func.NPar(); ++ip)
+            std::cout << " " << p[ip];
+         std::cout << ")  nll = " << -logl << std::endl;
+#endif
+         
+         return -logl;
+
       }
 
-      // internal method to calculate single partial derivative
-      // assume cached vector fVec is already set
-      double DoParameterDerivative(const double *x, const double *p, double f0, int k) const
+      static double EvalPoissonLogL(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
+                                    int iWeight, bool extended, unsigned int,
+                                    ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
       {
-         double p0 = p[k];
-         double h = std::max(fEps * std::abs(p0), 8.0 * fPrecision * (std::abs(p0) + fPrecision));
-         fVec[k] += h;
-         double deriv = 0;
-         // t.b.d : treat case of infinities
-         // if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
-         double f1 = fFunc(x, &fVec.front());
-         if (fStrategy > 1) {
-            fVec[k] = p0 - h;
-            double f2 = fFunc(x, &fVec.front());
-            deriv = 0.5 * (f2 - f1) / h;
+         // evaluate the Poisson Log Likelihood
+         // for binned likelihood fits
+         // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
+         // add as well constant term for saturated model to make it like a Chi2/2
+         // by default is etended. If extended is false the fit is not extended and
+         // the global poisson term is removed (i.e is a binomial fit)
+         // (remember that in this case one needs to have a function with a fixed normalization
+         // like in a non extended binned fit)
+         //
+         // if use Weight use a weighted dataset
+         // iWeight = 1 ==> logL = Sum( w f(x_i) )
+         // case of iWeight==1 is actually identical to weight==0
+         // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
+         //
+
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+         auto vecSize = vecCore::VectorSize<T>();
+         // get fit option and check case of using integral of bins
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fExpErrors || fitOpt.fIntegral)
+            Error("FitUtil::EvaluateChi2",
+                  "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
+         bool useW2 = (iWeight == 2);
+
+         auto mapFunction = [&](unsigned int i) {
+            T y;
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            T fval{};
+
+            if (data.NDim() > 1) {
+               std::vector<T> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
+#ifdef USE_PARAMCACHE
+               fval = func(x.data());
+#else
+               fval = func(x.data(), p);
+#endif
+               // one -dim case
+            } else {
+               T x;
+               vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
+#ifdef USE_PARAMCACHE
+               fval = func(&x);
+#else
+               fval = func(&x, p);
+#endif
+            }
+
+            // EvalLog protects against 0 values of fval but don't want to add in the -log sum
+            // negative values of fval
+            vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
+
+            T nloglike{}; // negative loglikelihood
+
+            if (useW2) {
+               // apply weight correction . Effective weight is error^2/ y
+               // and expected events in bins is fval/weight
+               // can apply correction only when y is not zero otherwise weight is undefined
+               // (in case of weighted likelihood I don't care about the constant term due to
+               // the saturated model)
+               assert (data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
+               T error = 0.0;
+               vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
+               // for empty bin use the average weight  computed from the total data weight
+               auto m = vecCore::Mask_v<T>(y != 0.0);
+               auto weight = vecCore::Blend(m,(error * error) / y, T(data.SumOfError2()/ data.SumOfContent()) );
+               if (extended) {
+                  nloglike =  weight * ( fval - y);
+               }
+               vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval)) );
+
+            } else {
+               // standard case no weights or iWeight=1
+               // this is needed for Poisson likelihood (which are extened and not for multinomial)
+               // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
+               // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
+               if (extended) nloglike = fval - y;
+
+               vecCore::MaskedAssign<T>(
+                  nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
+            }
+
+            return nloglike;
+         };
+
+#ifdef R__USE_IMT
+         auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T{}); };
+#else
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::Evaluate<T>::EvalPoissonLogL",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T res{};
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
+               res += mapFunction(i);
+            }
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
          } else {
-            deriv = (f1 - f0) / h;
+            Error(
+               "FitUtil::Evaluate<T>::EvalPoissonLogL",
+               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
          }
 
-         fVec[k] = p[k]; // restore original p value
-         return deriv;
+         // Last padded SIMD vector of elements
+         if (data.Size() % vecSize != 0) 
+            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
+                                  res + mapFunction(data.Size() / vecSize));
+
+         return vecCore::ReduceAdd(res);
       }
-      // number of dimension in x (needed when calculating the integrals)
-      unsigned int NDim() const { return fFunc.NDim(); }
-      // number of parameters (needed for grad ccalculation)
-      unsigned int NPar() const { return fFunc.NPar(); }
 
-      double ParameterDerivative(const double *x, const double *p, int ipar) const
+      static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
       {
-         // fVec are the cached parameter values
-         std::copy(p, p + fN, fVec.begin());
-         double f0 = fFunc(x, p);
-         return DoParameterDerivative(x, p, f0, ipar);
+         Error("FitUtil::Evaluate<T>::EvalChi2Effective", "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
+         return -1.;
       }
 
-      // calculate all gradient at point (x,p) knnowing already value f0 (we gain a function eval.)
-      void ParameterGradient(const double *x, const double *p, double f0, double *g)
+      // Compute a mask to filter out infinite numbers and NaN values.
+      // The argument rval is updated so infinite numbers and NaN values are replaced by
+      // maximum finite values (preserving the original sign).
+      static vecCore::Mask<T> CheckInfNaNValues(T &rval)
       {
-         // fVec are the cached parameter values
-         std::copy(p, p + fN, fVec.begin());
-         for (unsigned int k = 0; k < fN; ++k) {
-            g[k] = DoParameterDerivative(x, p, f0, k);
+         auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
+
+         // Case +inf or nan
+         vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
+
+         // Case -inf
+         vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
+
+         return mask;
+      }
+
+      static void EvalChi2Gradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
+                                   unsigned int &nPoints,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the chi2 function
+         // this function is used when the model function knows how to calculate the derivative and we can
+         // avoid that the minimizer re-computes them
+         //
+         // case of chi2 effective (errors on coordinate) is not supported
+
+         if (data.HaveCoordErrors()) {
+            MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                           "Error on the coordinates are not used in calculating Chi2 gradient");
+            return; // it will assert otherwise later in GetPoint
+         }
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a gradient function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
+                                                   "BinVolume or ExpErrors\n. Aborting operation.");
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+         // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
+         std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
+
+         auto mapFunction = [&](const unsigned int i) {
+            // set all vector values to zero
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1, y, invError;
+
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            const auto invErrorPtr = data.ErrorPtr(i * vecSize);
+
+            if (invErrorPtr == nullptr)
+               invError = 1;
+            else
+               vecCore::Load<T>(invError, invErrorPtr);
+
+            // TODO: Check error options and invert if needed
+
+            T fval = 0;
+
+            const T *x = nullptr;
+
+            unsigned int ndim = data.NDim();
+            // need to declare vector outside if statement
+            // otherwise pointer will be invalid
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+            fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+            validPointsMasks[i] = CheckInfNaNValues(fval);
+            if (vecCore::MaskEmpty(validPointsMasks[i])) {
+               // Return a zero contribution to all partial derivatives on behalf of the current points
+               return pointContributionVec;
+            }
+
+            // loop on the parameters
+            for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+               // avoid singularity in the function (infinity and nan ) in the chi2 sum
+               // eventually add possibility of excluding some points (like singularity)
+               validPointsMasks[i] = CheckInfNaNValues(gradFunc[ipar]);
+
+               if (vecCore::MaskEmpty(validPointsMasks[i])) {
+                  break; // exit loop on parameters
+               }
+
+               // calculate derivative point contribution (only for valid points)
+               vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
+                                     -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
+            }
+
+            return pointContributionVec;
+         };
+
+         // Reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : partialResults) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+         std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateChi2Gradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error(
+               "FitUtil::EvaluateChi2Gradient",
+               "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+         }
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
+         }
+
+         // correct the number of points
+         nPoints = initialNPoints;
+
+         if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
+                         [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
+            unsigned nRejected = 0;
+
+            for (const auto &mask : validPointsMasks) {
+               for (unsigned int i = 0; i < vecSize; i++) {
+                  nRejected += !vecCore::Get(mask, i);
+               }
+            }
+
+            assert(nRejected <= initialNPoints);
+            nPoints = initialNPoints - nRejected;
+
+            if (nPoints < npar) {
+               MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                              "Too many points rejected for overflow in gradient calculation");
+           }
          }
       }
 
-      // calculate gradient w.r coordinate values
-      void Gradient(const double *x, const double *p, double f0, double *g)
+      static double EvalChi2Residual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
       {
-         // fVec are the cached coordinate values
-         std::copy(x, x + fN, fVec.begin());
-         for (unsigned int k = 0; k < fN; ++k) {
-            double x0 = x[k];
-            double h = std::max(fEps * std::abs(x0), 8.0 * fPrecision * (std::abs(x0) + fPrecision));
-            fVec[k] += h;
-            // t.b.d : treat case of infinities
-            // if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
-            double f1 = fFunc(&fVec.front(), p);
-            if (fStrategy > 1) {
-               fVec[k] = x0 - h;
-               double f2 = fFunc(&fVec.front(), p);
-               g[k] = 0.5 * (f2 - f1) / h;
+         Error("FitUtil::Evaluate<T>::EvalChi2Residual", "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
+         return -1.;
+      }
+
+      /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+      /// and its gradient
+      static double EvalPoissonBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int , double * ) {
+         Error("FitUtil::Evaluate<T>::EvaluatePoissonBinPdf", "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
+         return -1.;
+      }
+
+      static void
+      EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
+                              unsigned int &,
+                              ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                              unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the Poisson log likelihood function
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a grad function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+         (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluatePoissonLogLGradient", "The vectorized implementation doesn't support Integrals,"
+                                                          "BinVolume or ExpErrors\n. Aborting operation.");
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+         auto mapFunction = [&](const unsigned int i) {
+            // set all vector values to zero
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1, y;
+
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+
+            T fval = 0;
+
+            const T *x = nullptr;
+
+            unsigned ndim = data.NDim();
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
             } else {
-               g[k] = (f1 - f0) / h;
+               x = &x1;
             }
 
-            fVec[k] = x[k]; // restore original x value
+            fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+            // correct the gradient
+            for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+               vecCore::Mask<T> positiveValuesMask = fval > 0;
+
+               // df/dp * (1.  - y/f )
+               vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
+
+               vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
+
+               if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
+                  const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
+                  const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
+                  T gg = kdmax1 * gradFunc[ipar];
+                  pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
+               }
+            }
+
+#ifdef DEBUG_FITUTIL
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1];
+            else std::cout << i << "  x " << x[0];
+            std::cout << " func " << fval  << " gradient ";
+            for (unsigned int ii = 0; ii < npar; ++ii) std::cout << "  " << pointContributionVec[ii];
+            std::cout << "\n";
+         }
+      }
+#endif
+
+            return pointContributionVec;
+         };
+
+         // Vertically reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : partialResults) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluatePoissonLogLGradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error("FitUtil::EvaluatePoissonLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                                          "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                                          "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
          }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "***** Final gradient : ";
+         for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
+         std::cout << "\n";
+#endif  
+
       }
 
-   private:
-      double fEps;
-      double fPrecision;
-      int fStrategy;   // strategy in calculation ( =1 use 2 point rule( 1 extra func) , = 2 use r point rule)
-      unsigned int fN; // gradient dimension
-      const IModelFunction &fFunc;
-      mutable std::vector<double> fVec; // cached coordinates (or parameter values in case of gradientpar)
+      static void EvalLogLGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p,
+                                   double *grad, unsigned int &,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the log likelihood function
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a grad function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+#ifdef DEBUG_FITUTIL 
+         std::cout << "\n===> Evaluate Gradient for parameters ";
+         for (unsigned int ip = 0; ip < npar; ++ip)
+            std::cout << "  " << p[ip];
+         std::cout << "\n";
+#endif 
+
+         (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+         const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
+         const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
+
+         auto mapFunction = [&](const unsigned int i) {
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+
+            const T *x = nullptr;
+
+            unsigned int ndim = data.NDim();
+            std::vector<T> xc(ndim);
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+
+            T fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+#ifdef DEBUG_FITUTIL            
+            if (i < 5 || (i > numVectors-5) ) {
+               if (ndim > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+               else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            }
+#endif            
+
+            vecCore::Mask<T> positiveValues = fval > 0;
+
+            for (unsigned int kpar = 0; kpar < npar; ++kpar) {
+               if (!vecCore::MaskEmpty(positiveValues))
+                  vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
+
+               vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
+               if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
+                  T gg = kdmax1 * gradFunc[kpar];
+                  pointContributionVec[kpar] =
+                     vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2),
+                                    -vecCore::math::Max(gg, -kdmax2));
+               }
+               // if func derivative is zero term is also zero so do not add in g[kpar]
+            }
+
+            return pointContributionVec;
+         };
+
+         // Vertically reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : pointContributions) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+         std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateLogLGradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                                   "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                                   "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
+         }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "Final gradient ";
+         for (unsigned int param = 0; param < npar; param++) {
+            std::cout << "  " << grad[param];
+         }
+         std::cout << "\n";
+#endif
+      }
    };
 
-   // function to avoid infinities or nan
-   inline double CorrectValue(double rval)
-   {
-      // avoid infinities or nan in  rval
-      if (rval > -std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max())
-         return rval;
-      else if (rval < 0)
-         // case -inf
-         return -std::numeric_limits<double>::max();
-      else
-         // case + inf or nan
-         return +std::numeric_limits<double>::max();
-   }
+   template<>
+   struct Evaluate<double>{
+#endif
+
+      static double EvalChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
+                             ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         // evaluate the chi2 given a  function reference, the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
 
-   // calculation of the integral of the gradient functions
-   // for a function providing derivative w.r.t parameters
-   // x1 and x2 defines the integration interval , p the parameters
-   template <class GFunc>
-   void CalculateGradientIntegral(const GFunc &gfunc, const double *x1, const double *x2, const double *p, double *g)
-   {
-
-      // needs to calculate the integral for each partial derivative
-      ParamDerivFunc<GFunc> pfunc(gfunc);
-      IntegralEvaluator<ParamDerivFunc<GFunc>> igDerEval(pfunc, p, true);
-      // loop on the parameters
-      unsigned int npar = gfunc.NPar();
-      for (unsigned int k = 0; k < npar; ++k) {
-         pfunc.SetDerivComponent(k);
-         g[k] = igDerEval(x1, x2);
+
+         //Info("EvalChi2","Using non-vecorized implementation %d",(int) data.Opt().fIntegral);
+
+         return FitUtil::EvaluateChi2(func, data, p, nPoints, executionPolicy, nChunks);
       }
-   }
+
+      static double EvalLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         return FitUtil::EvaluateLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
+      }
+
+      static double EvalPoissonLogL(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
+                                    int iWeight, bool extended, unsigned int &nPoints,
+                                    ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         return FitUtil::EvaluatePoissonLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
+      }
+
+      static double EvalChi2Effective(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int &nPoints)
+      {
+         return FitUtil::EvaluateChi2Effective(func, data, p, nPoints);
+      }
+      static void EvalChi2Gradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
+                                   double *g, unsigned int &nPoints,
+                                   ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         FitUtil::EvaluateChi2Gradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+      static double EvalChi2Residual(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int i, double *g = 0)
+      {
+         return FitUtil::EvaluateChi2Residual(func, data, p, i, g);
+      }
+
+      /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+      /// and its gradient
+      static double EvalPoissonBinPdf(const IModelFunctionTempl<double> &func, const BinData & data, const double *p, unsigned int i, double *g ) {
+         return FitUtil::EvaluatePoissonBinPdf(func, data, p, i, g);
+      }
+
+      static void
+      EvalPoissonLogLGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
+                              unsigned int &nPoints,
+                              ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                              unsigned nChunks = 0)
+      {
+         FitUtil::EvaluatePoissonLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+
+      static void EvalLogLGradient(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                                   double *g, unsigned int &nPoints,
+                                   ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         FitUtil::EvaluateLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+   };
 
 } // end namespace FitUtil
 
-} // end namespace Fit
+   } // end namespace Fit
 
 } // end namespace ROOT
 
+#if defined (R__HAS_VECCORE) && defined(R__HAS_VC)
+//Fixes alignment for structures of SIMD structures
+Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
+#endif
 
 #endif /* ROOT_Fit_FitUtil */
diff --git a/math/mathcore/inc/Fit/LogLikelihoodFCN.h b/math/mathcore/inc/Fit/LogLikelihoodFCN.h
index b76750a7ef4c5824e763f2d1ea1660a6ae17c828..810029d806ca60c9e0b188ba11c7eb7e3abe6081 100644
--- a/math/mathcore/inc/Fit/LogLikelihoodFCN.h
+++ b/math/mathcore/inc/Fit/LogLikelihoodFCN.h
@@ -19,7 +19,7 @@
 
 #include "Fit/UnBinData.h"
 
-#include "Fit/EvaluateLogL.hxx"
+#include "Fit/FitUtil.h"
 
 #include <memory>
 
@@ -126,8 +126,8 @@ public:
    // need to be virtual to be instantited
    virtual void Gradient(const double *x, double *g) const {
       // evaluate the chi2 gradient
-      FitUtil::LogL<typename BaseFCN::T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints,
-                                                       fExecutionPolicy);
+      FitUtil::Evaluate<typename BaseFCN::T>::EvalLogLGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
+                                                               fNEffPoints, fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -154,8 +154,7 @@ private:
     */
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
-      return FitUtil::LogL<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended, fNEffPoints,
-                                    fExecutionPolicy);
+      return FitUtil::Evaluate<T>::EvalLogL(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended, fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h b/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
index d232dfdd25fd9c32af9270b01d7c7a1c2c121d87..0a09ddbad57abec011b993027326e32327da5a02 100644
--- a/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
+++ b/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
@@ -19,10 +19,18 @@
 
 #include "Fit/BinData.h"
 
-#include "Fit/EvaluatePoissonLogL.hxx"
+#include "Fit/FitUtil.h"
+
 
 #include <memory>
 
+//#define PARALLEL
+// #ifdef PARALLEL
+// #ifndef ROOT_Fit_FitUtilParallel
+// #include "Fit/FitUtilParallel.h"
+// #endif
+// #endif
+
 namespace ROOT {
 
    namespace Fit {
@@ -114,15 +122,15 @@ public:
    /// i-th likelihood element and its gradient
    virtual double DataElement(const double * x, unsigned int i, double * g) const {
       if (i==0) this->UpdateNCalls();
-      return FitUtil::PoissonLogL<typename BaseFCN::T>::EvalBinPdf(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
+      return FitUtil::Evaluate<typename BaseFCN::T>::EvalPoissonBinPdf(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
    }
 
    /// evaluate gradient
    virtual void Gradient(const double *x, double *g) const
    {
       // evaluate the Poisson gradient
-      FitUtil::PoissonLogL<typename BaseFCN::T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
-                                                              fNEffPoints, fExecutionPolicy);
+      FitUtil::Evaluate<typename BaseFCN::T>::EvalPoissonLogLGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
+                                                                      fNEffPoints, fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -155,8 +163,8 @@ private:
     */
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
-      return FitUtil::PoissonLogL<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended,
-                                           fNEffPoints, fExecutionPolicy);
+      return FitUtil::Evaluate<T>::EvalPoissonLogL(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended,
+                                                   fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/src/EvaluateChi2.cxx b/math/mathcore/src/EvaluateChi2.cxx
deleted file mode 100644
index 6d0fbad08e2f6ef91c07e9b3982c548798c5e5bf..0000000000000000000000000000000000000000
--- a/math/mathcore/src/EvaluateChi2.cxx
+++ /dev/null
@@ -1,647 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/EvaluateChi2.hxx"
-
-#include "Fit/BinData.h"
-
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-
-// Evaluates the Chi2 given a function reference and the data and returns the value.
-//
-// Optionally the integral of function in the bin is used.
-double Chi2<double>::Eval(const IModelFunction &func, const BinData &data, const double *p, unsigned int &,
-                          ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
-{
-   unsigned int n = data.Size();
-
-// set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-#endif
-   // do not cache parameter values (it is not thread safe)
-   // func.SetParameters(p);
-
-   // get fit option and check case if using integral of bins
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useExpErrors = (fitOpt.fExpErrors);
-   bool isWeighted = data.IsWeighted();
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate chi2 using function " << &func << "  " << p << std::endl;
-   std::cout << "use empty bins  " << fitOpt.fUseEmpty << std::endl;
-   std::cout << "use integral    " << fitOpt.fIntegral << std::endl;
-   std::cout << "use all error=1 " << fitOpt.fErrors1 << std::endl;
-   if (isWeighted)
-      std::cout << "Weighted data set - sumw =  " << data.SumOfContent() << "  sumw2 = " << data.SumOfError2()
-                << std::endl;
-#endif
-
-#ifdef USE_PARAMCACHE
-   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
-#else
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-#endif
-   double maxResValue = std::numeric_limits<double>::max() / n;
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-
-   auto mapFunction = [&](const unsigned i) {
-      double chi2{};
-      double fval{};
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.InvError(i);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      double binVolume = 1.0;
-      if (useBinVolume) {
-         unsigned int ndim = data.NDim();
-         const double *x2 = data.BinUpEdge(i);
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto xx = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - xx);
-            xc[j] = 0.5 * (x2[j] + xx);
-         }
-         x = xc.data();
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < data.NDim(); ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      } else {
-         // calculate integral normalized by bin volume
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, data.BinUpEdge(i));
-      }
-      // normalize result if requested according to bin volume
-      if (useBinVolume)
-         fval *= binVolume;
-
-      // expected errors
-      if (useExpErrors) {
-         double invWeight = 1.0;
-         if (isWeighted) {
-            // we need first to check if a weight factor needs to be applied
-            // weight = sumw2/sumw = error**2/content
-            // invWeight = y * invError * invError;
-            // we use always the global weight and not the observed one in the bin
-            // for empty bins use global weight (if it is weighted data.SumError2() is not zero)
-            invWeight = data.SumOfContent() / data.SumOfError2();
-            // if (invError > 0) invWeight = y * invError * invError;
-         }
-
-         //  if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
-         // compute expected error  as f(x) / weight
-         double invError2 = (fval > 0) ? invWeight / fval : 0.0;
-         invError = std::sqrt(invError2);
-         // std::cout << "using Pearson chi2 " << x[0] << "  " << 1./invError2 << "  " << fval << std::endl;
-      }
-
-//#define DEBUG
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
-      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << " bin volume " << binVolume << " ref " << wrefVolume << std::endl;
-#endif
-      //#undef DEBUG
-
-      if (invError > 0) {
-
-         double tmp = (y - fval) * invError;
-         double resval = tmp * tmp;
-
-         // avoid inifinity or nan in chi2 values due to wrong function values
-         if (resval < maxResValue)
-            chi2 += resval;
-         else {
-            chi2 += maxResValue;
-         }
-      }
-      return chi2;
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<double> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), double{});
-   };
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("Chi2<double>::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                            "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double res{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         res += mapFunction(i);
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-#endif
-   } else {
-      Error("Chi2<double>::EvaluateChi2", "Execution policy unknown. Avalaible choices:\n "
-                                          "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                          "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-   return res;
-}
-
-// Evaluates the Chi2 given a function reference and the data. Returns the value and,
-// in nPoints, the actual number of used points.
-//
-// This method uses the error in the coordinates.
-// The integral of bin does not make sense in this case.
-double Chi2<double>::EvalEffective(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                   unsigned int &nPoints)
-{
-   unsigned int n = data.Size();
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate effective chi2 using function " << &func << "  " << p << std::endl;
-#endif
-
-   assert(data.HaveCoordErrors() || data.HaveAsymErrors());
-
-   double chi2 = 0;
-
-   // func.SetParameters(p);
-
-   unsigned int ndim = func.NDim();
-
-   // use Richardson derivator
-   ROOT::Math::RichardsonDerivator derivator;
-
-   double maxResValue = std::numeric_limits<double>::max() / n;
-
-   for (unsigned int i = 0; i < n; ++i) {
-
-      double y = 0;
-      const double *x = data.GetPoint(i, y);
-
-      double fval = func(x, p);
-
-      double delta_y_func = y - fval;
-
-      double ey = 0;
-      const double *ex = 0;
-      if (!data.HaveAsymErrors())
-         ex = data.GetPointError(i, ey);
-      else {
-         double eylow, eyhigh = 0;
-         ex = data.GetPointError(i, eylow, eyhigh);
-         if (delta_y_func < 0)
-            ey = eyhigh; // function is higher than points
-         else
-            ey = eylow;
-      }
-      double e2 = ey * ey;
-      // before calculating the gradient check that all error in x are not zero
-      unsigned int j = 0;
-      while (j < ndim && ex[j] == 0.) {
-         j++;
-      }
-      // if j is less ndim some elements are not zero
-      if (j < ndim) {
-         // need an adapter from a multi-dim function to a one-dimensional
-         ROOT::Math::OneDimMultiFunctionAdapter<const IModelFunction &> f1D(func, x, 0, p);
-         // select optimal step size  (use 10--2 by default as was done in TF1:
-         double kEps = 0.01;
-         double kPrecision = 1.E-8;
-         for (unsigned int icoord = 0; icoord < ndim; ++icoord) {
-            // calculate derivative for each coordinate
-            if (ex[icoord] > 0) {
-               // gradCalc.Gradient(x, p, fval, &grad[0]);
-               f1D.SetCoord(icoord);
-               // optimal spep size (take ex[] as scale for the points and 1% of it
-               double x0 = x[icoord];
-               double h = std::max(kEps * std::abs(ex[icoord]), 8.0 * kPrecision * (std::abs(x0) + kPrecision));
-               double deriv = derivator.Derivative1(f1D, x[icoord], h);
-               double edx = ex[icoord] * deriv;
-               e2 += edx * edx;
-#ifdef DEBUG
-               std::cout << "error for coord " << icoord << " = " << ex[icoord] << " deriv " << deriv << std::endl;
-#endif
-            }
-         }
-      }
-      double w2 = (e2 > 0) ? 1.0 / e2 : 0;
-      double resval = w2 * (y - fval) * (y - fval);
-
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << " ex " << ex[0] << " ey  " << ey << " params : ";
-      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << "\tresval = " << resval << std::endl;
-#endif
-
-      // avoid (infinity and nan ) in the chi2 sum
-      // eventually add possibility of excluding some points (like singularity)
-      if (resval < maxResValue)
-         chi2 += resval;
-      else
-         chi2 += maxResValue;
-   }
-
-   // reset the number of fitting data points
-   nPoints = n; // no points are rejected
-
-#ifdef DEBUG
-   std::cout << "chi2 = " << chi2 << " n = " << nPoints << std::endl;
-#endif
-
-   return chi2;
-}
-
-// Evaluates the gradient of the Chi2 function.
-// This function is used when the model function knows how to calculate the derivative and we can
-// avoid that the minimizer re-computes them.
-//
-// The case of Chi2 effective (errors on coordinate) is not supported
-void Chi2<double>::EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p,
-                                double *grad, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                unsigned nChunks)
-{
-
-   if (data.HaveCoordErrors()) {
-      MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Gradient",
-                     "Error on the coordinates are not used in calculating Chi2 gradient");
-      return; // it will assert otherwise later in GetPoint
-   }
-
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a gradient function
-
-   const IGradModelFunction &func = *fg;
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate chi2 using function gradient " << &func << "  " << p << std::endl;
-#endif
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   std::vector<bool> isPointRejected(initialNPoints);
-
-   auto mapFunction = [&](const unsigned int i) {
-      // set all vector values to zero
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.Error(i);
-
-      invError = (invError != 0.0) ? 1.0 / invError : 1;
-
-      double fval = 0;
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-
-      unsigned int ndim = data.NDim();
-      double binVolume = 1;
-      if (useBinVolume) {
-         const double *x2 = data.BinUpEdge(i);
-
-         xc.resize(ndim);
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto x1_j = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - x1_j);
-            xc[j] = 0.5 * (x2[j] + x1_j);
-         }
-
-         x = xc.data();
-
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (ndim > 1) {
-         xc.resize(ndim);
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < ndim; ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-      } else {
-         auto x2 = data.BinUpEdge(i);
-         // calculate normalized integral and gradient (divided by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, x2);
-         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
-      for (unsigned int ipar = 0; ipar < npar; ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << std::endl;
-#endif
-      if (!std::isfinite(fval)) {
-         isPointRejected[i] = true;
-         // Return a zero contribution to all partial derivatives on behalf of the current point
-         return pointContribution;
-      }
-
-      // loop on the parameters
-      unsigned int ipar = 0;
-      for (; ipar < npar; ++ipar) {
-
-         // correct gradient for bin volumes
-         if (useBinVolume)
-            gradFunc[ipar] *= binVolume;
-
-         // avoid singularity in the function (infinity and nan ) in the chi2 sum
-         // eventually add possibility of excluding some points (like singularity)
-         double dfval = gradFunc[ipar];
-         if (!std::isfinite(dfval)) {
-            break; // exit loop on parameters
-         }
-
-         // calculate derivative point contribution
-         pointContribution[ipar] = -2.0 * (y - fval) * invError * invError * gradFunc[ipar];
-      }
-
-      if (ipar < npar) {
-         // case loop was broken for an overflow in the gradient calculation
-         isPointRejected[i] = true;
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("Chi2<double>::EvaluateChi2Gradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("Chi2<double>::EvaluateChi2Gradient",
-            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // correct the number of points
-   nPoints = initialNPoints;
-
-   if (std::any_of(isPointRejected.begin(), isPointRejected.end(), [](bool point) { return point; })) {
-      unsigned nRejected = std::accumulate(isPointRejected.begin(), isPointRejected.end(), 0);
-      assert(nRejected <= initialNPoints);
-      nPoints = initialNPoints - nRejected;
-
-      if (nPoints < npar)
-         MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Gradient",
-                        "Error - too many points rejected for overflow in gradient calculation");
-   }
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-}
-
-// evaluate the chi2 contribution (residual term) only for data with no coord-errors
-// This function is used in the specialized least square algorithms like FUMILI or L.M.
-// if we have error on the coordinates the method is not yet implemented
-//  integral option is also not yet implemented
-//  one can use in that case normal chi2 method
-double Chi2<double>::EvalResidual(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                  unsigned int i, double *g)
-{
-   if (data.GetErrorType() == BinData::kCoordError && data.Opt().fCoordErrors) {
-      MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Residual",
-                     "Error on the coordinates are not used in calculating Chi2 residual");
-      return 0; // it will assert otherwise later in GetPoint
-   }
-
-   // func.SetParameters(p);
-
-   double y, invError = 0;
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useExpErrors = (fitOpt.fExpErrors);
-
-   const double *x1 = data.GetPoint(i, y, invError);
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-   double fval = 0;
-   unsigned int ndim = data.NDim();
-   double binVolume = 1.0;
-   const double *x2 = 0;
-   if (useBinVolume || useBinIntegral)
-      x2 = data.BinUpEdge(i);
-
-   double *xc = 0;
-
-   if (useBinVolume) {
-      xc = new double[ndim];
-      for (unsigned int j = 0; j < ndim; ++j) {
-         binVolume *= std::abs(x2[j] - x1[j]);
-         xc[j] = 0.5 * (x2[j] + x1[j]);
-      }
-      // normalize the bin volume using a reference value
-      binVolume /= data.RefVolume();
-   }
-
-   const double *x = (useBinVolume) ? xc : x1;
-
-   if (!useBinIntegral) {
-      fval = func(x, p);
-   } else {
-      // calculate integral (normalized by bin volume)
-      // need to set function and parameters here in case loop is parallelized
-      fval = igEval(x1, x2);
-   }
-   // normalize result if requested according to bin volume
-   if (useBinVolume)
-      fval *= binVolume;
-
-   // expected errors
-   if (useExpErrors) {
-      // we need first to check if a weight factor needs to be applied
-      // weight = sumw2/sumw = error**2/content
-      // NOTE: assume histogram is not weighted
-      // don't know how to do with bins with weight = 0
-      // double invWeight = y * invError * invError;
-      // if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
-      // compute expected error  as f(x) / weight
-      double invError2 = (fval > 0) ? 1.0 / fval : 0.0;
-      invError = std::sqrt(invError2);
-   }
-
-   double resval = (y - fval) * invError;
-
-   // avoid infinities or nan in  resval
-   resval = CorrectValue(resval);
-
-   // estimate gradient
-   if (g != 0) {
-
-      unsigned int npar = func.NPar();
-      const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-      if (gfunc != 0) {
-         // case function provides gradient
-         if (!useBinIntegral) {
-            gfunc->ParameterGradient(x, p, g);
-         } else {
-            // needs to calculate the integral for each partial derivative
-            CalculateGradientIntegral(*gfunc, x1, x2, p, g);
-         }
-      } else {
-         SimpleGradientCalculator gc(npar, func);
-         if (!useBinIntegral)
-            gc.ParameterGradient(x, p, fval, g);
-         else {
-            // needs to calculate the integral for each partial derivative
-            CalculateGradientIntegral(gc, x1, x2, p, g);
-         }
-      }
-      // mutiply by - 1 * weight
-      for (unsigned int k = 0; k < npar; ++k) {
-         g[k] *= -invError;
-         if (useBinVolume)
-            g[k] *= binVolume;
-      }
-   }
-
-   if (useBinVolume)
-      delete[] xc;
-
-   return resval;
-}
-
-} // namespace FitUtil
-
-} // namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/EvaluateLogL.cxx b/math/mathcore/src/EvaluateLogL.cxx
deleted file mode 100644
index fa78dad06c5a0b35dda315cd996054fd93d65cc4..0000000000000000000000000000000000000000
--- a/math/mathcore/src/EvaluateLogL.cxx
+++ /dev/null
@@ -1,452 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/EvaluateLogL.hxx"
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-//#include <memory>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-//  need to implement integral option
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-//______________________________________________________________________________________________________
-//
-//  Log Likelihood functions
-//_______________________________________________________________________________________________________
-
-// utility function used by the likelihoods
-
-double EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *p, unsigned int i, double *g)
-{
-   // evaluate the pdf contribution to the generic logl function in case of bin data
-   // return actually the log of the pdf and its derivatives
-
-   // func.SetParameters(p);
-
-   const double *x = data.Coords(i);
-   double fval = func(x, p);
-   double logPdf = ROOT::Math::Util::EvalLog(fval);
-   // return
-   if (g == 0)
-      return logPdf;
-
-   const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-   // gradient  calculation
-   if (gfunc != 0) {
-      // case function provides gradient
-      gfunc->ParameterGradient(x, p, g);
-   } else {
-      // estimate gradieant numerically with simple 2 point rule
-      // should probably calculate gradient of log(pdf) is more stable numerically
-      SimpleGradientCalculator gc(func.NPar(), func);
-      gc.ParameterGradient(x, p, fval, g);
-   }
-   // divide gradient by function value since returning the logs
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar) {
-      g[ipar] /= fval; // this should be checked against infinities
-   }
-
-#ifdef DEBUG
-   std::cout << x[i] << "\t";
-   std::cout << "\tpar = [ " << func.NPar() << " ] =  ";
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-      std::cout << p[ipar] << "\t";
-   std::cout << "\tfval = " << fval;
-   std::cout << "\tgrad = [ ";
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-      std::cout << g[ipar] << "\t";
-   std::cout << " ] " << std::endl;
-#endif
-
-   return logPdf;
-}
-
-double LogL<double>::Eval(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p, int iWeight,
-                          bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                          unsigned nChunks)
-{
-   // evaluate the LogLikelihood
-
-   unsigned int n = data.Size();
-
-   // unsigned int nRejected = 0;
-
-   bool normalizeFunc = false;
-
-// set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunctionTempl<double> &>(func)).SetParameters(p);
-#endif
-
-   nPoints = data.Size(); // npoints
-
-#ifdef R__USE_IMT
-   // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the
-   // function
-   // this will be done in sequential mode and parameters can be set in a thread safe manner
-   if (!normalizeFunc) {
-      if (data.NDim() == 1) {
-         const double *x = data.GetCoordComponent(0, 0);
-         func(x, p);
-      } else {
-         std::vector<double> x(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            x[j] = *data.GetCoordComponent(0, j);
-         func(x.data(), p);
-      }
-   }
-#endif
-
-   double norm = 1.0;
-   if (normalizeFunc) {
-      // compute integral of the function
-      std::vector<double> xmin(data.NDim());
-      std::vector<double> xmax(data.NDim());
-      IntegralEvaluator<> igEval(func, p, true);
-      // compute integral in the ranges where is defined
-      if (data.Range().Size() > 0) {
-         norm = 0;
-         for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-            data.Range().GetRange(&xmin[0], &xmax[0], ir);
-            norm += igEval.Integral(xmin.data(), xmax.data());
-         }
-      } else {
-         // use (-inf +inf)
-         data.Range().GetRange(&xmin[0], &xmax[0]);
-         // check if funcition is zero at +- inf
-         if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
-            MATH_ERROR_MSG("FitUtil::LogL<double>::Eval",
-                           "A range has not been set and the function is not zero at +/- inf");
-            return 0;
-         }
-         norm = igEval.Integral(&xmin[0], &xmax[0]);
-      }
-   }
-
-   // needed to compue effective global weight in case of extended likelihood
-
-   auto mapFunction = [&](const unsigned i) {
-      double W = 0;
-      double W2 = 0;
-      double fval = 0;
-
-      if (data.NDim() > 1) {
-         std::vector<double> x(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            x[j] = *data.GetCoordComponent(i, j);
-#ifdef USE_PARAMCACHE
-         fval = func(x.data());
-#else
-         fval = func(x.data(), p);
-#endif
-
-         // one -dim case
-      } else {
-         const auto x = data.GetCoordComponent(i, 0);
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      }
-
-      if (normalizeFunc)
-         fval = fval * (1 / norm);
-
-      // function EvalLog protects against negative or too small values of fval
-      double logval = ROOT::Math::Util::EvalLog(fval);
-      if (iWeight > 0) {
-         double weight = data.Weight(i);
-         logval *= weight;
-         if (iWeight == 2) {
-            logval *= weight; // use square of weights in likelihood
-            if (!extended) {
-               // needed sum of weights and sum of weight square if likelkihood is extended
-               W = weight;
-               W2 = weight * weight;
-            }
-         }
-      }
-      return LikelihoodAux<double>(logval, W, W2);
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<LikelihoodAux<double>> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<double>(0.0, 0.0, 0.0),
-                             [](const LikelihoodAux<double> &l1, const LikelihoodAux<double> &l2) { return l1 + l2; });
-   };
-
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::LogL<double>::Eval", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double logl{};
-   double sumW{};
-   double sumW2{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         auto resArray = mapFunction(i);
-         logl += resArray.logvalue;
-         sumW += resArray.weight;
-         sumW2 += resArray.weight2;
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      auto resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-      logl = resArray.logvalue;
-      sumW = resArray.weight;
-      sumW2 = resArray.weight2;
-#endif
-   } else {
-      Error("FitUtil::LogL<double>::Eval", "Execution policy unknown. Avalaible choices:\n "
-                                           "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                           "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-   if (extended) {
-      // add Poisson extended term
-      double extendedTerm = 0; // extended term in likelihood
-      double nuTot = 0;
-      // nuTot is integral of function in the range
-      // if function has been normalized integral has been already computed
-      if (!normalizeFunc) {
-         IntegralEvaluator<> igEval(func, p, true);
-         std::vector<double> xmin(data.NDim());
-         std::vector<double> xmax(data.NDim());
-
-         // compute integral in the ranges where is defined
-         if (data.Range().Size() > 0) {
-            nuTot = 0;
-            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-               data.Range().GetRange(&xmin[0], &xmax[0], ir);
-               nuTot += igEval.Integral(xmin.data(), xmax.data());
-            }
-         } else {
-            // use (-inf +inf)
-            data.Range().GetRange(&xmin[0], &xmax[0]);
-            // check if funcition is zero at +- inf
-            if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
-               MATH_ERROR_MSG("FitUtil::LogL<double>::Eval",
-                              "A range has not been set and the function is not zero at +/- inf");
-               return 0;
-            }
-            nuTot = igEval.Integral(&xmin[0], &xmax[0]);
-         }
-
-         // force to be last parameter value
-         // nutot = p[func.NDim()-1];
-         if (iWeight != 2)
-            extendedTerm = -nuTot; // no need to add in this case n log(nu) since is already computed before
-         else {
-            // case use weight square in likelihood : compute total effective weight = sw2/sw
-            // ignore for the moment case when sumW is zero
-            extendedTerm = -(sumW2 / sumW) * nuTot;
-         }
-
-      } else {
-         nuTot = norm;
-         extendedTerm = -nuTot + double(n) * ROOT::Math::Util::EvalLog(nuTot);
-         // in case of weights need to use here sum of weights (to be done)
-      }
-      logl += extendedTerm;
-   }
-
-#ifdef DEBUG
-   std::cout << "Evaluated log L for parameters (";
-   for (unsigned int ip = 0; ip < func.NPar(); ++ip)
-      std::cout << " " << p[ip];
-   std::cout << ")  fval = " << -logl << std::endl;
-#endif
-
-   return -logl;
-}
-
-void LogL<double>::EvalGradient(const IModelFunctionTempl<double> &f, const UnBinData &data, const double *p,
-                                double *grad, unsigned int &, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                unsigned nChunks)
-{
-   // evaluate the gradient of the log likelihood function
-
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a grad function
-
-   const IGradModelFunction &func = *fg;
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
-
-#ifdef DEBUG
-   std::cout << "\n===> Evaluate Gradient for parameters ";
-   for (unsigned int ip = 0; ip < npar; ++ip)
-      std::cout << "  " << p[ip];
-   std::cout << "\n";
-#endif
-
-   const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-   const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
-
-   auto mapFunction = [&](const unsigned int i) {
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = data.GetCoordComponent(i, 0);
-      }
-
-      double fval = func(x, p);
-      func.ParameterGradient(x, p, &gradFunc[0]);
-
-#ifdef DEBUG
-      {
-         R__LOCKGUARD(gROOTMutex);
-         if (i < 5 || (i > data.Size() - 5)) {
-            if (data.NDim() > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval << " gradient " << gradFunc[0]
-                         << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-      }
-#endif
-
-      for (unsigned int kpar = 0; kpar < npar; ++kpar) {
-         if (fval > 0)
-            pointContribution[kpar] = -1. / fval * gradFunc[kpar];
-         else if (gradFunc[kpar] != 0) {
-            double gg = kdmax1 * gradFunc[kpar];
-            if (gg > 0)
-               gg = std::min(gg, kdmax2);
-            else
-               gg = std::max(gg, -kdmax2);
-            pointContribution[kpar] = -gg;
-         }
-         // if func derivative is zero term is also zero so do not add in g[kpar]
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::LogL<double>::EvalGradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("FitUtil::LogL<double>::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                   "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                   "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-
-#ifdef DEBUG
-   std::cout << "FitUtil.cxx : Final gradient ";
-   for (unsigned int param = 0; param < npar; param++) {
-      std::cout << "  " << grad[param];
-   }
-   std::cout << "\n";
-#endif
-}
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/EvaluatePoissonLogL.cxx b/math/mathcore/src/EvaluatePoissonLogL.cxx
deleted file mode 100644
index b49a2284426c5a54656ad4570e990515ec4814e3..0000000000000000000000000000000000000000
--- a/math/mathcore/src/EvaluatePoissonLogL.cxx
+++ /dev/null
@@ -1,541 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/EvaluatePoissonLogL.hxx"
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-//#include <memory>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-//  need to implement integral option
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-//______________________________________________________________________________________________________
-//
-//  Poisson Log Likelihood functions
-//_______________________________________________________________________________________________________
-
-
-// evaluate the Poisson Log Likelihood
-// for binned likelihood fits
-// this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
-// add as well constant term for saturated model to make it like a Chi2/2
-// by default is etended. If extended is false the fit is not extended and
-// the global poisson term is removed (i.e is a binomial fit)
-// (remember that in this case one needs to have a function with a fixed normalization
-// like in a non extended unbinned fit)
-//
-// if use Weight use a weighted dataset
-// iWeight = 1 ==> logL = Sum( w f(x_i) )
-// case of iWeight==1 is actually identical to weight==0
-// iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
-double PoissonLogL<double>::Eval(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                 int iWeight, bool extended, unsigned int &nPoints,
-                                 ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
-{
-   unsigned int n = data.Size();
-
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-#endif
-
-   nPoints = data.Size(); // npoints
-
-   // get fit option and check case of using integral of bins
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useW2 = (iWeight == 2);
-
-   // normalize if needed by a reference volume value
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-#ifdef DEBUG
-   std::cout << "Evaluate PoissonLogL for params = [ ";
-   for (unsigned int j = 0; j < func.NPar(); ++j)
-      std::cout << p[j] << " , ";
-   std::cout << "]  - data size = " << n << " useBinIntegral " << useBinIntegral << " useBinVolume " << useBinVolume
-             << " useW2 " << useW2 << " wrefVolume = " << wrefVolume << std::endl;
-#endif
-
-#ifdef USE_PARAMCACHE
-   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
-#else
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-#endif
-
-   auto mapFunction = [&](const unsigned i) {
-      auto x1 = data.GetCoordComponent(i, 0);
-      auto y = *data.ValuePtr(i);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      double fval = 0;
-      double binVolume = 1.0;
-
-      if (useBinVolume) {
-         unsigned int ndim = data.NDim();
-         const double *x2 = data.BinUpEdge(i);
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto xx = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - xx);
-            xc[j] = 0.5 * (x2[j] + xx);
-         }
-         x = xc.data();
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < data.NDim(); ++j) {
-            xc[j] = *data.GetCoordComponent(i, j);
-         }
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      } else {
-         // calculate integral (normalized by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, data.BinUpEdge(i));
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      int NSAMPLE = 100;
-      if (i % NSAMPLE == 0) {
-         std::cout << "evt " << i << " x = [ ";
-         for (unsigned int j = 0; j < func.NDim(); ++j)
-            std::cout << x[j] << " , ";
-         std::cout << "]  ";
-         if (fitOpt.fIntegral) {
-            std::cout << "x2 = [ ";
-            for (unsigned int j = 0; j < func.NDim(); ++j)
-               std::cout << data.BinUpEdge(i)[j] << " , ";
-            std::cout << "] ";
-         }
-         std::cout << "  y = " << y << " fval = " << fval << std::endl;
-      }
-#endif
-
-      // EvalLog protects against 0 values of fval but don't want to add in the -log sum
-      // negative values of fval
-      fval = std::max(fval, 0.0);
-
-      double nloglike = 0; // negative loglikelihood
-      if (useW2) {
-         // apply weight correction . Effective weight is error^2/ y
-         // and expected events in bins is fval/weight
-         // can apply correction only when y is not zero otherwise weight is undefined
-         // (in case of weighted likelihood I don't care about the constant term due to
-         // the saturated model)
-
-         // use for the empty bins the global weight
-         double weight = 1.0;
-         if (y != 0) {
-            double error = data.Error(i);
-            weight = (error * error) / y; // this is the bin effective weight
-            nloglike += weight * y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
-         } else {
-            // for empty bin use the average weight  computed from the total data weight
-            weight = data.SumOfError2() / data.SumOfContent();
-         }
-         if (extended) {
-            nloglike += weight * (fval - y);
-         }
-
-      } else {
-         // standard case no weights or iWeight=1
-         // this is needed for Poisson likelihood (which are extened and not for multinomial)
-         // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
-         // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
-         if (extended)
-            nloglike = fval - y;
-
-         if (y > 0) {
-            nloglike += y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
-         }
-      }
-      return nloglike;
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<double> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), double{});
-   };
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::EvaluatePoissonLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double res{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         res += mapFunction(i);
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-#endif
-   } else {
-      Error("FitUtil::EvaluatePoissonLogL", "Execution policy unknown. Avalaible choices:\n "
-                                            "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                            "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-#ifdef DEBUG
-   std::cout << "Loglikelihood  = " << res << std::endl;
-#endif
-
-   return res;
-}
-
-// Evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
-// and its gradient
-double PoissonLogL<double>::EvalBinPdf(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                       unsigned int i, double *g)
-{
-   double y = 0;
-   const double *x1 = data.GetPoint(i, y);
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-   double fval = 0;
-   const double *x2 = 0;
-   // calculate the bin volume
-   double binVolume = 1;
-   std::vector<double> xc;
-   if (useBinVolume) {
-      unsigned int ndim = data.NDim();
-      xc.resize(ndim);
-      x2 = data.BinUpEdge(i);
-      for (unsigned int j = 0; j < ndim; ++j) {
-         binVolume *= std::abs(x2[j] - x1[j]);
-         xc[j] = 0.5 * (x2[j] + x1[j]);
-      }
-      // normalize the bin volume using a reference value
-      binVolume /= data.RefVolume();
-   }
-
-   const double *x = (useBinVolume) ? &xc.front() : x1;
-
-   if (!useBinIntegral) {
-      fval = func(x, p);
-   } else {
-      // calculate integral normalized (divided by bin volume)
-      x2 = data.BinUpEdge(i);
-      fval = igEval(x1, x2);
-   }
-   if (useBinVolume)
-      fval *= binVolume;
-
-   // logPdf for Poisson: ignore constant term depending on N
-   fval = std::max(fval, 0.0); // avoid negative or too small values
-   double logPdf = -fval;
-   if (y > 0.0) {
-      // include also constants due to saturate model (see Baker-Cousins paper)
-      logPdf += y * ROOT::Math::Util::EvalLog(fval / y) + y;
-   }
-   // need to return the pdf contribution (not the log)
-
-   // double pdfval =  std::exp(logPdf);
-
-   // if (g == 0) return pdfval;
-   if (g == 0)
-      return logPdf;
-
-   unsigned int npar = func.NPar();
-   const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-   // gradient  calculation
-   if (gfunc != 0) {
-      // case function provides gradient
-      if (!useBinIntegral)
-         gfunc->ParameterGradient(x, p, g);
-      else {
-         // needs to calculate the integral for each partial derivative
-         CalculateGradientIntegral(*gfunc, x1, x2, p, g);
-      }
-
-   } else {
-      SimpleGradientCalculator gc(func.NPar(), func);
-      if (!useBinIntegral)
-         gc.ParameterGradient(x, p, fval, g);
-      else {
-         // needs to calculate the integral for each partial derivative
-         CalculateGradientIntegral(gc, x1, x2, p, g);
-      }
-   }
-   // correct g[] do be derivative of poisson term
-   for (unsigned int k = 0; k < npar; ++k) {
-      // apply bin volume correction
-      if (useBinVolume)
-         g[k] *= binVolume;
-
-      // correct for Poisson term
-      if (fval > 0)
-         g[k] *= (y / fval - 1.); //* pdfval;
-      else if (y > 0) {
-         const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-         g[k] *= kdmax1;
-      } else // y == 0 cannot have  negative y
-         g[k] *= -1;
-   }
-
-#ifdef DEBUG
-   std::cout << "x = " << x[0] << " logPdf = " << logPdf << " grad";
-   for (unsigned int ipar = 0; ipar < npar; ++ipar)
-      std::cout << g[ipar] << "\t";
-   std::cout << std::endl;
-#endif
-
-   //   return pdfval;
-   return logPdf;
-}
-
-// Evaluate the gradient of the Poisson log likelihood function
-void PoissonLogL<double>::EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p,
-                                       double *grad, unsigned int &, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                       unsigned nChunks)
-{
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a grad function
-
-   const IGradModelFunction &func = *fg;
-
-#ifdef USE_PARAMCACHE
-   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
-#endif
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   double wrefVolume = 1.0;
-   if (useBinVolume && fitOpt.fNormBinVolume)
-      wrefVolume /= data.RefVolume();
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   auto mapFunction = [&](const unsigned int i) {
-      // set all vector values to zero
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.Error(i);
-
-      invError = (invError != 0.0) ? 1.0 / invError : 1;
-
-      double fval = 0;
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-
-      unsigned ndim = data.NDim();
-      double binVolume = 1.0;
-      if (useBinVolume) {
-         const double *x2 = data.BinUpEdge(i);
-
-         xc.resize(ndim);
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto x1_j = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - x1_j);
-            xc[j] = 0.5 * (x2[j] + x1_j);
-         }
-
-         x = xc.data();
-
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (ndim > 1) {
-         xc.resize(ndim);
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < ndim; ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-      } else {
-         // calculate integral (normalized by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         auto x2 = data.BinUpEdge(i);
-         fval = igEval(x, x2);
-         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      {
-         R__LOCKGUARD(gROOTMutex);
-         if (i < 5 || (i > data.Size() - 5)) {
-            if (data.NDim() > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval << " gradient " << gradFunc[0]
-                         << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-      }
-#endif
-
-      // correct the gradient
-      for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-
-         // correct gradient for bin volumes
-         if (useBinVolume)
-            gradFunc[ipar] *= binVolume;
-
-         // df/dp * (1.  - y/f )
-         if (fval > 0)
-            pointContribution[ipar] = gradFunc[ipar] * (1. - y / fval);
-         else if (gradFunc[ipar] != 0) {
-            const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-            const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
-            double gg = kdmax1 * gradFunc[ipar];
-            if (gg > 0)
-               gg = std::min(gg, kdmax2);
-            else
-               gg = std::max(gg, -kdmax2);
-            pointContribution[ipar] = -gg;
-         }
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::EvaluatePoissonLogLGradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("FitUtil::EvaluatePoissonLogLGradient",
-            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-
-#ifdef DEBUG
-   std::cout << "***** Final gradient : ";
-   for (unsigned int ii = 0; ii < npar; ++ii)
-      std::cout << grad[ii] << "   ";
-   std::cout << "\n";
-#endif
-}
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/FitUtil.cxx b/math/mathcore/src/FitUtil.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..2fc483f9e5bfdc09545f756d96d31754e2180080
--- /dev/null
+++ b/math/mathcore/src/FitUtil.cxx
@@ -0,0 +1,1756 @@
+// @(#)root/mathcore:$Id$
+// Author: L. Moneta Tue Nov 28 10:52:47 2006
+
+/**********************************************************************
+ *                                                                    *
+ * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
+ *                                                                    *
+ *                                                                    *
+ **********************************************************************/
+
+// Implementation file for class FitUtil
+
+#include "Fit/FitUtil.h"
+
+#include "Fit/BinData.h"
+#include "Fit/UnBinData.h"
+
+#include "Math/IFunctionfwd.h"
+#include "Math/IParamFunction.h"
+#include "Math/Integrator.h"
+#include "Math/IntegratorMultiDim.h"
+#include "Math/WrappedFunction.h"
+#include "Math/OneDimFunctionAdapter.h"
+#include "Math/RichardsonDerivator.h"
+
+#include "Math/Error.h"
+#include "Math/Util.h"  // for safe log(x)
+
+#include <limits>
+#include <cmath>
+#include <cassert>
+#include <algorithm>
+#include <numeric>
+//#include <memory>
+
+#include "TROOT.h"
+
+//#define DEBUG
+#ifdef DEBUG
+#define NSAMPLE 10
+#include <iostream>
+#endif
+
+//  need to implement integral option
+
+namespace ROOT {
+
+   namespace Fit {
+
+      namespace FitUtil {
+
+         // derivative with respect of the parameter to be integrated
+         template<class GradFunc = IGradModelFunction>
+         struct ParamDerivFunc {
+            ParamDerivFunc(const GradFunc & f) : fFunc(f), fIpar(0) {}
+            void SetDerivComponent(unsigned int ipar) { fIpar = ipar; }
+            double operator() (const double *x, const double *p) const {
+               return fFunc.ParameterDerivative( x, p, fIpar );
+            }
+            unsigned int NDim() const { return fFunc.NDim(); }
+            const GradFunc & fFunc;
+            unsigned int fIpar;
+         };
+
+// simple gradient calculator using the 2 points rule
+
+         class SimpleGradientCalculator {
+
+         public:
+            // construct from function and gradient dimension gdim
+            // gdim = npar for parameter gradient
+            // gdim = ndim for coordinate gradients
+            // construct (the param values will be passed later)
+            // one can choose between 2 points rule (1 extra evaluation) istrat=1
+            // or two point rule (2 extra evaluation)
+            // (found 2 points rule does not work correctly - minuit2FitBench fails)
+            SimpleGradientCalculator(int gdim, const IModelFunction & func,double eps = 2.E-8, int istrat = 1) :
+               fEps(eps),
+               fPrecision(1.E-8 ), // sqrt(epsilon)
+               fStrategy(istrat),
+               fN(gdim ),
+               fFunc(func),
+               fVec(std::vector<double>(gdim) ) // this can be probably optimized
+            {}
+
+            // internal method to calculate single partial derivative
+            // assume cached vector fVec is already set
+            double DoParameterDerivative(const double *x, const double *p, double f0, int k) const {
+               double p0 = p[k];
+               double h = std::max( fEps* std::abs(p0), 8.0*fPrecision*(std::abs(p0) + fPrecision) );
+               fVec[k] += h;
+               double deriv = 0;
+               // t.b.d : treat case of infinities
+               //if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
+               double f1 = fFunc(x, &fVec.front() );
+               if (fStrategy > 1) {
+                  fVec[k] = p0 - h;
+                  double f2 = fFunc(x, &fVec.front() );
+                  deriv = 0.5 * ( f2 - f1 )/h;
+               }
+               else
+                  deriv = ( f1 - f0 )/h;
+
+               fVec[k] = p[k]; // restore original p value
+               return deriv;
+            }
+            // number of dimension in x (needed when calculating the integrals)
+            unsigned int NDim() const {
+               return fFunc.NDim();
+            }
+            // number of parameters (needed for grad ccalculation)
+            unsigned int NPar() const {
+               return fFunc.NPar();
+            }
+
+            double ParameterDerivative(const double *x, const double *p, int ipar) const {
+               // fVec are the cached parameter values
+               std::copy(p, p+fN, fVec.begin());
+               double f0 = fFunc(x, p);
+               return DoParameterDerivative(x,p,f0,ipar);
+            }
+
+            // calculate all gradient at point (x,p) knnowing already value f0 (we gain a function eval.)
+            void ParameterGradient(const double * x, const double * p, double f0, double * g) {
+               // fVec are the cached parameter values
+               std::copy(p, p+fN, fVec.begin());
+               for (unsigned int k = 0; k < fN; ++k) {
+                  g[k] = DoParameterDerivative(x,p,f0,k);
+               }
+            }
+
+            // calculate gradient w.r coordinate values
+            void Gradient(const double * x, const double * p, double f0, double * g) {
+               // fVec are the cached coordinate values
+               std::copy(x, x+fN, fVec.begin());
+               for (unsigned int k = 0; k < fN; ++k) {
+                  double x0 = x[k];
+                  double h = std::max( fEps* std::abs(x0), 8.0*fPrecision*(std::abs(x0) + fPrecision) );
+                  fVec[k] += h;
+                  // t.b.d : treat case of infinities
+                  //if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
+                  double f1 = fFunc( &fVec.front(), p );
+                  if (fStrategy > 1) {
+                     fVec[k] = x0 - h;
+                     double f2 = fFunc( &fVec.front(), p  );
+                     g[k] = 0.5 * ( f2 - f1 )/h;
+                  }
+                  else
+                     g[k] = ( f1 - f0 )/h;
+
+                  fVec[k] = x[k]; // restore original x value
+               }
+            }
+
+         private:
+
+            double fEps;
+            double fPrecision;
+            int fStrategy; // strategy in calculation ( =1 use 2 point rule( 1 extra func) , = 2 use r point rule)
+            unsigned int fN; // gradient dimension
+            const IModelFunction & fFunc;
+            mutable std::vector<double> fVec; // cached coordinates (or parameter values in case of gradientpar)
+         };
+
+
+         // function to avoid infinities or nan
+         double CorrectValue(double rval) {
+            // avoid infinities or nan in  rval
+            if (rval > - std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max() )
+               return rval;
+            else if (rval < 0)
+               // case -inf
+               return -std::numeric_limits<double>::max();
+            else
+               // case + inf or nan
+               return  + std::numeric_limits<double>::max();
+         }
+
+         // Check if the value is a finite number. The argument rval is updated if it is infinite or NaN,
+         // setting it to the maximum finite value (preserving the sign).
+         bool CheckInfNaNValue(double &rval)
+         {
+            if (rval > - std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max() )
+               return true;
+            else if (rval < 0) {
+               // case -inf
+               rval =  -std::numeric_limits<double>::max();
+               return false;
+            }
+            else {
+               // case + inf or nan
+               rval =  + std::numeric_limits<double>::max();
+               return false;
+            }
+         }
+
+
+         // calculation of the integral of the gradient functions
+         // for a function providing derivative w.r.t parameters
+         // x1 and x2 defines the integration interval , p the parameters
+         template <class GFunc>
+         void CalculateGradientIntegral(const GFunc & gfunc,
+                                        const double *x1, const double * x2, const double * p, double *g) {
+
+            // needs to calculate the integral for each partial derivative
+            ParamDerivFunc<GFunc> pfunc( gfunc);
+            IntegralEvaluator<ParamDerivFunc<GFunc> > igDerEval( pfunc, p, true);
+            // loop on the parameters
+            unsigned int npar = gfunc.NPar();
+            for (unsigned int k = 0; k < npar; ++k ) {
+               pfunc.SetDerivComponent(k);
+               g[k] = igDerEval( x1, x2 );
+            }
+         }
+
+
+
+      } // end namespace  FitUtil
+
+
+
+//___________________________________________________________________________________________________________________________
+// for chi2 functions
+//___________________________________________________________________________________________________________________________
+
+      double FitUtil::EvaluateChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+      {
+         // evaluate the chi2 given a  function reference  , the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
+
+         unsigned int n = data.Size();
+
+      // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+#endif
+   // do not cache parameter values (it is not thread safe)
+   //func.SetParameters(p);
+
+
+   // get fit option and check case if using integral of bins
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useExpErrors = (fitOpt.fExpErrors);
+   bool isWeighted = data.IsWeighted();
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate chi2 using function " << &func << "  " << p << std::endl;
+   std::cout << "use empty bins  " << fitOpt.fUseEmpty << std::endl;
+   std::cout << "use integral    " << fitOpt.fIntegral << std::endl;
+   std::cout << "use all error=1 " << fitOpt.fErrors1 << std::endl;
+   if (isWeighted)   std::cout << "Weighted data set - sumw =  " << data.SumOfContent() << "  sumw2 = " << data.SumOfError2() << std::endl;
+#endif
+
+#ifdef USE_PARAMCACHE
+   IntegralEvaluator<> igEval( func, 0, useBinIntegral);
+#else
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+#endif
+   double maxResValue = std::numeric_limits<double>::max() /n;
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+
+   auto mapFunction = [&](const unsigned i){
+
+      double chi2{};
+      double fval{};
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.InvError(i);
+
+      //invError = (invError!= 0.0) ? 1.0/invError :1;
+
+      const double * x = nullptr;
+      std::vector<double> xc;
+      double binVolume = 1.0;
+      if (useBinVolume) {
+         unsigned int ndim = data.NDim();
+         const double * x2 = data.BinUpEdge(i);
+         xc.resize(data.NDim());
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto xx = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j]- xx);
+            xc[j] = 0.5*(x2[j]+ xx);
+         }
+         x = xc.data();
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if(data.NDim() > 1) {
+         xc.resize(data.NDim());
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < data.NDim(); ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+            x = x1;
+      }
+
+
+      if (!useBinIntegral) {
+#ifdef USE_PARAMCACHE
+         fval = func ( x );
+#else
+         fval = func ( x, p );
+#endif
+      }
+      else {
+         // calculate integral normalized by bin volume
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval( x, data.BinUpEdge(i)) ;
+      }
+      // normalize result if requested according to bin volume
+      if (useBinVolume) fval *= binVolume;
+
+      // expected errors
+      if (useExpErrors) {
+         double invWeight  = 1.0; 
+         if (isWeighted) {
+            // we need first to check if a weight factor needs to be applied
+            // weight = sumw2/sumw = error**2/content
+            //invWeight = y * invError * invError;
+            // we use always the global weight and not the observed one in the bin
+            // for empty bins use global weight (if it is weighted data.SumError2() is not zero)
+            invWeight = data.SumOfContent()/ data.SumOfError2();
+            //if (invError > 0) invWeight = y * invError * invError;
+         }
+         
+         //  if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
+         // compute expected error  as f(x) / weight
+         double invError2 = (fval > 0) ? invWeight / fval : 0.0;
+         invError = std::sqrt(invError2);
+         //std::cout << "using Pearson chi2 " << x[0] << "  " << 1./invError2 << "  " << fval << std::endl;
+      }
+
+//#define DEBUG
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << "  " << 1./invError << " params : ";
+      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << " bin volume " << binVolume << " ref " << wrefVolume << std::endl;
+#endif
+//#undef DEBUG
+
+      if (invError > 0) {
+
+         double tmp = ( y -fval )* invError;
+         double resval = tmp * tmp;
+
+
+         // avoid inifinity or nan in chi2 values due to wrong function values
+         if ( resval < maxResValue )
+            chi2 += resval;
+         else {
+            //nRejected++;
+            chi2 += maxResValue;
+         }
+      }
+      return chi2;
+  };
+
+#ifdef R__USE_IMT
+  auto redFunction = [](const std::vector<double> & objs){
+                          return std::accumulate(objs.begin(), objs.end(), double{});
+  };
+#else
+  (void)nChunks;
+
+  // If IMT is disabled, force the execution policy to the serial case
+  if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+     Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                      "to ROOT::Fit::ExecutionPolicy::kSerial.");
+     executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+  }
+#endif
+
+  double res{};
+  if(executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial){
+    for (unsigned int i=0; i<n; ++i) {
+      res += mapFunction(i);
+    }
+#ifdef R__USE_IMT
+  } else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+    ROOT::TThreadExecutor pool;
+    auto chunks = nChunks !=0? nChunks: setAutomaticChunking(data.Size());
+    res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+#endif
+//   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+    // ROOT::TProcessExecutor pool;
+    // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+  } else{
+    Error("FitUtil::EvaluateChi2","Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+  }
+
+   return res;
+}
+
+
+//___________________________________________________________________________________________________________________________
+
+double FitUtil::EvaluateChi2Effective(const IModelFunction & func, const BinData & data, const double * p, unsigned int & nPoints) {
+   // evaluate the chi2 given a  function reference  , the data and returns the value and also in nPoints
+   // the actual number of used points
+   // method using the error in the coordinates
+   // integral of bin does not make sense in this case
+
+   unsigned int n = data.Size();
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate effective chi2 using function " << &func << "  " << p << std::endl;
+#endif
+
+   assert(data.HaveCoordErrors()  || data.HaveAsymErrors());
+
+   double chi2 = 0;
+   //int nRejected = 0;
+
+
+   //func.SetParameters(p);
+
+   unsigned int ndim = func.NDim();
+
+   // use Richardson derivator
+   ROOT::Math::RichardsonDerivator derivator;
+
+   double maxResValue = std::numeric_limits<double>::max() /n;
+
+
+
+   for (unsigned int i = 0; i < n; ++ i) {
+
+
+      double y = 0;
+      const double * x = data.GetPoint(i,y);
+
+      double fval = func( x, p );
+
+      double delta_y_func = y - fval;
+
+
+      double ey = 0;
+      const double * ex = 0;
+      if (!data.HaveAsymErrors() )
+         ex = data.GetPointError(i, ey);
+      else {
+         double eylow, eyhigh = 0;
+         ex = data.GetPointError(i, eylow, eyhigh);
+         if ( delta_y_func < 0)
+            ey = eyhigh; // function is higher than points
+         else
+            ey = eylow;
+      }
+      double e2 = ey * ey;
+      // before calculating the gradient check that all error in x are not zero
+      unsigned int j = 0;
+      while ( j < ndim && ex[j] == 0.)  { j++; }
+      // if j is less ndim some elements are not zero
+      if (j < ndim) {
+         // need an adapter from a multi-dim function to a one-dimensional
+         ROOT::Math::OneDimMultiFunctionAdapter<const IModelFunction &> f1D(func,x,0,p);
+         // select optimal step size  (use 10--2 by default as was done in TF1:
+         double kEps = 0.01;
+         double kPrecision = 1.E-8;
+         for (unsigned int icoord = 0; icoord < ndim; ++icoord) {
+            // calculate derivative for each coordinate
+            if (ex[icoord] > 0) {
+               //gradCalc.Gradient(x, p, fval, &grad[0]);
+               f1D.SetCoord(icoord);
+               // optimal spep size (take ex[] as scale for the points and 1% of it
+               double x0= x[icoord];
+               double h = std::max( kEps* std::abs(ex[icoord]), 8.0*kPrecision*(std::abs(x0) + kPrecision) );
+               double deriv = derivator.Derivative1(f1D, x[icoord], h);
+               double edx = ex[icoord] * deriv;
+               e2 += edx * edx;
+#ifdef DEBUG
+               std::cout << "error for coord " << icoord << " = " << ex[icoord] << " deriv " << deriv << std::endl;
+#endif
+            }
+         }
+      }
+      double w2 = (e2 > 0) ? 1.0/e2 : 0;
+      double resval = w2 * ( y - fval ) *  ( y - fval);
+
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << " ex " << ex[0] << " ey  " << ey << " params : ";
+      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << "\tresval = " << resval << std::endl;
+#endif
+
+      // avoid (infinity and nan ) in the chi2 sum
+      // eventually add possibility of excluding some points (like singularity)
+      if ( resval < maxResValue )
+         chi2 += resval;
+      else
+         chi2 += maxResValue;
+      //nRejected++;
+
+   }
+
+   // reset the number of fitting data points
+   nPoints = n;  // no points are rejected
+   //if (nRejected != 0)  nPoints = n - nRejected;
+
+#ifdef DEBUG
+   std::cout << "chi2 = " << chi2 << " n = " << nPoints  << std::endl;
+#endif
+
+   return chi2;
+
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// evaluate the chi2 contribution (residual term) only for data with no coord-errors
+/// This function is used in the specialized least square algorithms like FUMILI or L.M.
+/// if we have error on the coordinates the method is not yet implemented
+///  integral option is also not yet implemented
+///  one can use in that case normal chi2 method
+
+double FitUtil::EvaluateChi2Residual(const IModelFunction & func, const BinData & data, const double * p, unsigned int i, double * g) {
+   if (data.GetErrorType() == BinData::kCoordError && data.Opt().fCoordErrors ) {
+      MATH_ERROR_MSG("FitUtil::EvaluateChi2Residual","Error on the coordinates are not used in calculating Chi2 residual");
+      return 0; // it will assert otherwise later in GetPoint
+   }
+
+
+   //func.SetParameters(p);
+
+   double y, invError = 0;
+
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useExpErrors = (fitOpt.fExpErrors);
+
+   const double * x1 = data.GetPoint(i,y, invError);
+
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+   double fval = 0;
+   unsigned int ndim = data.NDim();
+   double binVolume = 1.0;
+   const double * x2 = 0;
+   if (useBinVolume || useBinIntegral) x2 = data.BinUpEdge(i);
+
+   double * xc = 0;
+
+   if (useBinVolume) {
+      xc = new double[ndim];
+      for (unsigned int j = 0; j < ndim; ++j) {
+         binVolume *= std::abs( x2[j]-x1[j] );
+         xc[j] = 0.5*(x2[j]+ x1[j]);
+      }
+      // normalize the bin volume using a reference value
+      binVolume /= data.RefVolume();
+   }
+
+   const double * x = (useBinVolume) ? xc : x1;
+
+   if (!useBinIntegral) {
+      fval = func ( x, p );
+   }
+   else {
+      // calculate integral (normalized by bin volume)
+      // need to set function and parameters here in case loop is parallelized
+      fval = igEval( x1, x2) ;
+   }
+   // normalize result if requested according to bin volume
+   if (useBinVolume) fval *= binVolume;
+
+   // expected errors
+   if (useExpErrors) {
+      // we need first to check if a weight factor needs to be applied
+      // weight = sumw2/sumw = error**2/content
+      //NOTE: assume histogram is not weighted
+      // don't know how to do with bins with weight = 0
+      //double invWeight = y * invError * invError;
+      // if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
+      // compute expected error  as f(x) / weight
+      double invError2 = (fval > 0) ? 1.0 / fval : 0.0;
+      invError = std::sqrt(invError2);
+   }
+
+
+   double resval =   ( y -fval )* invError;
+
+   // avoid infinities or nan in  resval
+   resval = CorrectValue(resval);
+
+   // estimate gradient
+   if (g != 0) {
+
+      unsigned int npar = func.NPar();
+      const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+      if (gfunc != 0) {
+         //case function provides gradient
+         if (!useBinIntegral ) {
+            gfunc->ParameterGradient(  x , p, g );
+         }
+         else {
+            // needs to calculate the integral for each partial derivative
+            CalculateGradientIntegral( *gfunc, x1, x2, p, g);
+         }
+      }
+      else {
+         SimpleGradientCalculator  gc( npar, func);
+         if (!useBinIntegral )
+            gc.ParameterGradient(x, p, fval, g);
+         else {
+            // needs to calculate the integral for each partial derivative
+            CalculateGradientIntegral( gc, x1, x2, p, g);
+         }
+      }
+      // mutiply by - 1 * weight
+      for (unsigned int k = 0; k < npar; ++k) {
+         g[k] *= - invError;
+         if (useBinVolume) g[k] *= binVolume;
+      }
+   }
+
+   if (useBinVolume) delete [] xc;
+
+   return resval;
+
+}
+
+void FitUtil::EvaluateChi2Gradient(const IModelFunction &f, const BinData &data, const double *p, double *grad,
+                                   unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the chi2 function
+   // this function is used when the model function knows how to calculate the derivative and we can
+   // avoid that the minimizer re-computes them
+   //
+   // case of chi2 effective (errors on coordinate) is not supported
+
+   if (data.HaveCoordErrors()) {
+      MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                     "Error on the coordinates are not used in calculating Chi2 gradient");
+      return; // it will assert otherwise later in GetPoint
+   }
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a gradient function
+
+   const IGradModelFunction &func = *fg;
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate chi2 using function gradient " << &func << "  " << p << std::endl;
+#endif
+
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   std::vector<bool> isPointRejected(initialNPoints);
+
+   auto mapFunction = [&](const unsigned int i) {
+      // set all vector values to zero
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.Error(i);
+
+      invError = (invError != 0.0) ? 1.0 / invError : 1;
+
+      double fval = 0;
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+
+      unsigned int ndim = data.NDim();
+      double binVolume = 1;
+      if (useBinVolume) {
+         const double *x2 = data.BinUpEdge(i);
+
+         xc.resize(ndim);
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto x1_j = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - x1_j);
+            xc[j] = 0.5 * (x2[j] + x1_j);
+         }
+
+         x = xc.data();
+
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (ndim > 1) {
+         xc.resize(ndim);
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < ndim; ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+         fval = func(x, p);
+         func.ParameterGradient(x, p, &gradFunc[0]);
+      } else {
+         auto x2 = data.BinUpEdge(i);
+         // calculate normalized integral and gradient (divided by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval(x, x2);
+         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
+      }
+      if (useBinVolume)
+         fval *= binVolume;
+
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
+      for (unsigned int ipar = 0; ipar < npar; ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << std::endl;
+#endif
+      if (!CheckInfNaNValue(fval)) {
+         isPointRejected[i] = true;
+         // Return a zero contribution to all partial derivatives on behalf of the current point
+         return pointContribution;
+      }
+
+      // loop on the parameters
+      unsigned int ipar = 0;
+      for (; ipar < npar; ++ipar) {
+
+         // correct gradient for bin volumes
+         if (useBinVolume)
+            gradFunc[ipar] *= binVolume;
+
+         // avoid singularity in the function (infinity and nan ) in the chi2 sum
+         // eventually add possibility of excluding some points (like singularity)
+         double dfval = gradFunc[ipar];
+         if (!CheckInfNaNValue(dfval)) {
+            break; // exit loop on parameters
+         }
+
+         // calculate derivative point contribution
+         pointContribution[ipar] = -2.0 * (y - fval) * invError * invError * gradFunc[ipar];
+      }
+
+      if (ipar < npar) {
+         // case loop was broken for an overflow in the gradient calculation
+         isPointRejected[i] = true;
+      }
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluateChi2Gradient", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                               "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+   // else if(executionPolicy == ROOT::Fit::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluateChi2Gradient",
+            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   //to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // correct the number of points
+   nPoints = initialNPoints;
+
+   if (std::any_of(isPointRejected.begin(), isPointRejected.end(), [](bool point) { return point; })) {
+      unsigned nRejected = std::accumulate(isPointRejected.begin(), isPointRejected.end(), 0);
+      assert(nRejected <= initialNPoints);
+      nPoints = initialNPoints - nRejected;
+
+      if (nPoints < npar)
+         MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                        "Error - too many points rejected for overflow in gradient calculation");
+   }
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+}
+
+//______________________________________________________________________________________________________
+//
+//  Log Likelihood functions
+//_______________________________________________________________________________________________________
+
+// utility function used by the likelihoods
+
+// for LogLikelihood functions
+
+double FitUtil::EvaluatePdf(const IModelFunction & func, const UnBinData & data, const double * p, unsigned int i, double * g) {
+   // evaluate the pdf contribution to the generic logl function in case of bin data
+   // return actually the log of the pdf and its derivatives
+
+
+   //func.SetParameters(p);
+
+
+   const double * x = data.Coords(i);
+   double fval = func ( x, p );
+   double logPdf = ROOT::Math::Util::EvalLog(fval);
+   //return
+   if (g == 0) return logPdf;
+
+   const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+   // gradient  calculation
+   if (gfunc != 0) {
+      //case function provides gradient
+      gfunc->ParameterGradient(  x , p, g );
+   }
+   else {
+      // estimate gradieant numerically with simple 2 point rule
+      // should probably calculate gradient of log(pdf) is more stable numerically
+      SimpleGradientCalculator gc(func.NPar(), func);
+      gc.ParameterGradient(x, p, fval, g );
+   }
+   // divide gradient by function value since returning the logs
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar) {
+      g[ipar] /= fval; // this should be checked against infinities
+   }
+
+#ifdef DEBUG
+   std::cout << x[i] << "\t";
+   std::cout << "\tpar = [ " << func.NPar() << " ] =  ";
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+      std::cout << p[ipar] << "\t";
+   std::cout << "\tfval = " << fval;
+   std::cout << "\tgrad = [ ";
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+      std::cout << g[ipar] << "\t";
+   std::cout << " ] "   << std::endl;
+#endif
+
+
+   return logPdf;
+}
+
+double FitUtil::EvaluateLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the LogLikelihood
+
+   unsigned int n = data.Size();
+
+   //unsigned int nRejected = 0;
+
+   bool normalizeFunc = false;
+
+   // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunctionTempl<double> &>(func)).SetParameters(p);
+#endif
+
+   nPoints = data.Size();  // npoints
+
+#ifdef R__USE_IMT
+         // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
+         // this will be done in sequential mode and parameters can be set in a thread safe manner
+         if (!normalizeFunc) {
+            if (data.NDim() == 1) {
+               const double * x = data.GetCoordComponent(0,0);
+               func( x, p);
+            }
+            else {
+               std::vector<double> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  x[j] = *data.GetCoordComponent(0, j);
+               func( x.data(), p);
+            }
+         }
+#endif
+
+         double norm = 1.0;
+         if (normalizeFunc) {
+            // compute integral of the function
+            std::vector<double> xmin(data.NDim());
+            std::vector<double> xmax(data.NDim());
+            IntegralEvaluator<> igEval(func, p, true);
+            // compute integral in the ranges where is defined
+            if (data.Range().Size() > 0) {
+               norm = 0;
+               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                  norm += igEval.Integral(xmin.data(), xmax.data());
+               }
+            } else {
+               // use (-inf +inf)
+               data.Range().GetRange(&xmin[0], &xmax[0]);
+               // check if funcition is zero at +- inf
+               if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
+                  MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood",
+                                 "A range has not been set and the function is not zero at +/- inf");
+                  return 0;
+               }
+               norm = igEval.Integral(&xmin[0], &xmax[0]);
+            }
+         }
+
+         // needed to compue effective global weight in case of extended likelihood
+
+         auto mapFunction = [&](const unsigned i) {
+            double W = 0;
+            double W2 = 0;
+            double fval = 0;
+
+            if (data.NDim() > 1) {
+               std::vector<double> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  x[j] = *data.GetCoordComponent(i, j);
+#ifdef USE_PARAMCACHE
+               fval = func(x.data());
+#else
+               fval = func(x.data(), p);
+#endif
+
+               // one -dim case
+            } else {
+               const auto x = data.GetCoordComponent(i, 0);
+#ifdef USE_PARAMCACHE
+               fval = func(x);
+#else
+               fval = func(x, p);
+#endif
+            }
+
+            if (normalizeFunc)
+               fval = fval * (1 / norm);
+
+            // function EvalLog protects against negative or too small values of fval
+            double logval = ROOT::Math::Util::EvalLog(fval);
+            if (iWeight > 0) {
+               double weight = data.Weight(i);
+               logval *= weight;
+               if (iWeight == 2) {
+                  logval *= weight; // use square of weights in likelihood
+                  if (!extended) {
+                     // needed sum of weights and sum of weight square if likelkihood is extended
+                     W = weight;
+                     W2 = weight * weight;
+                  }
+               }
+            }
+            return LikelihoodAux<double>(logval, W, W2);
+         };
+
+#ifdef R__USE_IMT
+  // auto redFunction = [](const std::vector<LikelihoodAux<double>> & objs){
+  //          return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<double>(0.0,0.0,0.0),
+  //                      [](const LikelihoodAux<double> &l1, const LikelihoodAux<double> &l2){
+  //                          return l1+l2;
+  //                 });
+  // };
+  // do not use std::accumulate to be sure to maintain always the same order
+  auto redFunction = [](const std::vector<LikelihoodAux<double>> & objs){
+     auto l0 =  LikelihoodAux<double>(0.0,0.0,0.0);
+     for ( auto & l : objs ) {
+        l0 = l0 + l;
+     }
+     return l0;
+  };
+#else
+  (void)nChunks;
+
+  // If IMT is disabled, force the execution policy to the serial case
+  if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+     Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                      "to ROOT::Fit::ExecutionPolicy::kSerial.");
+     executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+  }
+#endif
+
+  double logl{};
+  double sumW{};
+  double sumW2{};
+  if(executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial){
+    for (unsigned int i=0; i<n; ++i) {
+      auto resArray = mapFunction(i);
+      logl+=resArray.logvalue;
+      sumW+=resArray.weight;
+      sumW2+=resArray.weight2;
+    }
+#ifdef R__USE_IMT
+  } else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+    ROOT::TThreadExecutor pool;
+    auto chunks = nChunks !=0? nChunks: setAutomaticChunking(data.Size());
+    auto resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+    logl=resArray.logvalue;
+    sumW=resArray.weight;
+    sumW2=resArray.weight2;
+#endif
+//   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+    // ROOT::TProcessExecutor pool;
+    // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+  } else{
+    Error("FitUtil::EvaluateLogL","Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+  }
+
+  if (extended) {
+      // add Poisson extended term
+      double extendedTerm = 0; // extended term in likelihood
+      double nuTot = 0;
+      // nuTot is integral of function in the range
+      // if function has been normalized integral has been already computed
+      if (!normalizeFunc) {
+         IntegralEvaluator<> igEval( func, p, true);
+         std::vector<double> xmin(data.NDim());
+         std::vector<double> xmax(data.NDim());
+
+         // compute integral in the ranges where is defined
+         if (data.Range().Size() > 0 ) {
+            nuTot = 0;
+            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+               data.Range().GetRange(&xmin[0],&xmax[0],ir);
+               nuTot += igEval.Integral(xmin.data(),xmax.data());
+            }
+         } else {
+            // use (-inf +inf)
+            data.Range().GetRange(&xmin[0],&xmax[0]);
+            // check if funcition is zero at +- inf
+            if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
+               MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood","A range has not been set and the function is not zero at +/- inf");
+               return 0;
+            }
+            nuTot = igEval.Integral(&xmin[0],&xmax[0]);
+         }
+
+         // force to be last parameter value
+         //nutot = p[func.NDim()-1];
+         if (iWeight != 2)
+            extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
+         else {
+            // case use weight square in likelihood : compute total effective weight = sw2/sw
+            // ignore for the moment case when sumW is zero
+            extendedTerm = - (sumW2 / sumW) * nuTot;
+         }
+
+      }
+      else {
+         nuTot = norm;
+         extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog( nuTot);
+         // in case of weights need to use here sum of weights (to be done)
+      }
+      logl += extendedTerm;
+
+   }
+
+#ifdef DEBUG  
+   std::cout << "Evaluated log L for parameters (";
+   for (unsigned int ip = 0; ip < func.NPar(); ++ip)
+      std::cout << " " << p[ip];
+   std::cout << ")  fval = " << -logl << std::endl;
+#endif
+
+   return -logl;
+}
+
+void FitUtil::EvaluateLogLGradient(const IModelFunction &f, const UnBinData &data, const double *p, double *grad,
+                                   unsigned int &, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the log likelihood function
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a grad function
+
+   const IGradModelFunction &func = *fg;
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
+
+#ifdef DEBUG  
+    std::cout << "\n===> Evaluate Gradient for parameters ";
+         for (unsigned int ip = 0; ip < npar; ++ip)
+            std::cout << "  " << p[ip];
+         std::cout << "\n";
+#endif
+
+   const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
+   const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
+
+   auto mapFunction = [&](const unsigned int i) {
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+
+      const double * x = nullptr;
+      std::vector<double> xc;
+      if (data.NDim() > 1) {
+         xc.resize(data.NDim() );
+         for (unsigned int j = 0; j < data.NDim(); ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data(); 
+      } else {
+         x = data.GetCoordComponent(i, 0);
+      }
+
+      double fval = func(x, p);
+      func.ParameterGradient(x, p, &gradFunc[0]);
+      
+#ifdef DEBUG
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval
+                                           << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+         }
+      }
+#endif
+
+      for (unsigned int kpar = 0; kpar < npar; ++kpar) {
+         if (fval > 0)
+            pointContribution[kpar] = -1. / fval * gradFunc[kpar];
+         else if (gradFunc[kpar] != 0) {
+            double gg = kdmax1 * gradFunc[kpar];
+            if (gg > 0)
+               gg = std::min(gg, kdmax2);
+            else
+               gg = std::max(gg, -kdmax2);
+            pointContribution[kpar] = -gg;
+         }
+         // if func derivative is zero term is also zero so do not add in g[kpar]
+      }
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluateLogLGradient", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                               "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+
+   // else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                             "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                             "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   // to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+
+#ifdef DEBUG
+   std::cout << "FitUtil.cxx : Final gradient ";
+   for (unsigned int param = 0; param < npar; param++) {
+      std::cout << "  " << grad[param];
+   }
+   std::cout << "\n";
+#endif
+}
+//_________________________________________________________________________________________________
+// for binned log likelihood functions
+////////////////////////////////////////////////////////////////////////////////
+/// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+/// and its gradient
+
+double FitUtil::EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * p, unsigned int i, double * g ) {
+   double y = 0;
+   const double * x1 = data.GetPoint(i,y);
+
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+   double fval = 0;
+   const double * x2 = 0;
+   // calculate the bin volume
+   double binVolume = 1;
+   std::vector<double> xc;
+   if (useBinVolume) {
+      unsigned int ndim = data.NDim();
+      xc.resize(ndim);
+      x2 = data.BinUpEdge(i);
+      for (unsigned int j = 0; j < ndim; ++j) {
+         binVolume *= std::abs( x2[j]-x1[j] );
+         xc[j] = 0.5*(x2[j]+ x1[j]);
+      }
+      // normalize the bin volume using a reference value
+      binVolume /= data.RefVolume();
+   }
+
+   const double * x = (useBinVolume) ? &xc.front() : x1;
+
+   if (!useBinIntegral ) {
+      fval = func ( x, p );
+   }
+   else {
+      // calculate integral normalized (divided by bin volume)
+      x2 =  data.BinUpEdge(i);
+      fval = igEval( x1, x2 ) ;
+   }
+   if (useBinVolume) fval *= binVolume;
+
+   // logPdf for Poisson: ignore constant term depending on N
+   fval = std::max(fval, 0.0);  // avoid negative or too small values
+   double logPdf =  - fval;
+   if (y > 0.0) {
+      // include also constants due to saturate model (see Baker-Cousins paper)
+      logPdf += y * ROOT::Math::Util::EvalLog( fval / y) + y;
+   }
+   // need to return the pdf contribution (not the log)
+
+   //double pdfval =  std::exp(logPdf);
+
+  //if (g == 0) return pdfval;
+   if (g == 0) return logPdf;
+
+   unsigned int npar = func.NPar();
+   const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+   // gradient  calculation
+   if (gfunc != 0) {
+      //case function provides gradient
+      if (!useBinIntegral )
+         gfunc->ParameterGradient(  x , p, g );
+      else {
+         // needs to calculate the integral for each partial derivative
+         CalculateGradientIntegral( *gfunc, x1, x2, p, g);
+      }
+
+   }
+   else {
+      SimpleGradientCalculator  gc(func.NPar(), func);
+      if (!useBinIntegral )
+         gc.ParameterGradient(x, p, fval, g);
+      else {
+        // needs to calculate the integral for each partial derivative
+         CalculateGradientIntegral( gc, x1, x2, p, g);
+      }
+   }
+   // correct g[] do be derivative of poisson term
+   for (unsigned int k = 0; k < npar; ++k) {
+      // apply bin volume correction
+      if (useBinVolume) g[k] *= binVolume;
+
+      // correct for Poisson term
+      if ( fval > 0)
+         g[k] *= ( y/fval - 1.) ;//* pdfval;
+      else if (y > 0) {
+         const double kdmax1 = std::sqrt( std::numeric_limits<double>::max() );
+         g[k] *= kdmax1;
+      }
+      else   // y == 0 cannot have  negative y
+         g[k] *= -1;
+   }
+
+
+#ifdef DEBUG
+   std::cout << "x = " << x[0] << " logPdf = " << logPdf << " grad";
+   for (unsigned int ipar = 0; ipar < npar; ++ipar)
+      std::cout << g[ipar] << "\t";
+   std::cout << std::endl;
+#endif
+
+//   return pdfval;
+   return logPdf;
+}
+
+double FitUtil::EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *p, int iWeight,
+                                    bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
+                                    unsigned nChunks)
+{
+   // evaluate the Poisson Log Likelihood
+   // for binned likelihood fits
+   // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
+   // add as well constant term for saturated model to make it like a Chi2/2
+   // by default is etended. If extended is false the fit is not extended and
+   // the global poisson term is removed (i.e is a binomial fit)
+   // (remember that in this case one needs to have a function with a fixed normalization
+   // like in a non extended unbinned fit)
+   //
+   // if use Weight use a weighted dataset
+   // iWeight = 1 ==> logL = Sum( w f(x_i) )
+   // case of iWeight==1 is actually identical to weight==0
+   // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
+   //
+   // nPoints returns the points where bin content is not zero
+
+
+   unsigned int n = data.Size();
+
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+#endif
+
+   nPoints = data.Size();  // npoints
+
+
+   // get fit option and check case of using integral of bins
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useW2 = (iWeight == 2);
+
+   // normalize if needed by a reference volume value
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+#ifdef DEBUG
+   std::cout << "Evaluate PoissonLogL for params = [ ";
+   for (unsigned int j = 0; j < func.NPar(); ++j) std::cout << p[j] << " , ";
+   std::cout << "]  - data size = " << n << " useBinIntegral " << useBinIntegral << " useBinVolume "
+             << useBinVolume << " useW2 " << useW2 << " wrefVolume = " << wrefVolume << std::endl;
+#endif
+
+#ifdef USE_PARAMCACHE
+   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
+#else
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+#endif
+
+   auto mapFunction = [&](const unsigned i) {
+      auto x1 = data.GetCoordComponent(i, 0);
+      auto y = *data.ValuePtr(i);
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+      double fval = 0;
+      double binVolume = 1.0;
+
+      if (useBinVolume) {
+         unsigned int ndim = data.NDim();
+         const double *x2 = data.BinUpEdge(i);
+         xc.resize(data.NDim());
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto xx = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - xx);
+            xc[j] = 0.5 * (x2[j] + xx);
+         }
+         x = xc.data();
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (data.NDim() > 1) {
+         xc.resize(data.NDim());
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < data.NDim(); ++j) {
+            xc[j] = *data.GetCoordComponent(i, j);
+         }
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+#ifdef USE_PARAMCACHE
+         fval = func(x);
+#else
+         fval = func(x, p);
+#endif
+      } else {
+         // calculate integral (normalized by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval(x, data.BinUpEdge(i));
+      }
+      if (useBinVolume) fval *= binVolume;
+
+
+
+#ifdef DEBUG
+      int NSAMPLE = 100;
+      if (i % NSAMPLE == 0) {
+         std::cout << "evt " << i << " x = [ ";
+         for (unsigned int j = 0; j < func.NDim(); ++j) std::cout << x[j] << " , ";
+         std::cout << "]  ";
+         if (fitOpt.fIntegral) {
+            std::cout << "x2 = [ ";
+            for (unsigned int j = 0; j < func.NDim(); ++j) std::cout << data.BinUpEdge(i)[j] << " , ";
+            std::cout << "] ";
+         }
+         std::cout << "  y = " << y << " fval = " << fval << std::endl;
+      }
+#endif
+
+
+      // EvalLog protects against 0 values of fval but don't want to add in the -log sum
+      // negative values of fval
+      fval = std::max(fval, 0.0);
+
+      double nloglike = 0; // negative loglikelihood
+      if (useW2) {
+         // apply weight correction . Effective weight is error^2/ y
+         // and expected events in bins is fval/weight
+         // can apply correction only when y is not zero otherwise weight is undefined
+         // (in case of weighted likelihood I don't care about the constant term due to
+         // the saturated model)
+ 
+         // use for the empty bins the global weight
+         double weight = 1.0;
+         if (y != 0) {
+            double error = data.Error(i);
+            weight = (error * error) / y; // this is the bin effective weight
+            nloglike += weight * y * ( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval) );
+         }
+         else {
+            // for empty bin use the average weight  computed from the total data weight
+            weight = data.SumOfError2()/ data.SumOfContent();
+         }
+         if (extended) {
+            nloglike += weight  *  ( fval - y);
+         }
+
+      } else {
+         // standard case no weights or iWeight=1
+         // this is needed for Poisson likelihood (which are extened and not for multinomial)
+         // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
+         // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
+         if (extended) nloglike = fval - y;
+
+         if (y >  0) {
+            nloglike += y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
+         }
+      }
+      return nloglike;
+   };
+
+#ifdef R__USE_IMT
+   auto redFunction = [](const std::vector<double> &objs) {
+      return std::accumulate(objs.begin(), objs.end(), double{});
+   };
+#else
+   (void)nChunks;
+
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluatePoissonLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                              "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   double res{};
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      for (unsigned int i = 0; i < n; ++i) {
+         res += mapFunction(i);
+      }
+#ifdef R__USE_IMT
+   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
+      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+#endif
+      //   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+      // ROOT::TProcessExecutor pool;
+      // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   } else {
+      Error("FitUtil::EvaluatePoissonLogL",
+            "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+   }
+
+#ifdef DEBUG
+   std::cout << "Loglikelihood  = " << res << std::endl;
+#endif
+
+   return res;
+}
+
+void FitUtil::EvaluatePoissonLogLGradient(const IModelFunction &f, const BinData &data, const double *p, double *grad,
+                                          unsigned int &, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the Poisson log likelihood function
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a grad function
+
+   const IGradModelFunction &func = *fg;
+
+#ifdef USE_PARAMCACHE
+   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
+#endif
+   
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   double wrefVolume = 1.0;
+   if (useBinVolume && fitOpt.fNormBinVolume)
+      wrefVolume /= data.RefVolume();
+
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   auto mapFunction = [&](const unsigned int i) {
+      // set all vector values to zero
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.Error(i);
+
+      invError = (invError != 0.0) ? 1.0 / invError : 1;
+
+      double fval = 0;
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+
+      unsigned ndim = data.NDim();
+      double binVolume = 1.0;
+      if (useBinVolume) {
+         const double *x2 = data.BinUpEdge(i);
+
+         xc.resize(ndim);
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto x1_j = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - x1_j);
+            xc[j] = 0.5 * (x2[j] + x1_j);
+         }
+
+         x = xc.data();
+
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (ndim > 1) {
+         xc.resize(ndim);
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < ndim; ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+         fval = func(x, p);
+         func.ParameterGradient(x, p, &gradFunc[0]);
+      } else {
+         // calculate integral (normalized by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         auto x2 = data.BinUpEdge(i);
+         fval = igEval(x, x2);
+         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
+      }
+      if (useBinVolume)
+         fval *= binVolume;
+
+#ifdef DEBUG
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval
+                                           << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+         }
+      }
+#endif
+
+      // correct the gradient
+      for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+
+         // correct gradient for bin volumes
+         if (useBinVolume)
+            gradFunc[ipar] *= binVolume;
+
+         // df/dp * (1.  - y/f )
+         if (fval > 0)
+            pointContribution[ipar] = gradFunc[ipar] * (1. - y / fval);
+         else if (gradFunc[ipar] != 0) {
+            const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
+            const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
+            double gg = kdmax1 * gradFunc[ipar];
+            if (gg > 0)
+               gg = std::min(gg, kdmax2);
+            else
+               gg = std::max(gg, -kdmax2);
+            pointContribution[ipar] = -gg;
+         }
+      }
+
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluatePoissonLogLGradient",
+              "Multithread execution policy requires IMT, which is disabled. Changing "
+              "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+
+   // else if(executionPolicy == ROOT::Fit::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluatePoissonLogLGradient",
+            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   //to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+
+#ifdef DEBUG
+   std::cout << "***** Final gradient : ";
+   for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
+   std::cout << "\n";
+#endif
+   
+}
+
+
+unsigned FitUtil::setAutomaticChunking(unsigned nEvents){
+      auto ncpu  = ROOT::GetImplicitMTPoolSize();
+      if (nEvents/ncpu < 1000) return ncpu;
+      return nEvents/1000;
+      //return ((nEvents/ncpu + 1) % 1000) *40 ; //arbitrary formula
+}
+
+}
+
+} // end namespace ROOT