From 72fdb8477b71cd53d26cf491d7f9d40c36bc4866 Mon Sep 17 00:00:00 2001
From: Danilo Piparo <danilo.piparo@cern.ch>
Date: Wed, 26 Sep 2018 15:30:05 +0200
Subject: [PATCH] Revert commits from d30691 to c04d4c

since ROOT stopped compiling.

Revert "Remove dead code I know I can remove"

This reverts commit c04d4cf62a3f035df627033e7ca0d020bd12e9c9.

Revert "[Math/Fit] Update documentation"

This reverts commit 76f58d3ab1150f7eb8846d071ba1de33b7166ce6.

Revert "Merge old PoissonLogL functions into new interfaces"

This reverts commit b1cd6e68ae470e4203b88cf00cb60c3d7e1b3432.

Revert "Merge old LogL functions into new interfaces"

This reverts commit 059092a75b34e395b348477b621863c44f159e58.

Revert "Merge old functions into new interfaces"

This reverts commit ccecadaf5ba068cdbfc4161bb8d3919cd078e938.

Revert "Move template specification definitions to source file"

This reverts commit 5192cf5bb6ef73e8696d0d327f6b916889d83ce6.

Revert "Fix undefined default initialization"

This reverts commit 92af804ea2fc4cb0f8261d8d4f01a811ce1be5a4.

Revert "Fix bug in gradient evaluation"

This reverts commit cbc475ca8c61d02f284f6385bfe8a5f9fdac55ba.

Revert "Apply clang-format"

This reverts commit 7007245d3bcb4cedc87d1ceba1e21e5cb18a90a8.

Revert "Improve loop performance"

This reverts commit 9ddf280f2ba676397f47ef3d7c3938d00a9aafe1.

Revert "Fix iterator type"

This reverts commit 271738b817f2c776f82bbf39f483bcd4b911974e.

Revert "Fix non initialization warning"

This reverts commit 5b576e20b8fe6c22f69d775c4d3e80eec106ebbe.

Revert "Discard unnecessary operations"

This reverts commit 2dd87289b932baccc6e382e17813a7d78f724a9a.

Revert "Add braces for consistency"

This reverts commit dcc37acf88d4ec50f6f52cc3397e3b7911ab6c25.

Revert "Update infinite check functions"

This reverts commit 06c127dad65197d7ee83f63ae23085372cc6cbb8.

Revert "Remove unnecessary specialization"

This reverts commit 7e767e98738c63a67233da51f90cf575460c3011.

Revert "Remove unnecessary cast"

This reverts commit c8020a0fa5dc65489b9efd3f0bbd7b321f26969c.

Revert "Fix comment"

This reverts commit 711185dcd81afecc5d8d8995e088515068d628e3.

Revert "Add missing include"

This reverts commit d6bfc08ae7343c8e9a12c40ed712bb4762956208.

Revert "Factor out objective functions from FitUtil"

This reverts commit c498e8c8bcde8af7f799f26fab5afecae4bb47c0.

Revert "Fix header comment"

This reverts commit d30691e4d9a67f7915b6706d882843fca6883176.
---
 hist/hist/inc/TF1.h                           |    3 +-
 math/mathcore/CMakeLists.txt                  |    2 +-
 math/mathcore/inc/Fit/Chi2FCN.h               |   12 +-
 math/mathcore/inc/Fit/EvaluateChi2.hxx        |  409 ----
 math/mathcore/inc/Fit/EvaluateLogL.hxx        |  543 -----
 math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx |  393 ----
 math/mathcore/inc/Fit/FitUtil.h               | 1354 +++++++++++--
 math/mathcore/inc/Fit/LogLikelihoodFCN.h      |    9 +-
 math/mathcore/inc/Fit/PoissonLikelihoodFCN.h  |   20 +-
 math/mathcore/src/EvaluateChi2.cxx            |  647 ------
 math/mathcore/src/EvaluateLogL.cxx            |  452 -----
 math/mathcore/src/EvaluatePoissonLogL.cxx     |  541 -----
 math/mathcore/src/FitUtil.cxx                 | 1756 +++++++++++++++++
 13 files changed, 3011 insertions(+), 3130 deletions(-)
 delete mode 100644 math/mathcore/inc/Fit/EvaluateChi2.hxx
 delete mode 100644 math/mathcore/inc/Fit/EvaluateLogL.hxx
 delete mode 100644 math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx
 delete mode 100644 math/mathcore/src/EvaluateChi2.cxx
 delete mode 100644 math/mathcore/src/EvaluateLogL.cxx
 delete mode 100644 math/mathcore/src/EvaluatePoissonLogL.cxx
 create mode 100644 math/mathcore/src/FitUtil.cxx

diff --git a/hist/hist/inc/TF1.h b/hist/hist/inc/TF1.h
index 5513533e9ee..2efdf396d71 100644
--- a/hist/hist/inc/TF1.h
+++ b/hist/hist/inc/TF1.h
@@ -789,8 +789,7 @@ inline T TF1::EvalParTempl(const T *data, const Double_t *params)
 }
 
 #ifdef R__HAS_VECCORE
-
-// Internal to TF1. Evaluates Vectorized TF1 on data of type Double_t
+// Internal to TF1. Evaluates Vectorized TF1 on data of type Double_v
 inline double TF1::EvalParVec(const Double_t *data, const Double_t *params)
 {
    assert(fType == EFType::kTemplVec);
diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
index ece31de6024..9499c36ac96 100644
--- a/math/mathcore/CMakeLists.txt
+++ b/math/mathcore/CMakeLists.txt
@@ -1,5 +1,5 @@
 ############################################################################
-# CMakeLists.txt file for building ROOT math/MathCore package
+# CMakeLists.txt file for building ROOT io/io package
 ############################################################################
 
 set(MATHCORE_HEADERS TRandom.h
diff --git a/math/mathcore/inc/Fit/Chi2FCN.h b/math/mathcore/inc/Fit/Chi2FCN.h
index 765da5e420c..d33d67fee6e 100644
--- a/math/mathcore/inc/Fit/Chi2FCN.h
+++ b/math/mathcore/inc/Fit/Chi2FCN.h
@@ -22,7 +22,8 @@
 
 #include "Fit/BinData.h"
 
-#include "Fit/EvaluateChi2.hxx"
+
+#include "Fit/FitUtil.h"
 
 #include <memory>
 
@@ -119,13 +120,14 @@ public:
    /// i-th chi-square residual
    virtual double DataElement(const double *x, unsigned int i, double *g) const {
       if (i==0) this->UpdateNCalls();
-      return FitUtil::Chi2<T>::EvalResidual(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
+      return FitUtil::Evaluate<T>::EvalChi2Residual(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
    }
 
    // need to be virtual to be instantiated
    virtual void Gradient(const double *x, double *g) const {
       // evaluate the chi2 gradient
-      FitUtil::Chi2<T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints, fExecutionPolicy);
+      FitUtil::Evaluate<T>::EvalChi2Gradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints,
+                                             fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -145,9 +147,9 @@ private:
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
       if (BaseFCN::Data().HaveCoordErrors() || BaseFCN::Data().HaveAsymErrors())
-         return FitUtil::Chi2<T>::EvalEffective(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints);
+         return FitUtil::Evaluate<T>::EvalChi2Effective(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints);
       else
-         return FitUtil::Chi2<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints, fExecutionPolicy);
+         return FitUtil::Evaluate<T>::EvalChi2(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/inc/Fit/EvaluateChi2.hxx b/math/mathcore/inc/Fit/EvaluateChi2.hxx
deleted file mode 100644
index 84173014fb8..00000000000
--- a/math/mathcore/inc/Fit/EvaluateChi2.hxx
+++ /dev/null
@@ -1,409 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Header file for class FitUtil
-
-#ifndef ROOT_Fit_EvaluateChi2
-#define ROOT_Fit_EvaluateChi2
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-template <class T>
-struct Chi2 {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the Chi2 given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const BinData &data, const double *p, unsigned int &nPoints,
-                      ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
-   {
-      // normal chi2 using only error on values (from fitting histogram)
-      // optionally the integral of function in the bin is used
-
-
-      unsigned int n = data.Size();
-      nPoints = data.Size(); // npoints
-
-      // set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-      // do not cache parameter values (it is not thread safe)
-      // func.SetParameters(p);
-
-      // get fit option and check case if using integral of bins
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error(
-            "Chi2<T>::EvaluateChi2",
-            "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
-
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      double maxResValue = std::numeric_limits<double>::max() / n;
-      std::vector<double> ones{1, 1, 1, 1};
-      auto vecSize = vecCore::VectorSize<T>();
-
-      auto mapFunction = [&](unsigned int i) {
-         // in case of no error in y invError=1 is returned
-         T x1, y, invErrorVec;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         const auto invError = data.ErrorPtr(i * vecSize);
-         auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
-         vecCore::Load<T>(invErrorVec, invErrorptr);
-
-         const T *x;
-         std::vector<T> xc;
-         if (data.NDim() > 1) {
-            xc.resize(data.NDim());
-            xc[0] = x1;
-            for (unsigned int j = 1; j < data.NDim(); ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         T fval(0.);
-
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-
-         T tmp = (y - fval) * invErrorVec;
-         T chi2 = tmp * tmp;
-
-         // avoid inifinity or nan in chi2 values due to wrong function values
-         auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
-
-         vecCore::MaskedAssign<T>(chi2, m, maxResValue);
-
-         return chi2;
-      };
-
-      auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T(0.)); };
-
-#ifndef R__USE_IMT
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("Chi2<T>::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                          "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T res(0.);
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("Chi2<T>::EvaluateChi2",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Last SIMD vector of elements (if padding needed)
-      if (data.Size() % vecSize != 0)
-         vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
-                               res + mapFunction(data.Size() / vecSize));
-
-      return vecCore::ReduceAdd(res);
-   }
-
-   static double EvalEffective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
-   {
-      Error("Chi2<T>::EvaluateChi2<T>::EvalEffective",
-            "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
-      return -1.;
-   }
-
-   /**
-       Evaluate the Chi2 gradient given a vectorized model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the chi2 function
-      // this function is used when the model function knows how to calculate the derivative and we can
-      // avoid that the minimizer re-computes them
-      //
-      // case of chi2 effective (errors on coordinate) is not supported
-
-      if (data.HaveCoordErrors()) {
-         MATH_ERROR_MSG("Chi2<T>::EvaluateChi2Gradient",
-                        "Error on the coordinates are not used in calculating Chi2 gradient");
-         return; // it will assert otherwise later in GetPoint
-      }
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a gradient function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error("Chi2<T>::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
-                                                "BinVolume or ExpErrors\n. Aborting operation.");
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-      // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
-      std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
-
-      auto mapFunction = [&](const unsigned int i) {
-         // set all vector values to zero
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1, y, invError;
-
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         const auto invErrorPtr = data.ErrorPtr(i * vecSize);
-
-         if (invErrorPtr == nullptr)
-            invError = 1;
-         else
-            vecCore::Load<T>(invError, invErrorPtr);
-
-         // TODO: Check error options and invert if needed
-
-         T fval = 0;
-
-         const T *x = nullptr;
-
-         unsigned int ndim = data.NDim();
-         // need to declare vector outside if statement
-         // otherwise pointer will be invalid
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-         validPointsMasks[i] = isFinite(fval);
-         if (vecCore::MaskEmpty(validPointsMasks[i])) {
-            // Return a zero contribution to all partial derivatives on behalf of the current points
-            return pointContributionVec;
-         }
-
-         // loop on the parameters
-         for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-            // avoid singularity in the function (infinity and nan ) in the chi2 sum
-            // eventually add possibility of excluding some points (like singularity)
-            validPointsMasks[i] = isFinite(gradFunc[ipar]);
-
-            if (vecCore::MaskEmpty(validPointsMasks[i])) {
-               break; // exit loop on parameters
-            }
-
-            // calculate derivative point contribution (only for valid points)
-            vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
-                                  -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
-         }
-
-         return pointContributionVec;
-      };
-
-      // Reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : partialResults) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-      std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("Chi2<T>::EvaluateChi2Gradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("Chi2<T>::EvaluateChi2Gradient",
-               "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-      // correct the number of points
-      nPoints = initialNPoints;
-
-      if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
-                      [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
-         T nRejected_v(0);
-         T zeros(0.);
-         for (auto mask : validPointsMasks) {
-            T partial(1.);
-            vecCore::MaskedAssign(partial, mask, zeros);
-            nRejected_v += partial;
-         }
-
-         auto nRejected = vecCore::ReduceAdd(nRejected_v);
-
-         assert(nRejected <= initialNPoints);
-         nPoints = initialNPoints - nRejected;
-
-         if (nPoints < npar) {
-            MATH_ERROR_MSG("Chi2<T>::EvaluateChi2Gradient",
-                           "Too many points rejected for overflow in gradient calculation");
-         }
-      }
-   }
-
-   static double EvalResidual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
-   {
-      Error("Chi2<T>::EvaluateChi2<T>::EvalResidual",
-            "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
-      return -1.;
-   }
-
-   // Compute a mask to filter out infinite numbers and NaN values.
-   // The argument rval is updated so infinite numbers and NaN values are replaced by
-   // maximum finite values (preserving the original sign).
-   static vecCore::Mask<T> isFinite(T &rval)
-   {
-      return rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
-   }
-};
-
-template <>
-struct Chi2<double> {
-#endif
-
-   /**
-    Evaluate the Chi2 given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
-                      ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
-
-   /**
-    Evaluate the effective Chi2 given a model function and the data at the point x.
-    The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
-   */
-   static double EvalEffective(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, unsigned int &nPoints);
-
-   /**
-    Evaluate the Chi2 gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-
-   // methods required by dedicate minimizer like Fumili
-
-   /**
-    Evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
-    and if the pointer g is not null evaluate also the gradient of the residual.
-    If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-    is used
-   */
-   static double EvalResidual(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                              unsigned int i, double *g = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#endif /* ROOT_Fit_FitUtil */
diff --git a/math/mathcore/inc/Fit/EvaluateLogL.hxx b/math/mathcore/inc/Fit/EvaluateLogL.hxx
deleted file mode 100644
index c7c3f832b67..00000000000
--- a/math/mathcore/inc/Fit/EvaluateLogL.hxx
+++ /dev/null
@@ -1,543 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-#ifndef ROOT_Fit_EvaluateLogL
-#define ROOT_Fit_EvaluateLogL
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-// internal class defining
-template <class T>
-class LikelihoodAux {
-public:
-   LikelihoodAux(T logv = 0., T w = 0., T w2 = 0.) : logvalue(logv), weight(w), weight2(w2) {}
-
-   LikelihoodAux operator+(const LikelihoodAux &l) const
-   {
-      return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
-   }
-
-   LikelihoodAux &operator+=(const LikelihoodAux &l)
-   {
-      logvalue += l.logvalue;
-      weight += l.weight;
-      weight2 += l.weight2;
-      return *this;
-   }
-
-   T logvalue;
-   T weight;
-   T weight2;
-};
-
-/**
-  Evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
-  If the pointer g is not null evaluate also the gradient of the pdf.
-  If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-  is used
-*/
-double EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *x, unsigned int ipoint, double *g = 0);
-
-#ifdef R__HAS_VECCORE
-/**
-  Evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
-  If the pointer g is not null evaluate also the gradient of the pdf.
-  If the function provides parameter derivatives they are used otherwise a simple derivative calculation
-  is used
-*/
-template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
-double EvaluatePdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p,
-                   unsigned int i, double *)
-{
-   // Evaluate the pdf contribution to the generic logl function in case of bin data
-   // return actually the log of the pdf and its derivatives
-   // func.SetParameters(p);
-   const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
-   auto fval = func(&x, p);
-   auto logPdf = ROOT::Math::Util::EvalLog(fval);
-   return vecCore::Get<ROOT::Double_v>(logPdf, 0);
-}
-#endif
-
-template <class T>
-struct LogL {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the LogL given a vectorized model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p, int iWeight,
-                      bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0)
-   {
-      // evaluate the LogLikelihood
-      unsigned int n = data.Size();
-      nPoints = data.Size(); // npoints
-
-      // unsigned int nRejected = 0;
-      bool normalizeFunc = false;
-
-      // set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-
-#ifdef R__USE_IMT
-      // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the
-      // function this will be done in sequential mode and parameters can be set in a thread safe manner
-      if (!normalizeFunc) {
-         if (data.NDim() == 1) {
-            T x;
-            vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
-            func(&x, p);
-         } else {
-            std::vector<T> x(data.NDim());
-            for (unsigned int j = 0; j < data.NDim(); ++j)
-               vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
-            func(x.data(), p);
-         }
-      }
-#endif
-
-      // this is needed if function must be normalized
-      double norm = 1.0;
-      if (normalizeFunc) {
-         // compute integral of the function
-         std::vector<double> xmin(data.NDim());
-         std::vector<double> xmax(data.NDim());
-         IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
-         // compute integral in the ranges where is defined
-         if (data.Range().Size() > 0) {
-            norm = 0;
-            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-               data.Range().GetRange(&xmin[0], &xmax[0], ir);
-               norm += igEval.Integral(xmin.data(), xmax.data());
-            }
-         } else {
-            // use (-inf +inf)
-            data.Range().GetRange(&xmin[0], &xmax[0]);
-            // check if funcition is zero at +- inf
-            T xmin_v, xmax_v;
-            vecCore::Load<T>(xmin_v, xmin.data());
-            vecCore::Load<T>(xmax_v, xmax.data());
-            if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
-               MATH_ERROR_MSG("FitUtil::LogL<T>::Eval",
-                              "A range has not been set and the function is not zero at +/- inf");
-               return 0;
-            }
-            norm = igEval.Integral(&xmin[0], &xmax[0]);
-         }
-      }
-
-      // needed to compute effective global weight in case of extended likelihood
-
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned int numVectors = n / vecSize;
-
-      auto mapFunction = [&, p](const unsigned i) {
-         T W(0.);
-         T W2(0.);
-         T fval(0.);
-
-         (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
-
-         T x1;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         const T *x = nullptr;
-         unsigned int ndim = data.NDim();
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            if (ndim == 1)
-               std::cout << i << "  x " << x[0] << " fval = " << fval;
-            else
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval;
-         }
-#endif
-
-         if (normalizeFunc)
-            fval = fval * (1 / norm);
-
-         // function EvalLog protects against negative or too small values of fval
-         auto logval = ROOT::Math::Util::EvalLog(fval);
-         if (iWeight > 0) {
-            T weight(0.);
-            if (data.WeightsPtr(i) == nullptr)
-               weight = 1;
-            else
-               vecCore::Load<T>(weight, data.WeightsPtr(i * vecSize));
-            logval *= weight;
-            if (iWeight == 2) {
-               logval *= weight; // use square of weights in likelihood
-               if (!extended) {
-                  // needed sum of weights and sum of weight square if likelkihood is extended
-                  W = weight;
-                  W2 = weight * weight;
-               }
-            }
-         }
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            std::cout << "   " << fval << "  logfval " << logval << std::endl;
-         }
-#endif
-
-         return LikelihoodAux<T>(logval, W, W2);
-      };
-
-      auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
-         return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
-                                [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) { return l1 + l2; });
-      };
-
-#ifndef R__USE_IMT
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::LogL<T>::Eval", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                          "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T logl_v(0.);
-      T sumW_v(0.);
-      T sumW2_v(0.);
-      ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("FitUtil::LogL<T>::Eval",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      logl_v = resArray.logvalue;
-      sumW_v = resArray.weight;
-      sumW2_v = resArray.weight2;
-
-      // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
-      unsigned int remainingPoints = n % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(remainingPoints);
-         vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
-         vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
-         vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
-      }
-
-      // reduce vector type to double.
-      double logl = vecCore::ReduceAdd(logl_v);
-      double sumW = vecCore::ReduceAdd(sumW_v);
-      double sumW2 = vecCore::ReduceAdd(sumW2_v);
-
-      if (extended) {
-         // add Poisson extended term
-         double extendedTerm = 0; // extended term in likelihood
-         double nuTot = 0;
-         // nuTot is integral of function in the range
-         // if function has been normalized integral has been already computed
-         if (!normalizeFunc) {
-            IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
-            std::vector<double> xmin(data.NDim());
-            std::vector<double> xmax(data.NDim());
-
-            // compute integral in the ranges where is defined
-            if (data.Range().Size() > 0) {
-               nuTot = 0;
-               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
-                  nuTot += igEval.Integral(xmin.data(), xmax.data());
-               }
-            } else {
-               // use (-inf +inf)
-               data.Range().GetRange(&xmin[0], &xmax[0]);
-               // check if funcition is zero at +- inf
-               T xmin_v, xmax_v;
-               vecCore::Load<T>(xmin_v, xmin.data());
-               vecCore::Load<T>(xmax_v, xmax.data());
-               if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
-                  MATH_ERROR_MSG("FitUtil::LogL<T>::Eval",
-                                 "A range has not been set and the function is not zero at +/- inf");
-                  return 0;
-               }
-               nuTot = igEval.Integral(&xmin[0], &xmax[0]);
-            }
-
-            // force to be last parameter value
-            // nutot = p[func.NDim()-1];
-            if (iWeight != 2)
-               extendedTerm = -nuTot; // no need to add in this case n log(nu) since is already computed before
-            else {
-               // case use weight square in likelihood : compute total effective weight = sw2/sw
-               // ignore for the moment case when sumW is zero
-               extendedTerm = -(sumW2 / sumW) * nuTot;
-            }
-
-         } else {
-            nuTot = norm;
-            extendedTerm = -nuTot + double(n) * ROOT::Math::Util::EvalLog(nuTot);
-            // in case of weights need to use here sum of weights (to be done)
-         }
-
-         logl += extendedTerm;
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "Evaluated log L for parameters (";
-      for (unsigned int ip = 0; ip < func.NPar(); ++ip)
-         std::cout << " " << p[ip];
-      std::cout << ")  nll = " << -logl << std::endl;
-#endif
-
-      return -logl;
-   }
-   
-   /**
-    Evaluate the LogL gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p, double *grad, unsigned int &,
-                ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial, unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the log likelihood function
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a grad function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "\n===> Evaluate Gradient for parameters ";
-      for (unsigned int ip = 0; ip < npar; ++ip)
-         std::cout << "  " << p[ip];
-      std::cout << "\n";
-#endif
-
-      (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
-      const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
-
-      auto mapFunction = [&](const unsigned int i) {
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1;
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-
-         const T *x = nullptr;
-
-         unsigned int ndim = data.NDim();
-         std::vector<T> xc(ndim);
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         T fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-#ifdef DEBUG_FITUTIL
-         if (i < 5 || (i > numVectors - 5)) {
-            if (ndim > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1]
-                         << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-#endif
-
-         vecCore::Mask<T> positiveValues = fval > 0;
-
-         for (unsigned int kpar = 0; kpar < npar; ++kpar) {
-            if (!vecCore::MaskEmpty(positiveValues))
-               vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
-
-            vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
-            if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
-               T gg = kdmax1 * gradFunc[kpar];
-               pointContributionVec[kpar] = vecCore::Blend(
-                  nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2), -vecCore::math::Max(gg, -kdmax2));
-            }
-            // if func derivative is zero term is also zero so do not add in g[kpar]
-         }
-
-         return pointContributionVec;
-      };
-
-      // Vertically reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : pointContributions) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-      std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::LogL<T>::EvalGradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("FitUtil::LogL<T>::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(initialNPoints % vecSize);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "Final gradient ";
-      for (unsigned int param = 0; param < npar; param++) {
-         std::cout << "  " << grad[param];
-      }
-      std::cout << "\n";
-#endif
-   }
-};
-
-template <>
-struct LogL<double> {
-#endif
-
-   /**
-    Evaluate the LogL given a model function and the data at the point x.
-   */
-   static double Eval(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0);
-
-   /**
-    Evaluate the LogL gradient given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &f, const UnBinData &data, const double *p, double *grad,
-                            unsigned int &nPoints,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#if defined(R__HAS_VECCORE) && defined(R__HAS_VC)
-// Fixes alignment for structures of SIMD structures
-Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
-#endif
-
-#endif /* ROOT_Fit_EvaluateLogL */
diff --git a/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx b/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx
deleted file mode 100644
index 63f693a5b5c..00000000000
--- a/math/mathcore/inc/Fit/EvaluatePoissonLogL.hxx
+++ /dev/null
@@ -1,393 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-#ifndef ROOT_Fit_EvaluatePoissonLogL
-#define ROOT_Fit_EvaluatePoissonLogL
-
-#include "Math/IParamFunctionfwd.h"
-#include "Math/IParamFunction.h"
-
-#ifdef R__USE_IMT
-#include "ROOT/TThreadExecutor.hxx"
-#endif
-#include "ROOT/TSequentialExecutor.hxx"
-
-#include "Fit/BinData.h"
-#include "Fit/FitExecutionPolicy.h"
-#include "Fit/FitUtil.h"
-
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-
-#include "TError.h"
-
-// using parameter cache is not thread safe but needed for normalizing the functions
-#define USE_PARAMCACHE
-
-namespace ROOT {
-
-namespace Fit {
-
-/**
-   namespace defining utility free functions using in Fit for evaluating the various fit method
-   functions (chi2, likelihood, etc..)  given the data and the model function
-
-   @ingroup FitMain
-*/
-namespace FitUtil {
-
-template <class T>
-struct PoissonLogL {
-#ifdef R__HAS_VECCORE
-
-   /**
-    Evaluate the Poisson LogL given a vectorized model function and the data at the point x.
-    Return also nPoints as the effective number of used points in the LogL evaluation
-
-    By default is extended, pass extedend=false if want to be not extended (MultiNomial)
-   */
-   static double Eval(const IModelFunctionTempl<T> &func, const BinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
-   {
-      // evaluate the Poisson Log Likelihood
-      // for binned likelihood fits
-      // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
-      // add as well constant term for saturated model to make it like a Chi2/2
-      // by default is etended. If extended is false the fit is not extended and
-      // the global poisson term is removed (i.e is a binomial fit)
-      // (remember that in this case one needs to have a function with a fixed normalization
-      // like in a non extended binned fit)
-      //
-      // if use Weight use a weighted dataset
-      // iWeight = 1 ==> logL = Sum( w f(x_i) )
-      // case of iWeight==1 is actually identical to weight==0
-      // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
-      //
-
-#ifdef USE_PARAMCACHE
-      (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
-#endif
-      auto vecSize = vecCore::VectorSize<T>();
-      // get fit option and check case of using integral of bins
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fExpErrors || fitOpt.fIntegral)
-         Error("FitUtil::PoissonLogL<T>::Eval",
-               "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
-      bool useW2 = (iWeight == 2);
-
-      auto mapFunction = [&](unsigned int i) {
-         T y;
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-         T fval(0.);
-
-         if (data.NDim() > 1) {
-            std::vector<T> x(data.NDim());
-            for (unsigned int j = 0; j < data.NDim(); ++j)
-               vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
-#ifdef USE_PARAMCACHE
-            fval = func(x.data());
-#else
-            fval = func(x.data(), p);
-#endif
-            // one -dim case
-         } else {
-            T x;
-            vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
-#ifdef USE_PARAMCACHE
-            fval = func(&x);
-#else
-            fval = func(&x, p);
-#endif
-         }
-
-         // EvalLog protects against 0 values of fval but don't want to add in the -log sum
-         // negative values of fval
-         vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
-
-         T nloglike(0.); // negative loglikelihood
-
-         if (useW2) {
-            // apply weight correction . Effective weight is error^2/ y
-            // and expected events in bins is fval/weight
-            // can apply correction only when y is not zero otherwise weight is undefined
-            // (in case of weighted likelihood I don't care about the constant term due to
-            // the saturated model)
-            assert(data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
-            T error = 0.0;
-            vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
-            // for empty bin use the average weight  computed from the total data weight
-            auto m = vecCore::Mask_v<T>(y != 0.0);
-            auto weight = vecCore::Blend(m, (error * error) / y, T(data.SumOfError2() / data.SumOfContent()));
-            if (extended) {
-               nloglike = weight * (fval - y);
-            }
-            vecCore::MaskedAssign<T>(nloglike, y != 0,
-                                     nloglike +
-                                        weight * y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
-
-         } else {
-            // standard case no weights or iWeight=1
-            // this is needed for Poisson likelihood (which are extened and not for multinomial)
-            // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
-            // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
-            if (extended)
-               nloglike = fval - y;
-
-            vecCore::MaskedAssign<T>(nloglike, y > 0,
-                                     nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
-         }
-
-         return nloglike;
-      };
-
-#ifdef R__USE_IMT
-      auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T(0.)); };
-#else
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::PoissonLogL<T>::Eval",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      T res(0.);
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
-            res += mapFunction(i);
-         }
-#ifdef R__USE_IMT
-      } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
-         res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
-#endif
-      } else {
-         Error("FitUtil::PoissonLogL<T>::Eval",
-               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-               "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Last padded SIMD vector of elements
-      if (data.Size() % vecSize != 0)
-         vecCore::MaskedAssign(res, ::vecCore::Int2Mask<T>(data.Size() % vecSize),
-                               res + mapFunction(data.Size() / vecSize));
-
-      return vecCore::ReduceAdd(res);
-   }
-
-   static double EvalBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
-   {
-      Error("FitUtil::PoissonLogL<T>::EvalBinPdf",
-            "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
-      return -1.;
-   }
-
-   /**
-    Evaluate the Poisson LogL given a vectorized model function and the data at the point x.
-   */
-   static void
-   EvalGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad, unsigned int &,
-                ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial, unsigned nChunks = 0)
-   {
-      // evaluate the gradient of the Poisson log likelihood function
-
-      const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
-      assert(fg != nullptr); // must be called by a grad function
-
-      const IGradModelFunctionTempl<T> &func = *fg;
-
-      (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
-
-      const DataOptions &fitOpt = data.Opt();
-      if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
-         Error("FitUtil::PoissonLogL<T>::EvalGradient", "The vectorized implementation doesn't support Integrals,"
-                                                       "BinVolume or ExpErrors\n. Aborting operation.");
-
-      unsigned int npar = func.NPar();
-      auto vecSize = vecCore::VectorSize<T>();
-      unsigned initialNPoints = data.Size();
-      unsigned numVectors = initialNPoints / vecSize;
-
-      auto mapFunction = [&](const unsigned int i) {
-         // set all vector values to zero
-         std::vector<T> gradFunc(npar);
-         std::vector<T> pointContributionVec(npar);
-
-         T x1, y;
-
-         vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
-         vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
-
-         T fval = 0;
-
-         const T *x = nullptr;
-
-         unsigned ndim = data.NDim();
-         std::vector<T> xc;
-         if (ndim > 1) {
-            xc.resize(ndim);
-            xc[0] = x1;
-            for (unsigned int j = 1; j < ndim; ++j)
-               vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
-            x = xc.data();
-         } else {
-            x = &x1;
-         }
-
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-
-         // correct the gradient
-         for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-            vecCore::Mask<T> positiveValuesMask = fval > 0;
-
-            // df/dp * (1.  - y/f )
-            vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
-
-            vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
-
-            if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
-               const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
-               const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
-               T gg = kdmax1 * gradFunc[ipar];
-               pointContributionVec[ipar] =
-                  -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
-            }
-         }
-
-#ifdef DEBUG_FITUTIL
-         {
-            R__LOCKGUARD(gROOTMutex);
-            if (i < 5 || (i > data.Size() - 5)) {
-               if (data.NDim() > 1)
-                  std::cout << i << "  x " << x[0] << " y " << x[1];
-               else
-                  std::cout << i << "  x " << x[0];
-               std::cout << " func " << fval << " gradient ";
-               for (unsigned int ii = 0; ii < npar; ++ii)
-                  std::cout << "  " << pointContributionVec[ii];
-               std::cout << "\n";
-            }
-         }
-#endif
-
-         return pointContributionVec;
-      };
-
-      // Vertically reduce the set of vectors by summing its equally-indexed components
-      auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
-         std::vector<T> result(npar);
-
-         for (auto const &pointContributionVec : partialResults) {
-            for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-               result[parameterIndex] += pointContributionVec[parameterIndex];
-         }
-
-         return result;
-      };
-
-      std::vector<T> gVec(npar);
-
-#ifndef R__USE_IMT
-      // to fix compiler warning
-      (void)nChunks;
-
-      // If IMT is disabled, force the execution policy to the serial case
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         Warning("FitUtil::PoissonLogL<T>::EvalGradient",
-                 "Multithread execution policy requires IMT, which is disabled. Changing "
-                 "to ROOT::Fit::ExecutionPolicy::kSerial.");
-         executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-      }
-#endif
-
-      if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-         ROOT::TSequentialExecutor pool;
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
-      }
-#ifdef R__USE_IMT
-      else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-         ROOT::TThreadExecutor pool;
-         auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
-         gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
-      }
-#endif
-      else {
-         Error("FitUtil::PoissonLogL<T>:::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                       "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                       "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-      }
-
-      // Compute the contribution from the remaining points
-      unsigned int remainingPoints = initialNPoints % vecSize;
-      if (remainingPoints > 0) {
-         auto remainingPointsContribution = mapFunction(numVectors);
-         // Add the contribution from the valid remaining points and store the result in the output variable
-         auto remainingMask = ::vecCore::Int2Mask<T>(remainingPoints);
-         for (unsigned int param = 0; param < npar; param++) {
-            vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
-         }
-      }
-      // reduce final gradient result from T to double
-      for (unsigned int param = 0; param < npar; param++) {
-         grad[param] = vecCore::ReduceAdd(gVec[param]);
-      }
-
-#ifdef DEBUG_FITUTIL
-      std::cout << "***** Final gradient : ";
-      for (unsigned int ii = 0; ii < npar; ++ii)
-         std::cout << grad[ii] << "   ";
-      std::cout << "\n";
-#endif
-   }
-};
-
-template <>
-struct PoissonLogL<double> {
-#endif
-
-   /**
-    Evaluate the Poisson LogL given a model function and the data at the point x.
-    Return also nPoints as the effective number of used points in the LogL evaluation.
-
-    By default is extended, pass extedend=false if want to be not extended (MultiNomial)
-   */
-   static double Eval(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, int iWeight,
-                      bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                      unsigned nChunks = 0);
-
-   /**
-    Evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
-    If the pointer g is not null evaluate also the gradient of the Poisson pdf.
-    If the function provides parameter derivatives they are used. Otherwise, a simple derivative calculation
-    is used.
-   */
-   static double EvalBinPdf(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, unsigned int i, double *g);
-
-   /**
-    Evaluate the Poisson LogL given a model function and the data at the point x.
-   */
-   static void EvalGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
-                            unsigned int &,
-                            ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
-                            unsigned nChunks = 0);
-};
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
-
-#endif /* ROOT_Fit_EvaluateLogL */
diff --git a/math/mathcore/inc/Fit/FitUtil.h b/math/mathcore/inc/Fit/FitUtil.h
index 5ce43b1ef56..93eda887ba1 100644
--- a/math/mathcore/inc/Fit/FitUtil.h
+++ b/math/mathcore/inc/Fit/FitUtil.h
@@ -16,29 +16,32 @@
 #include "Math/IParamFunctionfwd.h"
 #include "Math/IParamFunction.h"
 
-#include "TROOT.h"
-
 #ifdef R__USE_IMT
 #include "ROOT/TThreadExecutor.hxx"
 #endif
 #include "ROOT/TSequentialExecutor.hxx"
 
+#include "Fit/BinData.h"
+#include "Fit/UnBinData.h"
 #include "Fit/FitExecutionPolicy.h"
 
 #include "Math/Integrator.h"
 #include "Math/IntegratorMultiDim.h"
 
 #include "TError.h"
+#include "TSystem.h"
 
 // using parameter cache is not thread safe but needed for normalizing the functions
 #define USE_PARAMCACHE
 
+//#define DEBUG_FITUTIL
+
 #ifdef R__HAS_VECCORE
 namespace vecCore {
 template <class T>
 vecCore::Mask<T> Int2Mask(unsigned i)
 {
-   T x{};
+   T x;
    for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
       vecCore::Set<T>(x, j, j);
    return vecCore::Mask<T>(x < T(i));
@@ -67,6 +70,53 @@ namespace FitUtil {
   template <class T>
   using IModelFunctionTempl = ROOT::Math::IParamMultiFunctionTempl<T>;
 
+  // internal class defining
+  template <class T>
+  class LikelihoodAux {
+  public:
+     LikelihoodAux(T logv = {}, T w = {}, T w2 = {}) : logvalue(logv), weight(w), weight2(w2) {}
+
+     LikelihoodAux operator+(const LikelihoodAux &l) const
+     {
+        return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
+     }
+
+     LikelihoodAux &operator+=(const LikelihoodAux &l)
+     {
+        logvalue += l.logvalue;
+        weight += l.weight;
+        weight2 += l.weight2;
+        return *this;
+     }
+
+     T logvalue;
+     T weight;
+     T weight2;
+  };
+
+  template <>
+  class LikelihoodAux<double> {
+  public:
+     LikelihoodAux(double logv = 0.0, double w = 0.0, double w2 = 0.0) : logvalue(logv), weight(w), weight2(w2){};
+
+     LikelihoodAux operator+(const LikelihoodAux &l) const
+     {
+        return LikelihoodAux<double>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
+     }
+
+     LikelihoodAux &operator+=(const LikelihoodAux &l)
+     {
+        logvalue += l.logvalue;
+        weight += l.weight;
+        weight2 += l.weight2;
+        return *this;
+     }
+
+     double logvalue;
+     double weight;
+     double weight2;
+  };
+
   // internal class to evaluate the function or the integral
   // and cached internal integration details
   // if useIntegral is false no allocation is done
@@ -111,9 +161,8 @@ namespace FitUtil {
                                                                             const>(*this, &IntegralEvaluator::FN, fDim);
            fIgNDim = new ROOT::Math::IntegratorMultiDim();
            fIgNDim->SetFunction(*fFuncNDim);
-        } else {
+        } else
            assert(fDim > 0);
-        }
      }
 
      void SetParameters(const double *p)
@@ -163,9 +212,8 @@ namespace FitUtil {
            return fIgNDim->Integral(x1, x2) / dV;
            //                   std::cout << " do integral btw x " << x1[0] << "  " << x2[0] << " y " << x1[1] << "  "
            //                   << x2[1] << " dV = " << dV << " result = " << result << std::endl; return result;
-        } else {
+        } else
            assert(1.); // should never be here
-        }
         return 0;
      }
 
@@ -182,7 +230,8 @@ namespace FitUtil {
         if (fDim == 1) {
            ROOT::Double_v xx;
            vecCore::Load<ROOT::Double_v>(xx, x);
-           auto res = (*f)(&xx, p);
+           const double *p0 = p;
+           auto res = (*f)(&xx, (const double *)p0);
            return vecCore::Get<ROOT::Double_v>(res, 0);
         } else {
            std::vector<ROOT::Double_v> xx(fDim);
@@ -210,159 +259,1212 @@ namespace FitUtil {
      ROOT::Math::IMultiGenFunction *fFuncNDim;
   };
 
-  inline unsigned setAutomaticChunking(unsigned nEvents)
-  {
-     auto ncpu = ROOT::GetImplicitMTPoolSize();
-     if (nEvents / ncpu < 1000)
-        return ncpu;
-     return nEvents / 1000;
-     //   return ((nEvents/ncpu + 1) % 1000) *40 ; //arbitrary formula
+  /** Chi2 Functions */
+
+  /**
+      evaluate the Chi2 given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  double EvaluateChi2(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints,
+                      ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
+
+  /**
+      evaluate the effective Chi2 given a model function and the data at the point x.
+      The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  double EvaluateChi2Effective(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints);
+
+  /**
+      evaluate the Chi2 gradient given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the Chi2 evaluation
+  */
+  void EvaluateChi2Gradient(const IModelFunction &func, const BinData &data, const double *x, double *grad,
+                            unsigned int &nPoints,
+                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                            unsigned nChunks = 0);
+
+  /**
+      evaluate the LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  double EvaluateLogL(const IModelFunction &func, const UnBinData &data, const double *p, int iWeight, bool extended,
+                      unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0);
+
+  /**
+      evaluate the LogL gradient given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  void EvaluateLogLGradient(const IModelFunction &func, const UnBinData &data, const double *x, double *grad,
+                            unsigned int &nPoints,
+                            ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                            unsigned nChunks = 0);
+
+  // #ifdef R__HAS_VECCORE
+  //    template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
+  //    void EvaluateLogLGradient(const IModelFunctionTempl<ROOT::Double_v> &, const UnBinData &, const double *, double
+  //    *, unsigned int & ) {}
+  // #endif
+
+  /**
+      evaluate the Poisson LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+      By default is extended, pass extedend to false if want to be not extended (MultiNomial)
+  */
+  double EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *x, int iWeight,
+                             bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
+                             unsigned nChunks = 0);
+
+  /**
+      evaluate the Poisson LogL given a model function and the data at the point x.
+      return also nPoints as the effective number of used points in the LogL evaluation
+  */
+  void EvaluatePoissonLogLGradient(const IModelFunction &func, const BinData &data, const double *x, double *grad,
+                                   unsigned int &nPoints,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0);
+
+  // methods required by dedicate minimizer like Fumili
+
+  /**
+      evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
+      and if the pointer g is not null evaluate also the gradient of the residual.
+      If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+      is used
+  */
+  double EvaluateChi2Residual(const IModelFunction &func, const BinData &data, const double *x, unsigned int ipoint,
+                              double *g = 0);
+
+  /**
+      evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
+      If the pointer g is not null evaluate also the gradient of the pdf.
+      If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+      is used
+  */
+  double
+  EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *x, unsigned int ipoint, double *g = 0);
+
+#ifdef R__HAS_VECCORE
+   template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
+   double EvaluatePdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p, unsigned int i, double *) {
+      // evaluate the pdf contribution to the generic logl function in case of bin data
+      // return actually the log of the pdf and its derivatives
+      // func.SetParameters(p);
+      const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
+      auto fval = func(&x, p);
+      auto logPdf = ROOT::Math::Util::EvalLog(fval);
+      return vecCore::Get<ROOT::Double_v>(logPdf, 0);
    }
+#endif
 
-   // derivative with respect of the parameter to be integrated
-   template <class GradFunc = IGradModelFunction>
-   struct ParamDerivFunc {
-      ParamDerivFunc(const GradFunc &f) : fFunc(f), fIpar(0) {}
-      void SetDerivComponent(unsigned int ipar) { fIpar = ipar; }
-      double operator()(const double *x, const double *p) const { return fFunc.ParameterDerivative(x, p, fIpar); }
-      unsigned int NDim() const { return fFunc.NDim(); }
-      const GradFunc &fFunc;
-      unsigned int fIpar;
-   };
+   /**
+       evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
+       If the pointer g is not null evaluate also the gradient of the Poisson pdf.
+       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
+       is used
+   */
+   double EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * x, unsigned int ipoint, double * g = 0);
+
+   unsigned setAutomaticChunking(unsigned nEvents);
 
-   // simple gradient calculator using the 2 points rule
-
-   class SimpleGradientCalculator {
-
-   public:
-      // construct from function and gradient dimension gdim
-      // gdim = npar for parameter gradient
-      // gdim = ndim for coordinate gradients
-      // construct (the param values will be passed later)
-      // one can choose between 2 points rule (1 extra evaluation) istrat=1
-      // or two point rule (2 extra evaluation)
-      // (found 2 points rule does not work correctly - minuit2FitBench fails)
-      SimpleGradientCalculator(int gdim, const IModelFunction &func, double eps = 2.E-8, int istrat = 1)
-         : fEps(eps), fPrecision(1.E-8),                                             // sqrt(epsilon)
-           fStrategy(istrat), fN(gdim), fFunc(func), fVec(std::vector<double>(gdim)) // this can be probably optimized
+   template<class T>
+   struct Evaluate {
+#ifdef R__HAS_VECCORE
+      static double EvalChi2(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
+                             unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
       {
+         // evaluate the chi2 given a  vectorized function reference  , the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
+
+         //Info("EvalChi2","Using vecorized implementation %d",(int) data.Opt().fIntegral);
+
+         unsigned int n = data.Size();
+         nPoints = data.Size();  // npoints
+
+         // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+         // do not cache parameter values (it is not thread safe)
+         //func.SetParameters(p);
+
+
+         // get fit option and check case if using integral of bins
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluateChi2", "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
+
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+         double maxResValue = std::numeric_limits<double>::max() / n;
+         std::vector<double> ones{1, 1, 1, 1};
+         auto vecSize = vecCore::VectorSize<T>();
+
+         auto mapFunction = [&](unsigned int i) {
+            // in case of no error in y invError=1 is returned
+            T x1, y, invErrorVec;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            const auto invError = data.ErrorPtr(i * vecSize);
+            auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
+            vecCore::Load<T>(invErrorVec, invErrorptr);
+
+            const T *x;
+            std::vector<T> xc;
+            if(data.NDim() > 1) {
+               xc.resize(data.NDim());
+               xc[0] = x1;
+               for (unsigned int j = 1; j < data.NDim(); ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+            T fval{};
+
+#ifdef USE_PARAMCACHE
+            fval = func(x);
+#else
+            fval = func(x, p);
+#endif
+
+            T tmp = (y - fval) * invErrorVec;
+            T chi2 = tmp * tmp;
+
+
+            // avoid inifinity or nan in chi2 values due to wrong function values
+            auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
+
+            vecCore::MaskedAssign<T>(chi2, m, maxResValue);
+
+            return chi2;
+         };
+
+         auto redFunction = [](const std::vector<T> &objs) {
+            return std::accumulate(objs.begin(), objs.end(), T{});
+         };
+
+#ifndef R__USE_IMT
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T res{};
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size()/vecSize), redFunction);
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
+         } else {
+            Error("FitUtil::EvaluateChi2", "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         // Last SIMD vector of elements (if padding needed)
+         if (data.Size() % vecSize != 0) 
+            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
+                                  res + mapFunction(data.Size() / vecSize));
+
+         return vecCore::ReduceAdd(res);
+      }
+
+      static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         // evaluate the LogLikelihood
+         unsigned int n = data.Size();
+         nPoints = data.Size();  // npoints
+
+         //unsigned int nRejected = 0;
+         bool normalizeFunc = false;
+
+         // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+
+#ifdef R__USE_IMT
+         // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
+         // this will be done in sequential mode and parameters can be set in a thread safe manner
+         if (!normalizeFunc) {
+            if (data.NDim() == 1) {
+               T x;
+               vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
+               func( &x, p);
+            }
+            else {
+               std::vector<T> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
+               func( x.data(), p);
+            }
+         }
+#endif
+
+         // this is needed if function must be normalized
+         double norm = 1.0;
+         if (normalizeFunc) {
+            // compute integral of the function
+            std::vector<double> xmin(data.NDim());
+            std::vector<double> xmax(data.NDim());
+            IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
+            // compute integral in the ranges where is defined
+            if (data.Range().Size() > 0) {
+               norm = 0;
+               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                  norm += igEval.Integral(xmin.data(), xmax.data());
+               }
+            } else {
+               // use (-inf +inf)
+               data.Range().GetRange(&xmin[0], &xmax[0]);
+               // check if funcition is zero at +- inf
+               T xmin_v, xmax_v;
+               vecCore::Load<T>(xmin_v, xmin.data());
+               vecCore::Load<T>(xmax_v, xmax.data());
+               if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+                  MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
+                  return 0;
+               }
+               norm = igEval.Integral(&xmin[0], &xmax[0]);
+            }
+         }
+
+         // needed to compute effective global weight in case of extended likelihood
+
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned int numVectors = n / vecSize;
+
+         auto mapFunction = [ &, p](const unsigned i) {
+            T W{};
+            T W2{};
+            T fval{};
+
+            (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
+
+            T x1;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            const T *x = nullptr;
+            unsigned int ndim = data.NDim();
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+#ifdef USE_PARAMCACHE
+            fval = func(x);
+#else
+            fval = func(x, p);
+#endif
+
+#ifdef DEBUG_FITUTIL
+            if (i < 5 || (i > numVectors-5) ) {
+               if (ndim == 1) std::cout << i << "  x " << x[0]  << " fval = " << fval; 
+               else std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval; 
+            }
+#endif
+
+            if (normalizeFunc) fval = fval * (1 / norm);
+
+            // function EvalLog protects against negative or too small values of fval
+            auto logval =  ROOT::Math::Util::EvalLog(fval);
+            if (iWeight > 0) {
+               T weight{};
+               if (data.WeightsPtr(i) == nullptr)
+                  weight = 1;
+               else
+                  vecCore::Load<T>(weight, data.WeightsPtr(i*vecSize));
+               logval *= weight;
+               if (iWeight == 2) {
+                  logval *= weight; // use square of weights in likelihood
+                  if (!extended) {
+                     // needed sum of weights and sum of weight square if likelkihood is extended
+                     W  = weight;
+                     W2 = weight * weight;
+                  }
+               }
+            }
+#ifdef DEBUG_FITUTIL
+            if (i < 5 || (i > numVectors-5)  )  {
+                 std::cout << "   " << fval << "  logfval " << logval << std::endl;
+            }
+#endif
+
+            return LikelihoodAux<T>(logval, W, W2);
+         };
+
+         auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
+            return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
+            [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) {
+               return l1 + l2;
+            });
+         };
+
+#ifndef R__USE_IMT
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T logl_v{};
+         T sumW_v{};
+         T sumW2_v{};
+         ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking( numVectors);
+            resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
+         } else {
+            Error("FitUtil::EvaluateLogL", "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         logl_v = resArray.logvalue;
+         sumW_v = resArray.weight;
+         sumW2_v = resArray.weight2;
+
+         // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
+         unsigned int remainingPoints = n % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
+            vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
+            vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
+         }
+
+
+         //reduce vector type to double.
+         double logl  = vecCore::ReduceAdd(logl_v);
+         double sumW  = vecCore::ReduceAdd(sumW_v);
+         double sumW2 = vecCore::ReduceAdd(sumW2_v);
+
+         if (extended) {
+            // add Poisson extended term
+            double extendedTerm = 0; // extended term in likelihood
+            double nuTot = 0;
+            // nuTot is integral of function in the range
+            // if function has been normalized integral has been already computed
+            if (!normalizeFunc) {
+               IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
+               std::vector<double> xmin(data.NDim());
+               std::vector<double> xmax(data.NDim());
+
+               // compute integral in the ranges where is defined
+               if (data.Range().Size() > 0) {
+                  nuTot = 0;
+                  for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                     data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                     nuTot += igEval.Integral(xmin.data(), xmax.data());
+                  }
+               } else {
+                  // use (-inf +inf)
+                  data.Range().GetRange(&xmin[0], &xmax[0]);
+                  // check if funcition is zero at +- inf
+                  T xmin_v, xmax_v;
+                  vecCore::Load<T>(xmin_v, xmin.data());
+                  vecCore::Load<T>(xmax_v, xmax.data());
+                  if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
+                     MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
+                     return 0;
+                  }
+                  nuTot = igEval.Integral(&xmin[0], &xmax[0]);
+               }
+
+               // force to be last parameter value
+               //nutot = p[func.NDim()-1];
+               if (iWeight != 2)
+                  extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
+               else {
+                  // case use weight square in likelihood : compute total effective weight = sw2/sw
+                  // ignore for the moment case when sumW is zero
+                  extendedTerm = - (sumW2 / sumW) * nuTot;
+               }
+
+            } else {
+               nuTot = norm;
+               extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog(nuTot);
+               // in case of weights need to use here sum of weights (to be done)
+            }
+
+            logl += extendedTerm;
+         }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "Evaluated log L for parameters (";
+         for (unsigned int ip = 0; ip < func.NPar(); ++ip)
+            std::cout << " " << p[ip];
+         std::cout << ")  nll = " << -logl << std::endl;
+#endif
+         
+         return -logl;
+
       }
 
-      // internal method to calculate single partial derivative
-      // assume cached vector fVec is already set
-      double DoParameterDerivative(const double *x, const double *p, double f0, int k) const
+      static double EvalPoissonLogL(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
+                                    int iWeight, bool extended, unsigned int,
+                                    ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
       {
-         double p0 = p[k];
-         double h = std::max(fEps * std::abs(p0), 8.0 * fPrecision * (std::abs(p0) + fPrecision));
-         fVec[k] += h;
-         double deriv = 0;
-         // t.b.d : treat case of infinities
-         // if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
-         double f1 = fFunc(x, &fVec.front());
-         if (fStrategy > 1) {
-            fVec[k] = p0 - h;
-            double f2 = fFunc(x, &fVec.front());
-            deriv = 0.5 * (f2 - f1) / h;
+         // evaluate the Poisson Log Likelihood
+         // for binned likelihood fits
+         // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
+         // add as well constant term for saturated model to make it like a Chi2/2
+         // by default is etended. If extended is false the fit is not extended and
+         // the global poisson term is removed (i.e is a binomial fit)
+         // (remember that in this case one needs to have a function with a fixed normalization
+         // like in a non extended binned fit)
+         //
+         // if use Weight use a weighted dataset
+         // iWeight = 1 ==> logL = Sum( w f(x_i) )
+         // case of iWeight==1 is actually identical to weight==0
+         // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
+         //
+
+#ifdef USE_PARAMCACHE
+         (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
+#endif
+         auto vecSize = vecCore::VectorSize<T>();
+         // get fit option and check case of using integral of bins
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fExpErrors || fitOpt.fIntegral)
+            Error("FitUtil::EvaluateChi2",
+                  "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
+         bool useW2 = (iWeight == 2);
+
+         auto mapFunction = [&](unsigned int i) {
+            T y;
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            T fval{};
+
+            if (data.NDim() > 1) {
+               std::vector<T> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
+#ifdef USE_PARAMCACHE
+               fval = func(x.data());
+#else
+               fval = func(x.data(), p);
+#endif
+               // one -dim case
+            } else {
+               T x;
+               vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
+#ifdef USE_PARAMCACHE
+               fval = func(&x);
+#else
+               fval = func(&x, p);
+#endif
+            }
+
+            // EvalLog protects against 0 values of fval but don't want to add in the -log sum
+            // negative values of fval
+            vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
+
+            T nloglike{}; // negative loglikelihood
+
+            if (useW2) {
+               // apply weight correction . Effective weight is error^2/ y
+               // and expected events in bins is fval/weight
+               // can apply correction only when y is not zero otherwise weight is undefined
+               // (in case of weighted likelihood I don't care about the constant term due to
+               // the saturated model)
+               assert (data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
+               T error = 0.0;
+               vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
+               // for empty bin use the average weight  computed from the total data weight
+               auto m = vecCore::Mask_v<T>(y != 0.0);
+               auto weight = vecCore::Blend(m,(error * error) / y, T(data.SumOfError2()/ data.SumOfContent()) );
+               if (extended) {
+                  nloglike =  weight * ( fval - y);
+               }
+               vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval)) );
+
+            } else {
+               // standard case no weights or iWeight=1
+               // this is needed for Poisson likelihood (which are extened and not for multinomial)
+               // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
+               // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
+               if (extended) nloglike = fval - y;
+
+               vecCore::MaskedAssign<T>(
+                  nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
+            }
+
+            return nloglike;
+         };
+
+#ifdef R__USE_IMT
+         auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T{}); };
+#else
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::Evaluate<T>::EvalPoissonLogL",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         T res{};
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
+               res += mapFunction(i);
+            }
+#ifdef R__USE_IMT
+         } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
+            res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
+#endif
          } else {
-            deriv = (f1 - f0) / h;
+            Error(
+               "FitUtil::Evaluate<T>::EvalPoissonLogL",
+               "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
          }
 
-         fVec[k] = p[k]; // restore original p value
-         return deriv;
+         // Last padded SIMD vector of elements
+         if (data.Size() % vecSize != 0) 
+            vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
+                                  res + mapFunction(data.Size() / vecSize));
+
+         return vecCore::ReduceAdd(res);
       }
-      // number of dimension in x (needed when calculating the integrals)
-      unsigned int NDim() const { return fFunc.NDim(); }
-      // number of parameters (needed for grad ccalculation)
-      unsigned int NPar() const { return fFunc.NPar(); }
 
-      double ParameterDerivative(const double *x, const double *p, int ipar) const
+      static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
       {
-         // fVec are the cached parameter values
-         std::copy(p, p + fN, fVec.begin());
-         double f0 = fFunc(x, p);
-         return DoParameterDerivative(x, p, f0, ipar);
+         Error("FitUtil::Evaluate<T>::EvalChi2Effective", "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
+         return -1.;
       }
 
-      // calculate all gradient at point (x,p) knnowing already value f0 (we gain a function eval.)
-      void ParameterGradient(const double *x, const double *p, double f0, double *g)
+      // Compute a mask to filter out infinite numbers and NaN values.
+      // The argument rval is updated so infinite numbers and NaN values are replaced by
+      // maximum finite values (preserving the original sign).
+      static vecCore::Mask<T> CheckInfNaNValues(T &rval)
       {
-         // fVec are the cached parameter values
-         std::copy(p, p + fN, fVec.begin());
-         for (unsigned int k = 0; k < fN; ++k) {
-            g[k] = DoParameterDerivative(x, p, f0, k);
+         auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
+
+         // Case +inf or nan
+         vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
+
+         // Case -inf
+         vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
+
+         return mask;
+      }
+
+      static void EvalChi2Gradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
+                                   unsigned int &nPoints,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the chi2 function
+         // this function is used when the model function knows how to calculate the derivative and we can
+         // avoid that the minimizer re-computes them
+         //
+         // case of chi2 effective (errors on coordinate) is not supported
+
+         if (data.HaveCoordErrors()) {
+            MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                           "Error on the coordinates are not used in calculating Chi2 gradient");
+            return; // it will assert otherwise later in GetPoint
+         }
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a gradient function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
+                                                   "BinVolume or ExpErrors\n. Aborting operation.");
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+         // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
+         std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
+
+         auto mapFunction = [&](const unsigned int i) {
+            // set all vector values to zero
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1, y, invError;
+
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+            const auto invErrorPtr = data.ErrorPtr(i * vecSize);
+
+            if (invErrorPtr == nullptr)
+               invError = 1;
+            else
+               vecCore::Load<T>(invError, invErrorPtr);
+
+            // TODO: Check error options and invert if needed
+
+            T fval = 0;
+
+            const T *x = nullptr;
+
+            unsigned int ndim = data.NDim();
+            // need to declare vector outside if statement
+            // otherwise pointer will be invalid
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+            fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+            validPointsMasks[i] = CheckInfNaNValues(fval);
+            if (vecCore::MaskEmpty(validPointsMasks[i])) {
+               // Return a zero contribution to all partial derivatives on behalf of the current points
+               return pointContributionVec;
+            }
+
+            // loop on the parameters
+            for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+               // avoid singularity in the function (infinity and nan ) in the chi2 sum
+               // eventually add possibility of excluding some points (like singularity)
+               validPointsMasks[i] = CheckInfNaNValues(gradFunc[ipar]);
+
+               if (vecCore::MaskEmpty(validPointsMasks[i])) {
+                  break; // exit loop on parameters
+               }
+
+               // calculate derivative point contribution (only for valid points)
+               vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
+                                     -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
+            }
+
+            return pointContributionVec;
+         };
+
+         // Reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : partialResults) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+         std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateChi2Gradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error(
+               "FitUtil::EvaluateChi2Gradient",
+               "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+         }
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
+         }
+
+         // correct the number of points
+         nPoints = initialNPoints;
+
+         if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
+                         [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
+            unsigned nRejected = 0;
+
+            for (const auto &mask : validPointsMasks) {
+               for (unsigned int i = 0; i < vecSize; i++) {
+                  nRejected += !vecCore::Get(mask, i);
+               }
+            }
+
+            assert(nRejected <= initialNPoints);
+            nPoints = initialNPoints - nRejected;
+
+            if (nPoints < npar) {
+               MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                              "Too many points rejected for overflow in gradient calculation");
+           }
          }
       }
 
-      // calculate gradient w.r coordinate values
-      void Gradient(const double *x, const double *p, double f0, double *g)
+      static double EvalChi2Residual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *)
       {
-         // fVec are the cached coordinate values
-         std::copy(x, x + fN, fVec.begin());
-         for (unsigned int k = 0; k < fN; ++k) {
-            double x0 = x[k];
-            double h = std::max(fEps * std::abs(x0), 8.0 * fPrecision * (std::abs(x0) + fPrecision));
-            fVec[k] += h;
-            // t.b.d : treat case of infinities
-            // if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
-            double f1 = fFunc(&fVec.front(), p);
-            if (fStrategy > 1) {
-               fVec[k] = x0 - h;
-               double f2 = fFunc(&fVec.front(), p);
-               g[k] = 0.5 * (f2 - f1) / h;
+         Error("FitUtil::Evaluate<T>::EvalChi2Residual", "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
+         return -1.;
+      }
+
+      /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+      /// and its gradient
+      static double EvalPoissonBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int , double * ) {
+         Error("FitUtil::Evaluate<T>::EvaluatePoissonBinPdf", "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
+         return -1.;
+      }
+
+      static void
+      EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
+                              unsigned int &,
+                              ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                              unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the Poisson log likelihood function
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a grad function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+         (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+
+         const DataOptions &fitOpt = data.Opt();
+         if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
+            Error("FitUtil::EvaluatePoissonLogLGradient", "The vectorized implementation doesn't support Integrals,"
+                                                          "BinVolume or ExpErrors\n. Aborting operation.");
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+         auto mapFunction = [&](const unsigned int i) {
+            // set all vector values to zero
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1, y;
+
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+            vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
+
+            T fval = 0;
+
+            const T *x = nullptr;
+
+            unsigned ndim = data.NDim();
+            std::vector<T> xc;
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
             } else {
-               g[k] = (f1 - f0) / h;
+               x = &x1;
             }
 
-            fVec[k] = x[k]; // restore original x value
+            fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+            // correct the gradient
+            for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+               vecCore::Mask<T> positiveValuesMask = fval > 0;
+
+               // df/dp * (1.  - y/f )
+               vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
+
+               vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
+
+               if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
+                  const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
+                  const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
+                  T gg = kdmax1 * gradFunc[ipar];
+                  pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
+               }
+            }
+
+#ifdef DEBUG_FITUTIL
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1];
+            else std::cout << i << "  x " << x[0];
+            std::cout << " func " << fval  << " gradient ";
+            for (unsigned int ii = 0; ii < npar; ++ii) std::cout << "  " << pointContributionVec[ii];
+            std::cout << "\n";
+         }
+      }
+#endif
+
+            return pointContributionVec;
+         };
+
+         // Vertically reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : partialResults) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluatePoissonLogLGradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error("FitUtil::EvaluatePoissonLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                                          "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                                          "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
          }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "***** Final gradient : ";
+         for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
+         std::cout << "\n";
+#endif  
+
       }
 
-   private:
-      double fEps;
-      double fPrecision;
-      int fStrategy;   // strategy in calculation ( =1 use 2 point rule( 1 extra func) , = 2 use r point rule)
-      unsigned int fN; // gradient dimension
-      const IModelFunction &fFunc;
-      mutable std::vector<double> fVec; // cached coordinates (or parameter values in case of gradientpar)
+      static void EvalLogLGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p,
+                                   double *grad, unsigned int &,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         // evaluate the gradient of the log likelihood function
+
+         const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
+         assert(fg != nullptr); // must be called by a grad function
+
+         const IGradModelFunctionTempl<T> &func = *fg;
+
+
+         unsigned int npar = func.NPar();
+         auto vecSize = vecCore::VectorSize<T>();
+         unsigned initialNPoints = data.Size();
+         unsigned numVectors = initialNPoints / vecSize;
+
+#ifdef DEBUG_FITUTIL 
+         std::cout << "\n===> Evaluate Gradient for parameters ";
+         for (unsigned int ip = 0; ip < npar; ++ip)
+            std::cout << "  " << p[ip];
+         std::cout << "\n";
+#endif 
+
+         (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
+
+         const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
+         const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
+
+         auto mapFunction = [&](const unsigned int i) {
+            std::vector<T> gradFunc(npar);
+            std::vector<T> pointContributionVec(npar);
+
+            T x1;
+            vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
+
+            const T *x = nullptr;
+
+            unsigned int ndim = data.NDim();
+            std::vector<T> xc(ndim);
+            if (ndim > 1) {
+               xc.resize(ndim);
+               xc[0] = x1;
+               for (unsigned int j = 1; j < ndim; ++j)
+                  vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
+               x = xc.data();
+            } else {
+               x = &x1;
+            }
+
+
+            T fval = func(x, p);
+            func.ParameterGradient(x, p, &gradFunc[0]);
+
+#ifdef DEBUG_FITUTIL            
+            if (i < 5 || (i > numVectors-5) ) {
+               if (ndim > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+               else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            }
+#endif            
+
+            vecCore::Mask<T> positiveValues = fval > 0;
+
+            for (unsigned int kpar = 0; kpar < npar; ++kpar) {
+               if (!vecCore::MaskEmpty(positiveValues))
+                  vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
+
+               vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
+               if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
+                  T gg = kdmax1 * gradFunc[kpar];
+                  pointContributionVec[kpar] =
+                     vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2),
+                                    -vecCore::math::Max(gg, -kdmax2));
+               }
+               // if func derivative is zero term is also zero so do not add in g[kpar]
+            }
+
+            return pointContributionVec;
+         };
+
+         // Vertically reduce the set of vectors by summing its equally-indexed components
+         auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
+            std::vector<T> result(npar);
+
+            for (auto const &pointContributionVec : pointContributions) {
+               for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+                  result[parameterIndex] += pointContributionVec[parameterIndex];
+            }
+
+            return result;
+         };
+
+         std::vector<T> gVec(npar);
+         std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+         // to fix compiler warning
+         (void)nChunks;
+
+         // If IMT is disabled, force the execution policy to the serial case
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            Warning("FitUtil::EvaluateLogLGradient",
+                    "Multithread execution policy requires IMT, which is disabled. Changing "
+                    "to ROOT::Fit::ExecutionPolicy::kSerial.");
+            executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+         }
+#endif
+
+         if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+            ROOT::TSequentialExecutor pool;
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
+         }
+#ifdef R__USE_IMT
+         else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+            ROOT::TThreadExecutor pool;
+            auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
+            gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
+         }
+#endif
+         else {
+            Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                                   "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                                   "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+         }
+
+         // Compute the contribution from the remaining points
+         unsigned int remainingPoints = initialNPoints % vecSize;
+         if (remainingPoints > 0) {
+            auto remainingPointsContribution = mapFunction(numVectors);
+            // Add the contribution from the valid remaining points and store the result in the output variable
+            auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
+            for (unsigned int param = 0; param < npar; param++) {
+               vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
+            }
+         }
+         // reduce final gradient result from T to double
+         for (unsigned int param = 0; param < npar; param++) {
+            grad[param] = vecCore::ReduceAdd(gVec[param]);
+         }
+
+#ifdef DEBUG_FITUTIL
+         std::cout << "Final gradient ";
+         for (unsigned int param = 0; param < npar; param++) {
+            std::cout << "  " << grad[param];
+         }
+         std::cout << "\n";
+#endif
+      }
    };
 
-   // function to avoid infinities or nan
-   inline double CorrectValue(double rval)
-   {
-      // avoid infinities or nan in  rval
-      if (rval > -std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max())
-         return rval;
-      else if (rval < 0)
-         // case -inf
-         return -std::numeric_limits<double>::max();
-      else
-         // case + inf or nan
-         return +std::numeric_limits<double>::max();
-   }
+   template<>
+   struct Evaluate<double>{
+#endif
+
+      static double EvalChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
+                             ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         // evaluate the chi2 given a  function reference, the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
 
-   // calculation of the integral of the gradient functions
-   // for a function providing derivative w.r.t parameters
-   // x1 and x2 defines the integration interval , p the parameters
-   template <class GFunc>
-   void CalculateGradientIntegral(const GFunc &gfunc, const double *x1, const double *x2, const double *p, double *g)
-   {
-
-      // needs to calculate the integral for each partial derivative
-      ParamDerivFunc<GFunc> pfunc(gfunc);
-      IntegralEvaluator<ParamDerivFunc<GFunc>> igDerEval(pfunc, p, true);
-      // loop on the parameters
-      unsigned int npar = gfunc.NPar();
-      for (unsigned int k = 0; k < npar; ++k) {
-         pfunc.SetDerivComponent(k);
-         g[k] = igDerEval(x1, x2);
+
+         //Info("EvalChi2","Using non-vecorized implementation %d",(int) data.Opt().fIntegral);
+
+         return FitUtil::EvaluateChi2(func, data, p, nPoints, executionPolicy, nChunks);
       }
-   }
+
+      static double EvalLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         return FitUtil::EvaluateLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
+      }
+
+      static double EvalPoissonLogL(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
+                                    int iWeight, bool extended, unsigned int &nPoints,
+                                    ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks = 0)
+      {
+         return FitUtil::EvaluatePoissonLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
+      }
+
+      static double EvalChi2Effective(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int &nPoints)
+      {
+         return FitUtil::EvaluateChi2Effective(func, data, p, nPoints);
+      }
+      static void EvalChi2Gradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
+                                   double *g, unsigned int &nPoints,
+                                   ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         FitUtil::EvaluateChi2Gradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+      static double EvalChi2Residual(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int i, double *g = 0)
+      {
+         return FitUtil::EvaluateChi2Residual(func, data, p, i, g);
+      }
+
+      /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+      /// and its gradient
+      static double EvalPoissonBinPdf(const IModelFunctionTempl<double> &func, const BinData & data, const double *p, unsigned int i, double *g ) {
+         return FitUtil::EvaluatePoissonBinPdf(func, data, p, i, g);
+      }
+
+      static void
+      EvalPoissonLogLGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
+                              unsigned int &nPoints,
+                              ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                              unsigned nChunks = 0)
+      {
+         FitUtil::EvaluatePoissonLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+
+      static void EvalLogLGradient(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                                   double *g, unsigned int &nPoints,
+                                   ::ROOT::Fit::ExecutionPolicy executionPolicy = ::ROOT::Fit::ExecutionPolicy::kSerial,
+                                   unsigned nChunks = 0)
+      {
+         FitUtil::EvaluateLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
+      }
+   };
 
 } // end namespace FitUtil
 
-} // end namespace Fit
+   } // end namespace Fit
 
 } // end namespace ROOT
 
+#if defined (R__HAS_VECCORE) && defined(R__HAS_VC)
+//Fixes alignment for structures of SIMD structures
+Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
+#endif
 
 #endif /* ROOT_Fit_FitUtil */
diff --git a/math/mathcore/inc/Fit/LogLikelihoodFCN.h b/math/mathcore/inc/Fit/LogLikelihoodFCN.h
index b76750a7ef4..810029d806c 100644
--- a/math/mathcore/inc/Fit/LogLikelihoodFCN.h
+++ b/math/mathcore/inc/Fit/LogLikelihoodFCN.h
@@ -19,7 +19,7 @@
 
 #include "Fit/UnBinData.h"
 
-#include "Fit/EvaluateLogL.hxx"
+#include "Fit/FitUtil.h"
 
 #include <memory>
 
@@ -126,8 +126,8 @@ public:
    // need to be virtual to be instantited
    virtual void Gradient(const double *x, double *g) const {
       // evaluate the chi2 gradient
-      FitUtil::LogL<typename BaseFCN::T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g, fNEffPoints,
-                                                       fExecutionPolicy);
+      FitUtil::Evaluate<typename BaseFCN::T>::EvalLogLGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
+                                                               fNEffPoints, fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -154,8 +154,7 @@ private:
     */
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
-      return FitUtil::LogL<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended, fNEffPoints,
-                                    fExecutionPolicy);
+      return FitUtil::Evaluate<T>::EvalLogL(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended, fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h b/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
index d232dfdd25f..0a09ddbad57 100644
--- a/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
+++ b/math/mathcore/inc/Fit/PoissonLikelihoodFCN.h
@@ -19,10 +19,18 @@
 
 #include "Fit/BinData.h"
 
-#include "Fit/EvaluatePoissonLogL.hxx"
+#include "Fit/FitUtil.h"
+
 
 #include <memory>
 
+//#define PARALLEL
+// #ifdef PARALLEL
+// #ifndef ROOT_Fit_FitUtilParallel
+// #include "Fit/FitUtilParallel.h"
+// #endif
+// #endif
+
 namespace ROOT {
 
    namespace Fit {
@@ -114,15 +122,15 @@ public:
    /// i-th likelihood element and its gradient
    virtual double DataElement(const double * x, unsigned int i, double * g) const {
       if (i==0) this->UpdateNCalls();
-      return FitUtil::PoissonLogL<typename BaseFCN::T>::EvalBinPdf(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
+      return FitUtil::Evaluate<typename BaseFCN::T>::EvalPoissonBinPdf(BaseFCN::ModelFunction(), BaseFCN::Data(), x, i, g);
    }
 
    /// evaluate gradient
    virtual void Gradient(const double *x, double *g) const
    {
       // evaluate the Poisson gradient
-      FitUtil::PoissonLogL<typename BaseFCN::T>::EvalGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
-                                                              fNEffPoints, fExecutionPolicy);
+      FitUtil::Evaluate<typename BaseFCN::T>::EvalPoissonLogLGradient(BaseFCN::ModelFunction(), BaseFCN::Data(), x, g,
+                                                                      fNEffPoints, fExecutionPolicy);
    }
 
    /// get type of fit method function
@@ -155,8 +163,8 @@ private:
     */
    virtual double DoEval (const double * x) const {
       this->UpdateNCalls();
-      return FitUtil::PoissonLogL<T>::Eval(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended,
-                                           fNEffPoints, fExecutionPolicy);
+      return FitUtil::Evaluate<T>::EvalPoissonLogL(BaseFCN::ModelFunction(), BaseFCN::Data(), x, fWeight, fIsExtended,
+                                                   fNEffPoints, fExecutionPolicy);
    }
 
    // for derivatives
diff --git a/math/mathcore/src/EvaluateChi2.cxx b/math/mathcore/src/EvaluateChi2.cxx
deleted file mode 100644
index 6d0fbad08e2..00000000000
--- a/math/mathcore/src/EvaluateChi2.cxx
+++ /dev/null
@@ -1,647 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/EvaluateChi2.hxx"
-
-#include "Fit/BinData.h"
-
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-
-// Evaluates the Chi2 given a function reference and the data and returns the value.
-//
-// Optionally the integral of function in the bin is used.
-double Chi2<double>::Eval(const IModelFunction &func, const BinData &data, const double *p, unsigned int &,
-                          ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
-{
-   unsigned int n = data.Size();
-
-// set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-#endif
-   // do not cache parameter values (it is not thread safe)
-   // func.SetParameters(p);
-
-   // get fit option and check case if using integral of bins
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useExpErrors = (fitOpt.fExpErrors);
-   bool isWeighted = data.IsWeighted();
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate chi2 using function " << &func << "  " << p << std::endl;
-   std::cout << "use empty bins  " << fitOpt.fUseEmpty << std::endl;
-   std::cout << "use integral    " << fitOpt.fIntegral << std::endl;
-   std::cout << "use all error=1 " << fitOpt.fErrors1 << std::endl;
-   if (isWeighted)
-      std::cout << "Weighted data set - sumw =  " << data.SumOfContent() << "  sumw2 = " << data.SumOfError2()
-                << std::endl;
-#endif
-
-#ifdef USE_PARAMCACHE
-   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
-#else
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-#endif
-   double maxResValue = std::numeric_limits<double>::max() / n;
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-
-   auto mapFunction = [&](const unsigned i) {
-      double chi2{};
-      double fval{};
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.InvError(i);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      double binVolume = 1.0;
-      if (useBinVolume) {
-         unsigned int ndim = data.NDim();
-         const double *x2 = data.BinUpEdge(i);
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto xx = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - xx);
-            xc[j] = 0.5 * (x2[j] + xx);
-         }
-         x = xc.data();
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < data.NDim(); ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      } else {
-         // calculate integral normalized by bin volume
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, data.BinUpEdge(i));
-      }
-      // normalize result if requested according to bin volume
-      if (useBinVolume)
-         fval *= binVolume;
-
-      // expected errors
-      if (useExpErrors) {
-         double invWeight = 1.0;
-         if (isWeighted) {
-            // we need first to check if a weight factor needs to be applied
-            // weight = sumw2/sumw = error**2/content
-            // invWeight = y * invError * invError;
-            // we use always the global weight and not the observed one in the bin
-            // for empty bins use global weight (if it is weighted data.SumError2() is not zero)
-            invWeight = data.SumOfContent() / data.SumOfError2();
-            // if (invError > 0) invWeight = y * invError * invError;
-         }
-
-         //  if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
-         // compute expected error  as f(x) / weight
-         double invError2 = (fval > 0) ? invWeight / fval : 0.0;
-         invError = std::sqrt(invError2);
-         // std::cout << "using Pearson chi2 " << x[0] << "  " << 1./invError2 << "  " << fval << std::endl;
-      }
-
-//#define DEBUG
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
-      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << " bin volume " << binVolume << " ref " << wrefVolume << std::endl;
-#endif
-      //#undef DEBUG
-
-      if (invError > 0) {
-
-         double tmp = (y - fval) * invError;
-         double resval = tmp * tmp;
-
-         // avoid inifinity or nan in chi2 values due to wrong function values
-         if (resval < maxResValue)
-            chi2 += resval;
-         else {
-            chi2 += maxResValue;
-         }
-      }
-      return chi2;
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<double> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), double{});
-   };
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("Chi2<double>::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                            "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double res{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         res += mapFunction(i);
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-#endif
-   } else {
-      Error("Chi2<double>::EvaluateChi2", "Execution policy unknown. Avalaible choices:\n "
-                                          "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                          "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-   return res;
-}
-
-// Evaluates the Chi2 given a function reference and the data. Returns the value and,
-// in nPoints, the actual number of used points.
-//
-// This method uses the error in the coordinates.
-// The integral of bin does not make sense in this case.
-double Chi2<double>::EvalEffective(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                   unsigned int &nPoints)
-{
-   unsigned int n = data.Size();
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate effective chi2 using function " << &func << "  " << p << std::endl;
-#endif
-
-   assert(data.HaveCoordErrors() || data.HaveAsymErrors());
-
-   double chi2 = 0;
-
-   // func.SetParameters(p);
-
-   unsigned int ndim = func.NDim();
-
-   // use Richardson derivator
-   ROOT::Math::RichardsonDerivator derivator;
-
-   double maxResValue = std::numeric_limits<double>::max() / n;
-
-   for (unsigned int i = 0; i < n; ++i) {
-
-      double y = 0;
-      const double *x = data.GetPoint(i, y);
-
-      double fval = func(x, p);
-
-      double delta_y_func = y - fval;
-
-      double ey = 0;
-      const double *ex = 0;
-      if (!data.HaveAsymErrors())
-         ex = data.GetPointError(i, ey);
-      else {
-         double eylow, eyhigh = 0;
-         ex = data.GetPointError(i, eylow, eyhigh);
-         if (delta_y_func < 0)
-            ey = eyhigh; // function is higher than points
-         else
-            ey = eylow;
-      }
-      double e2 = ey * ey;
-      // before calculating the gradient check that all error in x are not zero
-      unsigned int j = 0;
-      while (j < ndim && ex[j] == 0.) {
-         j++;
-      }
-      // if j is less ndim some elements are not zero
-      if (j < ndim) {
-         // need an adapter from a multi-dim function to a one-dimensional
-         ROOT::Math::OneDimMultiFunctionAdapter<const IModelFunction &> f1D(func, x, 0, p);
-         // select optimal step size  (use 10--2 by default as was done in TF1:
-         double kEps = 0.01;
-         double kPrecision = 1.E-8;
-         for (unsigned int icoord = 0; icoord < ndim; ++icoord) {
-            // calculate derivative for each coordinate
-            if (ex[icoord] > 0) {
-               // gradCalc.Gradient(x, p, fval, &grad[0]);
-               f1D.SetCoord(icoord);
-               // optimal spep size (take ex[] as scale for the points and 1% of it
-               double x0 = x[icoord];
-               double h = std::max(kEps * std::abs(ex[icoord]), 8.0 * kPrecision * (std::abs(x0) + kPrecision));
-               double deriv = derivator.Derivative1(f1D, x[icoord], h);
-               double edx = ex[icoord] * deriv;
-               e2 += edx * edx;
-#ifdef DEBUG
-               std::cout << "error for coord " << icoord << " = " << ex[icoord] << " deriv " << deriv << std::endl;
-#endif
-            }
-         }
-      }
-      double w2 = (e2 > 0) ? 1.0 / e2 : 0;
-      double resval = w2 * (y - fval) * (y - fval);
-
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << " ex " << ex[0] << " ey  " << ey << " params : ";
-      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << "\tresval = " << resval << std::endl;
-#endif
-
-      // avoid (infinity and nan ) in the chi2 sum
-      // eventually add possibility of excluding some points (like singularity)
-      if (resval < maxResValue)
-         chi2 += resval;
-      else
-         chi2 += maxResValue;
-   }
-
-   // reset the number of fitting data points
-   nPoints = n; // no points are rejected
-
-#ifdef DEBUG
-   std::cout << "chi2 = " << chi2 << " n = " << nPoints << std::endl;
-#endif
-
-   return chi2;
-}
-
-// Evaluates the gradient of the Chi2 function.
-// This function is used when the model function knows how to calculate the derivative and we can
-// avoid that the minimizer re-computes them.
-//
-// The case of Chi2 effective (errors on coordinate) is not supported
-void Chi2<double>::EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p,
-                                double *grad, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                unsigned nChunks)
-{
-
-   if (data.HaveCoordErrors()) {
-      MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Gradient",
-                     "Error on the coordinates are not used in calculating Chi2 gradient");
-      return; // it will assert otherwise later in GetPoint
-   }
-
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a gradient function
-
-   const IGradModelFunction &func = *fg;
-
-#ifdef DEBUG
-   std::cout << "\n\nFit data size = " << n << std::endl;
-   std::cout << "evaluate chi2 using function gradient " << &func << "  " << p << std::endl;
-#endif
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   std::vector<bool> isPointRejected(initialNPoints);
-
-   auto mapFunction = [&](const unsigned int i) {
-      // set all vector values to zero
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.Error(i);
-
-      invError = (invError != 0.0) ? 1.0 / invError : 1;
-
-      double fval = 0;
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-
-      unsigned int ndim = data.NDim();
-      double binVolume = 1;
-      if (useBinVolume) {
-         const double *x2 = data.BinUpEdge(i);
-
-         xc.resize(ndim);
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto x1_j = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - x1_j);
-            xc[j] = 0.5 * (x2[j] + x1_j);
-         }
-
-         x = xc.data();
-
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (ndim > 1) {
-         xc.resize(ndim);
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < ndim; ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-      } else {
-         auto x2 = data.BinUpEdge(i);
-         // calculate normalized integral and gradient (divided by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, x2);
-         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
-      for (unsigned int ipar = 0; ipar < npar; ++ipar)
-         std::cout << p[ipar] << "\t";
-      std::cout << "\tfval = " << fval << std::endl;
-#endif
-      if (!std::isfinite(fval)) {
-         isPointRejected[i] = true;
-         // Return a zero contribution to all partial derivatives on behalf of the current point
-         return pointContribution;
-      }
-
-      // loop on the parameters
-      unsigned int ipar = 0;
-      for (; ipar < npar; ++ipar) {
-
-         // correct gradient for bin volumes
-         if (useBinVolume)
-            gradFunc[ipar] *= binVolume;
-
-         // avoid singularity in the function (infinity and nan ) in the chi2 sum
-         // eventually add possibility of excluding some points (like singularity)
-         double dfval = gradFunc[ipar];
-         if (!std::isfinite(dfval)) {
-            break; // exit loop on parameters
-         }
-
-         // calculate derivative point contribution
-         pointContribution[ipar] = -2.0 * (y - fval) * invError * invError * gradFunc[ipar];
-      }
-
-      if (ipar < npar) {
-         // case loop was broken for an overflow in the gradient calculation
-         isPointRejected[i] = true;
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("Chi2<double>::EvaluateChi2Gradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("Chi2<double>::EvaluateChi2Gradient",
-            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // correct the number of points
-   nPoints = initialNPoints;
-
-   if (std::any_of(isPointRejected.begin(), isPointRejected.end(), [](bool point) { return point; })) {
-      unsigned nRejected = std::accumulate(isPointRejected.begin(), isPointRejected.end(), 0);
-      assert(nRejected <= initialNPoints);
-      nPoints = initialNPoints - nRejected;
-
-      if (nPoints < npar)
-         MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Gradient",
-                        "Error - too many points rejected for overflow in gradient calculation");
-   }
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-}
-
-// evaluate the chi2 contribution (residual term) only for data with no coord-errors
-// This function is used in the specialized least square algorithms like FUMILI or L.M.
-// if we have error on the coordinates the method is not yet implemented
-//  integral option is also not yet implemented
-//  one can use in that case normal chi2 method
-double Chi2<double>::EvalResidual(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                  unsigned int i, double *g)
-{
-   if (data.GetErrorType() == BinData::kCoordError && data.Opt().fCoordErrors) {
-      MATH_ERROR_MSG("Chi2<double>::EvaluateChi2Residual",
-                     "Error on the coordinates are not used in calculating Chi2 residual");
-      return 0; // it will assert otherwise later in GetPoint
-   }
-
-   // func.SetParameters(p);
-
-   double y, invError = 0;
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useExpErrors = (fitOpt.fExpErrors);
-
-   const double *x1 = data.GetPoint(i, y, invError);
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-   double fval = 0;
-   unsigned int ndim = data.NDim();
-   double binVolume = 1.0;
-   const double *x2 = 0;
-   if (useBinVolume || useBinIntegral)
-      x2 = data.BinUpEdge(i);
-
-   double *xc = 0;
-
-   if (useBinVolume) {
-      xc = new double[ndim];
-      for (unsigned int j = 0; j < ndim; ++j) {
-         binVolume *= std::abs(x2[j] - x1[j]);
-         xc[j] = 0.5 * (x2[j] + x1[j]);
-      }
-      // normalize the bin volume using a reference value
-      binVolume /= data.RefVolume();
-   }
-
-   const double *x = (useBinVolume) ? xc : x1;
-
-   if (!useBinIntegral) {
-      fval = func(x, p);
-   } else {
-      // calculate integral (normalized by bin volume)
-      // need to set function and parameters here in case loop is parallelized
-      fval = igEval(x1, x2);
-   }
-   // normalize result if requested according to bin volume
-   if (useBinVolume)
-      fval *= binVolume;
-
-   // expected errors
-   if (useExpErrors) {
-      // we need first to check if a weight factor needs to be applied
-      // weight = sumw2/sumw = error**2/content
-      // NOTE: assume histogram is not weighted
-      // don't know how to do with bins with weight = 0
-      // double invWeight = y * invError * invError;
-      // if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
-      // compute expected error  as f(x) / weight
-      double invError2 = (fval > 0) ? 1.0 / fval : 0.0;
-      invError = std::sqrt(invError2);
-   }
-
-   double resval = (y - fval) * invError;
-
-   // avoid infinities or nan in  resval
-   resval = CorrectValue(resval);
-
-   // estimate gradient
-   if (g != 0) {
-
-      unsigned int npar = func.NPar();
-      const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-      if (gfunc != 0) {
-         // case function provides gradient
-         if (!useBinIntegral) {
-            gfunc->ParameterGradient(x, p, g);
-         } else {
-            // needs to calculate the integral for each partial derivative
-            CalculateGradientIntegral(*gfunc, x1, x2, p, g);
-         }
-      } else {
-         SimpleGradientCalculator gc(npar, func);
-         if (!useBinIntegral)
-            gc.ParameterGradient(x, p, fval, g);
-         else {
-            // needs to calculate the integral for each partial derivative
-            CalculateGradientIntegral(gc, x1, x2, p, g);
-         }
-      }
-      // mutiply by - 1 * weight
-      for (unsigned int k = 0; k < npar; ++k) {
-         g[k] *= -invError;
-         if (useBinVolume)
-            g[k] *= binVolume;
-      }
-   }
-
-   if (useBinVolume)
-      delete[] xc;
-
-   return resval;
-}
-
-} // namespace FitUtil
-
-} // namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/EvaluateLogL.cxx b/math/mathcore/src/EvaluateLogL.cxx
deleted file mode 100644
index fa78dad06c5..00000000000
--- a/math/mathcore/src/EvaluateLogL.cxx
+++ /dev/null
@@ -1,452 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/EvaluateLogL.hxx"
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-//#include <memory>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-//  need to implement integral option
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-//______________________________________________________________________________________________________
-//
-//  Log Likelihood functions
-//_______________________________________________________________________________________________________
-
-// utility function used by the likelihoods
-
-double EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *p, unsigned int i, double *g)
-{
-   // evaluate the pdf contribution to the generic logl function in case of bin data
-   // return actually the log of the pdf and its derivatives
-
-   // func.SetParameters(p);
-
-   const double *x = data.Coords(i);
-   double fval = func(x, p);
-   double logPdf = ROOT::Math::Util::EvalLog(fval);
-   // return
-   if (g == 0)
-      return logPdf;
-
-   const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-   // gradient  calculation
-   if (gfunc != 0) {
-      // case function provides gradient
-      gfunc->ParameterGradient(x, p, g);
-   } else {
-      // estimate gradieant numerically with simple 2 point rule
-      // should probably calculate gradient of log(pdf) is more stable numerically
-      SimpleGradientCalculator gc(func.NPar(), func);
-      gc.ParameterGradient(x, p, fval, g);
-   }
-   // divide gradient by function value since returning the logs
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar) {
-      g[ipar] /= fval; // this should be checked against infinities
-   }
-
-#ifdef DEBUG
-   std::cout << x[i] << "\t";
-   std::cout << "\tpar = [ " << func.NPar() << " ] =  ";
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-      std::cout << p[ipar] << "\t";
-   std::cout << "\tfval = " << fval;
-   std::cout << "\tgrad = [ ";
-   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
-      std::cout << g[ipar] << "\t";
-   std::cout << " ] " << std::endl;
-#endif
-
-   return logPdf;
-}
-
-double LogL<double>::Eval(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p, int iWeight,
-                          bool extended, unsigned int &nPoints, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                          unsigned nChunks)
-{
-   // evaluate the LogLikelihood
-
-   unsigned int n = data.Size();
-
-   // unsigned int nRejected = 0;
-
-   bool normalizeFunc = false;
-
-// set parameters of the function to cache integral value
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunctionTempl<double> &>(func)).SetParameters(p);
-#endif
-
-   nPoints = data.Size(); // npoints
-
-#ifdef R__USE_IMT
-   // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the
-   // function
-   // this will be done in sequential mode and parameters can be set in a thread safe manner
-   if (!normalizeFunc) {
-      if (data.NDim() == 1) {
-         const double *x = data.GetCoordComponent(0, 0);
-         func(x, p);
-      } else {
-         std::vector<double> x(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            x[j] = *data.GetCoordComponent(0, j);
-         func(x.data(), p);
-      }
-   }
-#endif
-
-   double norm = 1.0;
-   if (normalizeFunc) {
-      // compute integral of the function
-      std::vector<double> xmin(data.NDim());
-      std::vector<double> xmax(data.NDim());
-      IntegralEvaluator<> igEval(func, p, true);
-      // compute integral in the ranges where is defined
-      if (data.Range().Size() > 0) {
-         norm = 0;
-         for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-            data.Range().GetRange(&xmin[0], &xmax[0], ir);
-            norm += igEval.Integral(xmin.data(), xmax.data());
-         }
-      } else {
-         // use (-inf +inf)
-         data.Range().GetRange(&xmin[0], &xmax[0]);
-         // check if funcition is zero at +- inf
-         if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
-            MATH_ERROR_MSG("FitUtil::LogL<double>::Eval",
-                           "A range has not been set and the function is not zero at +/- inf");
-            return 0;
-         }
-         norm = igEval.Integral(&xmin[0], &xmax[0]);
-      }
-   }
-
-   // needed to compue effective global weight in case of extended likelihood
-
-   auto mapFunction = [&](const unsigned i) {
-      double W = 0;
-      double W2 = 0;
-      double fval = 0;
-
-      if (data.NDim() > 1) {
-         std::vector<double> x(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            x[j] = *data.GetCoordComponent(i, j);
-#ifdef USE_PARAMCACHE
-         fval = func(x.data());
-#else
-         fval = func(x.data(), p);
-#endif
-
-         // one -dim case
-      } else {
-         const auto x = data.GetCoordComponent(i, 0);
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      }
-
-      if (normalizeFunc)
-         fval = fval * (1 / norm);
-
-      // function EvalLog protects against negative or too small values of fval
-      double logval = ROOT::Math::Util::EvalLog(fval);
-      if (iWeight > 0) {
-         double weight = data.Weight(i);
-         logval *= weight;
-         if (iWeight == 2) {
-            logval *= weight; // use square of weights in likelihood
-            if (!extended) {
-               // needed sum of weights and sum of weight square if likelkihood is extended
-               W = weight;
-               W2 = weight * weight;
-            }
-         }
-      }
-      return LikelihoodAux<double>(logval, W, W2);
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<LikelihoodAux<double>> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<double>(0.0, 0.0, 0.0),
-                             [](const LikelihoodAux<double> &l1, const LikelihoodAux<double> &l2) { return l1 + l2; });
-   };
-
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::LogL<double>::Eval", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                             "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double logl{};
-   double sumW{};
-   double sumW2{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         auto resArray = mapFunction(i);
-         logl += resArray.logvalue;
-         sumW += resArray.weight;
-         sumW2 += resArray.weight2;
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      auto resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-      logl = resArray.logvalue;
-      sumW = resArray.weight;
-      sumW2 = resArray.weight2;
-#endif
-   } else {
-      Error("FitUtil::LogL<double>::Eval", "Execution policy unknown. Avalaible choices:\n "
-                                           "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                           "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-   if (extended) {
-      // add Poisson extended term
-      double extendedTerm = 0; // extended term in likelihood
-      double nuTot = 0;
-      // nuTot is integral of function in the range
-      // if function has been normalized integral has been already computed
-      if (!normalizeFunc) {
-         IntegralEvaluator<> igEval(func, p, true);
-         std::vector<double> xmin(data.NDim());
-         std::vector<double> xmax(data.NDim());
-
-         // compute integral in the ranges where is defined
-         if (data.Range().Size() > 0) {
-            nuTot = 0;
-            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
-               data.Range().GetRange(&xmin[0], &xmax[0], ir);
-               nuTot += igEval.Integral(xmin.data(), xmax.data());
-            }
-         } else {
-            // use (-inf +inf)
-            data.Range().GetRange(&xmin[0], &xmax[0]);
-            // check if funcition is zero at +- inf
-            if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
-               MATH_ERROR_MSG("FitUtil::LogL<double>::Eval",
-                              "A range has not been set and the function is not zero at +/- inf");
-               return 0;
-            }
-            nuTot = igEval.Integral(&xmin[0], &xmax[0]);
-         }
-
-         // force to be last parameter value
-         // nutot = p[func.NDim()-1];
-         if (iWeight != 2)
-            extendedTerm = -nuTot; // no need to add in this case n log(nu) since is already computed before
-         else {
-            // case use weight square in likelihood : compute total effective weight = sw2/sw
-            // ignore for the moment case when sumW is zero
-            extendedTerm = -(sumW2 / sumW) * nuTot;
-         }
-
-      } else {
-         nuTot = norm;
-         extendedTerm = -nuTot + double(n) * ROOT::Math::Util::EvalLog(nuTot);
-         // in case of weights need to use here sum of weights (to be done)
-      }
-      logl += extendedTerm;
-   }
-
-#ifdef DEBUG
-   std::cout << "Evaluated log L for parameters (";
-   for (unsigned int ip = 0; ip < func.NPar(); ++ip)
-      std::cout << " " << p[ip];
-   std::cout << ")  fval = " << -logl << std::endl;
-#endif
-
-   return -logl;
-}
-
-void LogL<double>::EvalGradient(const IModelFunctionTempl<double> &f, const UnBinData &data, const double *p,
-                                double *grad, unsigned int &, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                unsigned nChunks)
-{
-   // evaluate the gradient of the log likelihood function
-
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a grad function
-
-   const IGradModelFunction &func = *fg;
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
-
-#ifdef DEBUG
-   std::cout << "\n===> Evaluate Gradient for parameters ";
-   for (unsigned int ip = 0; ip < npar; ++ip)
-      std::cout << "  " << p[ip];
-   std::cout << "\n";
-#endif
-
-   const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-   const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
-
-   auto mapFunction = [&](const unsigned int i) {
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < data.NDim(); ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = data.GetCoordComponent(i, 0);
-      }
-
-      double fval = func(x, p);
-      func.ParameterGradient(x, p, &gradFunc[0]);
-
-#ifdef DEBUG
-      {
-         R__LOCKGUARD(gROOTMutex);
-         if (i < 5 || (i > data.Size() - 5)) {
-            if (data.NDim() > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval << " gradient " << gradFunc[0]
-                         << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-      }
-#endif
-
-      for (unsigned int kpar = 0; kpar < npar; ++kpar) {
-         if (fval > 0)
-            pointContribution[kpar] = -1. / fval * gradFunc[kpar];
-         else if (gradFunc[kpar] != 0) {
-            double gg = kdmax1 * gradFunc[kpar];
-            if (gg > 0)
-               gg = std::min(gg, kdmax2);
-            else
-               gg = std::max(gg, -kdmax2);
-            pointContribution[kpar] = -gg;
-         }
-         // if func derivative is zero term is also zero so do not add in g[kpar]
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::LogL<double>::EvalGradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("FitUtil::LogL<double>::EvalGradient", "Execution policy unknown. Avalaible choices:\n "
-                                                   "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                                   "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-
-#ifdef DEBUG
-   std::cout << "FitUtil.cxx : Final gradient ";
-   for (unsigned int param = 0; param < npar; param++) {
-      std::cout << "  " << grad[param];
-   }
-   std::cout << "\n";
-#endif
-}
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/EvaluatePoissonLogL.cxx b/math/mathcore/src/EvaluatePoissonLogL.cxx
deleted file mode 100644
index b49a2284426..00000000000
--- a/math/mathcore/src/EvaluatePoissonLogL.cxx
+++ /dev/null
@@ -1,541 +0,0 @@
-// @(#)root/mathcore:$Id$
-// Author: L. Moneta Tue Nov 28 10:52:47 2006
-
-/**********************************************************************
- *                                                                    *
- * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
- *                                                                    *
- *                                                                    *
- **********************************************************************/
-
-// Implementation file for class FitUtil
-
-#include "Fit/BinData.h"
-#include "Fit/UnBinData.h"
-#include "Fit/EvaluatePoissonLogL.hxx"
-#include "Math/IFunctionfwd.h"
-#include "Math/IParamFunction.h"
-#include "Math/Integrator.h"
-#include "Math/IntegratorMultiDim.h"
-#include "Math/WrappedFunction.h"
-#include "Math/OneDimFunctionAdapter.h"
-#include "Math/RichardsonDerivator.h"
-
-#include "Math/Error.h"
-#include "Math/Util.h" // for safe log(x)
-
-#include <limits>
-#include <cmath>
-#include <cassert>
-#include <algorithm>
-#include <numeric>
-//#include <memory>
-
-#include "TROOT.h"
-
-//#define DEBUG
-#ifdef DEBUG
-#define NSAMPLE 10
-#include <iostream>
-#endif
-
-//  need to implement integral option
-
-namespace ROOT {
-
-namespace Fit {
-
-namespace FitUtil {
-
-//______________________________________________________________________________________________________
-//
-//  Poisson Log Likelihood functions
-//_______________________________________________________________________________________________________
-
-
-// evaluate the Poisson Log Likelihood
-// for binned likelihood fits
-// this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
-// add as well constant term for saturated model to make it like a Chi2/2
-// by default is etended. If extended is false the fit is not extended and
-// the global poisson term is removed (i.e is a binomial fit)
-// (remember that in this case one needs to have a function with a fixed normalization
-// like in a non extended unbinned fit)
-//
-// if use Weight use a weighted dataset
-// iWeight = 1 ==> logL = Sum( w f(x_i) )
-// case of iWeight==1 is actually identical to weight==0
-// iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
-double PoissonLogL<double>::Eval(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                 int iWeight, bool extended, unsigned int &nPoints,
-                                 ::ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
-{
-   unsigned int n = data.Size();
-
-#ifdef USE_PARAMCACHE
-   (const_cast<IModelFunction &>(func)).SetParameters(p);
-#endif
-
-   nPoints = data.Size(); // npoints
-
-   // get fit option and check case of using integral of bins
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-   bool useW2 = (iWeight == 2);
-
-   // normalize if needed by a reference volume value
-   double wrefVolume = 1.0;
-   if (useBinVolume) {
-      if (fitOpt.fNormBinVolume)
-         wrefVolume /= data.RefVolume();
-   }
-
-#ifdef DEBUG
-   std::cout << "Evaluate PoissonLogL for params = [ ";
-   for (unsigned int j = 0; j < func.NPar(); ++j)
-      std::cout << p[j] << " , ";
-   std::cout << "]  - data size = " << n << " useBinIntegral " << useBinIntegral << " useBinVolume " << useBinVolume
-             << " useW2 " << useW2 << " wrefVolume = " << wrefVolume << std::endl;
-#endif
-
-#ifdef USE_PARAMCACHE
-   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
-#else
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-#endif
-
-   auto mapFunction = [&](const unsigned i) {
-      auto x1 = data.GetCoordComponent(i, 0);
-      auto y = *data.ValuePtr(i);
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-      double fval = 0;
-      double binVolume = 1.0;
-
-      if (useBinVolume) {
-         unsigned int ndim = data.NDim();
-         const double *x2 = data.BinUpEdge(i);
-         xc.resize(data.NDim());
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto xx = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - xx);
-            xc[j] = 0.5 * (x2[j] + xx);
-         }
-         x = xc.data();
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (data.NDim() > 1) {
-         xc.resize(data.NDim());
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < data.NDim(); ++j) {
-            xc[j] = *data.GetCoordComponent(i, j);
-         }
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-#ifdef USE_PARAMCACHE
-         fval = func(x);
-#else
-         fval = func(x, p);
-#endif
-      } else {
-         // calculate integral (normalized by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         fval = igEval(x, data.BinUpEdge(i));
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      int NSAMPLE = 100;
-      if (i % NSAMPLE == 0) {
-         std::cout << "evt " << i << " x = [ ";
-         for (unsigned int j = 0; j < func.NDim(); ++j)
-            std::cout << x[j] << " , ";
-         std::cout << "]  ";
-         if (fitOpt.fIntegral) {
-            std::cout << "x2 = [ ";
-            for (unsigned int j = 0; j < func.NDim(); ++j)
-               std::cout << data.BinUpEdge(i)[j] << " , ";
-            std::cout << "] ";
-         }
-         std::cout << "  y = " << y << " fval = " << fval << std::endl;
-      }
-#endif
-
-      // EvalLog protects against 0 values of fval but don't want to add in the -log sum
-      // negative values of fval
-      fval = std::max(fval, 0.0);
-
-      double nloglike = 0; // negative loglikelihood
-      if (useW2) {
-         // apply weight correction . Effective weight is error^2/ y
-         // and expected events in bins is fval/weight
-         // can apply correction only when y is not zero otherwise weight is undefined
-         // (in case of weighted likelihood I don't care about the constant term due to
-         // the saturated model)
-
-         // use for the empty bins the global weight
-         double weight = 1.0;
-         if (y != 0) {
-            double error = data.Error(i);
-            weight = (error * error) / y; // this is the bin effective weight
-            nloglike += weight * y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
-         } else {
-            // for empty bin use the average weight  computed from the total data weight
-            weight = data.SumOfError2() / data.SumOfContent();
-         }
-         if (extended) {
-            nloglike += weight * (fval - y);
-         }
-
-      } else {
-         // standard case no weights or iWeight=1
-         // this is needed for Poisson likelihood (which are extened and not for multinomial)
-         // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
-         // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
-         if (extended)
-            nloglike = fval - y;
-
-         if (y > 0) {
-            nloglike += y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
-         }
-      }
-      return nloglike;
-   };
-
-#ifdef R__USE_IMT
-   auto redFunction = [](const std::vector<double> &objs) {
-      return std::accumulate(objs.begin(), objs.end(), double{});
-   };
-#else
-   (void)nChunks;
-
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::EvaluatePoissonLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
-                                              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   double res{};
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      for (unsigned int i = 0; i < n; ++i) {
-         res += mapFunction(i);
-      }
-#ifdef R__USE_IMT
-   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
-      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
-#endif
-   } else {
-      Error("FitUtil::EvaluatePoissonLogL", "Execution policy unknown. Avalaible choices:\n "
-                                            "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
-                                            "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
-   }
-
-#ifdef DEBUG
-   std::cout << "Loglikelihood  = " << res << std::endl;
-#endif
-
-   return res;
-}
-
-// Evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
-// and its gradient
-double PoissonLogL<double>::EvalBinPdf(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
-                                       unsigned int i, double *g)
-{
-   double y = 0;
-   const double *x1 = data.GetPoint(i, y);
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-   double fval = 0;
-   const double *x2 = 0;
-   // calculate the bin volume
-   double binVolume = 1;
-   std::vector<double> xc;
-   if (useBinVolume) {
-      unsigned int ndim = data.NDim();
-      xc.resize(ndim);
-      x2 = data.BinUpEdge(i);
-      for (unsigned int j = 0; j < ndim; ++j) {
-         binVolume *= std::abs(x2[j] - x1[j]);
-         xc[j] = 0.5 * (x2[j] + x1[j]);
-      }
-      // normalize the bin volume using a reference value
-      binVolume /= data.RefVolume();
-   }
-
-   const double *x = (useBinVolume) ? &xc.front() : x1;
-
-   if (!useBinIntegral) {
-      fval = func(x, p);
-   } else {
-      // calculate integral normalized (divided by bin volume)
-      x2 = data.BinUpEdge(i);
-      fval = igEval(x1, x2);
-   }
-   if (useBinVolume)
-      fval *= binVolume;
-
-   // logPdf for Poisson: ignore constant term depending on N
-   fval = std::max(fval, 0.0); // avoid negative or too small values
-   double logPdf = -fval;
-   if (y > 0.0) {
-      // include also constants due to saturate model (see Baker-Cousins paper)
-      logPdf += y * ROOT::Math::Util::EvalLog(fval / y) + y;
-   }
-   // need to return the pdf contribution (not the log)
-
-   // double pdfval =  std::exp(logPdf);
-
-   // if (g == 0) return pdfval;
-   if (g == 0)
-      return logPdf;
-
-   unsigned int npar = func.NPar();
-   const IGradModelFunction *gfunc = dynamic_cast<const IGradModelFunction *>(&func);
-
-   // gradient  calculation
-   if (gfunc != 0) {
-      // case function provides gradient
-      if (!useBinIntegral)
-         gfunc->ParameterGradient(x, p, g);
-      else {
-         // needs to calculate the integral for each partial derivative
-         CalculateGradientIntegral(*gfunc, x1, x2, p, g);
-      }
-
-   } else {
-      SimpleGradientCalculator gc(func.NPar(), func);
-      if (!useBinIntegral)
-         gc.ParameterGradient(x, p, fval, g);
-      else {
-         // needs to calculate the integral for each partial derivative
-         CalculateGradientIntegral(gc, x1, x2, p, g);
-      }
-   }
-   // correct g[] do be derivative of poisson term
-   for (unsigned int k = 0; k < npar; ++k) {
-      // apply bin volume correction
-      if (useBinVolume)
-         g[k] *= binVolume;
-
-      // correct for Poisson term
-      if (fval > 0)
-         g[k] *= (y / fval - 1.); //* pdfval;
-      else if (y > 0) {
-         const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-         g[k] *= kdmax1;
-      } else // y == 0 cannot have  negative y
-         g[k] *= -1;
-   }
-
-#ifdef DEBUG
-   std::cout << "x = " << x[0] << " logPdf = " << logPdf << " grad";
-   for (unsigned int ipar = 0; ipar < npar; ++ipar)
-      std::cout << g[ipar] << "\t";
-   std::cout << std::endl;
-#endif
-
-   //   return pdfval;
-   return logPdf;
-}
-
-// Evaluate the gradient of the Poisson log likelihood function
-void PoissonLogL<double>::EvalGradient(const IModelFunctionTempl<double> &f, const BinData &data, const double *p,
-                                       double *grad, unsigned int &, ::ROOT::Fit::ExecutionPolicy executionPolicy,
-                                       unsigned nChunks)
-{
-   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
-   assert(fg != nullptr); // must be called by a grad function
-
-   const IGradModelFunction &func = *fg;
-
-#ifdef USE_PARAMCACHE
-   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
-#endif
-
-   const DataOptions &fitOpt = data.Opt();
-   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
-   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
-
-   double wrefVolume = 1.0;
-   if (useBinVolume && fitOpt.fNormBinVolume)
-      wrefVolume /= data.RefVolume();
-
-   IntegralEvaluator<> igEval(func, p, useBinIntegral);
-
-   unsigned int npar = func.NPar();
-   unsigned initialNPoints = data.Size();
-
-   auto mapFunction = [&](const unsigned int i) {
-      // set all vector values to zero
-      std::vector<double> gradFunc(npar);
-      std::vector<double> pointContribution(npar);
-
-      const auto x1 = data.GetCoordComponent(i, 0);
-      const auto y = data.Value(i);
-      auto invError = data.Error(i);
-
-      invError = (invError != 0.0) ? 1.0 / invError : 1;
-
-      double fval = 0;
-
-      const double *x = nullptr;
-      std::vector<double> xc;
-
-      unsigned ndim = data.NDim();
-      double binVolume = 1.0;
-      if (useBinVolume) {
-         const double *x2 = data.BinUpEdge(i);
-
-         xc.resize(ndim);
-         for (unsigned int j = 0; j < ndim; ++j) {
-            auto x1_j = *data.GetCoordComponent(i, j);
-            binVolume *= std::abs(x2[j] - x1_j);
-            xc[j] = 0.5 * (x2[j] + x1_j);
-         }
-
-         x = xc.data();
-
-         // normalize the bin volume using a reference value
-         binVolume *= wrefVolume;
-      } else if (ndim > 1) {
-         xc.resize(ndim);
-         xc[0] = *x1;
-         for (unsigned int j = 1; j < ndim; ++j)
-            xc[j] = *data.GetCoordComponent(i, j);
-         x = xc.data();
-      } else {
-         x = x1;
-      }
-
-      if (!useBinIntegral) {
-         fval = func(x, p);
-         func.ParameterGradient(x, p, &gradFunc[0]);
-      } else {
-         // calculate integral (normalized by bin volume)
-         // need to set function and parameters here in case loop is parallelized
-         auto x2 = data.BinUpEdge(i);
-         fval = igEval(x, x2);
-         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
-      }
-      if (useBinVolume)
-         fval *= binVolume;
-
-#ifdef DEBUG
-      {
-         R__LOCKGUARD(gROOTMutex);
-         if (i < 5 || (i > data.Size() - 5)) {
-            if (data.NDim() > 1)
-               std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval << " gradient " << gradFunc[0]
-                         << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
-            else
-               std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  "
-                         << gradFunc[3] << std::endl;
-         }
-      }
-#endif
-
-      // correct the gradient
-      for (unsigned int ipar = 0; ipar < npar; ++ipar) {
-
-         // correct gradient for bin volumes
-         if (useBinVolume)
-            gradFunc[ipar] *= binVolume;
-
-         // df/dp * (1.  - y/f )
-         if (fval > 0)
-            pointContribution[ipar] = gradFunc[ipar] * (1. - y / fval);
-         else if (gradFunc[ipar] != 0) {
-            const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
-            const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
-            double gg = kdmax1 * gradFunc[ipar];
-            if (gg > 0)
-               gg = std::min(gg, kdmax2);
-            else
-               gg = std::max(gg, -kdmax2);
-            pointContribution[ipar] = -gg;
-         }
-      }
-
-      return pointContribution;
-   };
-
-   // Vertically reduce the set of vectors by summing its equally-indexed components
-   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
-      std::vector<double> result(npar);
-
-      for (auto const &pointContribution : pointContributions) {
-         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
-            result[parameterIndex] += pointContribution[parameterIndex];
-      }
-
-      return result;
-   };
-
-   std::vector<double> g(npar);
-
-#ifndef R__USE_IMT
-   // If IMT is disabled, force the execution policy to the serial case
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      Warning("FitUtil::EvaluatePoissonLogLGradient",
-              "Multithread execution policy requires IMT, which is disabled. Changing "
-              "to ROOT::Fit::ExecutionPolicy::kSerial.");
-      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
-   }
-#endif
-
-   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
-      std::vector<std::vector<double>> allGradients(initialNPoints);
-      for (unsigned int i = 0; i < initialNPoints; ++i) {
-         allGradients[i] = mapFunction(i);
-      }
-      g = redFunction(allGradients);
-   }
-#ifdef R__USE_IMT
-   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
-      ROOT::TThreadExecutor pool;
-      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
-      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
-   }
-#endif
-   else {
-      Error("FitUtil::EvaluatePoissonLogLGradient",
-            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
-   }
-
-#ifndef R__USE_IMT
-   // to fix compiler warning
-   (void)nChunks;
-#endif
-
-   // copy result
-   std::copy(g.begin(), g.end(), grad);
-
-#ifdef DEBUG
-   std::cout << "***** Final gradient : ";
-   for (unsigned int ii = 0; ii < npar; ++ii)
-      std::cout << grad[ii] << "   ";
-   std::cout << "\n";
-#endif
-}
-
-} // end namespace FitUtil
-
-} // end namespace Fit
-
-} // end namespace ROOT
diff --git a/math/mathcore/src/FitUtil.cxx b/math/mathcore/src/FitUtil.cxx
new file mode 100644
index 00000000000..2fc483f9e5b
--- /dev/null
+++ b/math/mathcore/src/FitUtil.cxx
@@ -0,0 +1,1756 @@
+// @(#)root/mathcore:$Id$
+// Author: L. Moneta Tue Nov 28 10:52:47 2006
+
+/**********************************************************************
+ *                                                                    *
+ * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
+ *                                                                    *
+ *                                                                    *
+ **********************************************************************/
+
+// Implementation file for class FitUtil
+
+#include "Fit/FitUtil.h"
+
+#include "Fit/BinData.h"
+#include "Fit/UnBinData.h"
+
+#include "Math/IFunctionfwd.h"
+#include "Math/IParamFunction.h"
+#include "Math/Integrator.h"
+#include "Math/IntegratorMultiDim.h"
+#include "Math/WrappedFunction.h"
+#include "Math/OneDimFunctionAdapter.h"
+#include "Math/RichardsonDerivator.h"
+
+#include "Math/Error.h"
+#include "Math/Util.h"  // for safe log(x)
+
+#include <limits>
+#include <cmath>
+#include <cassert>
+#include <algorithm>
+#include <numeric>
+//#include <memory>
+
+#include "TROOT.h"
+
+//#define DEBUG
+#ifdef DEBUG
+#define NSAMPLE 10
+#include <iostream>
+#endif
+
+//  need to implement integral option
+
+namespace ROOT {
+
+   namespace Fit {
+
+      namespace FitUtil {
+
+         // derivative with respect of the parameter to be integrated
+         template<class GradFunc = IGradModelFunction>
+         struct ParamDerivFunc {
+            ParamDerivFunc(const GradFunc & f) : fFunc(f), fIpar(0) {}
+            void SetDerivComponent(unsigned int ipar) { fIpar = ipar; }
+            double operator() (const double *x, const double *p) const {
+               return fFunc.ParameterDerivative( x, p, fIpar );
+            }
+            unsigned int NDim() const { return fFunc.NDim(); }
+            const GradFunc & fFunc;
+            unsigned int fIpar;
+         };
+
+// simple gradient calculator using the 2 points rule
+
+         class SimpleGradientCalculator {
+
+         public:
+            // construct from function and gradient dimension gdim
+            // gdim = npar for parameter gradient
+            // gdim = ndim for coordinate gradients
+            // construct (the param values will be passed later)
+            // one can choose between 2 points rule (1 extra evaluation) istrat=1
+            // or two point rule (2 extra evaluation)
+            // (found 2 points rule does not work correctly - minuit2FitBench fails)
+            SimpleGradientCalculator(int gdim, const IModelFunction & func,double eps = 2.E-8, int istrat = 1) :
+               fEps(eps),
+               fPrecision(1.E-8 ), // sqrt(epsilon)
+               fStrategy(istrat),
+               fN(gdim ),
+               fFunc(func),
+               fVec(std::vector<double>(gdim) ) // this can be probably optimized
+            {}
+
+            // internal method to calculate single partial derivative
+            // assume cached vector fVec is already set
+            double DoParameterDerivative(const double *x, const double *p, double f0, int k) const {
+               double p0 = p[k];
+               double h = std::max( fEps* std::abs(p0), 8.0*fPrecision*(std::abs(p0) + fPrecision) );
+               fVec[k] += h;
+               double deriv = 0;
+               // t.b.d : treat case of infinities
+               //if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
+               double f1 = fFunc(x, &fVec.front() );
+               if (fStrategy > 1) {
+                  fVec[k] = p0 - h;
+                  double f2 = fFunc(x, &fVec.front() );
+                  deriv = 0.5 * ( f2 - f1 )/h;
+               }
+               else
+                  deriv = ( f1 - f0 )/h;
+
+               fVec[k] = p[k]; // restore original p value
+               return deriv;
+            }
+            // number of dimension in x (needed when calculating the integrals)
+            unsigned int NDim() const {
+               return fFunc.NDim();
+            }
+            // number of parameters (needed for grad ccalculation)
+            unsigned int NPar() const {
+               return fFunc.NPar();
+            }
+
+            double ParameterDerivative(const double *x, const double *p, int ipar) const {
+               // fVec are the cached parameter values
+               std::copy(p, p+fN, fVec.begin());
+               double f0 = fFunc(x, p);
+               return DoParameterDerivative(x,p,f0,ipar);
+            }
+
+            // calculate all gradient at point (x,p) knnowing already value f0 (we gain a function eval.)
+            void ParameterGradient(const double * x, const double * p, double f0, double * g) {
+               // fVec are the cached parameter values
+               std::copy(p, p+fN, fVec.begin());
+               for (unsigned int k = 0; k < fN; ++k) {
+                  g[k] = DoParameterDerivative(x,p,f0,k);
+               }
+            }
+
+            // calculate gradient w.r coordinate values
+            void Gradient(const double * x, const double * p, double f0, double * g) {
+               // fVec are the cached coordinate values
+               std::copy(x, x+fN, fVec.begin());
+               for (unsigned int k = 0; k < fN; ++k) {
+                  double x0 = x[k];
+                  double h = std::max( fEps* std::abs(x0), 8.0*fPrecision*(std::abs(x0) + fPrecision) );
+                  fVec[k] += h;
+                  // t.b.d : treat case of infinities
+                  //if (fval > - std::numeric_limits<double>::max() && fval < std::numeric_limits<double>::max() )
+                  double f1 = fFunc( &fVec.front(), p );
+                  if (fStrategy > 1) {
+                     fVec[k] = x0 - h;
+                     double f2 = fFunc( &fVec.front(), p  );
+                     g[k] = 0.5 * ( f2 - f1 )/h;
+                  }
+                  else
+                     g[k] = ( f1 - f0 )/h;
+
+                  fVec[k] = x[k]; // restore original x value
+               }
+            }
+
+         private:
+
+            double fEps;
+            double fPrecision;
+            int fStrategy; // strategy in calculation ( =1 use 2 point rule( 1 extra func) , = 2 use r point rule)
+            unsigned int fN; // gradient dimension
+            const IModelFunction & fFunc;
+            mutable std::vector<double> fVec; // cached coordinates (or parameter values in case of gradientpar)
+         };
+
+
+         // function to avoid infinities or nan
+         double CorrectValue(double rval) {
+            // avoid infinities or nan in  rval
+            if (rval > - std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max() )
+               return rval;
+            else if (rval < 0)
+               // case -inf
+               return -std::numeric_limits<double>::max();
+            else
+               // case + inf or nan
+               return  + std::numeric_limits<double>::max();
+         }
+
+         // Check if the value is a finite number. The argument rval is updated if it is infinite or NaN,
+         // setting it to the maximum finite value (preserving the sign).
+         bool CheckInfNaNValue(double &rval)
+         {
+            if (rval > - std::numeric_limits<double>::max() && rval < std::numeric_limits<double>::max() )
+               return true;
+            else if (rval < 0) {
+               // case -inf
+               rval =  -std::numeric_limits<double>::max();
+               return false;
+            }
+            else {
+               // case + inf or nan
+               rval =  + std::numeric_limits<double>::max();
+               return false;
+            }
+         }
+
+
+         // calculation of the integral of the gradient functions
+         // for a function providing derivative w.r.t parameters
+         // x1 and x2 defines the integration interval , p the parameters
+         template <class GFunc>
+         void CalculateGradientIntegral(const GFunc & gfunc,
+                                        const double *x1, const double * x2, const double * p, double *g) {
+
+            // needs to calculate the integral for each partial derivative
+            ParamDerivFunc<GFunc> pfunc( gfunc);
+            IntegralEvaluator<ParamDerivFunc<GFunc> > igDerEval( pfunc, p, true);
+            // loop on the parameters
+            unsigned int npar = gfunc.NPar();
+            for (unsigned int k = 0; k < npar; ++k ) {
+               pfunc.SetDerivComponent(k);
+               g[k] = igDerEval( x1, x2 );
+            }
+         }
+
+
+
+      } // end namespace  FitUtil
+
+
+
+//___________________________________________________________________________________________________________________________
+// for chi2 functions
+//___________________________________________________________________________________________________________________________
+
+      double FitUtil::EvaluateChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &,
+                                   ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+      {
+         // evaluate the chi2 given a  function reference  , the data and returns the value and also in nPoints
+         // the actual number of used points
+         // normal chi2 using only error on values (from fitting histogram)
+         // optionally the integral of function in the bin is used
+
+         unsigned int n = data.Size();
+
+      // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+#endif
+   // do not cache parameter values (it is not thread safe)
+   //func.SetParameters(p);
+
+
+   // get fit option and check case if using integral of bins
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useExpErrors = (fitOpt.fExpErrors);
+   bool isWeighted = data.IsWeighted();
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate chi2 using function " << &func << "  " << p << std::endl;
+   std::cout << "use empty bins  " << fitOpt.fUseEmpty << std::endl;
+   std::cout << "use integral    " << fitOpt.fIntegral << std::endl;
+   std::cout << "use all error=1 " << fitOpt.fErrors1 << std::endl;
+   if (isWeighted)   std::cout << "Weighted data set - sumw =  " << data.SumOfContent() << "  sumw2 = " << data.SumOfError2() << std::endl;
+#endif
+
+#ifdef USE_PARAMCACHE
+   IntegralEvaluator<> igEval( func, 0, useBinIntegral);
+#else
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+#endif
+   double maxResValue = std::numeric_limits<double>::max() /n;
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+
+   auto mapFunction = [&](const unsigned i){
+
+      double chi2{};
+      double fval{};
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.InvError(i);
+
+      //invError = (invError!= 0.0) ? 1.0/invError :1;
+
+      const double * x = nullptr;
+      std::vector<double> xc;
+      double binVolume = 1.0;
+      if (useBinVolume) {
+         unsigned int ndim = data.NDim();
+         const double * x2 = data.BinUpEdge(i);
+         xc.resize(data.NDim());
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto xx = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j]- xx);
+            xc[j] = 0.5*(x2[j]+ xx);
+         }
+         x = xc.data();
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if(data.NDim() > 1) {
+         xc.resize(data.NDim());
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < data.NDim(); ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+            x = x1;
+      }
+
+
+      if (!useBinIntegral) {
+#ifdef USE_PARAMCACHE
+         fval = func ( x );
+#else
+         fval = func ( x, p );
+#endif
+      }
+      else {
+         // calculate integral normalized by bin volume
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval( x, data.BinUpEdge(i)) ;
+      }
+      // normalize result if requested according to bin volume
+      if (useBinVolume) fval *= binVolume;
+
+      // expected errors
+      if (useExpErrors) {
+         double invWeight  = 1.0; 
+         if (isWeighted) {
+            // we need first to check if a weight factor needs to be applied
+            // weight = sumw2/sumw = error**2/content
+            //invWeight = y * invError * invError;
+            // we use always the global weight and not the observed one in the bin
+            // for empty bins use global weight (if it is weighted data.SumError2() is not zero)
+            invWeight = data.SumOfContent()/ data.SumOfError2();
+            //if (invError > 0) invWeight = y * invError * invError;
+         }
+         
+         //  if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
+         // compute expected error  as f(x) / weight
+         double invError2 = (fval > 0) ? invWeight / fval : 0.0;
+         invError = std::sqrt(invError2);
+         //std::cout << "using Pearson chi2 " << x[0] << "  " << 1./invError2 << "  " << fval << std::endl;
+      }
+
+//#define DEBUG
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << "  " << 1./invError << " params : ";
+      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << " bin volume " << binVolume << " ref " << wrefVolume << std::endl;
+#endif
+//#undef DEBUG
+
+      if (invError > 0) {
+
+         double tmp = ( y -fval )* invError;
+         double resval = tmp * tmp;
+
+
+         // avoid inifinity or nan in chi2 values due to wrong function values
+         if ( resval < maxResValue )
+            chi2 += resval;
+         else {
+            //nRejected++;
+            chi2 += maxResValue;
+         }
+      }
+      return chi2;
+  };
+
+#ifdef R__USE_IMT
+  auto redFunction = [](const std::vector<double> & objs){
+                          return std::accumulate(objs.begin(), objs.end(), double{});
+  };
+#else
+  (void)nChunks;
+
+  // If IMT is disabled, force the execution policy to the serial case
+  if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+     Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                      "to ROOT::Fit::ExecutionPolicy::kSerial.");
+     executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+  }
+#endif
+
+  double res{};
+  if(executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial){
+    for (unsigned int i=0; i<n; ++i) {
+      res += mapFunction(i);
+    }
+#ifdef R__USE_IMT
+  } else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+    ROOT::TThreadExecutor pool;
+    auto chunks = nChunks !=0? nChunks: setAutomaticChunking(data.Size());
+    res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+#endif
+//   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+    // ROOT::TProcessExecutor pool;
+    // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+  } else{
+    Error("FitUtil::EvaluateChi2","Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+  }
+
+   return res;
+}
+
+
+//___________________________________________________________________________________________________________________________
+
+double FitUtil::EvaluateChi2Effective(const IModelFunction & func, const BinData & data, const double * p, unsigned int & nPoints) {
+   // evaluate the chi2 given a  function reference  , the data and returns the value and also in nPoints
+   // the actual number of used points
+   // method using the error in the coordinates
+   // integral of bin does not make sense in this case
+
+   unsigned int n = data.Size();
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate effective chi2 using function " << &func << "  " << p << std::endl;
+#endif
+
+   assert(data.HaveCoordErrors()  || data.HaveAsymErrors());
+
+   double chi2 = 0;
+   //int nRejected = 0;
+
+
+   //func.SetParameters(p);
+
+   unsigned int ndim = func.NDim();
+
+   // use Richardson derivator
+   ROOT::Math::RichardsonDerivator derivator;
+
+   double maxResValue = std::numeric_limits<double>::max() /n;
+
+
+
+   for (unsigned int i = 0; i < n; ++ i) {
+
+
+      double y = 0;
+      const double * x = data.GetPoint(i,y);
+
+      double fval = func( x, p );
+
+      double delta_y_func = y - fval;
+
+
+      double ey = 0;
+      const double * ex = 0;
+      if (!data.HaveAsymErrors() )
+         ex = data.GetPointError(i, ey);
+      else {
+         double eylow, eyhigh = 0;
+         ex = data.GetPointError(i, eylow, eyhigh);
+         if ( delta_y_func < 0)
+            ey = eyhigh; // function is higher than points
+         else
+            ey = eylow;
+      }
+      double e2 = ey * ey;
+      // before calculating the gradient check that all error in x are not zero
+      unsigned int j = 0;
+      while ( j < ndim && ex[j] == 0.)  { j++; }
+      // if j is less ndim some elements are not zero
+      if (j < ndim) {
+         // need an adapter from a multi-dim function to a one-dimensional
+         ROOT::Math::OneDimMultiFunctionAdapter<const IModelFunction &> f1D(func,x,0,p);
+         // select optimal step size  (use 10--2 by default as was done in TF1:
+         double kEps = 0.01;
+         double kPrecision = 1.E-8;
+         for (unsigned int icoord = 0; icoord < ndim; ++icoord) {
+            // calculate derivative for each coordinate
+            if (ex[icoord] > 0) {
+               //gradCalc.Gradient(x, p, fval, &grad[0]);
+               f1D.SetCoord(icoord);
+               // optimal spep size (take ex[] as scale for the points and 1% of it
+               double x0= x[icoord];
+               double h = std::max( kEps* std::abs(ex[icoord]), 8.0*kPrecision*(std::abs(x0) + kPrecision) );
+               double deriv = derivator.Derivative1(f1D, x[icoord], h);
+               double edx = ex[icoord] * deriv;
+               e2 += edx * edx;
+#ifdef DEBUG
+               std::cout << "error for coord " << icoord << " = " << ex[icoord] << " deriv " << deriv << std::endl;
+#endif
+            }
+         }
+      }
+      double w2 = (e2 > 0) ? 1.0/e2 : 0;
+      double resval = w2 * ( y - fval ) *  ( y - fval);
+
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << " ex " << ex[0] << " ey  " << ey << " params : ";
+      for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << "\tresval = " << resval << std::endl;
+#endif
+
+      // avoid (infinity and nan ) in the chi2 sum
+      // eventually add possibility of excluding some points (like singularity)
+      if ( resval < maxResValue )
+         chi2 += resval;
+      else
+         chi2 += maxResValue;
+      //nRejected++;
+
+   }
+
+   // reset the number of fitting data points
+   nPoints = n;  // no points are rejected
+   //if (nRejected != 0)  nPoints = n - nRejected;
+
+#ifdef DEBUG
+   std::cout << "chi2 = " << chi2 << " n = " << nPoints  << std::endl;
+#endif
+
+   return chi2;
+
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// evaluate the chi2 contribution (residual term) only for data with no coord-errors
+/// This function is used in the specialized least square algorithms like FUMILI or L.M.
+/// if we have error on the coordinates the method is not yet implemented
+///  integral option is also not yet implemented
+///  one can use in that case normal chi2 method
+
+double FitUtil::EvaluateChi2Residual(const IModelFunction & func, const BinData & data, const double * p, unsigned int i, double * g) {
+   if (data.GetErrorType() == BinData::kCoordError && data.Opt().fCoordErrors ) {
+      MATH_ERROR_MSG("FitUtil::EvaluateChi2Residual","Error on the coordinates are not used in calculating Chi2 residual");
+      return 0; // it will assert otherwise later in GetPoint
+   }
+
+
+   //func.SetParameters(p);
+
+   double y, invError = 0;
+
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useExpErrors = (fitOpt.fExpErrors);
+
+   const double * x1 = data.GetPoint(i,y, invError);
+
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+   double fval = 0;
+   unsigned int ndim = data.NDim();
+   double binVolume = 1.0;
+   const double * x2 = 0;
+   if (useBinVolume || useBinIntegral) x2 = data.BinUpEdge(i);
+
+   double * xc = 0;
+
+   if (useBinVolume) {
+      xc = new double[ndim];
+      for (unsigned int j = 0; j < ndim; ++j) {
+         binVolume *= std::abs( x2[j]-x1[j] );
+         xc[j] = 0.5*(x2[j]+ x1[j]);
+      }
+      // normalize the bin volume using a reference value
+      binVolume /= data.RefVolume();
+   }
+
+   const double * x = (useBinVolume) ? xc : x1;
+
+   if (!useBinIntegral) {
+      fval = func ( x, p );
+   }
+   else {
+      // calculate integral (normalized by bin volume)
+      // need to set function and parameters here in case loop is parallelized
+      fval = igEval( x1, x2) ;
+   }
+   // normalize result if requested according to bin volume
+   if (useBinVolume) fval *= binVolume;
+
+   // expected errors
+   if (useExpErrors) {
+      // we need first to check if a weight factor needs to be applied
+      // weight = sumw2/sumw = error**2/content
+      //NOTE: assume histogram is not weighted
+      // don't know how to do with bins with weight = 0
+      //double invWeight = y * invError * invError;
+      // if (invError == 0) invWeight = (data.SumOfError2() > 0) ? data.SumOfContent()/ data.SumOfError2() : 1.0;
+      // compute expected error  as f(x) / weight
+      double invError2 = (fval > 0) ? 1.0 / fval : 0.0;
+      invError = std::sqrt(invError2);
+   }
+
+
+   double resval =   ( y -fval )* invError;
+
+   // avoid infinities or nan in  resval
+   resval = CorrectValue(resval);
+
+   // estimate gradient
+   if (g != 0) {
+
+      unsigned int npar = func.NPar();
+      const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+      if (gfunc != 0) {
+         //case function provides gradient
+         if (!useBinIntegral ) {
+            gfunc->ParameterGradient(  x , p, g );
+         }
+         else {
+            // needs to calculate the integral for each partial derivative
+            CalculateGradientIntegral( *gfunc, x1, x2, p, g);
+         }
+      }
+      else {
+         SimpleGradientCalculator  gc( npar, func);
+         if (!useBinIntegral )
+            gc.ParameterGradient(x, p, fval, g);
+         else {
+            // needs to calculate the integral for each partial derivative
+            CalculateGradientIntegral( gc, x1, x2, p, g);
+         }
+      }
+      // mutiply by - 1 * weight
+      for (unsigned int k = 0; k < npar; ++k) {
+         g[k] *= - invError;
+         if (useBinVolume) g[k] *= binVolume;
+      }
+   }
+
+   if (useBinVolume) delete [] xc;
+
+   return resval;
+
+}
+
+void FitUtil::EvaluateChi2Gradient(const IModelFunction &f, const BinData &data, const double *p, double *grad,
+                                   unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the chi2 function
+   // this function is used when the model function knows how to calculate the derivative and we can
+   // avoid that the minimizer re-computes them
+   //
+   // case of chi2 effective (errors on coordinate) is not supported
+
+   if (data.HaveCoordErrors()) {
+      MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                     "Error on the coordinates are not used in calculating Chi2 gradient");
+      return; // it will assert otherwise later in GetPoint
+   }
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a gradient function
+
+   const IGradModelFunction &func = *fg;
+
+#ifdef DEBUG
+   std::cout << "\n\nFit data size = " << n << std::endl;
+   std::cout << "evaluate chi2 using function gradient " << &func << "  " << p << std::endl;
+#endif
+
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   std::vector<bool> isPointRejected(initialNPoints);
+
+   auto mapFunction = [&](const unsigned int i) {
+      // set all vector values to zero
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.Error(i);
+
+      invError = (invError != 0.0) ? 1.0 / invError : 1;
+
+      double fval = 0;
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+
+      unsigned int ndim = data.NDim();
+      double binVolume = 1;
+      if (useBinVolume) {
+         const double *x2 = data.BinUpEdge(i);
+
+         xc.resize(ndim);
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto x1_j = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - x1_j);
+            xc[j] = 0.5 * (x2[j] + x1_j);
+         }
+
+         x = xc.data();
+
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (ndim > 1) {
+         xc.resize(ndim);
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < ndim; ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+         fval = func(x, p);
+         func.ParameterGradient(x, p, &gradFunc[0]);
+      } else {
+         auto x2 = data.BinUpEdge(i);
+         // calculate normalized integral and gradient (divided by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval(x, x2);
+         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
+      }
+      if (useBinVolume)
+         fval *= binVolume;
+
+#ifdef DEBUG
+      std::cout << x[0] << "  " << y << "  " << 1. / invError << " params : ";
+      for (unsigned int ipar = 0; ipar < npar; ++ipar)
+         std::cout << p[ipar] << "\t";
+      std::cout << "\tfval = " << fval << std::endl;
+#endif
+      if (!CheckInfNaNValue(fval)) {
+         isPointRejected[i] = true;
+         // Return a zero contribution to all partial derivatives on behalf of the current point
+         return pointContribution;
+      }
+
+      // loop on the parameters
+      unsigned int ipar = 0;
+      for (; ipar < npar; ++ipar) {
+
+         // correct gradient for bin volumes
+         if (useBinVolume)
+            gradFunc[ipar] *= binVolume;
+
+         // avoid singularity in the function (infinity and nan ) in the chi2 sum
+         // eventually add possibility of excluding some points (like singularity)
+         double dfval = gradFunc[ipar];
+         if (!CheckInfNaNValue(dfval)) {
+            break; // exit loop on parameters
+         }
+
+         // calculate derivative point contribution
+         pointContribution[ipar] = -2.0 * (y - fval) * invError * invError * gradFunc[ipar];
+      }
+
+      if (ipar < npar) {
+         // case loop was broken for an overflow in the gradient calculation
+         isPointRejected[i] = true;
+      }
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluateChi2Gradient", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                               "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+   // else if(executionPolicy == ROOT::Fit::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluateChi2Gradient",
+            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   //to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // correct the number of points
+   nPoints = initialNPoints;
+
+   if (std::any_of(isPointRejected.begin(), isPointRejected.end(), [](bool point) { return point; })) {
+      unsigned nRejected = std::accumulate(isPointRejected.begin(), isPointRejected.end(), 0);
+      assert(nRejected <= initialNPoints);
+      nPoints = initialNPoints - nRejected;
+
+      if (nPoints < npar)
+         MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
+                        "Error - too many points rejected for overflow in gradient calculation");
+   }
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+}
+
+//______________________________________________________________________________________________________
+//
+//  Log Likelihood functions
+//_______________________________________________________________________________________________________
+
+// utility function used by the likelihoods
+
+// for LogLikelihood functions
+
+double FitUtil::EvaluatePdf(const IModelFunction & func, const UnBinData & data, const double * p, unsigned int i, double * g) {
+   // evaluate the pdf contribution to the generic logl function in case of bin data
+   // return actually the log of the pdf and its derivatives
+
+
+   //func.SetParameters(p);
+
+
+   const double * x = data.Coords(i);
+   double fval = func ( x, p );
+   double logPdf = ROOT::Math::Util::EvalLog(fval);
+   //return
+   if (g == 0) return logPdf;
+
+   const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+   // gradient  calculation
+   if (gfunc != 0) {
+      //case function provides gradient
+      gfunc->ParameterGradient(  x , p, g );
+   }
+   else {
+      // estimate gradieant numerically with simple 2 point rule
+      // should probably calculate gradient of log(pdf) is more stable numerically
+      SimpleGradientCalculator gc(func.NPar(), func);
+      gc.ParameterGradient(x, p, fval, g );
+   }
+   // divide gradient by function value since returning the logs
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar) {
+      g[ipar] /= fval; // this should be checked against infinities
+   }
+
+#ifdef DEBUG
+   std::cout << x[i] << "\t";
+   std::cout << "\tpar = [ " << func.NPar() << " ] =  ";
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+      std::cout << p[ipar] << "\t";
+   std::cout << "\tfval = " << fval;
+   std::cout << "\tgrad = [ ";
+   for (unsigned int ipar = 0; ipar < func.NPar(); ++ipar)
+      std::cout << g[ipar] << "\t";
+   std::cout << " ] "   << std::endl;
+#endif
+
+
+   return logPdf;
+}
+
+double FitUtil::EvaluateLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
+                             int iWeight, bool extended, unsigned int &nPoints,
+                             ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the LogLikelihood
+
+   unsigned int n = data.Size();
+
+   //unsigned int nRejected = 0;
+
+   bool normalizeFunc = false;
+
+   // set parameters of the function to cache integral value
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunctionTempl<double> &>(func)).SetParameters(p);
+#endif
+
+   nPoints = data.Size();  // npoints
+
+#ifdef R__USE_IMT
+         // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
+         // this will be done in sequential mode and parameters can be set in a thread safe manner
+         if (!normalizeFunc) {
+            if (data.NDim() == 1) {
+               const double * x = data.GetCoordComponent(0,0);
+               func( x, p);
+            }
+            else {
+               std::vector<double> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  x[j] = *data.GetCoordComponent(0, j);
+               func( x.data(), p);
+            }
+         }
+#endif
+
+         double norm = 1.0;
+         if (normalizeFunc) {
+            // compute integral of the function
+            std::vector<double> xmin(data.NDim());
+            std::vector<double> xmax(data.NDim());
+            IntegralEvaluator<> igEval(func, p, true);
+            // compute integral in the ranges where is defined
+            if (data.Range().Size() > 0) {
+               norm = 0;
+               for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+                  data.Range().GetRange(&xmin[0], &xmax[0], ir);
+                  norm += igEval.Integral(xmin.data(), xmax.data());
+               }
+            } else {
+               // use (-inf +inf)
+               data.Range().GetRange(&xmin[0], &xmax[0]);
+               // check if funcition is zero at +- inf
+               if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
+                  MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood",
+                                 "A range has not been set and the function is not zero at +/- inf");
+                  return 0;
+               }
+               norm = igEval.Integral(&xmin[0], &xmax[0]);
+            }
+         }
+
+         // needed to compue effective global weight in case of extended likelihood
+
+         auto mapFunction = [&](const unsigned i) {
+            double W = 0;
+            double W2 = 0;
+            double fval = 0;
+
+            if (data.NDim() > 1) {
+               std::vector<double> x(data.NDim());
+               for (unsigned int j = 0; j < data.NDim(); ++j)
+                  x[j] = *data.GetCoordComponent(i, j);
+#ifdef USE_PARAMCACHE
+               fval = func(x.data());
+#else
+               fval = func(x.data(), p);
+#endif
+
+               // one -dim case
+            } else {
+               const auto x = data.GetCoordComponent(i, 0);
+#ifdef USE_PARAMCACHE
+               fval = func(x);
+#else
+               fval = func(x, p);
+#endif
+            }
+
+            if (normalizeFunc)
+               fval = fval * (1 / norm);
+
+            // function EvalLog protects against negative or too small values of fval
+            double logval = ROOT::Math::Util::EvalLog(fval);
+            if (iWeight > 0) {
+               double weight = data.Weight(i);
+               logval *= weight;
+               if (iWeight == 2) {
+                  logval *= weight; // use square of weights in likelihood
+                  if (!extended) {
+                     // needed sum of weights and sum of weight square if likelkihood is extended
+                     W = weight;
+                     W2 = weight * weight;
+                  }
+               }
+            }
+            return LikelihoodAux<double>(logval, W, W2);
+         };
+
+#ifdef R__USE_IMT
+  // auto redFunction = [](const std::vector<LikelihoodAux<double>> & objs){
+  //          return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<double>(0.0,0.0,0.0),
+  //                      [](const LikelihoodAux<double> &l1, const LikelihoodAux<double> &l2){
+  //                          return l1+l2;
+  //                 });
+  // };
+  // do not use std::accumulate to be sure to maintain always the same order
+  auto redFunction = [](const std::vector<LikelihoodAux<double>> & objs){
+     auto l0 =  LikelihoodAux<double>(0.0,0.0,0.0);
+     for ( auto & l : objs ) {
+        l0 = l0 + l;
+     }
+     return l0;
+  };
+#else
+  (void)nChunks;
+
+  // If IMT is disabled, force the execution policy to the serial case
+  if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+     Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                      "to ROOT::Fit::ExecutionPolicy::kSerial.");
+     executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+  }
+#endif
+
+  double logl{};
+  double sumW{};
+  double sumW2{};
+  if(executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial){
+    for (unsigned int i=0; i<n; ++i) {
+      auto resArray = mapFunction(i);
+      logl+=resArray.logvalue;
+      sumW+=resArray.weight;
+      sumW2+=resArray.weight2;
+    }
+#ifdef R__USE_IMT
+  } else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+    ROOT::TThreadExecutor pool;
+    auto chunks = nChunks !=0? nChunks: setAutomaticChunking(data.Size());
+    auto resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+    logl=resArray.logvalue;
+    sumW=resArray.weight;
+    sumW2=resArray.weight2;
+#endif
+//   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+    // ROOT::TProcessExecutor pool;
+    // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+  } else{
+    Error("FitUtil::EvaluateLogL","Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+  }
+
+  if (extended) {
+      // add Poisson extended term
+      double extendedTerm = 0; // extended term in likelihood
+      double nuTot = 0;
+      // nuTot is integral of function in the range
+      // if function has been normalized integral has been already computed
+      if (!normalizeFunc) {
+         IntegralEvaluator<> igEval( func, p, true);
+         std::vector<double> xmin(data.NDim());
+         std::vector<double> xmax(data.NDim());
+
+         // compute integral in the ranges where is defined
+         if (data.Range().Size() > 0 ) {
+            nuTot = 0;
+            for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
+               data.Range().GetRange(&xmin[0],&xmax[0],ir);
+               nuTot += igEval.Integral(xmin.data(),xmax.data());
+            }
+         } else {
+            // use (-inf +inf)
+            data.Range().GetRange(&xmin[0],&xmax[0]);
+            // check if funcition is zero at +- inf
+            if (func(xmin.data(), p) != 0 || func(xmax.data(), p) != 0) {
+               MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood","A range has not been set and the function is not zero at +/- inf");
+               return 0;
+            }
+            nuTot = igEval.Integral(&xmin[0],&xmax[0]);
+         }
+
+         // force to be last parameter value
+         //nutot = p[func.NDim()-1];
+         if (iWeight != 2)
+            extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
+         else {
+            // case use weight square in likelihood : compute total effective weight = sw2/sw
+            // ignore for the moment case when sumW is zero
+            extendedTerm = - (sumW2 / sumW) * nuTot;
+         }
+
+      }
+      else {
+         nuTot = norm;
+         extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog( nuTot);
+         // in case of weights need to use here sum of weights (to be done)
+      }
+      logl += extendedTerm;
+
+   }
+
+#ifdef DEBUG  
+   std::cout << "Evaluated log L for parameters (";
+   for (unsigned int ip = 0; ip < func.NPar(); ++ip)
+      std::cout << " " << p[ip];
+   std::cout << ")  fval = " << -logl << std::endl;
+#endif
+
+   return -logl;
+}
+
+void FitUtil::EvaluateLogLGradient(const IModelFunction &f, const UnBinData &data, const double *p, double *grad,
+                                   unsigned int &, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the log likelihood function
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a grad function
+
+   const IGradModelFunction &func = *fg;
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
+
+#ifdef DEBUG  
+    std::cout << "\n===> Evaluate Gradient for parameters ";
+         for (unsigned int ip = 0; ip < npar; ++ip)
+            std::cout << "  " << p[ip];
+         std::cout << "\n";
+#endif
+
+   const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
+   const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
+
+   auto mapFunction = [&](const unsigned int i) {
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+
+      const double * x = nullptr;
+      std::vector<double> xc;
+      if (data.NDim() > 1) {
+         xc.resize(data.NDim() );
+         for (unsigned int j = 0; j < data.NDim(); ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data(); 
+      } else {
+         x = data.GetCoordComponent(i, 0);
+      }
+
+      double fval = func(x, p);
+      func.ParameterGradient(x, p, &gradFunc[0]);
+      
+#ifdef DEBUG
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval
+                                           << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+         }
+      }
+#endif
+
+      for (unsigned int kpar = 0; kpar < npar; ++kpar) {
+         if (fval > 0)
+            pointContribution[kpar] = -1. / fval * gradFunc[kpar];
+         else if (gradFunc[kpar] != 0) {
+            double gg = kdmax1 * gradFunc[kpar];
+            if (gg > 0)
+               gg = std::min(gg, kdmax2);
+            else
+               gg = std::max(gg, -kdmax2);
+            pointContribution[kpar] = -gg;
+         }
+         // if func derivative is zero term is also zero so do not add in g[kpar]
+      }
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluateLogLGradient", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                               "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+
+   // else if(executionPolicy == ROOT::Fit::ExecutionPolicy::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Avalaible choices:\n "
+                                             "ROOT::Fit::ExecutionPolicy::kSerial (default)\n "
+                                             "ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   // to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+
+#ifdef DEBUG
+   std::cout << "FitUtil.cxx : Final gradient ";
+   for (unsigned int param = 0; param < npar; param++) {
+      std::cout << "  " << grad[param];
+   }
+   std::cout << "\n";
+#endif
+}
+//_________________________________________________________________________________________________
+// for binned log likelihood functions
+////////////////////////////////////////////////////////////////////////////////
+/// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
+/// and its gradient
+
+double FitUtil::EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * p, unsigned int i, double * g ) {
+   double y = 0;
+   const double * x1 = data.GetPoint(i,y);
+
+   const DataOptions & fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   IntegralEvaluator<> igEval( func, p, useBinIntegral);
+   double fval = 0;
+   const double * x2 = 0;
+   // calculate the bin volume
+   double binVolume = 1;
+   std::vector<double> xc;
+   if (useBinVolume) {
+      unsigned int ndim = data.NDim();
+      xc.resize(ndim);
+      x2 = data.BinUpEdge(i);
+      for (unsigned int j = 0; j < ndim; ++j) {
+         binVolume *= std::abs( x2[j]-x1[j] );
+         xc[j] = 0.5*(x2[j]+ x1[j]);
+      }
+      // normalize the bin volume using a reference value
+      binVolume /= data.RefVolume();
+   }
+
+   const double * x = (useBinVolume) ? &xc.front() : x1;
+
+   if (!useBinIntegral ) {
+      fval = func ( x, p );
+   }
+   else {
+      // calculate integral normalized (divided by bin volume)
+      x2 =  data.BinUpEdge(i);
+      fval = igEval( x1, x2 ) ;
+   }
+   if (useBinVolume) fval *= binVolume;
+
+   // logPdf for Poisson: ignore constant term depending on N
+   fval = std::max(fval, 0.0);  // avoid negative or too small values
+   double logPdf =  - fval;
+   if (y > 0.0) {
+      // include also constants due to saturate model (see Baker-Cousins paper)
+      logPdf += y * ROOT::Math::Util::EvalLog( fval / y) + y;
+   }
+   // need to return the pdf contribution (not the log)
+
+   //double pdfval =  std::exp(logPdf);
+
+  //if (g == 0) return pdfval;
+   if (g == 0) return logPdf;
+
+   unsigned int npar = func.NPar();
+   const IGradModelFunction * gfunc = dynamic_cast<const IGradModelFunction *>( &func);
+
+   // gradient  calculation
+   if (gfunc != 0) {
+      //case function provides gradient
+      if (!useBinIntegral )
+         gfunc->ParameterGradient(  x , p, g );
+      else {
+         // needs to calculate the integral for each partial derivative
+         CalculateGradientIntegral( *gfunc, x1, x2, p, g);
+      }
+
+   }
+   else {
+      SimpleGradientCalculator  gc(func.NPar(), func);
+      if (!useBinIntegral )
+         gc.ParameterGradient(x, p, fval, g);
+      else {
+        // needs to calculate the integral for each partial derivative
+         CalculateGradientIntegral( gc, x1, x2, p, g);
+      }
+   }
+   // correct g[] do be derivative of poisson term
+   for (unsigned int k = 0; k < npar; ++k) {
+      // apply bin volume correction
+      if (useBinVolume) g[k] *= binVolume;
+
+      // correct for Poisson term
+      if ( fval > 0)
+         g[k] *= ( y/fval - 1.) ;//* pdfval;
+      else if (y > 0) {
+         const double kdmax1 = std::sqrt( std::numeric_limits<double>::max() );
+         g[k] *= kdmax1;
+      }
+      else   // y == 0 cannot have  negative y
+         g[k] *= -1;
+   }
+
+
+#ifdef DEBUG
+   std::cout << "x = " << x[0] << " logPdf = " << logPdf << " grad";
+   for (unsigned int ipar = 0; ipar < npar; ++ipar)
+      std::cout << g[ipar] << "\t";
+   std::cout << std::endl;
+#endif
+
+//   return pdfval;
+   return logPdf;
+}
+
+double FitUtil::EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *p, int iWeight,
+                                    bool extended, unsigned int &nPoints, ROOT::Fit::ExecutionPolicy executionPolicy,
+                                    unsigned nChunks)
+{
+   // evaluate the Poisson Log Likelihood
+   // for binned likelihood fits
+   // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
+   // add as well constant term for saturated model to make it like a Chi2/2
+   // by default is etended. If extended is false the fit is not extended and
+   // the global poisson term is removed (i.e is a binomial fit)
+   // (remember that in this case one needs to have a function with a fixed normalization
+   // like in a non extended unbinned fit)
+   //
+   // if use Weight use a weighted dataset
+   // iWeight = 1 ==> logL = Sum( w f(x_i) )
+   // case of iWeight==1 is actually identical to weight==0
+   // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
+   //
+   // nPoints returns the points where bin content is not zero
+
+
+   unsigned int n = data.Size();
+
+#ifdef USE_PARAMCACHE
+   (const_cast<IModelFunction &>(func)).SetParameters(p);
+#endif
+
+   nPoints = data.Size();  // npoints
+
+
+   // get fit option and check case of using integral of bins
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+   bool useW2 = (iWeight == 2);
+
+   // normalize if needed by a reference volume value
+   double wrefVolume = 1.0;
+   if (useBinVolume) {
+      if (fitOpt.fNormBinVolume) wrefVolume /= data.RefVolume();
+   }
+
+#ifdef DEBUG
+   std::cout << "Evaluate PoissonLogL for params = [ ";
+   for (unsigned int j = 0; j < func.NPar(); ++j) std::cout << p[j] << " , ";
+   std::cout << "]  - data size = " << n << " useBinIntegral " << useBinIntegral << " useBinVolume "
+             << useBinVolume << " useW2 " << useW2 << " wrefVolume = " << wrefVolume << std::endl;
+#endif
+
+#ifdef USE_PARAMCACHE
+   IntegralEvaluator<> igEval(func, 0, useBinIntegral);
+#else
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+#endif
+
+   auto mapFunction = [&](const unsigned i) {
+      auto x1 = data.GetCoordComponent(i, 0);
+      auto y = *data.ValuePtr(i);
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+      double fval = 0;
+      double binVolume = 1.0;
+
+      if (useBinVolume) {
+         unsigned int ndim = data.NDim();
+         const double *x2 = data.BinUpEdge(i);
+         xc.resize(data.NDim());
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto xx = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - xx);
+            xc[j] = 0.5 * (x2[j] + xx);
+         }
+         x = xc.data();
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (data.NDim() > 1) {
+         xc.resize(data.NDim());
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < data.NDim(); ++j) {
+            xc[j] = *data.GetCoordComponent(i, j);
+         }
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+#ifdef USE_PARAMCACHE
+         fval = func(x);
+#else
+         fval = func(x, p);
+#endif
+      } else {
+         // calculate integral (normalized by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         fval = igEval(x, data.BinUpEdge(i));
+      }
+      if (useBinVolume) fval *= binVolume;
+
+
+
+#ifdef DEBUG
+      int NSAMPLE = 100;
+      if (i % NSAMPLE == 0) {
+         std::cout << "evt " << i << " x = [ ";
+         for (unsigned int j = 0; j < func.NDim(); ++j) std::cout << x[j] << " , ";
+         std::cout << "]  ";
+         if (fitOpt.fIntegral) {
+            std::cout << "x2 = [ ";
+            for (unsigned int j = 0; j < func.NDim(); ++j) std::cout << data.BinUpEdge(i)[j] << " , ";
+            std::cout << "] ";
+         }
+         std::cout << "  y = " << y << " fval = " << fval << std::endl;
+      }
+#endif
+
+
+      // EvalLog protects against 0 values of fval but don't want to add in the -log sum
+      // negative values of fval
+      fval = std::max(fval, 0.0);
+
+      double nloglike = 0; // negative loglikelihood
+      if (useW2) {
+         // apply weight correction . Effective weight is error^2/ y
+         // and expected events in bins is fval/weight
+         // can apply correction only when y is not zero otherwise weight is undefined
+         // (in case of weighted likelihood I don't care about the constant term due to
+         // the saturated model)
+ 
+         // use for the empty bins the global weight
+         double weight = 1.0;
+         if (y != 0) {
+            double error = data.Error(i);
+            weight = (error * error) / y; // this is the bin effective weight
+            nloglike += weight * y * ( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval) );
+         }
+         else {
+            // for empty bin use the average weight  computed from the total data weight
+            weight = data.SumOfError2()/ data.SumOfContent();
+         }
+         if (extended) {
+            nloglike += weight  *  ( fval - y);
+         }
+
+      } else {
+         // standard case no weights or iWeight=1
+         // this is needed for Poisson likelihood (which are extened and not for multinomial)
+         // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
+         // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
+         if (extended) nloglike = fval - y;
+
+         if (y >  0) {
+            nloglike += y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval));
+         }
+      }
+      return nloglike;
+   };
+
+#ifdef R__USE_IMT
+   auto redFunction = [](const std::vector<double> &objs) {
+      return std::accumulate(objs.begin(), objs.end(), double{});
+   };
+#else
+   (void)nChunks;
+
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluatePoissonLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
+                                              "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   double res{};
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      for (unsigned int i = 0; i < n; ++i) {
+         res += mapFunction(i);
+      }
+#ifdef R__USE_IMT
+   } else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size());
+      res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction, chunks);
+#endif
+      //   } else if(executionPolicy == ROOT::Fit::kMultitProcess){
+      // ROOT::TProcessExecutor pool;
+      // res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   } else {
+      Error("FitUtil::EvaluatePoissonLogL",
+            "Execution policy unknown. Avalaible choices:\n ROOT::Fit::ExecutionPolicy::kSerial (default)\n ROOT::Fit::ExecutionPolicy::kMultithread (requires IMT)\n");
+   }
+
+#ifdef DEBUG
+   std::cout << "Loglikelihood  = " << res << std::endl;
+#endif
+
+   return res;
+}
+
+void FitUtil::EvaluatePoissonLogLGradient(const IModelFunction &f, const BinData &data, const double *p, double *grad,
+                                          unsigned int &, ROOT::Fit::ExecutionPolicy executionPolicy, unsigned nChunks)
+{
+   // evaluate the gradient of the Poisson log likelihood function
+
+   const IGradModelFunction *fg = dynamic_cast<const IGradModelFunction *>(&f);
+   assert(fg != nullptr); // must be called by a grad function
+
+   const IGradModelFunction &func = *fg;
+
+#ifdef USE_PARAMCACHE
+   (const_cast<IGradModelFunction &>(func)).SetParameters(p);
+#endif
+   
+   const DataOptions &fitOpt = data.Opt();
+   bool useBinIntegral = fitOpt.fIntegral && data.HasBinEdges();
+   bool useBinVolume = (fitOpt.fBinVolume && data.HasBinEdges());
+
+   double wrefVolume = 1.0;
+   if (useBinVolume && fitOpt.fNormBinVolume)
+      wrefVolume /= data.RefVolume();
+
+   IntegralEvaluator<> igEval(func, p, useBinIntegral);
+
+   unsigned int npar = func.NPar();
+   unsigned initialNPoints = data.Size();
+
+   auto mapFunction = [&](const unsigned int i) {
+      // set all vector values to zero
+      std::vector<double> gradFunc(npar);
+      std::vector<double> pointContribution(npar);
+
+      const auto x1 = data.GetCoordComponent(i, 0);
+      const auto y = data.Value(i);
+      auto invError = data.Error(i);
+
+      invError = (invError != 0.0) ? 1.0 / invError : 1;
+
+      double fval = 0;
+
+      const double *x = nullptr;
+      std::vector<double> xc;
+
+      unsigned ndim = data.NDim();
+      double binVolume = 1.0;
+      if (useBinVolume) {
+         const double *x2 = data.BinUpEdge(i);
+
+         xc.resize(ndim);
+         for (unsigned int j = 0; j < ndim; ++j) {
+            auto x1_j = *data.GetCoordComponent(i, j);
+            binVolume *= std::abs(x2[j] - x1_j);
+            xc[j] = 0.5 * (x2[j] + x1_j);
+         }
+
+         x = xc.data();
+
+         // normalize the bin volume using a reference value
+         binVolume *= wrefVolume;
+      } else if (ndim > 1) {
+         xc.resize(ndim);
+         xc[0] = *x1;
+         for (unsigned int j = 1; j < ndim; ++j)
+            xc[j] = *data.GetCoordComponent(i, j);
+         x = xc.data();
+      } else {
+         x = x1;
+      }
+
+      if (!useBinIntegral) {
+         fval = func(x, p);
+         func.ParameterGradient(x, p, &gradFunc[0]);
+      } else {
+         // calculate integral (normalized by bin volume)
+         // need to set function and parameters here in case loop is parallelized
+         auto x2 = data.BinUpEdge(i);
+         fval = igEval(x, x2);
+         CalculateGradientIntegral(func, x, x2, p, &gradFunc[0]);
+      }
+      if (useBinVolume)
+         fval *= binVolume;
+
+#ifdef DEBUG
+      {
+         R__LOCKGUARD(gROOTMutex);
+         if (i < 5 || (i > data.Size()-5) ) {
+            if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " func " << fval
+                                           << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+            else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
+         }
+      }
+#endif
+
+      // correct the gradient
+      for (unsigned int ipar = 0; ipar < npar; ++ipar) {
+
+         // correct gradient for bin volumes
+         if (useBinVolume)
+            gradFunc[ipar] *= binVolume;
+
+         // df/dp * (1.  - y/f )
+         if (fval > 0)
+            pointContribution[ipar] = gradFunc[ipar] * (1. - y / fval);
+         else if (gradFunc[ipar] != 0) {
+            const double kdmax1 = std::sqrt(std::numeric_limits<double>::max());
+            const double kdmax2 = std::numeric_limits<double>::max() / (4 * initialNPoints);
+            double gg = kdmax1 * gradFunc[ipar];
+            if (gg > 0)
+               gg = std::min(gg, kdmax2);
+            else
+               gg = std::max(gg, -kdmax2);
+            pointContribution[ipar] = -gg;
+         }
+      }
+
+
+      return pointContribution;
+   };
+
+   // Vertically reduce the set of vectors by summing its equally-indexed components
+   auto redFunction = [&](const std::vector<std::vector<double>> &pointContributions) {
+      std::vector<double> result(npar);
+
+      for (auto const &pointContribution : pointContributions) {
+         for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
+            result[parameterIndex] += pointContribution[parameterIndex];
+      }
+
+      return result;
+   };
+
+   std::vector<double> g(npar);
+
+#ifndef R__USE_IMT
+   // If IMT is disabled, force the execution policy to the serial case
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      Warning("FitUtil::EvaluatePoissonLogLGradient",
+              "Multithread execution policy requires IMT, which is disabled. Changing "
+              "to ROOT::Fit::ExecutionPolicy::kSerial.");
+      executionPolicy = ROOT::Fit::ExecutionPolicy::kSerial;
+   }
+#endif
+
+   if (executionPolicy == ROOT::Fit::ExecutionPolicy::kSerial) {
+      std::vector<std::vector<double>> allGradients(initialNPoints);
+      for (unsigned int i = 0; i < initialNPoints; ++i) {
+         allGradients[i] = mapFunction(i);
+      }
+      g = redFunction(allGradients);
+   }
+#ifdef R__USE_IMT
+   else if (executionPolicy == ROOT::Fit::ExecutionPolicy::kMultithread) {
+      ROOT::TThreadExecutor pool;
+      auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(initialNPoints);
+      g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, initialNPoints), redFunction, chunks);
+   }
+#endif
+
+   // else if(executionPolicy == ROOT::Fit::kMultiprocess){
+   //    ROOT::TProcessExecutor pool;
+   //    g = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, n), redFunction);
+   // }
+   else {
+      Error("FitUtil::EvaluatePoissonLogLGradient",
+            "Execution policy unknown. Avalaible choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
+   }
+
+#ifndef R__USE_IMT
+   //to fix compiler warning
+   (void)nChunks;
+#endif
+
+   // copy result
+   std::copy(g.begin(), g.end(), grad);
+
+#ifdef DEBUG
+   std::cout << "***** Final gradient : ";
+   for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
+   std::cout << "\n";
+#endif
+   
+}
+
+
+unsigned FitUtil::setAutomaticChunking(unsigned nEvents){
+      auto ncpu  = ROOT::GetImplicitMTPoolSize();
+      if (nEvents/ncpu < 1000) return ncpu;
+      return nEvents/1000;
+      //return ((nEvents/ncpu + 1) % 1000) *40 ; //arbitrary formula
+}
+
+}
+
+} // end namespace ROOT
-- 
GitLab