diff --git a/cmake/modules/FindVdt.cmake b/cmake/modules/FindVdt.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f60872ac1bdb85d651657487a980e6551726251c --- /dev/null +++ b/cmake/modules/FindVdt.cmake @@ -0,0 +1,20 @@ +# - Locate vdt library +# Defines: +# +# VDT_FOUND +# VDT_INCLUDE_DIRS +# VDT_LIBRARIES + +find_path(VDT_INCLUDE_DIR NAMES vdt/vdtMath.h HINTS ${VDT_DIR}/include /usr/include/vdt) +find_library(VDT_LIBRARY NAMES vdt HINTS ${VDT_DIR}/lib/ $ENV{VDT_DIR}/lib/ /usr/lib) + +set(VDT_INCLUDE_DIRS ${VDT_INCLUDE_DIR}) +set(VDT_LIBRARIES ${VDT_LIBRARY}) + + +# handle the QUIETLY and REQUIRED arguments and set VDT_FOUND to TRUE if +# all listed variables are TRUE +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(VDT DEFAULT_MSG VDT_INCLUDE_DIR VDT_LIBRARY) + +mark_as_advanced(VDT_FOUND VDT_INCLUDE_DIRS VDT_LIBRARIES) diff --git a/cmake/modules/RootBuildOptions.cmake b/cmake/modules/RootBuildOptions.cmake index 9ee5592d2c1ad2f715677ab3f0356294cb81010d..03bdc79c370e77f30b5dec6bb6025af957e9550b 100644 --- a/cmake/modules/RootBuildOptions.cmake +++ b/cmake/modules/RootBuildOptions.cmake @@ -83,6 +83,7 @@ ROOT_BUILD_OPTION(builtin_cfitsio OFF "Build the FITSIO library internally (down ROOT_BUILD_OPTION(builtin_xrootd OFF "Build the XROOTD internally (downloading tarfile from the Web)") ROOT_BUILD_OPTION(builtin_llvm ON "Build the LLVM internally") ROOT_BUILD_OPTION(builtin_tbb OFF "Build the TBB internally") +ROOT_BUILD_OPTION(builtin_vdt ON "Build the VDT package internally") ROOT_BUILD_OPTION(builtin_vc OFF "Build the Vc package internally") ROOT_BUILD_OPTION(cxx11 ON "Build using C++11 compatible mode, requires gcc > 4.7.x or clang") ROOT_BUILD_OPTION(cxx14 OFF "Build using C++14 compatible mode, requires gcc > 4.9.x or clang") @@ -155,7 +156,7 @@ ROOT_BUILD_OPTION(tmva ON "Build TMVA multi variate analysis library") ROOT_BUILD_OPTION(unuran OFF "UNURAN - package for generating non-uniform random numbers") ROOT_BUILD_OPTION(vecgeom OFF "VecGeom is a vectorized geometry library enhancing the performance of geometry navigation.") ROOT_BUILD_OPTION(vc OFF "Vc adds a few new types for portable and intuitive SIMD programming") -ROOT_BUILD_OPTION(vdt ON "VDT adds a set of fast and vectorisable mathematical functions") +ROOT_BUILD_OPTION(vdt OFF "VDT adds a set of fast and vectorisable mathematical functions") ROOT_BUILD_OPTION(winrtdebug OFF "Link against the Windows debug runtime library") ROOT_BUILD_OPTION(xft ON "Xft support (X11 antialiased fonts)") ROOT_BUILD_OPTION(xml ON "XML parser interface") @@ -193,6 +194,7 @@ if(all) set(table_defvalue ON) set(unuran_defvalue ON) set(vc_defvalue ON) + set(vdt_defvalue ON) endif() #---VC does not support yet Arm and PPC processors---------------------------------------------- diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake index f8657ebf513a1fd5a747ce9410bdbcb732dd75ed..3c4454d385b7d43683c0fc0a5193bd041d35ab63 100644 --- a/cmake/modules/SearchInstalledSoftware.cmake +++ b/cmake/modules/SearchInstalledSoftware.cmake @@ -1260,6 +1260,37 @@ if(vc OR builtin_vc) endif() endif() +#---Check for Vdt-------------------------------------------------------------------- +if(vdt OR builtin_vdt) + if(NOT builtin_vdt) + message(STATUS "Looking for VDT") + find_package(Vdt 0.3.9 CONFIG QUIET) + if(NOT Vdt_FOUND) + if(fail-on-missing) + message(FATAL_ERROR "VDT not found. Ensure that the installation of VDT is in the CMAKE_PREFIX_PATH") + else() + message(STATUS "VDT not found. Ensure that the installation of VDT is in the CMAKE_PREFIX_PATH") + message(STATUS " Alternatively, you can also enable the option 'builtin_vdt' to build the VDT libraries internally") + message(STATUS " For the time being switching OFF 'vdt' option") + set(vdt OFF CACHE BOOL "" FORCE) + endif() + endif() + set(Vdt_INCLUDE_DIRS ${Vdt_INCLUDE_DIR}) + endif() + if(builtin_vdt) + set(vdt_version 0.3.9) + ExternalProject_Add( + VDT + URL ${repository_tarfiles}/vdt-${vdt_version}.tar.gz + INSTALL_DIR ${CMAKE_BINARY_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> + LOG_DOWNLOAD 1 LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 + ) + set(Vdt_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/include) + set(Vdt_LIBRARIES ${CMAKE_BINARY_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(vdt ON CACHE BOOL "" FORCE) + endif() +endif() #---Check for CUDA and BLAS --------------------------------------------------------- if(tmva AND cuda) diff --git a/math/CMakeLists.txt b/math/CMakeLists.txt index e480fc6be0d689fdeac558cefee2eef8be62e882..8e84d69bb8860312c6909b190b567d6cff69f92e 100644 --- a/math/CMakeLists.txt +++ b/math/CMakeLists.txt @@ -26,10 +26,6 @@ if(fftw3) add_subdirectory(fftw) endif() -if(vdt) - add_subdirectory(vdt) -endif() - if(r) add_subdirectory(rtools) endif() diff --git a/math/vdt/CMakeLists.cmake b/math/vdt/CMakeLists.cmake deleted file mode 100644 index 0dfc22d3bca98aaffe2746f1601c81d87cc40bde..0000000000000000000000000000000000000000 --- a/math/vdt/CMakeLists.cmake +++ /dev/null @@ -1,2 +0,0 @@ -include(cmake/AddTargetProperty.cmake) -ROOT_INSTALL_HEADERS(include/) diff --git a/math/vdt/CMakeLists.txt b/math/vdt/CMakeLists.txt deleted file mode 100644 index 6041f6b84b99ce75b2697248ae6331d1d6cf9aff..0000000000000000000000000000000000000000 --- a/math/vdt/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -ROOT_INSTALL_HEADERS(include/) - -ROOT_ADD_TEST_SUBDIRECTORY(tests) diff --git a/math/vdt/Licence.md b/math/vdt/Licence.md deleted file mode 100644 index 8946a896fd011760b8e53965456796cacdbd722f..0000000000000000000000000000000000000000 --- a/math/vdt/Licence.md +++ /dev/null @@ -1,12 +0,0 @@ -VDT is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser Public License for more details. - -You should have received a copy of the GNU Lesser Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. diff --git a/math/vdt/Module.mk b/math/vdt/Module.mk deleted file mode 100644 index 29225f67fb66a2a39504add1fa075143d5a0cb5e..0000000000000000000000000000000000000000 --- a/math/vdt/Module.mk +++ /dev/null @@ -1,48 +0,0 @@ -# Module.mk for VDT -# -# Author: Danilo Piparo - -MODNAME := vdt -MODDIR := $(ROOT_SRCDIR)/math/$(MODNAME) -MODDIRI := $(MODDIR)/include -MODDIRS := $(MODDIR)/include - -VDTH := include/vdt/asin.h\ - include/vdt/atan2.h\ - include/vdt/atan.h\ - include/vdt/cos.h\ - include/vdt/exp.h\ - include/vdt/inv.h\ - include/vdt/log.h\ - include/vdt/sincos.h\ - include/vdt/sin.h\ - include/vdt/sqrt.h\ - include/vdt/tan.h\ - include/vdt/vdtcore_common.h\ - include/vdt/vdtMath.h - -# We create a stamp file as vdt is the only module without a library. -ALLHDRS += $(patsubst $(MODDIRI)/vdt/%.h, include/%.h, $(VDTH) ) - -##### local rules ##### -.PHONY: all-$(MODNAME) clean-$(MODNAME) distclean-$(MODNAME) \ - test-$(MODNAME) - -include/vdt/%.h: $(MODDIRI)/vdt/%.h - @(if [ ! -d "include/vdt" ]; then \ - mkdir -p include/vdt; \ - fi) - cp $< $@ - -all-$(MODNAME): - $(noop) - -clean-$(MODNAME): - $(noop) - -clean:: clean-$(MODNAME) - -distclean-$(MODNAME): clean-$(MODNAME) - @rm -rf include/vdt - -distclean:: distclean-$(MODNAME) diff --git a/math/vdt/ReadMe.md b/math/vdt/ReadMe.md deleted file mode 100644 index b18d7bbd854007a9a48e415c45e199b2e40858c4..0000000000000000000000000000000000000000 --- a/math/vdt/ReadMe.md +++ /dev/null @@ -1,45 +0,0 @@ -The VDT mathematical library -============================ - -What is VDT? ------------- - -**VDT is a library of mathematical functions**, implemented in double and single -precision. The implementation is **fast** and with the aid of modern compilers -(e.g. gcc 4.7) vectorisable. - -VDT exploits also Pade polynomials. A lot of ideas were inspired by the **cephes -math library** (by Stephen L. Moshier, moshier@na-net.ornl.gov) as well as -portions of actual code. The Cephes library can be found here: -http://www.netlib.org/cephes - -Implemented functions - * log - * exp - * sincos - * sin - * cos - * tan - * asin - * acos - * atan - * atan2 - * inverse sqrt - * inverse (faster than division, based on isqrt) - - -Copyright Danilo Piparo, Vincenzo Innocente, Thomas Hauth (CERN) 2012-14 - -VDT is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser Public License for more details. - -You should have received a copy of the GNU Lesser Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. - diff --git a/math/vdt/ReleaseNotes.txt b/math/vdt/ReleaseNotes.txt deleted file mode 100644 index 2a92be5a3f5e8e80f92032530a6037d25892f902..0000000000000000000000000000000000000000 --- a/math/vdt/ReleaseNotes.txt +++ /dev/null @@ -1,64 +0,0 @@ -These are the release notes before the integration of VDT in ROOT. - -V0.3.6 -o Fixed suppport for Clang on Mac -o Simpler compiler detection system - -V0.3.5 -o Added support for Clang in CMake - -V0.3.4 -o Added regex support to perfbenchmark to profile only groups of functions -o Removed fpe rising in atan2, both single and double precision, when x -argument was 0. - -V0.3.3 -o Minor Bugfixes - -V0.3.2 -o Added the possibility to prepare the lib for preload. The CMake command -is -DPRELOAD=1. If set, symbols identical to the Libm ones but containing -a vdt call will be generated in the lib. If preloaded, vdt calls will be -performed instead of libm ones. - -V0.3.1 -o Use NEON instructions on ARM (-DNEON=1 cmake option) - -V0.3.0 -o Constants - * Add ULL to unsigned long long constants -o CMake - * Removed -pedantic - * Add possibility to disable SSE for ARM or other archs - * More readable constructs -o Can now add debugging symbols to build with -D DEBUG=1 . -o randomPool: - * Removed long doubles - * Size is now a uint64_t - * Refactored: added abstract interface to random generator - * Added 2D random generator and test -o Removed redundancy of includes in diagnostic files -o Removed fake program deleteme.cpp -o fcnResponse,fcnComparison: - * Added interface - * Added 2D version - * Added test -o printFuncDiff: added T(T,T) and void(uint32_t,T*,T*,T*) versions -o Atan2: - * Added tests for response. - * Integrated in the accuracy tests - * Double and Single precision - - -V0.2.3 -o Fixed the installation of atan2.h - -v0.2.2 -o Removed typos in the documentation (thanks to A. Neumann for spotting this!) -o Fixed CMake to make the install working also on OSx - -v0.2.1 -o Shared library built by default instead of static one. -o make install now supported. To specify the install dir use the variable -DCMAKE_INSTALL_PREFIX= whith cmake. -o Vector signatures are now void (const unsigned int, T const *, T*) -o Petulant compilation flags (picky with warnings) diff --git a/math/vdt/include/vdt/asin.h b/math/vdt/include/vdt/asin.h deleted file mode 100644 index c07d10f09a9a270190763455b968864729ff403a..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/asin.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * aasin.h - * The basic idea is to exploit Pade' polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef ASIN_H_ -#define ASIN_H_ - -#include "vdtcore_common.h" - -namespace vdt{ - -namespace details{ - -const double RX1asin = 2.967721961301243206100E-3; -const double RX2asin = -5.634242780008963776856E-1; -const double RX3asin = 6.968710824104713396794E0; -const double RX4asin = -2.556901049652824852289E1; -const double RX5asin = 2.853665548261061424989E1; - -const double SX1asin = -2.194779531642920639778E1; -const double SX2asin = 1.470656354026814941758E2; -const double SX3asin = -3.838770957603691357202E2; -const double SX4asin = 3.424398657913078477438E2; - -const double PX1asin = 4.253011369004428248960E-3; -const double PX2asin = -6.019598008014123785661E-1; -const double PX3asin = 5.444622390564711410273E0; -const double PX4asin = -1.626247967210700244449E1; -const double PX5asin = 1.956261983317594739197E1; -const double PX6asin = -8.198089802484824371615E0; - -const double QX1asin = -1.474091372988853791896E1; -const double QX2asin = 7.049610280856842141659E1; -const double QX3asin = -1.471791292232726029859E2; -const double QX4asin = 1.395105614657485689735E2; -const double QX5asin = -4.918853881490881290097E1; - -inline double getRX(const double x){ - double rx = RX1asin; - rx*= x; - rx+= RX2asin; - rx*= x; - rx+= RX3asin; - rx*= x; - rx+= RX4asin; - rx*= x; - rx+= RX5asin; - return rx; -} -inline double getSX(const double x){ - double sx = x; - sx+= SX1asin; - sx*= x; - sx+= SX2asin; - sx*= x; - sx+= SX3asin; - sx*= x; - sx+= SX4asin; - return sx; -} - -inline double getPX(const double x){ - double px = PX1asin; - px*= x; - px+= PX2asin; - px*= x; - px+= PX3asin; - px*= x; - px+= PX4asin; - px*= x; - px+= PX5asin; - px*= x; - px+= PX6asin; - return px; -} - -inline double getQX(const double x){ - double qx = x; - qx+= QX1asin; - qx*= x; - qx+= QX2asin; - qx*= x; - qx+= QX3asin; - qx*= x; - qx+= QX4asin; - qx*= x; - qx+= QX5asin; - return qx; - } -} - -} - -namespace vdt{ - -// asin double precision -------------------------------------------------------- -/// Double Precision asin -inline double fast_asin(double x){ - - const uint64_t sign_mask = details::getSignMask(x); - x = std::fabs(x); - const double a = x; - - - double zz = 1.0 - a; - double px = details::getRX(zz); - double qx = details::getSX(zz); - - const double p = zz * px/qx; - - zz = std::sqrt(zz+zz); - double z = details::PIO4 - zz; - zz = zz * p - details::MOREBITS; - z -= zz; - z += details::PIO4; - - if( a < 0.625 ){ - zz = a * a; - px = details::getPX(zz); - qx = details::getQX(zz); - z = zz*px/qx; - z = a * z + a; - } - - - // Linear approx, not sooo needed but seable. Price is cheap though - double res = a < 1e-8? a : z ; - // Restore Sign - return details::dpORuint64(res,sign_mask); - -} - -//------------------------------------------------------------------------------ -/// Single Precision asin -inline float fast_asinf(float x){ - - - uint32_t flag=0; - - const uint32_t sign_mask = details::getSignMask(x); - const float a = std::fabs(x); - - float z; - if( a > 0.5f ) - { - z = 0.5f * (1.0f - a); - x = sqrtf( z ); - flag = 1; - } - else - { - x = a; - z = x * x; - } - - z = (((( 4.2163199048E-2f * z - + 2.4181311049E-2f) * z - + 4.5470025998E-2f) * z - + 7.4953002686E-2f) * z - + 1.6666752422E-1f) * z * x - + x; - -// if( flag != 0 ) -// { -// z = z + z; -// z = PIO2F - z; -// } - - // No branch with the two coefficients - - float tmp = z + z; - tmp = details::PIO2F - tmp; - - // Linear approx, not sooo needed but seable. Price is cheap though - float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ; - - // Restore Sign - return details::spORuint32(res,sign_mask); - -} - -//------------------------------------------------------------------------------ -// The cos is in this file as well - -inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);} - -//------------------------------------------------------------------------------ - -inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);} - -//------------------------------------------------------------------------------ - -// // Vector signatures -// -// void asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// -// void acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} //vdt namespace - -#endif /* ASIN_H_ */ diff --git a/math/vdt/include/vdt/atan.h b/math/vdt/include/vdt/atan.h deleted file mode 100644 index 5aa4a972b7657a63102db97ab91166259a7c2750..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/atan.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * atan.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef ATAN_H_ -#define ATAN_H_ - -#include "vdtcore_common.h" - -namespace vdt{ - -namespace details{ -const double T3PO8 = 2.41421356237309504880; -const double MOREBITSO2 = MOREBITS * 0.5; - -inline double get_atan_px(const double x2){ - - const double PX1atan = -8.750608600031904122785E-1; - const double PX2atan = -1.615753718733365076637E1; - const double PX3atan = -7.500855792314704667340E1; - const double PX4atan = -1.228866684490136173410E2; - const double PX5atan = -6.485021904942025371773E1; - - double px = PX1atan; - px *= x2; - px += PX2atan; - px *= x2; - px += PX3atan; - px *= x2; - px += PX4atan; - px *= x2; - px += PX5atan; - - return px; -} - - -inline double get_atan_qx(const double x2){ - const double QX1atan = 2.485846490142306297962E1; - const double QX2atan = 1.650270098316988542046E2; - const double QX3atan = 4.328810604912902668951E2; - const double QX4atan = 4.853903996359136964868E2; - const double QX5atan = 1.945506571482613964425E2; - - double qx=x2; - qx += QX1atan; - qx *=x2; - qx += QX2atan; - qx *=x2; - qx += QX3atan; - qx *=x2; - qx += QX4atan; - qx *=x2; - qx += QX5atan; - - return qx; -} - -} - - - -/// Fast Atan implementation double precision -inline double fast_atan(double x){ - - /* make argument positive and save the sign */ - const uint64_t sign_mask = details::getSignMask(x); - x=std::fabs(x); - - /* range reduction */ - const double originalx=x; - - double y = details::PIO4; - double factor = details::MOREBITSO2; - x = (x-1.0) / (x+1.0); - - if( originalx > details::T3PO8 ) { - y = details::PIO2; - factor = details::MOREBITS; - x = -1.0 / originalx ; - } - if ( originalx <= 0.66 ) { - y = 0.; - factor = 0.; - x = originalx; - } - - const double x2 = x * x; - - const double px = details::get_atan_px(x2); - const double qx = details::get_atan_qx(x2); - - //double res = y +x * x2 * px / qx + x +factor; - - const double poq=px / qx; - - double res = x * x2 * poq + x; - res+=y; - - res+=factor; - - return details::dpORuint64(res,sign_mask); -} - -//------------------------------------------------------------------------------ -/// Fast Atan implementation single precision -inline float fast_atanf( float xx ) { - - const uint32_t sign_mask = details::getSignMask(xx); - - float x= std::fabs(xx); - const float x0=x; - float y=0.0f; - - /* range reduction */ - if( x0 > 0.4142135623730950f ){ // * tan pi/8 - x = (x0-1.0f)/(x0+1.0f); - y = details::PIO4F; - } - if( x0 > 2.414213562373095f ){ // tan 3pi/8 - x = -( 1.0f/x0 ); - y = details::PIO2F; - } - - - const float x2 = x * x; - y += - ((( 8.05374449538e-2f * x2 - - 1.38776856032E-1f) * x2 - + 1.99777106478E-1f) * x2 - - 3.33329491539E-1f) * x2 * x - + x; - - return details::spORuint32(y,sign_mask); -} - -//------------------------------------------------------------------------------ -// // Vector signatures -// -// void atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -}// end of vdt - -#endif // end of atan diff --git a/math/vdt/include/vdt/atan2.h b/math/vdt/include/vdt/atan2.h deleted file mode 100644 index 068aecbb3fab3e2ca324e1ac70869444f94d5f8a..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/atan2.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * atan2.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Sept 20, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef ATAN2_H_ -#define ATAN2_H_ - -#include "vdtcore_common.h" -#include "atan.h" - -namespace vdt{ - -inline double fast_atan2( double y, double x ) { - // move in first octant - double xx = std::fabs(x); - double yy = std::fabs(y); - double tmp (0.0); - if (yy>xx) { - tmp = yy; - yy=xx; xx=tmp; - tmp=1.; - } - - // To avoid the fpe, we protect against /0. - const double oneIfXXZero = (xx==0.); - double t=yy/(xx+oneIfXXZero); - double z=t; - - double s = details::PIO4; - double factor = details::MOREBITSO2; - t = (t-1.0) / (t+1.0); - - if( z > details::T3PO8 ) { - s = details::PIO2; - factor = details::MOREBITS; - t = -1.0 / z ; - } - if ( z <= 0.66 ) { - s = 0.; - factor = 0.; - t = z; - } - - const double t2 = t * t; - - const double px = details::get_atan_px(t2); - const double qx = details::get_atan_qx(t2); - - //double res = y +x * x2 * px / qx + x +factor; - - const double poq=px / qx; - - double ret = t * t2 * poq + t; - ret+=s; - - ret+=factor; - - // Here we put the result to 0 if xx was 0, if not nothing happens! - ret*= (1.-oneIfXXZero); - - // move back in place - if (y==0) ret=0.0; - if (tmp!=0) ret = details::PIO2 - ret; - if (x<0) ret = details::PI - ret; - if (y<0) ret = -ret; - - - return ret; -} - -inline float fast_atan2f( float y, float x ) { - - // move in first octant - float xx = std::fabs(x); - float yy = std::fabs(y); - float tmp (0.0f); - if (yy>xx) { - tmp = yy; - yy=xx; xx=tmp; - tmp =1.f; - } - - // To avoid the fpe, we protect against /0. - const float oneIfXXZero = (xx==0.f); - - float t=yy/(xx/*+oneIfXXZero*/); - float z=t; - if( t > 0.4142135623730950f ) // * tan pi/8 - z = (t-1.0f)/(t+1.0f); - - //printf("%e %e %e %e\n",yy,xx,t,z); - float z2 = z * z; - - float ret =(((( 8.05374449538e-2f * z2 - - 1.38776856032E-1f) * z2 - + 1.99777106478E-1f) * z2 - - 3.33329491539E-1f) * z2 * z - + z ); - - // Here we put the result to 0 if xx was 0, if not nothing happens! - ret*= (1.f - oneIfXXZero); - - // move back in place - if (y==0.f) ret=0.f; - if( t > 0.4142135623730950f ) ret += details::PIO4F; - if (tmp!=0) ret = details::PIO2F - ret; - if (x<0.f) ret = details::PIF - ret; - if (y<0.f) ret = -ret; - - return ret; - -} - - - -//------------------------------------------------------------------------------ -// // Vector signatures -// -// void atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); -// void fast_atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); -// void atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); -// void fast_atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); - -} // end namespace vdt - - -#endif diff --git a/math/vdt/include/vdt/cos.h b/math/vdt/include/vdt/cos.h deleted file mode 100644 index 7641cd18e2f52fe2e05ecfeec770e3e3f65e4c35..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/cos.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * cos.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -#ifndef COS_H_ -#define COS_H_ - -#include "sincos.h" - -namespace vdt{ - -// Cos double precision -------------------------------------------------------- - -/// Double precision cosine: just call sincos. -inline double fast_cos(double x){double s,c;fast_sincos(x,s,c);return c;} - -//------------------------------------------------------------------------------ - -inline float fast_cosf(float x){float s,c;fast_sincosf(x,s,c);return c;} - -//------------------------------------------------------------------------------ -// void cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} //vdt namespace - -#endif /* COS_H_ */ diff --git a/math/vdt/include/vdt/exp.h b/math/vdt/include/vdt/exp.h deleted file mode 100644 index 4fb630351903f928617030812a2afdb781cec20b..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/exp.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * exp.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef _VDT_EXP_ -#define _VDT_EXP_ - -#include "vdtcore_common.h" -#include <limits> - -namespace vdt{ - -namespace details{ - - const double EXP_LIMIT = 708; - - const double PX1exp = 1.26177193074810590878E-4; - const double PX2exp = 3.02994407707441961300E-2; - const double PX3exp = 9.99999999999999999910E-1; - const double QX1exp = 3.00198505138664455042E-6; - const double QX2exp = 2.52448340349684104192E-3; - const double QX3exp = 2.27265548208155028766E-1; - const double QX4exp = 2.00000000000000000009E0; - - const double LOG2E = 1.4426950408889634073599; // 1/log(2) - - const float MAXLOGF = 88.72283905206835f; - const float MINLOGF = -88.f; - - const float C1F = 0.693359375f; - const float C2F = -2.12194440e-4f; - - const float PX1expf = 1.9875691500E-4f; - const float PX2expf =1.3981999507E-3f; - const float PX3expf =8.3334519073E-3f; - const float PX4expf =4.1665795894E-2f; - const float PX5expf =1.6666665459E-1f; - const float PX6expf =5.0000001201E-1f; - - const float LOG2EF = 1.44269504088896341f; - -} - -// Exp double precision -------------------------------------------------------- - - -/// Exponential Function double precision -inline double fast_exp(double initial_x){ - - double x = initial_x; - double px=details::fpfloor(details::LOG2E * x +0.5); - - const int32_t n = int32_t(px); - - x -= px * 6.93145751953125E-1; - x -= px * 1.42860682030941723212E-6; - - const double xx = x * x; - - // px = x * P(x**2). - px = details::PX1exp; - px *= xx; - px += details::PX2exp; - px *= xx; - px += details::PX3exp; - px *= x; - - // Evaluate Q(x**2). - double qx = details::QX1exp; - qx *= xx; - qx += details::QX2exp; - qx *= xx; - qx += details::QX3exp; - qx *= xx; - qx += details::QX4exp; - - // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) - x = px / (qx - px); - x = 1.0 + 2.0 * x; - - // Build 2^n in double. - x *= details::uint642dp(( ((uint64_t)n) +1023)<<52); - - if (initial_x > details::EXP_LIMIT) - x = std::numeric_limits<double>::infinity(); - if (initial_x < -details::EXP_LIMIT) - x = 0.; - - return x; - -} - -// Exp single precision -------------------------------------------------------- - -/// Exponential Function single precision -inline float fast_expf(float initial_x) { - - float x = initial_x; - - float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */ - - x -= z * details::C1F; - x -= z * details::C2F; - const int32_t n = int32_t ( z ); - - const float x2 = x * x; - - z = x*details::PX1expf; - z += details::PX2expf; - z *= x; - z += details::PX3expf; - z *= x; - z += details::PX4expf; - z *= x; - z += details::PX5expf; - z *= x; - z += details::PX6expf; - z *= x2; - z += x + 1.0f; - - /* multiply by power of 2 */ - z *= details::uint322sp((n+0x7f)<<23); - - if (initial_x > details::MAXLOGF) z=std::numeric_limits<float>::infinity(); - if (initial_x < details::MINLOGF) z=0.f; - - return z; - - } - -//------------------------------------------------------------------------------ - -// void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} // end namespace vdt - -#endif diff --git a/math/vdt/include/vdt/inv.h b/math/vdt/include/vdt/inv.h deleted file mode 100644 index 10ea24f821b0344fa74b3e5c639a81d89d808d0e..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/inv.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * inv.h - * An experiment: implement division with the square fo the approximate - * inverse square root. - * In other words one transforms a shift, multiplications and sums into a - * sqrt. - * - * Created on: Jun 24, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - * - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef INV_H_ -#define INV_H_ - -#include "vdtcore_common.h" -#include "sqrt.h" -#include <cmath> -#include <limits> - -namespace vdt{ - -//------------------------------------------------------------------------------ - -/// General implementation of the inversion -inline double fast_inv_general(double x, const uint32_t isqrt_iterations) { - const uint64_t sign_mask = details::getSignMask(x); - const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x), - isqrt_iterations); - return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask )); -} - -//------------------------------------------------------------------------------ - -/// Four iterations inversion -inline double fast_inv(double x) {return fast_inv_general(x,4);} - -//------------------------------------------------------------------------------ - -/// Three iterations -inline double fast_approx_inv(double x) {return fast_inv_general(x,3);} - -//------------------------------------------------------------------------------ - -/// For comparisons -inline double inv (double x) {return 1./x;} - -//------------------------------------------------------------------------------ -// Single precision - - - -/// General implementation of the inversion -inline float fast_invf_general(float x, const uint32_t isqrt_iterations) { - const uint32_t sign_mask = details::getSignMask(x); - const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x), - isqrt_iterations); - return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask )); -} - -//------------------------------------------------------------------------------ - -/// Two iterations -inline float fast_invf(float x) {return fast_invf_general(x,2);} - -//------------------------------------------------------------------------------ - -/// One iterations -inline float fast_approx_invf(float x) {return fast_invf_general(x,1);} - -//------------------------------------------------------------------------------ - -/// For comparisons -inline float invf (float x) {return 1.f/x;} - -//------------------------------------------------------------------------------ - -// void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} // end namespace vdt - -#endif /* INV_H_ */ diff --git a/math/vdt/include/vdt/log.h b/math/vdt/include/vdt/log.h deleted file mode 100644 index 571fee87355bd0bc517a0c0e9e25bbde9668fcb2..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/log.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * log.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef LOG_H_ -#define LOG_H_ - -#include "vdtcore_common.h" -#include <limits> - -namespace vdt{ - -// local namespace for the constants/functions which are necessary only here -namespace details{ - -const double LOG_UPPER_LIMIT = 1e307; -const double LOG_LOWER_LIMIT = 0; - -const double SQRTH = 0.70710678118654752440; - -inline double get_log_px(const double x){ - const double PX1log = 1.01875663804580931796E-4; - const double PX2log = 4.97494994976747001425E-1; - const double PX3log = 4.70579119878881725854E0; - const double PX4log = 1.44989225341610930846E1; - const double PX5log = 1.79368678507819816313E1; - const double PX6log = 7.70838733755885391666E0; - - double px = PX1log; - px *= x; - px += PX2log; - px *= x; - px += PX3log; - px *= x; - px += PX4log; - px *= x; - px += PX5log; - px *= x; - px += PX6log; - return px; - -} - -inline double get_log_qx(const double x){ - const double QX1log = 1.12873587189167450590E1; - const double QX2log = 4.52279145837532221105E1; - const double QX3log = 8.29875266912776603211E1; - const double QX4log = 7.11544750618563894466E1; - const double QX5log = 2.31251620126765340583E1; - - double qx = x; - qx += QX1log; - qx *=x; - qx += QX2log; - qx *=x; - qx += QX3log; - qx *=x; - qx += QX4log; - qx *=x; - qx += QX5log; - return qx; -} - -} - -// Log double precision -------------------------------------------------------- -inline double fast_log(double x){ - - const double original_x = x; - - /* separate mantissa from exponent */ - double fe; - x = details::getMantExponent(x,fe); - - // blending - x > details::SQRTH? fe+=1. : x+=x ; - x -= 1.0; - - /* rational form */ - double px = details::get_log_px(x); - - //for the final formula - const double x2 = x*x; - px *= x; - px *= x2; - - const double qx = details::get_log_qx(x); - - double res = px / qx ; - - res -= fe * 2.121944400546905827679e-4; - res -= 0.5 * x2 ; - - res = x + res; - res += fe * 0.693359375; - - if (original_x > details::LOG_UPPER_LIMIT) - res = std::numeric_limits<double>::infinity(); - if (original_x < details::LOG_LOWER_LIMIT) // THIS IS NAN! - res = - std::numeric_limits<double>::quiet_NaN(); - - return res; - -} - -// Log single precision -------------------------------------------------------- - - - -namespace details{ - -const float LOGF_UPPER_LIMIT = MAXNUMF; -const float LOGF_LOWER_LIMIT = 0; - -const float PX1logf = 7.0376836292E-2f; -const float PX2logf = -1.1514610310E-1f; -const float PX3logf = 1.1676998740E-1f; -const float PX4logf = -1.2420140846E-1f; -const float PX5logf = 1.4249322787E-1f; -const float PX6logf = -1.6668057665E-1f; -const float PX7logf = 2.0000714765E-1f; -const float PX8logf = -2.4999993993E-1f; -const float PX9logf = 3.3333331174E-1f; - -inline float get_log_poly(const float x){ - float y = x*PX1logf; - y += PX2logf; - y *= x; - y += PX3logf; - y *= x; - y += PX4logf; - y *= x; - y += PX5logf; - y *= x; - y += PX6logf; - y *= x; - y += PX7logf; - y *= x; - y += PX8logf; - y *= x; - y += PX9logf; - return y; -} - -const float SQRTHF = 0.707106781186547524f; - -} - -// Log single precision -------------------------------------------------------- -inline float fast_logf( float x ) { - - const float original_x = x; - - float fe; - x = details::getMantExponentf( x, fe); - - x > details::SQRTHF? fe+=1.f : x+=x ; - x -= 1.0f; - - const float x2 = x*x; - - float res = details::get_log_poly(x); - res *= x2*x; - - res += -2.12194440e-4f * fe; - res += -0.5f * x2; - - res= x + res; - - res += 0.693359375f * fe; - - if (original_x > details::LOGF_UPPER_LIMIT) - res = std::numeric_limits<float>::infinity(); - if (original_x < details::LOGF_LOWER_LIMIT) - res = -std::numeric_limits<float>::quiet_NaN(); - - return res; -} - - -//------------------------------------------------------------------------------ - -// void logv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_logv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void logfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_logfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} //vdt namespace - -#endif /* LOG_H_ */ diff --git a/math/vdt/include/vdt/sin.h b/math/vdt/include/vdt/sin.h deleted file mode 100644 index a73541c133d0fd9eb73a011393be4a0c7f6ba94f..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/sin.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * cos.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef SIN_H_ -#define SIN_H_ - -#include "sincos.h" - -namespace vdt{ - -// Sin double precision -------------------------------------------------------- - -/// Double precision sine: just call sincos. -inline double fast_sin(double x){double s,c;fast_sincos(x,s,c);return s;} - -//------------------------------------------------------------------------------ - -inline float fast_sinf(float x){float s,c;fast_sincosf(x,s,c);return s;} - -//------------------------------------------------------------------------------ -// void sinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_sinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void sinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_sinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - - -} //vdt namespace - -#endif /* SIN_H_ */ diff --git a/math/vdt/include/vdt/sincos.h b/math/vdt/include/vdt/sincos.h deleted file mode 100644 index b54a01839f82835fe0aabf47b2fa8e85f0907a5c..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/sincos.h +++ /dev/null @@ -1,238 +0,0 @@ -/* - * sincos_common.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "vdtcore_common.h" -#include <cmath> -#include <limits> - -#ifndef SINCOS_COMMON_H_ -#define SINCOS_COMMON_H_ - -namespace vdt{ - -namespace details{ - -// double precision constants - -const double DP1sc = 7.85398125648498535156E-1; -const double DP2sc = 3.77489470793079817668E-8; -const double DP3sc = 2.69515142907905952645E-15; - -const double C1sin = 1.58962301576546568060E-10; -const double C2sin =-2.50507477628578072866E-8; -const double C3sin = 2.75573136213857245213E-6; -const double C4sin =-1.98412698295895385996E-4; -const double C5sin = 8.33333333332211858878E-3; -const double C6sin =-1.66666666666666307295E-1; - -const double C1cos =-1.13585365213876817300E-11; -const double C2cos = 2.08757008419747316778E-9; -const double C3cos =-2.75573141792967388112E-7; -const double C4cos = 2.48015872888517045348E-5; -const double C5cos =-1.38888888888730564116E-3; -const double C6cos = 4.16666666666665929218E-2; - -const double DP1 = 7.853981554508209228515625E-1; -const double DP2 = 7.94662735614792836714E-9; -const double DP3 = 3.06161699786838294307E-17; - -// single precision constants - -const float DP1F = 0.78515625; -const float DP2F = 2.4187564849853515625e-4; -const float DP3F = 3.77489497744594108e-8; - -const float T24M1 = 16777215.; - -//------------------------------------------------------------------------------ - -inline double get_sin_px(const double x){ - double px=C1sin; - px *= x; - px += C2sin; - px *= x; - px += C3sin; - px *= x; - px += C4sin; - px *= x; - px += C5sin; - px *= x; - px += C6sin; - return px; -} - -//------------------------------------------------------------------------------ - -inline double get_cos_px(const double x){ - double px=C1cos; - px *= x; - px += C2cos; - px *= x; - px += C3cos; - px *= x; - px += C4cos; - px *= x; - px += C5cos; - px *= x; - px += C6cos; - return px; -} - - -//------------------------------------------------------------------------------ -/// Reduce to 0 to 45 -inline double reduce2quadrant(double x, int32_t& quad) { - - x = fabs(x); - quad = int (ONEOPIO4 * x); // always positive, so (int) == std::floor - quad = (quad+1) & (~1); - const double y = double (quad); - // Extended precision modular arithmetic - return ((x - y * DP1) - y * DP2) - y * DP3; - } - -//------------------------------------------------------------------------------ -/// Sincos only for -45deg < x < 45deg -inline void fast_sincos_m45_45( const double z, double & s, double &c ) { - - double zz = z * z; - s = z + z * zz * get_sin_px(zz); - c = 1.0 - zz * .5 + zz * zz * get_cos_px(zz); - } - - -//------------------------------------------------------------------------------ - -} // End namespace details - -/// Double precision sincos -inline void fast_sincos( const double xx, double & s, double &c ) { - // I have to use doubles to make it vectorise... - - int j; - double x = details::reduce2quadrant(xx,j); - const double signS = (j&4); - - j-=2; - - const double signC = (j&4); - const double poly = j&2; - - details::fast_sincos_m45_45(x,s,c); - - //swap - if( poly==0 ) { - const double tmp = c; - c=s; - s=tmp; - } - - if(signC == 0.) - c = -c; - if(signS != 0.) - s = -s; - if (xx < 0.) - s = -s; - - } - - -// Single precision functions - -namespace details { -//------------------------------------------------------------------------------ -/// Reduce to 0 to 45 -inline float reduce2quadrant(float x, int & quad) { - /* make argument positive */ - x = fabs(x); - - quad = int (ONEOPIO4F * x); /* integer part of x/PIO4 */ - - quad = (quad+1) & (~1); - const float y = float(quad); - // quad &=4; - // Extended precision modular arithmetic - return ((x - y * DP1F) - y * DP2F) - y * DP3F; - } - - -//------------------------------------------------------------------------------ - - - -/// Sincos only for -45deg < x < 45deg -inline void fast_sincosf_m45_45( const float x, float & s, float &c ) { - - float z = x * x; - - s = (((-1.9515295891E-4f * z - + 8.3321608736E-3f) * z - - 1.6666654611E-1f) * z * x) - + x; - - c = (( 2.443315711809948E-005f * z - - 1.388731625493765E-003f) * z - + 4.166664568298827E-002f) * z * z - - 0.5f * z + 1.0f; - } - -//------------------------------------------------------------------------------ - -} // end details namespace - -/// Single precision sincos -inline void fast_sincosf( const float xx, float & s, float &c ) { - - - int j; - const float x = details::reduce2quadrant(xx,j); - int signS = (j&4); - - j-=2; - - const int signC = (j&4); - const int poly = j&2; - - float ls,lc; - details::fast_sincosf_m45_45(x,ls,lc); - - //swap - if( poly==0 ) { - const float tmp = lc; - lc=ls; ls=tmp; - } - - if(signC == 0) lc = -lc; - if(signS != 0) ls = -ls; - if (xx<0) ls = -ls; - c=lc; - s=ls; - } - - -} // end namespace vdt - -#endif diff --git a/math/vdt/include/vdt/sqrt.h b/math/vdt/include/vdt/sqrt.h deleted file mode 100644 index 67dfcc4b0ba31fdbfa2b7d2c7cd79603986722ae..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/sqrt.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * sqrt.h - * Implementations born on the Quake 3 fast inverse square root - * function. - * http://en.wikipedia.org/wiki/Fast_inverse_square_root - * - * Created on: Jun 24, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef SQRT_H_ -#define SQRT_H_ - -#include "vdtcore_common.h" - -namespace vdt{ - -//------------------------------------------------------------------------------ - - -/// Sqrt implmentation from Quake3 -inline double fast_isqrt_general(double x, const uint32_t ISQRT_ITERATIONS) { - - const double threehalfs = 1.5; - const double x2 = x * 0.5; - double y = x; - uint64_t i = details::dp2uint64(y); - // Evil! - i = 0x5fe6eb50c7aa19f9ULL - ( i >> 1 ); - y = details::uint642dp(i); - for (uint32_t j=0;j<ISQRT_ITERATIONS;++j) - y *= threehalfs - ( x2 * y * y ) ; - - return y; -} - -//------------------------------------------------------------------------------ - -/// Four iterations -inline double fast_isqrt(double x) {return fast_isqrt_general(x,4);} - -/// Three iterations -inline double fast_approx_isqrt(double x) {return fast_isqrt_general(x,3);} - -//------------------------------------------------------------------------------ - -/// For comparisons -inline double isqrt (double x) {return 1./std::sqrt(x);} - -//------------------------------------------------------------------------------ - -/// Sqrt implmentation from Quake3 -inline float fast_isqrtf_general(float x, const uint32_t ISQRT_ITERATIONS) { - - const float threehalfs = 1.5f; - const float x2 = x * 0.5f; - float y = x; - uint32_t i = details::sp2uint32(y); - i = 0x5f3759df - ( i >> 1 ); - y = details::uint322sp(i); - for (uint32_t j=0;j<ISQRT_ITERATIONS;++j) - y *= ( threehalfs - ( x2 * y * y ) ); - - return y; -} - -//------------------------------------------------------------------------------ - -/// Two iterations -inline float fast_isqrtf(float x) {return fast_isqrtf_general(x,2);} - -/// One (!) iterations -inline float fast_approx_isqrtf(float x) {return fast_isqrtf_general(x,1);} - -//------------------------------------------------------------------------------ - -/// For comparisons -inline float isqrtf (float x) {return 1.f/std::sqrt(x);} - -//------------------------------------------------------------------------------ - -// void isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_approx_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_approx_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -} // end namespace vdt - -#endif /* SQRT_H_ */ diff --git a/math/vdt/include/vdt/tan.h b/math/vdt/include/vdt/tan.h deleted file mode 100644 index d617ce9f26edb5668becb44a046c717bd14b5c6e..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/tan.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * tan.h - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code. - * The Cephes library can be found here: http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef TAN_H_ -#define TAN_H_ - -#include "vdtcore_common.h" -#include "sincos.h" - -namespace vdt{ - - -namespace details{ - -const double PX1tan=-1.30936939181383777646E4; -const double PX2tan=1.15351664838587416140E6; -const double PX3tan=-1.79565251976484877988E7; - -const double QX1tan = 1.36812963470692954678E4; -const double QX2tan = -1.32089234440210967447E6; -const double QX3tan = 2.50083801823357915839E7; -const double QX4tan = -5.38695755929454629881E7; - -const double DP1tan = 7.853981554508209228515625E-1; -const double DP2tan = 7.94662735614792836714E-9; -const double DP3tan = 3.06161699786838294307E-17; - -const float DP1Ftan = 0.78515625; -const float DP2Ftan = 2.4187564849853515625e-4; -const float DP3Ftan = 3.77489497744594108e-8; - - -//------------------------------------------------------------------------------ -/// Reduce to -45 to 45 -inline double reduce2quadranttan(double x, int32_t& quad) { - - x = fabs(x); - quad = int( ONEOPIO4 * x ); // always positive, so (int) == std::floor - quad = (quad+1) & (~1); - const double y = quad; - // Extended precision modular arithmetic - return ((x - y * DP1tan) - y * DP2tan) - y * DP3tan; - } - -//------------------------------------------------------------------------------ -/// Reduce to -45 to 45 -inline float reduce2quadranttan(float x, int32_t& quad) { - - x = fabs(x); - quad = int( ONEOPIO4F * x ); // always positive, so (int) == std::floor - quad = (quad+1) & (~1); - const float y = quad; - // Extended precision modular arithmetic - return ((x - y * DP1Ftan) - y * DP2Ftan) - y * DP3Ftan; - } - -} - -//------------------------------------------------------------------------------ -/// Double precision tangent implementation -inline double fast_tan(double x){ - - const uint64_t sign_mask = details::getSignMask(x); - - int32_t quad =0; - const double z=details::reduce2quadranttan(x,quad); - - const double zz = z * z; - - double res=z; - - if( zz > 1.0e-14 ){ - double px = details::PX1tan; - px *= zz; - px += details::PX2tan; - px *= zz; - px += details::PX3tan; - - double qx=zz; - qx += details::QX1tan; - qx *=zz; - qx += details::QX2tan; - qx *=zz; - qx += details::QX3tan; - qx *=zz; - qx += details::QX4tan; - - res = z + z * zz * px / qx; - } - - // A no branching way to say: if j&2 res = -1/res. You can!!! - quad &=2; - quad >>=1; - const int32_t alt = quad^1; - // Avoid fpe generated by 1/0 if res is 0 - const double zeroIfXNonZero = (x==0.); - res += zeroIfXNonZero; - res = quad * (-1./res) + alt * res; // one coeff is one and one is 0! - - // Again, return 0 if res==0, the correct result otherwhise - return details::dpXORuint64(res,sign_mask) * (1.-zeroIfXNonZero); - -} - -// Single precision ------------------------------------------------------------ - -inline float fast_tanf(float x){ - const uint32_t sign_mask = details::getSignMask(x); - - int32_t quad =0; - const float z=details::reduce2quadranttan(x,quad); - - const float zz = z * z; - - float res=z; - - if( zz > 1.0e-14f ){ - res = - ((((( 9.38540185543E-3f * zz - + 3.11992232697E-3f) * zz - + 2.44301354525E-2f) * zz - + 5.34112807005E-2f) * zz - + 1.33387994085E-1f) * zz - + 3.33331568548E-1f) * zz * z - + z; - } - - // A no branching way to say: if j&2 res = -1/res. You can!!! - quad &=2; - quad >>=1; - const int32_t alt = quad^1; - // Avoid fpe generated by 1/0 if res is 0 - const float zeroIfXNonZero = (x==0.f); - res += zeroIfXNonZero; - res = quad * (-1.f/res) + alt * res; // one coeff is one and one is 0! - - return details::spXORuint32(res,sign_mask) * (1.f-zeroIfXNonZero); - -} - -//------------------------------------------------------------------------------ - -// void tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void fast_tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); -// void tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); -// void fast_tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); - -//------------------------------------------------------------------------------ - -} //vdt namespace - - -#endif /* COS_H_ */ diff --git a/math/vdt/include/vdt/vdtMath.h b/math/vdt/include/vdt/vdtMath.h deleted file mode 100644 index ede572440c9e8ed1ebf38f7787b37e85a5106a2d..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/vdtMath.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _VDT_MATH_H_ -#define _VDT_MATH_H_ - -// Include all the VDT fucntions - -#include "sin.h" -#include "asin.h" - -#include "cos.h" - -#include "tan.h" -#include "atan.h" -#include "atan2.h" - -#include "exp.h" -#include "log.h" - -#include "sqrt.h" - -#include "inv.h" - -#endif diff --git a/math/vdt/include/vdt/vdtcore_common.h b/math/vdt/include/vdt/vdtcore_common.h deleted file mode 100644 index dd8169a9fe7066aefec31b8284292501326bfe6d..0000000000000000000000000000000000000000 --- a/math/vdt/include/vdt/vdtcore_common.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * vdtcore_common.h - * Common functions for the vdt routines. - * The basic idea is to exploit Pade polynomials. - * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier - * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos, - * tan, asin, acos and atan functions. The Cephes library can be found here: - * http://www.netlib.org/cephes/ - * - * Created on: Jun 23, 2012 - * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente - */ - -/* - * VDT is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef VDTCOMMON_H_ -#define VDTCOMMON_H_ - -#include "inttypes.h" -#include <cmath> - -namespace vdt{ - -namespace details{ - -// Constants -const double TWOPI = 2.*M_PI; -const double PI = M_PI; -const double PIO2 = M_PI_2; -const double PIO4 = M_PI_4; -const double ONEOPIO4 = 4./M_PI; - -const float TWOPIF = 2.*M_PI; -const float PIF = M_PI; -const float PIO2F = M_PI_2; -const float PIO4F = M_PI_4; -const float ONEOPIO4F = 4./M_PI; - -const double MOREBITS = 6.123233995736765886130E-17; - - -const float MAXNUMF = 3.4028234663852885981170418348451692544e38f; - -//------------------------------------------------------------------------------ - -/// Used to switch between different type of interpretations of the data (64 bits) -union ieee754{ - ieee754 () {}; - ieee754 (double thed) {d=thed;}; - ieee754 (uint64_t thell) {ll=thell;}; - ieee754 (float thef) {f[0]=thef;}; - ieee754 (uint32_t thei) {i[0]=thei;}; - double d; - float f[2]; - uint32_t i[2]; - uint64_t ll; - uint16_t s[4]; -}; - -//------------------------------------------------------------------------------ - -/// Converts an unsigned long long to a double -inline double uint642dp(uint64_t ll) { - ieee754 tmp; - tmp.ll=ll; - return tmp.d; -} - -//------------------------------------------------------------------------------ - -/// Converts a double to an unsigned long long -inline uint64_t dp2uint64(double x) { - ieee754 tmp; - tmp.d=x; - return tmp.ll; -} - -//------------------------------------------------------------------------------ -/// Makes an AND of a double and a unsigned long long -inline double dpANDuint64(const double x, const uint64_t i ){ - return uint642dp(dp2uint64(x) & i); -} -//------------------------------------------------------------------------------ -/// Makes an OR of a double and a unsigned long long -inline double dpORuint64(const double x, const uint64_t i ){ - return uint642dp(dp2uint64(x) | i); -} - -/// Makes a XOR of a double and a unsigned long long -inline double dpXORuint64(const double x, const uint64_t i ){ - return uint642dp(dp2uint64(x) ^ i); -} - -//------------------------------------------------------------------------------ -inline uint64_t getSignMask(const double x){ - const uint64_t mask=0x8000000000000000ULL; - return dp2uint64(x) & mask; -} - -//------------------------------------------------------------------------------ -/// Converts an int to a float -inline float uint322sp(int x) { - ieee754 tmp; - tmp.i[0]=x; - return tmp.f[0]; - } - -//------------------------------------------------------------------------------ -/// Converts a float to an int -inline uint32_t sp2uint32(float x) { - ieee754 tmp; - tmp.f[0]=x; - return tmp.i[0]; - } - -//------------------------------------------------------------------------------ -/// Makes an AND of a float and a unsigned long -inline float spANDuint32(const float x, const uint32_t i ){ - return uint322sp(sp2uint32(x) & i); -} -//------------------------------------------------------------------------------ -/// Makes an OR of a float and a unsigned long -inline float spORuint32(const float x, const uint32_t i ){ - return uint322sp(sp2uint32(x) | i); -} - -//------------------------------------------------------------------------------ -/// Makes an OR of a float and a unsigned long -inline float spXORuint32(const float x, const uint32_t i ){ - return uint322sp(sp2uint32(x) ^ i); -} -//------------------------------------------------------------------------------ -/// Get the sign mask -inline uint32_t getSignMask(const float x){ - const uint32_t mask=0x80000000; - return sp2uint32(x) & mask; -} - -//------------------------------------------------------------------------------ -/// Like frexp but vectorising and the exponent is a double. -inline double getMantExponent(const double x, double & fe){ - - uint64_t n = dp2uint64(x); - - // Shift to the right up to the beginning of the exponent. - // Then with a mask, cut off the sign bit - uint64_t le = (n >> 52); - - // chop the head of the number: an int contains more than 11 bits (32) - int32_t e = le; // This is important since sums on uint64_t do not vectorise - fe = e-1023 ; - - // This puts to 11 zeroes the exponent - n &=0x800FFFFFFFFFFFFFULL; - // build a mask which is 0.5, i.e. an exponent equal to 1022 - // which means *2, see the above +1. - const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5); - n |= p05; - - return uint642dp(n); -} - -//------------------------------------------------------------------------------ -/// Like frexp but vectorising and the exponent is a float. -inline float getMantExponentf(const float x, float & fe){ - - uint32_t n = sp2uint32(x); - int32_t e = (n >> 23)-127; - fe = e; - - // fractional part - const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5); - n &= 0x807fffff;// ~0x7f800000; - n |= p05f; - - return uint322sp(n); - -} - -//------------------------------------------------------------------------------ -/// Converts a fp to an int -inline uint32_t fp2uint(float x) { - return sp2uint32(x); - } -/// Converts a fp to an int -inline uint64_t fp2uint(double x) { - return dp2uint64(x); - } -/// Converts an int to fp -inline float int2fp(uint32_t i) { - return uint322sp(i); - } -/// Converts an int to fp -inline double int2fp(uint64_t i) { - return uint642dp(i); - } - -//------------------------------------------------------------------------------ -/** - * A vectorisable floor implementation, not only triggered by fast-math. - * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 - * compliant for argument -0.0 -**/ -inline double fpfloor(const double x){ - // no problem since exp is defined between -708 and 708. Int is enough for it! - int32_t ret = int32_t (x); - ret-=(sp2uint32(x)>>31); - return ret; - -} -//------------------------------------------------------------------------------ -/** - * A vectorisable floor implementation, not only triggered by fast-math. - * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 - * compliant for argument -0.0 -**/ -inline float fpfloor(const float x){ - int32_t ret = int32_t (x); - ret-=(sp2uint32(x)>>31); - return ret; - -} - -//------------------------------------------------------------------------------ - - - - -} - -} // end of namespace vdt - -#endif /* VDTCOMMON_H_ */ diff --git a/math/vdt/tests/CMakeLists.txt b/math/vdt/tests/CMakeLists.txt deleted file mode 100644 index b66b180e00a23073c747064eebba625f06a2bfd7..0000000000000000000000000000000000000000 --- a/math/vdt/tests/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set (Libraries Core Hist Gpad MathCore) -set (testname stressVdt) - - -if( CMAKE_COMPILER_IS_GNUCXX AND GCC_MAJOR GREATER 3 AND GCC_MINOR GREATER 6 ) - set (ModernGcc True) -endif() - -if( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND CLANG_MAJOR GREATER 2 AND CLANG_MINOR GREATER 3 ) - set (ModernClang True) - # __extern_always_inline is not understood by clang as of version 3.5 - set (alwaysInlineClang "-D__extern_always_inline=inline") -endif() - -if( ModernGcc ) - set(additional_compile_flags "-O3 -ffast-math") -elseif(ModernClang) - set(additional_compile_flags "-O3 -funsafe-math-optimizations -fno-trapping-math -ffinite-math-only ${alwaysInlineClang} ") -endif() - -ROOT_EXECUTABLE(${testname} ${testname}.cxx LIBRARIES ${Libraries} ADDITIONAL_COMPILE_FLAGS ${additional_compile_flags} ) - -ROOT_ADD_TEST(${testname} COMMAND ${testname} FAILREGEX "too") # for "too slow" and "too inaccurate" :) diff --git a/math/vdt/tests/stressVdt.cxx b/math/vdt/tests/stressVdt.cxx deleted file mode 100644 index 1874adc3d9eb75f817f218f03809e3d7bd3217d1..0000000000000000000000000000000000000000 --- a/math/vdt/tests/stressVdt.cxx +++ /dev/null @@ -1,471 +0,0 @@ -/// Simple program to benchmark vdt accuracy and cpu performance. -#include <vector> -#include <iostream> -#include <cmath> //for log2 -#include <assert.h> -#include <limits> -#include <iostream> -#include <iomanip> -#include <map> -#include <string> - -#include "vdt/vdtMath.h" - -#include "TStopwatch.h" -#include "TRandom3.h" -#include "TH1F.h" -#include "TCanvas.h" -#include "TError.h" - -const double RANGE=3000.; -const uint32_t SIZE= 16777216; - -//------------------------------------------------------------------------------ -// Not good for floating point, but just to get the bit difference -template <class T> -uint64_t fp2uint (T /*x*/) -{ - T::not_implemented; // "Static assert" in C++03 - return 0; -} - -template <> -uint64_t fp2uint<double> (double x) -{ - return vdt::details::dp2uint64(x); -} - -template <> -uint64_t fp2uint<float> (float x) -{ - return vdt::details::sp2uint32(x); -} - -//------------------------------------------------------------------------------ -/// Returns most significative different bit -template <class T> -inline uint32_t diffbit(const T a,const T b ) -{ - uint64_t ia = fp2uint<T>(a); - uint64_t ib = fp2uint<T>(b); - uint64_t c = ia>ib? ia-ib : ib-ia; - return log2(c)+1; -} - -//------------------------------------------------------------------------------ -// This allows to vectorise on very modern compilers (>=gcc 4.7) -// Note how the templating mechanism allows the compiler to *inline* the -// function. It is much more efficient than a void ptr or std::function! -template <typename T, typename F> -inline void calculateValues(F mathFunc, - const std::vector<T>& inputVector, - std::vector<T>& outputVector) -{ - - const uint32_t size = inputVector.size(); - - for (unsigned int i=0;i<size;++i){ - outputVector[i]=mathFunc(inputVector[i]); - } - -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T> -void compareOutputs(const std::vector<T>& inputVector1, - const std::vector<T>& inputVector2, - std::vector<uint32_t>& outputVector) -{ - assert(inputVector1.size()==inputVector2.size() && - inputVector1.size()==outputVector.size()); - - const uint32_t size = inputVector1.size(); - - for (unsigned int i=0;i<size;++i) - outputVector[i]=diffbit(inputVector1[i],inputVector2[i]); -} - -//------------------------------------------------------------------------------ - -enum rangeType {kReal, - kExp, - kExpf, - kRealPlus, - km1p1}; - -template <typename T> -void fillRandom(std::vector<T>& randomV, - const rangeType theRangeType) -{ - // Yeah, well, maybe it can be done better. But this is not a tutorial about - // random generation! - const uint32_t size=randomV.size(); - static TRandom3 rndmGenerator(123); - T* arr = &(randomV[0]); - rndmGenerator.RndmArray(size,arr); - if (kReal == theRangeType ) for (uint32_t i=0;i<size;++i) randomV[i]=(randomV[i]-0.5)*2*RANGE; - if (kExp == theRangeType ) for (uint32_t i=0;i<size;++i) randomV[i]=(randomV[i]-0.5)*2*705.; - if (kExpf == theRangeType ) for (uint32_t i=0;i<size;++i) randomV[i]=(randomV[i]-0.5)*2*85.; - if (kRealPlus == theRangeType ) for (uint32_t i=0;i<size;++i) randomV[i]=randomV[i]*RANGE+0.000001; - if (km1p1 == theRangeType ) for (uint32_t i=0;i<size;++i) randomV[i]=(randomV[i]-0.5)*2; - -} - -//------------------------------------------------------------------------------ - -template<typename T> -void treatBinDiffHisto(TH1F& histo, - const std::vector<T>& VDTVals, - const std::vector<T>& SystemVals) -{ - const uint32_t size = VDTVals.size(); - std::vector<uint32_t> diff(size); - compareOutputs(VDTVals,SystemVals,diff); - uint32_t theDiff=0; - for (uint32_t i =0;i<size;++i){ - theDiff=diff[i]; - histo.Fill(theDiff); - } -} - -//////////////////////////////////////////////////////////////////////////////// - -template <typename T, typename F> -inline double measureTiming(F mathFunc, - const std::vector<T>& inputVector, - std::vector<T>& outputVector) -{ - TStopwatch timer; - timer.Start(); - calculateValues<T>(mathFunc,inputVector,outputVector); - timer.Stop(); - return timer.RealTime(); -} - -//------------------------------------------------------------------------------ - - /* - Reference values on a Intel(R) Core(TM) i7 CPU 950 @ 3.07GHz - gcc 4.8.2 -Ofast - - Expf 0.110623 0.821863 7.4294 3.35636 6 - Sinf 0.151893 9.83798 64.7692 0.235055 9 - Cosf 0.11277 9.87508 87.5683 0.234271 8 - Tanf 0.167273 9.84792 58.8733 0.519784 9 - Atanf 0.0855272 0.529288 6.18854 0.366963 2 - Logf 0.114023 0.465541 4.08287 0.270999 2 - Isqrtf 0.0328619 0.275043 8.36965 4.35237 7 - Asinf 0.0958891 0.415733 4.33556 0.60167 3 - Acosf 0.099067 0.470179 4.74607 0.480427 10 - Exp 0.204585 0.64904 3.17247 0.137142 2 - Sin 0.327537 1.5099 4.60986 0.253579 2 - Cos 0.299601 1.5038 5.01933 0.253664 2 - Tan 0.276369 2.13009 7.7074 0.351065 5 - Atan 0.355532 0.902413 2.5382 0.326134 2 - Log 0.244172 1.25513 5.14034 0.385112 2 - Isqrt 0.167836 0.283752 1.69065 0.453692 2 - Asin 0.40379 0.869315 2.15289 0.318644 2 - Acos 0.392566 0.864706 2.2027 0.391922 11 - */ - - // The reference values: speedup and accuracy. Some contingency is given -struct staticInitHelper{ - std::map<std::string, std::pair<float,uint32_t> > referenceValues; - - staticInitHelper() - { -#if defined(R__B64) - const int sincosTolerance = 4; - const int tanTolerance = 5; -#else - const int sincosTolerance = 19; - const int tanTolerance = 19; -#endif - referenceValues["Expf"] = std::make_pair(1.f,8); - referenceValues["Sinf"] = std::make_pair(1.f,11); - referenceValues["Cosf"] = std::make_pair(1.f,10); - referenceValues["Tanf"] = std::make_pair(1.f,11); - referenceValues["Atanf"] = std::make_pair(1.f,4); - referenceValues["Logf"] = std::make_pair(1.f,4); - referenceValues["Isqrtf"]= std::make_pair(1.f,9); - referenceValues["Asinf"] = std::make_pair(1.f,5); - referenceValues["Acosf"] = std::make_pair(1.f,12); - referenceValues["Exp"] = std::make_pair(1.f,4); - referenceValues["Sin"] = std::make_pair(1.f,sincosTolerance); - referenceValues["Cos"] = std::make_pair(1.f,sincosTolerance); - referenceValues["Tan"] = std::make_pair(1.f,tanTolerance); - referenceValues["Atan"] = std::make_pair(1.f,4); - referenceValues["Log"] = std::make_pair(1.f,4); - referenceValues["Isqrt"] = std::make_pair(.4f,4); // Fix fluctuation on x86_64-slc5-gcc47 - referenceValues["Asin"] = std::make_pair(1.f,4); - referenceValues["Acos"] = std::make_pair(1.f,13); - } -} gbl; - -template <typename T, typename F1, typename F2> -inline void compareFunctions(const std::string& label, - F1 vdtFunc, - F2 systemFunc, - const std::vector<T>& inputVector, - std::vector<T>& outputVectorVDT, - std::vector<T>& outputVectorSystem, - float& speedup, - uint32_t& maxdiffBit, - TH1F& histo) -{ - double timeVdt = measureTiming<T>(vdtFunc,inputVector,outputVectorVDT); - double timeSystem = measureTiming<T>(systemFunc,inputVector,outputVectorSystem); - std::string name(label); - std::string title(label); - title+=";Diff. Bit;#"; - histo.Reset(); - histo.SetName(label.c_str()); - histo.SetTitle(title.c_str()); - treatBinDiffHisto(histo,outputVectorVDT,outputVectorSystem); - double meandiffBit = histo.GetMean(); - maxdiffBit = 0; - const uint32_t xmax=histo.GetXaxis()->GetXmax(); - - for (uint32_t i=1;i<=xmax;i++){ - if ( histo.GetBinContent(i) > 0.f ) - maxdiffBit=i-1; - } - - speedup = timeSystem/timeVdt ; - - std::cout << std::setw(8) - << label << std::setw(10) - << timeVdt << std::setw(10) - << timeSystem << std::setw(15) - << speedup << std::setw(15) - << meandiffBit << std::setw(15) - << maxdiffBit << std::endl; - - // Draw it - TCanvas c; - c.cd(); - histo.SetLineColor(kBlue); - histo.SetLineWidth(2); - histo.Draw("hist"); - c.SetLogy(); - name+=".png"; - Int_t oldErrorIgnoreLevel = gErrorIgnoreLevel; // we know we are printing.. - gErrorIgnoreLevel=1001; - c.Print(name.c_str()); - gErrorIgnoreLevel=oldErrorIgnoreLevel; -} - -// -void checkFunction(const std::string& label,float /*speedup*/, uint32_t maxdiffBit) -{ -// Remove check on the speed as routinely this program is ran on virtual build nodes -// and several factors may cause fluctuations in the result. -// if (gbl.referenceValues[label].first > speedup) -// std::cerr << "Note " << label << " was slower than the system library.\n"; - if (gbl.referenceValues[label].second < maxdiffBit) - std::cerr << "WARNING " << label << " is too inaccurate!\n"; -} - -//------------------------------------------------------------------------------ -// Some helpers -inline float isqrtf(float x) {return 1.f/sqrt(x);}; -inline double isqrt(double x) {return 1./sqrt(x);}; - -//------------------------------------------------------------------------------ -// Artificially chunck the work to help the compiler manage everything. -// If all the content is moved to a single function, the vectorization breaks. -// Same holds for the check of speed and accuracy. Technically it could be part -// of compareFunctions. -void spStep1() -{ - std::vector<float> VDTVals(SIZE); - std::vector<float> SystemVals(SIZE); - std::vector<float> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",32,0,32); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,kExpf); - compareFunctions<float>("Expf", vdt::fast_expf, expf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Expf",speedup, maxdiffBit); - - fillRandom(realNumbers,kReal); - compareFunctions<float>("Sinf", vdt::fast_sinf, sinf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Sinf",speedup, maxdiffBit); - compareFunctions<float>("Cosf", vdt::fast_cosf, cosf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Cosf",speedup, maxdiffBit); - compareFunctions<float>("Tanf", vdt::fast_tanf, tanf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Tanf",speedup, maxdiffBit); - compareFunctions<float>("Atanf", vdt::fast_atanf, atanf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Atanf",speedup, maxdiffBit); -} - -void spStep2() -{ - std::vector<float> VDTVals(SIZE); - std::vector<float> SystemVals(SIZE); - std::vector<float> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",32,0,32); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,kRealPlus); - compareFunctions<float>("Logf", vdt::fast_logf, logf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Logf",speedup, maxdiffBit); - compareFunctions<float>("Isqrtf", vdt::fast_isqrtf, isqrtf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Isqrtf",speedup, maxdiffBit); -} - -void spStep3() -{ - std::vector<float> VDTVals(SIZE); - std::vector<float> SystemVals(SIZE); - std::vector<float> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",32,0,32); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,km1p1); - compareFunctions<float>("Asinf", vdt::fast_asinf, asinf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Asinf",speedup, maxdiffBit); - compareFunctions<float>("Acosf", vdt::fast_acosf, acosf, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Acosf",speedup, maxdiffBit); -} - -void dpStep1() -{ - std::vector<double> VDTVals(SIZE); - std::vector<double> SystemVals(SIZE); - std::vector<double> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",64,0,64); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,kExp); - compareFunctions<double>("Exp", vdt::fast_exp, exp, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Exp",speedup, maxdiffBit); - fillRandom(realNumbers,kReal); - compareFunctions<double>("Sin", vdt::fast_sin, sin, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Sin",speedup, maxdiffBit); - compareFunctions<double>("Cos", vdt::fast_cos, cos, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Cos",speedup, maxdiffBit); - compareFunctions<double>("Tan", vdt::fast_tan, tan, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Tan",speedup, maxdiffBit); - compareFunctions<double>("Atan", vdt::fast_atan, atan, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Atan",speedup, maxdiffBit); -} - -void dpStep2() -{ - std::vector<double> VDTVals(SIZE); - std::vector<double> SystemVals(SIZE); - std::vector<double> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",64,0,64); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,kRealPlus); - compareFunctions<double>("Log", vdt::fast_log, log, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Log",speedup, maxdiffBit); - compareFunctions<double>("Isqrt", vdt::fast_isqrt, isqrt, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Isqrt",speedup, maxdiffBit); -} - -void dpStep3() -{ - std::vector<double> VDTVals(SIZE); - std::vector<double> SystemVals(SIZE); - std::vector<double> realNumbers(SIZE); - TH1F histo("bitDiffHisto","willbechanged",64,0,64); - - float speedup; - uint32_t maxdiffBit; - - fillRandom(realNumbers,km1p1); - compareFunctions<double>("Asin", vdt::fast_asin, asin, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Asin",speedup, maxdiffBit); - compareFunctions<double>("Acos", vdt::fast_acos, acos, realNumbers, VDTVals, SystemVals, speedup, maxdiffBit, histo); - checkFunction("Acos",speedup, maxdiffBit); -} -//------------------------------------------------------------------------------ - -int main(){ - - std::cout << "Test performed on " << SIZE << " random numbers\n" - << std::setw(8) - << "Name" << std::setw(10) - << "VDT (s)" << std::setw(10) - << "Sys (s)" << std::setw(15) - << "Speedup" << std::setw(15) - << "<diff Bit>" << std::setw(15) - << "max(diff Bit)" << std::endl; - - // Single precision ---- - spStep1(); - spStep2(); - spStep3(); - - // Double precision ---- - dpStep1(); - dpStep2(); - dpStep3(); - - return 0; - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -