From 2b8973d3efbc3807e8fa1c176bbdfefc96f49e13 Mon Sep 17 00:00:00 2001 From: Matthias Kretz <kretz@kde.org> Date: Tue, 25 Mar 2014 17:06:43 +0100 Subject: [PATCH] =?UTF-8?q?Merge=20from=20Matthias=20github=20branch=20lat?= =?UTF-8?q?est=20change=20for=20Vc=200.7=20=E2=80=9Cupdate=20to=20latest?= =?UTF-8?q?=20Vc=200.7=20branch=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- math/vc/cmake/VcMacros.cmake | 7 ++- math/vc/include/Vc/Allocator | 13 ++++- math/vc/include/Vc/common/memory.h | 58 ++++++++++++++++++---- math/vc/include/Vc/common/memorybase.h | 29 +++++++++++ math/vc/include/Vc/global.h | 2 + math/vc/tests/CMakeLists.txt | 33 +++++++----- math/vc/tests/const.h | 8 +-- math/vc/tests/implicit_type_conversion.cpp | 2 +- math/vc/tests/memory.cpp | 17 +++++++ math/vc/tests/stlcontainer.cpp | 9 ++++ 10 files changed, 143 insertions(+), 35 deletions(-) diff --git a/math/vc/cmake/VcMacros.cmake b/math/vc/cmake/VcMacros.cmake index 47d71b3d28b..ab9077cb6d9 100644 --- a/math/vc/cmake/VcMacros.cmake +++ b/math/vc/cmake/VcMacros.cmake @@ -376,10 +376,9 @@ macro(vc_set_preferred_compiler_flags) vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-local-type-template-args") vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-unnamed-type-template-args") - # disable this warning appearing from version 3.4 (5.1) - vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-unused-function") - - AddCompilerFlag(-stdlib=libc++) + if(NOT DEFINED Vc_INSIDE_ROOT) # ROOT has to set this up + AddCompilerFlag(-stdlib=libc++) + endif() endif() if(NOT Vc_COMPILER_IS_MSVC) diff --git a/math/vc/include/Vc/Allocator b/math/vc/include/Vc/Allocator index 0342f18d6f3..06158ec63cb 100644 --- a/math/vc/include/Vc/Allocator +++ b/math/vc/include/Vc/Allocator @@ -23,11 +23,13 @@ #include <new> #include <cstddef> #include <cstdlib> -#include "common/macros.h" #ifdef VC_CXX11 #include <utility> #endif +#include "global.h" +#include "common/macros.h" + namespace ROOT { namespace Vc { @@ -125,7 +127,14 @@ namespace std \ NaturalAlignment = sizeof(void *) > Vc_ALIGNOF(long double) ? sizeof(void *) : (Vc_ALIGNOF(long double) > Vc_ALIGNOF(long long) ? Vc_ALIGNOF(long double) : Vc_ALIGNOF(long long)), #endif - Alignment = Vc_ALIGNOF(T), +#ifdef VC_IMPL_AVX + SimdAlignment = 32, +#elif defined VC_IMPL_SSE + SimdAlignment = 16, +#else + SimdAlignment = 1, +#endif + Alignment = Vc_ALIGNOF(T) > SimdAlignment ? Vc_ALIGNOF(T) : SimdAlignment, /* The number of extra bytes allocated must be large enough to put a pointer right * before the adjusted address. This pointer stores the original address, which is * required to call ::operator delete in deallocate. diff --git a/math/vc/include/Vc/common/memory.h b/math/vc/include/Vc/common/memory.h index 755819b6a30..49cc9418767 100644 --- a/math/vc/include/Vc/common/memory.h +++ b/math/vc/include/Vc/common/memory.h @@ -180,9 +180,15 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli template<typename Parent, typename RM> Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) { assert(vectorsCount() == rhs.vectorsCount()); - std::memcpy(m_mem, rhs.m_mem, vectorsCount() * sizeof(V)); + Internal::copyVectors(*this, rhs); return *this; } + + Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { + Internal::copyVectors(*this, rhs); + return *this; + } + /** * Initialize all data with the given vector. * @@ -325,13 +331,37 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli */ _VC_CONSTEXPR size_t vectorsCount() const { return VectorsCount; } - template<typename Parent, typename RM> - Vc_ALWAYS_INLINE Memory<V> &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) { +#ifdef VC_CXX11 + Vc_ALWAYS_INLINE Memory() = default; +#else + Vc_ALWAYS_INLINE Memory() {} +#endif + + inline Memory(const Memory &rhs) + { + Internal::copyVectors(*this, rhs); + } + + template <size_t S> inline Memory(const Memory<V, S> &rhs) + { + assert(vectorsCount() == rhs.vectorsCount()); + Internal::copyVectors(*this, rhs); + } + + inline Memory &operator=(const Memory &rhs) + { + Internal::copyVectors(*this, rhs); + return *this; + } + + template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs) + { assert(vectorsCount() == rhs.vectorsCount()); - std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType)); + Internal::copyVectors(*this, rhs); return *this; } - Vc_ALWAYS_INLINE Memory<V> &operator=(const EntryType *rhs) { + + Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); return *this; } @@ -438,7 +468,7 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli m_vectorsCount(rhs.vectorsCount()), m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size)) { - std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType)); + Internal::copyVectors(*this, rhs); } /** @@ -448,12 +478,12 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli * * \param rhs The Memory object to copy from. */ - Vc_ALWAYS_INLINE Memory(const Memory<V, 0u> &rhs) + Vc_ALWAYS_INLINE Memory(const Memory &rhs) : m_entriesCount(rhs.entriesCount()), m_vectorsCount(rhs.vectorsCount()), m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size)) { - std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType)); + Internal::copyVectors(*this, rhs); } /** @@ -495,9 +525,15 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli * \note this function requires the vectorsCount() of both Memory objects to be equal. */ template<typename Parent, typename RM> - Vc_ALWAYS_INLINE Memory<V> &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) { + Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) { + assert(vectorsCount() == rhs.vectorsCount()); + Internal::copyVectors(*this, rhs); + return *this; + } + + Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { assert(vectorsCount() == rhs.vectorsCount()); - std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType)); + Internal::copyVectors(*this, rhs); return *this; } @@ -510,7 +546,7 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli * * \note this function requires that there are entriesCount() many values accessible from \p rhs. */ - Vc_ALWAYS_INLINE Memory<V> &operator=(const EntryType *rhs) { + Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); return *this; } diff --git a/math/vc/include/Vc/common/memorybase.h b/math/vc/include/Vc/common/memorybase.h index a8e886cc362..620010acb50 100644 --- a/math/vc/include/Vc/common/memorybase.h +++ b/math/vc/include/Vc/common/memorybase.h @@ -566,6 +566,35 @@ template<typename V, typename Parent, int Dimension, typename RowMemory> class M } }; +namespace Internal +{ +template <typename V, + typename ParentL, + typename ParentR, + int Dimension, + typename RowMemoryL, + typename RowMemoryR> +inline void copyVectors(MemoryBase<V, ParentL, Dimension, RowMemoryL> &dst, + const MemoryBase<V, ParentR, Dimension, RowMemoryR> &src) +{ + const size_t vectorsCount = dst.vectorsCount(); + size_t i = 3; + for (; i < vectorsCount; i += 4) { + const V tmp3 = src.vector(i - 3); + const V tmp2 = src.vector(i - 2); + const V tmp1 = src.vector(i - 1); + const V tmp0 = src.vector(i - 0); + dst.vector(i - 3) = tmp3; + dst.vector(i - 2) = tmp2; + dst.vector(i - 1) = tmp1; + dst.vector(i - 0) = tmp0; + } + for (i -= 3; i < vectorsCount; ++i) { + dst.vector(i) = src.vector(i); + } +} +} // namespace Internal + } // namespace Vc } // namespace ROOT diff --git a/math/vc/include/Vc/global.h b/math/vc/include/Vc/global.h index 4704d845d8a..82089a86213 100644 --- a/math/vc/include/Vc/global.h +++ b/math/vc/include/Vc/global.h @@ -62,6 +62,8 @@ // ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer. # define VC_HAVE_MAX_ALIGN_T 1 # endif +# elif defined(VC_ICC) +# define VC_HAVE_MAX_ALIGN_T 1 # elif !defined(VC_CLANG) // Clang doesn't provide max_align_t at all # define VC_HAVE_STD_MAX_ALIGN_T 1 diff --git a/math/vc/tests/CMakeLists.txt b/math/vc/tests/CMakeLists.txt index 1178d1563f9..e5645fed3b3 100644 --- a/math/vc/tests/CMakeLists.txt +++ b/math/vc/tests/CMakeLists.txt @@ -6,6 +6,13 @@ if(Vc_COMPILER_IS_MSVC) AddCompilerFlag("/wd4723") # Disable warning "potential divide by 0" (suppress doesn't work) endif() +if(DEFINED Vc_INSIDE_ROOT) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "") # Reset the ROOT default executable destination + set(Vc_TEST_TARGET_PREFIX "vc-") +else() + set(Vc_TEST_TARGET_PREFIX "") +endif() + set(CXX11_FLAG) set(_cxx11_flags "-std=c++11" "-std=c++0x") if(Vc_COMPILER_IS_GCC AND WIN32) @@ -61,8 +68,8 @@ macro(vc_add_test _name) add_target_property(${_target} COMPILE_FLAGS "-DVC_IMPL=Scalar ${_extra_flags}") add_target_property(${_target} LABELS "Scalar") add_dependencies(Scalar ${_target}) - add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") - set_property(TEST ${_target} PROPERTY LABELS "Scalar") + add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") + set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "Scalar") vc_add_run_target(${_target}) endif() @@ -86,8 +93,8 @@ macro(vc_add_test _name) add_target_property(${_target} COMPILE_FLAGS "${DVC_IMPL} ${_extra_flags}") add_target_property(${_target} LABELS "SSE") add_dependencies(SSE ${_target}) - add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") - set_property(TEST ${_target} PROPERTY LABELS "SSE") + add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") + set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "SSE") vc_add_run_target(${_target}) endif() endif() @@ -112,8 +119,8 @@ macro(vc_add_test _name) add_target_property(${_target} COMPILE_FLAGS "${DVC_IMPL} ${_extra_flags}") add_target_property(${_target} LABELS "AVX") add_dependencies(AVX ${_target}) - add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") - set_property(TEST ${_target} PROPERTY LABELS "AVX") + add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}") + set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "AVX") vc_add_run_target(${_target}) endif() endif(USE_AVX) @@ -155,8 +162,8 @@ if(USE_SSE2 AND NOT Vc_SSE_INTRINSICS_BROKEN) add_target_property(sse2_blend COMPILE_FLAGS "-DVC_IMPL=SSE2") add_target_property(sse2_blend LABELS "SSE") add_dependencies(SSE sse2_blend) - add_test(sse2_blend "${CMAKE_CURRENT_BINARY_DIR}/sse2_blend") - set_property(TEST sse2_blend PROPERTY LABELS "SSE") + add_test(${Vc_TEST_TARGET_PREFIX}sse2_blend "${CMAKE_CURRENT_BINARY_DIR}/sse2_blend") + set_property(TEST ${Vc_TEST_TARGET_PREFIX}sse2_blend PROPERTY LABELS "SSE") target_link_libraries(sse2_blend Vc) if(USE_SSE4_1) @@ -164,8 +171,8 @@ if(USE_SSE2 AND NOT Vc_SSE_INTRINSICS_BROKEN) add_target_property(sse4_blend COMPILE_FLAGS "-DVC_IMPL=SSE4_1") add_target_property(sse4_blend LABELS "SSE") add_dependencies(SSE sse4_blend) - add_test(sse4_blend "${CMAKE_CURRENT_BINARY_DIR}/sse4_blend") - set_property(TEST sse4_blend PROPERTY LABELS "SSE") + add_test(${Vc_TEST_TARGET_PREFIX}sse4_blend "${CMAKE_CURRENT_BINARY_DIR}/sse4_blend") + set_property(TEST ${Vc_TEST_TARGET_PREFIX}sse4_blend PROPERTY LABELS "SSE") target_link_libraries(sse4_blend Vc) endif() endif() @@ -175,8 +182,8 @@ add_executable(supportfunctions supportfunctions.cpp) target_link_libraries(supportfunctions Vc) add_target_property(supportfunctions LABELS "other") add_dependencies(other supportfunctions) -add_test(supportfunctions "${CMAKE_CURRENT_BINARY_DIR}/supportfunctions") -set_property(TEST supportfunctions PROPERTY LABELS "other") +add_test(${Vc_TEST_TARGET_PREFIX}supportfunctions "${CMAKE_CURRENT_BINARY_DIR}/supportfunctions") +set_property(TEST ${Vc_TEST_TARGET_PREFIX}supportfunctions PROPERTY LABELS "other") vc_add_run_target(supportfunctions) get_property(_incdirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) @@ -223,7 +230,7 @@ if(TEST_OPERATOR_FAILURES) foreach(type_a ${A} ${B}) foreach(op "^" "==" "*") # "/" "+" "-" "&" "|" "!=" "<=" ">=" "<" ">") set(name "implicit_type_conversion_failures_${type_a}_${op}_${type_b}_${impl}") - add_test(NAME "${name}" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + add_test(NAME "${Vc_TEST_TARGET_PREFIX}${name}" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_CXX_COMPILER} ${_flags} ${_implFlags} ${incdirs} -o "${name}.tmp" diff --git a/math/vc/tests/const.h b/math/vc/tests/const.h index 10a5ec65110..b89e2fffc72 100644 --- a/math/vc/tests/const.h +++ b/math/vc/tests/const.h @@ -17,12 +17,12 @@ }}}*/ -#ifndef VC_COMMON_CONST_H -#define VC_COMMON_CONST_H +#ifndef VC_TESTS_CONST_H_ +#define VC_TESTS_CONST_H_ #include <Vc/common/macros.h> -namespace ROOT { +namespace ROOT { namespace Vc { template<typename T> struct Math; @@ -63,4 +63,4 @@ namespace Vc #include <Vc/common/undomacros.h> -#endif // VC_COMMON_CONST_H +#endif // VC_TESTS_CONST_H_ diff --git a/math/vc/tests/implicit_type_conversion.cpp b/math/vc/tests/implicit_type_conversion.cpp index 24948c01121..e778268d1e1 100644 --- a/math/vc/tests/implicit_type_conversion.cpp +++ b/math/vc/tests/implicit_type_conversion.cpp @@ -33,7 +33,7 @@ typedef unsigned long long ulonglong; #define _TYPE_TEST_ERR(a, b) #else #if defined(VC_GCC) && VC_GCC == 0x40801 -#warning "Skipping tests involving operator& because of a bug in GCC 4.8.1 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57532)" +// Skipping tests involving operator& because of a bug in GCC 4.8.1 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57532) #define _TYPE_TEST(a, b, c) \ COMPARE(typeid(a() * b()), typeid(c)); \ COMPARE(typeid(a() / b()), typeid(c)); \ diff --git a/math/vc/tests/memory.cpp b/math/vc/tests/memory.cpp index 0fa648def1d..a8a4f79539c 100644 --- a/math/vc/tests/memory.cpp +++ b/math/vc/tests/memory.cpp @@ -284,6 +284,22 @@ template<typename V> void testCCtor() } } +template<typename V> void testCopyAssignment() +{ + Memory<V, 99> m1; + m1.setZero(); + + Memory<V, 99> m2(m1); + for (size_t i = 0; i < m2.entriesCount(); ++i) { + COMPARE(m2[i], 0); + m2[i] += 1; + } + m1 = m2; + for (size_t i = 0; i < m2.entriesCount(); ++i) { + COMPARE(m1[i], 1); + } +} + int main() { testAllTypes(testEntries); @@ -293,6 +309,7 @@ int main() testAllTypes(testVectorReorganization); testAllTypes(memoryOperators); testAllTypes(testCCtor); + testAllTypes(testCopyAssignment); return 0; } diff --git a/math/vc/tests/stlcontainer.cpp b/math/vc/tests/stlcontainer.cpp index 97e5587ec30..d6fa0fbaede 100644 --- a/math/vc/tests/stlcontainer.cpp +++ b/math/vc/tests/stlcontainer.cpp @@ -25,6 +25,8 @@ #include <vector> #include "unittest.h" +#include "Vc/common/macros.h" + template<typename Vec> size_t alignmentMask() { if (Vec::Size == 1) { @@ -55,6 +57,13 @@ template<typename V> void stdVectorAlignment() std::vector<V> v3(v); std::vector<SomeStruct<V>, Vc::Allocator<SomeStruct<V> > > v4(v2); + + typedef typename V::EntryType T; + for (int i = 1; i < 100; ++i) { + std::vector<T, Vc::Allocator<T> > v5(i); + const size_t expectedAlignment = Vc_ALIGNOF(V); + COMPARE((&v5[0] - static_cast<const T *>(0)) * sizeof(T) & (expectedAlignment - 1), 0); + } } int main(int argc, char **argv) -- GitLab