From 2b8973d3efbc3807e8fa1c176bbdfefc96f49e13 Mon Sep 17 00:00:00 2001
From: Matthias Kretz <kretz@kde.org>
Date: Tue, 25 Mar 2014 17:06:43 +0100
Subject: [PATCH] =?UTF-8?q?Merge=20from=20Matthias=20github=20branch=20lat?=
 =?UTF-8?q?est=20change=20for=20Vc=200.7=20=E2=80=9Cupdate=20to=20latest?=
 =?UTF-8?q?=20Vc=200.7=20branch=E2=80=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 math/vc/cmake/VcMacros.cmake               |  7 ++-
 math/vc/include/Vc/Allocator               | 13 ++++-
 math/vc/include/Vc/common/memory.h         | 58 ++++++++++++++++++----
 math/vc/include/Vc/common/memorybase.h     | 29 +++++++++++
 math/vc/include/Vc/global.h                |  2 +
 math/vc/tests/CMakeLists.txt               | 33 +++++++-----
 math/vc/tests/const.h                      |  8 +--
 math/vc/tests/implicit_type_conversion.cpp |  2 +-
 math/vc/tests/memory.cpp                   | 17 +++++++
 math/vc/tests/stlcontainer.cpp             |  9 ++++
 10 files changed, 143 insertions(+), 35 deletions(-)

diff --git a/math/vc/cmake/VcMacros.cmake b/math/vc/cmake/VcMacros.cmake
index 47d71b3d28b..ab9077cb6d9 100644
--- a/math/vc/cmake/VcMacros.cmake
+++ b/math/vc/cmake/VcMacros.cmake
@@ -376,10 +376,9 @@ macro(vc_set_preferred_compiler_flags)
       vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-local-type-template-args")
       vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-unnamed-type-template-args")
 
-      # disable this warning appearing from version 3.4 (5.1) 
-      vc_add_compiler_flag(Vc_DEFINITIONS "-Wno-unused-function")
-
-      AddCompilerFlag(-stdlib=libc++)
+      if(NOT DEFINED Vc_INSIDE_ROOT)  # ROOT has to set this up
+         AddCompilerFlag(-stdlib=libc++)
+      endif()
    endif()
 
    if(NOT Vc_COMPILER_IS_MSVC)
diff --git a/math/vc/include/Vc/Allocator b/math/vc/include/Vc/Allocator
index 0342f18d6f3..06158ec63cb 100644
--- a/math/vc/include/Vc/Allocator
+++ b/math/vc/include/Vc/Allocator
@@ -23,11 +23,13 @@
 #include <new>
 #include <cstddef>
 #include <cstdlib>
-#include "common/macros.h"
 #ifdef VC_CXX11
 #include <utility>
 #endif
 
+#include "global.h"
+#include "common/macros.h"
+
 namespace ROOT {
 namespace Vc
 {
@@ -125,7 +127,14 @@ namespace std \
             NaturalAlignment = sizeof(void *) > Vc_ALIGNOF(long double) ? sizeof(void *) :
                 (Vc_ALIGNOF(long double) > Vc_ALIGNOF(long long) ? Vc_ALIGNOF(long double) : Vc_ALIGNOF(long long)),
 #endif
-            Alignment = Vc_ALIGNOF(T),
+#ifdef VC_IMPL_AVX
+            SimdAlignment = 32,
+#elif defined VC_IMPL_SSE
+            SimdAlignment = 16,
+#else
+            SimdAlignment = 1,
+#endif
+            Alignment = Vc_ALIGNOF(T) > SimdAlignment ? Vc_ALIGNOF(T) : SimdAlignment,
             /* The number of extra bytes allocated must be large enough to put a pointer right
              * before the adjusted address. This pointer stores the original address, which is
              * required to call ::operator delete in deallocate.
diff --git a/math/vc/include/Vc/common/memory.h b/math/vc/include/Vc/common/memory.h
index 755819b6a30..49cc9418767 100644
--- a/math/vc/include/Vc/common/memory.h
+++ b/math/vc/include/Vc/common/memory.h
@@ -180,9 +180,15 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
             template<typename Parent, typename RM>
             Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
                 assert(vectorsCount() == rhs.vectorsCount());
-                std::memcpy(m_mem, rhs.m_mem, vectorsCount() * sizeof(V));
+                Internal::copyVectors(*this, rhs);
                 return *this;
             }
+
+            Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
+                Internal::copyVectors(*this, rhs);
+                return *this;
+            }
+
             /**
              * Initialize all data with the given vector.
              *
@@ -325,13 +331,37 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
              */
             _VC_CONSTEXPR size_t vectorsCount() const { return VectorsCount; }
 
-            template<typename Parent, typename RM>
-            Vc_ALWAYS_INLINE Memory<V> &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
+#ifdef VC_CXX11
+            Vc_ALWAYS_INLINE Memory() = default;
+#else
+            Vc_ALWAYS_INLINE Memory() {}
+#endif
+
+            inline Memory(const Memory &rhs)
+            {
+                Internal::copyVectors(*this, rhs);
+            }
+
+            template <size_t S> inline Memory(const Memory<V, S> &rhs)
+            {
+                assert(vectorsCount() == rhs.vectorsCount());
+                Internal::copyVectors(*this, rhs);
+            }
+
+            inline Memory &operator=(const Memory &rhs)
+            {
+                Internal::copyVectors(*this, rhs);
+                return *this;
+            }
+
+            template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
+            {
                 assert(vectorsCount() == rhs.vectorsCount());
-                std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType));
+                Internal::copyVectors(*this, rhs);
                 return *this;
             }
-            Vc_ALWAYS_INLINE Memory<V> &operator=(const EntryType *rhs) {
+
+            Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
                 std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
                 return *this;
             }
@@ -438,7 +468,7 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
             m_vectorsCount(rhs.vectorsCount()),
             m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
         {
-            std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType));
+            Internal::copyVectors(*this, rhs);
         }
 
         /**
@@ -448,12 +478,12 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
          *
          * \param rhs The Memory object to copy from.
          */
-        Vc_ALWAYS_INLINE Memory(const Memory<V, 0u> &rhs)
+        Vc_ALWAYS_INLINE Memory(const Memory &rhs)
             : m_entriesCount(rhs.entriesCount()),
             m_vectorsCount(rhs.vectorsCount()),
             m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
         {
-            std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType));
+            Internal::copyVectors(*this, rhs);
         }
 
         /**
@@ -495,9 +525,15 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
          * \note this function requires the vectorsCount() of both Memory objects to be equal.
          */
         template<typename Parent, typename RM>
-        Vc_ALWAYS_INLINE Memory<V> &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
+        Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
+            assert(vectorsCount() == rhs.vectorsCount());
+            Internal::copyVectors(*this, rhs);
+            return *this;
+        }
+
+        Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
             assert(vectorsCount() == rhs.vectorsCount());
-            std::memcpy(m_mem, rhs.m_mem, entriesCount() * sizeof(EntryType));
+            Internal::copyVectors(*this, rhs);
             return *this;
         }
 
@@ -510,7 +546,7 @@ template<typename V, size_t Size1, size_t Size2> class Memory : public VectorAli
          *
          * \note this function requires that there are entriesCount() many values accessible from \p rhs.
          */
-        Vc_ALWAYS_INLINE Memory<V> &operator=(const EntryType *rhs) {
+        Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
             std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
             return *this;
         }
diff --git a/math/vc/include/Vc/common/memorybase.h b/math/vc/include/Vc/common/memorybase.h
index a8e886cc362..620010acb50 100644
--- a/math/vc/include/Vc/common/memorybase.h
+++ b/math/vc/include/Vc/common/memorybase.h
@@ -566,6 +566,35 @@ template<typename V, typename Parent, int Dimension, typename RowMemory> class M
         }
 };
 
+namespace Internal
+{
+template <typename V,
+          typename ParentL,
+          typename ParentR,
+          int Dimension,
+          typename RowMemoryL,
+          typename RowMemoryR>
+inline void copyVectors(MemoryBase<V, ParentL, Dimension, RowMemoryL> &dst,
+                        const MemoryBase<V, ParentR, Dimension, RowMemoryR> &src)
+{
+    const size_t vectorsCount = dst.vectorsCount();
+    size_t i = 3;
+    for (; i < vectorsCount; i += 4) {
+        const V tmp3 = src.vector(i - 3);
+        const V tmp2 = src.vector(i - 2);
+        const V tmp1 = src.vector(i - 1);
+        const V tmp0 = src.vector(i - 0);
+        dst.vector(i - 3) = tmp3;
+        dst.vector(i - 2) = tmp2;
+        dst.vector(i - 1) = tmp1;
+        dst.vector(i - 0) = tmp0;
+    }
+    for (i -= 3; i < vectorsCount; ++i) {
+        dst.vector(i) = src.vector(i);
+    }
+}
+} // namespace Internal
+
 } // namespace Vc
 } // namespace ROOT
 
diff --git a/math/vc/include/Vc/global.h b/math/vc/include/Vc/global.h
index 4704d845d8a..82089a86213 100644
--- a/math/vc/include/Vc/global.h
+++ b/math/vc/include/Vc/global.h
@@ -62,6 +62,8 @@
 //     ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
 #      define VC_HAVE_MAX_ALIGN_T 1
 #    endif
+#  elif defined(VC_ICC)
+#      define VC_HAVE_MAX_ALIGN_T 1
 #  elif !defined(VC_CLANG)
 //   Clang doesn't provide max_align_t at all
 #    define VC_HAVE_STD_MAX_ALIGN_T 1
diff --git a/math/vc/tests/CMakeLists.txt b/math/vc/tests/CMakeLists.txt
index 1178d1563f9..e5645fed3b3 100644
--- a/math/vc/tests/CMakeLists.txt
+++ b/math/vc/tests/CMakeLists.txt
@@ -6,6 +6,13 @@ if(Vc_COMPILER_IS_MSVC)
    AddCompilerFlag("/wd4723") # Disable warning "potential divide by 0" (suppress doesn't work)
 endif()
 
+if(DEFINED Vc_INSIDE_ROOT)
+   set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "")  # Reset the ROOT default executable destination
+   set(Vc_TEST_TARGET_PREFIX "vc-")
+else()
+   set(Vc_TEST_TARGET_PREFIX "")
+endif()
+
 set(CXX11_FLAG)
 set(_cxx11_flags "-std=c++11" "-std=c++0x")
 if(Vc_COMPILER_IS_GCC AND WIN32)
@@ -61,8 +68,8 @@ macro(vc_add_test _name)
          add_target_property(${_target} COMPILE_FLAGS "-DVC_IMPL=Scalar ${_extra_flags}")
          add_target_property(${_target} LABELS "Scalar")
          add_dependencies(Scalar ${_target})
-         add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
-         set_property(TEST ${_target} PROPERTY LABELS "Scalar")
+         add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
+         set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "Scalar")
          vc_add_run_target(${_target})
       endif()
 
@@ -86,8 +93,8 @@ macro(vc_add_test _name)
             add_target_property(${_target} COMPILE_FLAGS "${DVC_IMPL} ${_extra_flags}")
             add_target_property(${_target} LABELS "SSE")
             add_dependencies(SSE ${_target})
-            add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
-            set_property(TEST ${_target} PROPERTY LABELS "SSE")
+            add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
+            set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "SSE")
             vc_add_run_target(${_target})
          endif()
       endif()
@@ -112,8 +119,8 @@ macro(vc_add_test _name)
             add_target_property(${_target} COMPILE_FLAGS "${DVC_IMPL} ${_extra_flags}")
             add_target_property(${_target} LABELS "AVX")
             add_dependencies(AVX ${_target})
-            add_test(${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
-            set_property(TEST ${_target} PROPERTY LABELS "AVX")
+            add_test(${Vc_TEST_TARGET_PREFIX}${_target} "${CMAKE_CURRENT_BINARY_DIR}/${_target}")
+            set_property(TEST ${Vc_TEST_TARGET_PREFIX}${_target} PROPERTY LABELS "AVX")
             vc_add_run_target(${_target})
          endif()
       endif(USE_AVX)
@@ -155,8 +162,8 @@ if(USE_SSE2 AND NOT Vc_SSE_INTRINSICS_BROKEN)
       add_target_property(sse2_blend COMPILE_FLAGS "-DVC_IMPL=SSE2")
       add_target_property(sse2_blend LABELS "SSE")
       add_dependencies(SSE sse2_blend)
-      add_test(sse2_blend "${CMAKE_CURRENT_BINARY_DIR}/sse2_blend")
-      set_property(TEST sse2_blend PROPERTY LABELS "SSE")
+      add_test(${Vc_TEST_TARGET_PREFIX}sse2_blend "${CMAKE_CURRENT_BINARY_DIR}/sse2_blend")
+      set_property(TEST ${Vc_TEST_TARGET_PREFIX}sse2_blend PROPERTY LABELS "SSE")
       target_link_libraries(sse2_blend Vc)
 
       if(USE_SSE4_1)
@@ -164,8 +171,8 @@ if(USE_SSE2 AND NOT Vc_SSE_INTRINSICS_BROKEN)
          add_target_property(sse4_blend COMPILE_FLAGS "-DVC_IMPL=SSE4_1")
          add_target_property(sse4_blend LABELS "SSE")
          add_dependencies(SSE sse4_blend)
-         add_test(sse4_blend "${CMAKE_CURRENT_BINARY_DIR}/sse4_blend")
-         set_property(TEST sse4_blend PROPERTY LABELS "SSE")
+         add_test(${Vc_TEST_TARGET_PREFIX}sse4_blend "${CMAKE_CURRENT_BINARY_DIR}/sse4_blend")
+         set_property(TEST ${Vc_TEST_TARGET_PREFIX}sse4_blend PROPERTY LABELS "SSE")
          target_link_libraries(sse4_blend Vc)
       endif()
    endif()
@@ -175,8 +182,8 @@ add_executable(supportfunctions supportfunctions.cpp)
 target_link_libraries(supportfunctions Vc)
 add_target_property(supportfunctions LABELS "other")
 add_dependencies(other supportfunctions)
-add_test(supportfunctions "${CMAKE_CURRENT_BINARY_DIR}/supportfunctions")
-set_property(TEST supportfunctions PROPERTY LABELS "other")
+add_test(${Vc_TEST_TARGET_PREFIX}supportfunctions "${CMAKE_CURRENT_BINARY_DIR}/supportfunctions")
+set_property(TEST ${Vc_TEST_TARGET_PREFIX}supportfunctions PROPERTY LABELS "other")
 vc_add_run_target(supportfunctions)
 
 get_property(_incdirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
@@ -223,7 +230,7 @@ if(TEST_OPERATOR_FAILURES)
          foreach(type_a ${A} ${B})
             foreach(op "^" "==" "*") # "/" "+" "-" "&" "|" "!=" "<=" ">=" "<" ">")
                set(name "implicit_type_conversion_failures_${type_a}_${op}_${type_b}_${impl}")
-               add_test(NAME "${name}" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+               add_test(NAME "${Vc_TEST_TARGET_PREFIX}${name}" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
                   COMMAND
                   ${CMAKE_CXX_COMPILER} ${_flags} ${_implFlags}
                   ${incdirs} -o "${name}.tmp"
diff --git a/math/vc/tests/const.h b/math/vc/tests/const.h
index 10a5ec65110..b89e2fffc72 100644
--- a/math/vc/tests/const.h
+++ b/math/vc/tests/const.h
@@ -17,12 +17,12 @@
 
 }}}*/
 
-#ifndef VC_COMMON_CONST_H
-#define VC_COMMON_CONST_H
+#ifndef VC_TESTS_CONST_H_
+#define VC_TESTS_CONST_H_
 
 #include <Vc/common/macros.h>
 
-namespace ROOT { 
+namespace ROOT {
 namespace Vc
 {
     template<typename T> struct Math;
@@ -63,4 +63,4 @@ namespace Vc
 
 #include <Vc/common/undomacros.h>
 
-#endif // VC_COMMON_CONST_H
+#endif  // VC_TESTS_CONST_H_
diff --git a/math/vc/tests/implicit_type_conversion.cpp b/math/vc/tests/implicit_type_conversion.cpp
index 24948c01121..e778268d1e1 100644
--- a/math/vc/tests/implicit_type_conversion.cpp
+++ b/math/vc/tests/implicit_type_conversion.cpp
@@ -33,7 +33,7 @@ typedef unsigned long long ulonglong;
 #define _TYPE_TEST_ERR(a, b)
 #else
 #if defined(VC_GCC) && VC_GCC == 0x40801
-#warning "Skipping tests involving operator& because of a bug in GCC 4.8.1 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57532)"
+// Skipping tests involving operator& because of a bug in GCC 4.8.1 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57532)
 #define _TYPE_TEST(a, b, c) \
     COMPARE(typeid(a() * b()), typeid(c)); \
     COMPARE(typeid(a() / b()), typeid(c)); \
diff --git a/math/vc/tests/memory.cpp b/math/vc/tests/memory.cpp
index 0fa648def1d..a8a4f79539c 100644
--- a/math/vc/tests/memory.cpp
+++ b/math/vc/tests/memory.cpp
@@ -284,6 +284,22 @@ template<typename V> void testCCtor()
     }
 }
 
+template<typename V> void testCopyAssignment()
+{
+    Memory<V, 99> m1;
+    m1.setZero();
+
+    Memory<V, 99> m2(m1);
+    for (size_t i = 0; i < m2.entriesCount(); ++i) {
+        COMPARE(m2[i], 0);
+        m2[i] += 1;
+    }
+    m1 = m2;
+    for (size_t i = 0; i < m2.entriesCount(); ++i) {
+        COMPARE(m1[i], 1);
+    }
+}
+
 int main()
 {
     testAllTypes(testEntries);
@@ -293,6 +309,7 @@ int main()
     testAllTypes(testVectorReorganization);
     testAllTypes(memoryOperators);
     testAllTypes(testCCtor);
+    testAllTypes(testCopyAssignment);
 
     return 0;
 }
diff --git a/math/vc/tests/stlcontainer.cpp b/math/vc/tests/stlcontainer.cpp
index 97e5587ec30..d6fa0fbaede 100644
--- a/math/vc/tests/stlcontainer.cpp
+++ b/math/vc/tests/stlcontainer.cpp
@@ -25,6 +25,8 @@
 #include <vector>
 #include "unittest.h"
 
+#include "Vc/common/macros.h"
+
 template<typename Vec> size_t alignmentMask()
 {
     if (Vec::Size == 1) {
@@ -55,6 +57,13 @@ template<typename V> void stdVectorAlignment()
 
     std::vector<V> v3(v);
     std::vector<SomeStruct<V>, Vc::Allocator<SomeStruct<V> > > v4(v2);
+
+    typedef typename V::EntryType T;
+    for (int i = 1; i < 100; ++i) {
+        std::vector<T, Vc::Allocator<T> > v5(i);
+        const size_t expectedAlignment = Vc_ALIGNOF(V);
+        COMPARE((&v5[0] - static_cast<const T *>(0)) * sizeof(T) & (expectedAlignment - 1), 0);
+    }
 }
 
 int main(int argc, char **argv)
-- 
GitLab