Helper functions for propagation and testing in both architectures (reference not supported)

73ca0b1d · Emmanouil Stergiadis · Lorenzo Moneta · db912fea · 73ca0b1d · 73ca0b1d
Commit 73ca0b1d authored 6 years ago by Emmanouil Stergiadis Committed by Lorenzo Moneta 6 years ago
--- a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h
+++ b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h
@@ -77,9 +77,7 @@ private:
   std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.
-   std::vector<int> fForwardIndices;  ///< Vector of indices used for a fast Im2Col in forward pass
   std::vector<int> fBackwardIndices;  ///< Vector of indices used for a fast Im2Col in backward pass
   EActivationFunction fF; ///< Activation function of the layer.
   ERegularization fReg;   ///< The regularization method.
@@ -239,18 +237,12 @@ TConvLayer<Architecture_t>::~TConvLayer()
 template <typename Architecture_t>
 auto TConvLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
 {
+   R__ASSERT( input.size() > 0);
-   fForwardIndices.resize(this->GetNLocalViews() * this->GetNLocalViewPixels() );
+   Architecture_t::ConvLayerForward(this->GetOutput(), this->GetDerivatives(), input, this->GetWeightsAt(0),
+                                    this->GetBiasesAt(0), this->GetInputHeight(), this->GetInputWidth(),
-   R__ASSERT( input.size() > 0); 
+                                    this->GetInputDepth(), this->GetFilterHeight(), this->GetFilterWidth(),
-   Architecture_t::Im2colIndices(fForwardIndices, input[0], this->GetNLocalViews(), this->GetInputHeight(), this->GetInputWidth(), this->GetFilterHeight(),
+                                    this->GetDepth(), this->GetStrideRows(), this->GetStrideCols(),
-                             this->GetFilterWidth(), this->GetStrideRows(), this->GetStrideCols(),
+                                    this->GetPaddingHeight(), this->GetPaddingWidth(), this->GetActivationFunction());
-                             this->GetPaddingHeight(), this->GetPaddingWidth());
-   Architecture_t::ConvLayerForward(this->GetOutput(), this->GetDerivatives(), input, this->GetWeightsAt(0),  this->GetBiasesAt(0),
-                                    fF, fForwardIndices, this->GetNLocalViews(), this->GetNLocalViewPixels(),
-                                    this->GetDropoutProbability(), applyDropout ); 
 #if 0  
   // in printciple I could make the indices data member of the class

--- a/tmva/tmva/src/DNN/Architectures/Cpu/Arithmetic.cxx
+++ b/tmva/tmva/src/DNN/Architectures/Cpu/Arithmetic.cxx
@@ -110,6 +110,27 @@ void TCpu<Real_t>::Hadamard(TCpuMatrix<Real_t> &B,
 #endif
 }
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// \brief Checks two matrices for element-wise equality.
+/// \tparam Real_t An architecture-specific floating point number type.
+/// \param A The first matrix.
+/// \param B The second matrix.
+/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
+/// \return Whether the two matrices can be considered equal element-wise
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename Real_t>
+bool TCpu<Real_t>::AlmostEquals(const TCpuMatrix<Real_t> &A, const TCpuMatrix<Real_t> &B, double epsilon)
+{
+    const Real_t *dataA = A.GetRawDataPointer();
+    const Real_t *dataB = B.GetRawDataPointer();
+    size_t nElements =  A.GetNElements();
+    for(size_t i = 0; i < nElements; i++) {
+        if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
+    }
+    return true;
+}
 //____________________________________________________________________________
 template<typename Real_t>
 void TCpu<Real_t>::SumColumns(TCpuMatrix<Real_t> &B,

--- a/tmva/tmva/src/DNN/Architectures/Cuda/Arithmetic.cu
+++ b/tmva/tmva/src/DNN/Architectures/Cuda/Arithmetic.cu
@@ -165,7 +165,7 @@ AFloat TCuda<AFloat>::Sum(const TCudaMatrix<AFloat> & A)
 //____________________________________________________________________________
 template<>
 void TCuda<float>::SumColumns(TCudaMatrix<float> & B,
-                               const TCudaMatrix<float> & A)
+                              const TCudaMatrix<float> & A)
 {
   int m, n;
   m = A.GetNrows();
@@ -208,6 +208,80 @@ void TCuda<double>::SumColumns(TCudaMatrix<double> & B,
   B.SetComputeStream(s);
 }
+template<>
+void TCuda<float>::SumRows(TCudaMatrix<float> & B,
+                           const TCudaMatrix<float> & A)
+{
+    int m, n;
+    m = A.GetNrows();
+    n = A.GetNcols();
+    float alpha = 1.0, beta = 0.0;
+    cudaStream_t s = A.GetComputeStream();
+    cublasSetStream(A.GetCublasHandle(), s);
+    // Compute C = beta * C + alpha * (A * B)
+    cublasSgemv(A.GetCublasHandle(), CUBLAS_OP_N,
+                m, n, & alpha,
+                A.GetDataPointer(), m,             // *A, lda
+                TCudaMatrix<float>::GetOnes(), 1, // *x, incx
+                & beta, B.GetDataPointer(), 1);    // beta, *y, incy
+    B.SetComputeStream(s);
+}
+//____________________________________________________________________________
+template<>
+void TCuda<double>::SumRows(TCudaMatrix<double> & B,
+                            const TCudaMatrix<double> & A)
+{
+    int m, n;
+    m = A.GetNrows();
+    n = A.GetNcols();
+    double alpha = 1.0, beta = 0.0;
+    cudaStream_t s = A.GetComputeStream();
+    cublasSetStream(A.GetCublasHandle(), s);
+    // Compute C = beta * C + alpha * (A * B)
+    cublasDgemv(A.GetCublasHandle(), CUBLAS_OP_N,
+                m, n, & alpha,
+                A.GetDataPointer(), m,             // *A, lda
+                TCudaMatrix<double>::GetOnes(), 1, // *x, incx
+                & beta, B.GetDataPointer(), 1);    // beta, *y, incy
+    B.SetComputeStream(s);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// \brief Checks two matrices for element-wise equality.
+/// \tparam AFloat An architecture-specific floating point number type.
+/// \param A The first matrix.
+/// \param B The second matrix.
+/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
+/// \return Whether the two matrices can be considered equal element-wise
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename AFloat>
+bool TCuda<AFloat>::AlmostEquals(const TCudaMatrix<AFloat> &A, const TCudaMatrix<AFloat> &B, double epsilon)
+{
+    dim3 blockDims = TDevice::BlockDims2D();
+    dim3 gridDims  = TDevice::GridDims2D(A);
+    cudaStream_t s = A.GetComputeStream();
+    bool * dResult = 0;
+    cudaMalloc((void**) &dResult, sizeof(bool));
+    cudaMemset(dResult, 1, sizeof(bool));
+    ::TMVA::DNN::Cuda::AlmostEquals<<<gridDims, blockDims, 0, s>>>(dResult, A.GetDataPointer(), B.GetDataPointer(),
+                                                                   epsilon, A.GetNrows(), A.GetNcols());
+    bool result;
+    cudaMemcpy(&result, dResult, sizeof(bool), cudaMemcpyDeviceToHost);
+    cudaFree(dResult);
+    return result;
+}
 //____________________________________________________________________________
 template<>
 void TCuda<float>::ScaleAdd(TCudaMatrix<float> & B,