Skip to content
Snippets Groups Projects
Commit e5fdd643 authored by Lorenzo Moneta's avatar Lorenzo Moneta
Browse files

Revert "Remove RNN tests"

This reverts commit 4ac3743a.
parent f39559f5
No related branches found
No related tags found
No related merge requests found
Showing
with 741 additions and 0 deletions
############################################################################
# CMakeLists.txt file for building TMVA/DNN/RNN tests.
# @author Saurav Shekhar
############################################################################
project(tmva-tests)
find_package(ROOT REQUIRED)
set(Libraries Core MathCore Matrix TMVA)
include_directories(${ROOT_INCLUDE_DIRS})
# RNN - BackPropagation Reference
ROOT_EXECUTABLE(testRecurrentBackpropagation TestRecurrentBackpropagation.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-RNN-Backpropagation COMMAND testRecurrentBackpropagation)
#
## RNN - Initialization Reference
#ROOT_EXECUTABLE(testRecurrentNetInit TestRecurrentNetInitialization.cxx LIBRARIES ${Libraries})
#ROOT_ADD_TEST(TMVA-DNN-RNN-Init COMMAND testRecurrentNetInit)
# RNN - Forward Reference
ROOT_EXECUTABLE(testForwardPass TestForwardPass.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-RNN-Forward COMMAND testForwardPass)
# RNN - Full Test Reference
ROOT_EXECUTABLE(testFullRNN TestFullRNN.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-RNN-FullRNN COMMAND testFullRNN)
# RNN - Loss Reference
#ROOT_EXECUTABLE(testRecurrentNetLoss TestRecurrentNetLoss.cxx LIBRARIES ${Libraries})
#ROOT_ADD_TEST(TMVA-DNN-RNN-Loss COMMAND testRecurrentNetLoss)
#
## RNN - Prediction Reference
#ROOT_EXECUTABLE(testRecurrentNetPred TestRecurrentNetPrediction.cxx LIBRARIES ${Libraries})
#ROOT_ADD_TEST(TMVA-DNN-RNN-Pred COMMAND testRecurrentNetPred)
#--- CUDA tests. ---------------------------
if (CUDA_FOUND)
SET(DNN_CUDA_LIBRARIES dnn_cuda ${CUDA_CUBLAS_LIBRARIES})
endif (CUDA_FOUND)
#--- CPU tests. ----------------------------
if (BLAS_FOUND AND imt)
include_directories(SYSTEM ${TBB_INCLUDE_DIRS})
# DNN - Forward CPU
ROOT_EXECUTABLE(testForwardPassCpu TestForwardPassCpu.cxx LIBRARIES ${Libraries})
ROOT_ADD_TEST(TMVA-DNN-RNN-Forward-Cpu COMMAND testForwardPassCpu)
endif (BLAS_FOUND AND imt)
// @(#)root/tmva $Id$
// Author: Saurav Shekhar 22/06/17
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
//Testing RNNLayer forward pass for Reference implementation //
////////////////////////////////////////////////////////////////////
#include <iostream>
#include "TMVA/DNN/Architectures/Reference.h"
#include "TestForwardPass.h"
//#include "gtest/gtest.h"
//#include "gmock/gmock.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
//TEST(RNNTest, ForwardPass)
//{
// EXPECT_EQ(testForwardPass<TReference<double>>(3, 8, 100, 50), 0.0);
//}
int main() {
std::cout << "Testing RNN Forward pass\n";
// timesteps, batchsize, statesize, inputsize
std::cout << testForwardPass<TReference<double>>(1, 2, 3, 2) << "\n";
std::cout << testForwardPass<TReference<double>>(1, 8, 100, 50) << "\n";
std::cout << testForwardPass<TReference<double>>(5, 9, 128, 64) << "\n";
return 0;
}
// @(#)root/tmva $Id$
// Author: Saurav Shekhar
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
// Generic tests of the RNNLayer Forward pass //
////////////////////////////////////////////////////////////////////
#ifndef TMVA_TEST_DNN_TEST_RNN_TEST_FWDPASS_H
#define TMVA_TEST_DNN_TEST_RNN_TEST_FWDPASS_H
#include <iostream>
#include <vector>
#include "../Utility.h"
#include "TMVA/DNN/Functions.h"
#include "TMVA/DNN/DeepNet.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
template <typename Architecture>
auto printTensor1(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (size_t l = 0; l < A.size(); ++l) {
for (size_t i = 0; i < A[l].GetNrows(); ++i) {
for (size_t j = 0; j < A[l].GetNcols(); ++j) {
std::cout << A[l](i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
}
template <typename Architecture>
auto printMatrix1(const typename Architecture::Matrix_t &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (size_t i = 0; i < A.GetNrows(); ++i) {
for (size_t j = 0; j < A.GetNcols(); ++j) {
std::cout << A(i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
/*! Generate a DeepNet, test forward pass */
//______________________________________________________________________________
template <typename Architecture>
auto testForwardPass(size_t timeSteps, size_t batchSize, size_t stateSize,
size_t inputSize)
-> Double_t
{
using Matrix_t = typename Architecture::Matrix_t;
using Tensor_t = std::vector<Matrix_t>;
using RNNLayer_t = TBasicRNNLayer<Architecture>;
using Net_t = TDeepNet<Architecture>;
std::vector<TMatrixT<Double_t>> XRef(timeSteps, TMatrixT<Double_t>(batchSize, inputSize)); // T x B x D
Tensor_t XArch, arr_XArch;
// arr_XArch(batchSize, Matrix_t(timeSteps, inputSize)) does not work! initializes both
// elements of array to same matrix!!
for (size_t i = 0; i < batchSize; ++i) arr_XArch.emplace_back(timeSteps, inputSize); // B x T x D
for (size_t i = 0; i < timeSteps; ++i) {
randomMatrix(XRef[i]);
XArch.emplace_back(XRef[i]);
}
Architecture::Rearrange(arr_XArch, XArch); // B x T x D
Net_t rnn(batchSize, batchSize, timeSteps, inputSize, 0, 0, 0, ELossFunction::kMeanSquaredError, EInitialization::kGauss);
RNNLayer_t* layer = rnn.AddBasicRNNLayer(stateSize, inputSize, timeSteps);
layer->Initialize();
TMatrixT<Double_t> weightsInput = layer->GetWeightsInput(); // H x D
TMatrixT<Double_t> weightsState = layer->GetWeightsState(); // H x H
TMatrixT<Double_t> biases = layer->GetBiasesAt(0); // H x 1
TMatrixT<Double_t> state = layer->GetState(); // B x H
TMatrixT<Double_t> tmp(batchSize, stateSize);
rnn.Forward(arr_XArch);
Tensor_t outputArch = layer->GetOutput(); // B x T x H
Tensor_t arr_outputArch;
for (size_t t = 0; t < timeSteps; ++t) arr_outputArch.emplace_back(batchSize, stateSize); // T x B x H
Architecture::Rearrange(arr_outputArch, outputArch);
Double_t maximumError = 0.0;
for (size_t t = 0; t < timeSteps; ++t) {
tmp.MultT(state, weightsState);
state.MultT(XRef[t], weightsInput);
state += tmp;
// adding bias
for (size_t i = 0; i < (size_t) state.GetNrows(); i++) {
for (size_t j = 0; j < (size_t) state.GetNcols(); j++) {
state(i,j) += biases(j,0);
}
}
// activation fn
applyMatrix(state, [](double x){return tanh(x);});
TMatrixT<Double_t> output = arr_outputArch[t];
Double_t error = maximumRelativeError(output, state);
std::cout << "Time " << t << " Error: " << error << "\n";
maximumError = std::max(error, maximumError);
}
return maximumError;
}
#endif
// @(#)root/tmva $Id$
// Author: Saurav Shekhar 01/08/17
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
//Testing RNNLayer forward pass for Reference implementation //
////////////////////////////////////////////////////////////////////
#include <iostream>
#include "TMVA/DNN/Architectures/Cpu.h"
#include "TestForwardPass.h"
//#include "gtest/gtest.h"
//#include "gmock/gmock.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
//TEST(RNNTest, ForwardPass)
//{
// EXPECT_EQ(testForwardPass<TReference<double>>(3, 8, 100, 50), 0.0);
//}
int main() {
using Scalar_t = Double_t;
std::cout << "Testing RNN Forward pass\n";
// timesteps, batchsize, statesize, inputsize
std::cout << testForwardPass<TCpu<Scalar_t>>(2, 2, 3, 2) << "\n";
std::cout << testForwardPass<TCpu<Scalar_t>>(1, 8, 100, 50) << "\n";
std::cout << testForwardPass<TCpu<Scalar_t>>(5, 9, 128, 64) << "\n";
return 0;
}
// @(#)root/tmva $Id$
// Author: Saurav Shekhar 02/08/17
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
//Testing RNNLayer for incrementing a number //
////////////////////////////////////////////////////////////////////
#include <iostream>
#include "TMVA/DNN/Architectures/Reference.h"
#include "TestFullRNN.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
int main() {
std::cout << "Training RNN to identity first";
//testFullRNN(size_t batchSize, size_t stateSize, size_t inputSize, size_t outputSize)
// reconstruct 8 bit vector
// batchsize, statesize, inputsize, outputsize
testFullRNN<TReference<double>>(2, 3, 2, 2) ;
//testFullRNN<TReference<double>>(64, 10, 8, 8) ;
//testFullRNN<TReference<double>>(3, 8, 100, 50) ;
return 0;
}
// @(#)root/tmva $Id$
// Author: Saurav Shekhar
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
// Generic tests of the RNNLayer Forward pass //
////////////////////////////////////////////////////////////////////
#ifndef TMVA_TEST_DNN_TEST_RNN_TEST_FULL
#define TMVA_TEST_DNN_TEST_RNN_TEST_FULL
#include <iostream>
#include <vector>
#include "../Utility.h"
#include "TMVA/DNN/Functions.h"
#include "TMVA/DNN/DeepNet.h"
#include "TMVA/DNN/Net.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
template <typename Architecture>
auto printTensor1(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (size_t l = 0; l < A.size(); ++l) {
for (size_t i = 0; i < A[l].GetNrows(); ++i) {
for (size_t j = 0; j < A[l].GetNcols(); ++j) {
std::cout << A[l](i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
}
template <typename Architecture>
auto printMatrix1(const typename Architecture::Matrix_t &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (size_t i = 0; i < A.GetNrows(); ++i) {
for (size_t j = 0; j < A.GetNcols(); ++j) {
std::cout << A(i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
/* Generate a full recurrent neural net
* like a word generative model */
//______________________________________________________________________________
template <typename Architecture>
auto testFullRNN(size_t batchSize, size_t stateSize,
size_t inputSize, size_t outputSize)
-> void
{
using Matrix_t = typename Architecture::Matrix_t;
using Tensor_t = std::vector<Matrix_t>;
// using RNNLayer_t = TBasicRNNLayer<Architecture>;
// using FCLayer_t = TDenseLayer<Architecture>;
// using Reshape_t = TReshapeLayer<Architecture>;
using Net_t = TDeepNet<Architecture>;
using Scalar_t = typename Architecture::Scalar_t;
// check, denselayer takes only first one as input,
// so make sure time = 1, in the current case
size_t timeSteps = 1;
std::vector<TMatrixT<Double_t>> XRef(batchSize, TMatrixT<Double_t>(timeSteps, inputSize)); // B x T x D
//TMatrixT<Double_t> YRef(batchSize, outputSize); // B x O (D = O)
Tensor_t XArch;
Matrix_t YArch(batchSize, outputSize); // B x O (D = O)
for (size_t i = 0; i < batchSize; ++i) {
randomMatrix(XRef[i]);
std::cerr << "Copying output into input\n";
XArch.emplace_back(XRef[i]);
for (size_t j = 0; j < outputSize; ++j) {
YArch(i, j) = XArch[i](0, j);
}
}
Net_t rnn(batchSize, batchSize, timeSteps, inputSize, 0, 0, 0, ELossFunction::kMeanSquaredError, EInitialization::kGauss);
// RNNLayer_t* layer = rnn.AddBasicRNNLayer(stateSize, inputSize, timeSteps, false);
// Reshape_t* reshape = rnn.AddReshapeLayer(1, 1, stateSize, true);
// FCLayer_t* classifier = rnn.AddDenseLayer(outputSize, EActivationFunction::kIdentity);
rnn.AddBasicRNNLayer(stateSize, inputSize, timeSteps, false);
rnn.AddReshapeLayer(1, 1, stateSize, true);
rnn.AddDenseLayer(outputSize, EActivationFunction::kIdentity);
Matrix_t W(batchSize, 1);
for (size_t i = 0; i < batchSize; ++i) W(i, 0) = 1.0;
rnn.Initialize();
size_t iter = 0;
while (iter++ < 50) {
rnn.Forward(XArch);
Scalar_t loss = rnn.Loss(YArch, W, false);
//if (iter % 20 == 0) {
//for (size_t i = 0; i < inputSize; ++i) std::cout << XRef[0](0, i) << " "; std::cout << "\n";
//for (size_t i = 0; i < inputSize; ++i) std::cout << rnn.GetLayers().back()->GetOutputAt(0)(0, i) << " "; std::cout << "\n";
//}
std::cout << "loss: " << loss << std::endl;
rnn.Backward(XArch, YArch, W);
rnn.Update(0.1);
}
}
#endif
// @(#)root/tmva $Id$
// Author: Saurav Shekhar 30/11/17
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
// Testing RNNLayer backpropagation //
////////////////////////////////////////////////////////////////////
#include <iostream>
#include "TMVA/DNN/Architectures/Reference.h"
#include "TestRecurrentBackpropagation.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
int main() {
std::cout << "Testing RNN backward pass\n";
// timesteps, batchsize, statesize, inputsize
testRecurrentBackpropagationWeights<TReference<double>>(2, 2, 1, 2, 1e-5);
testRecurrentBackpropagationBiases<TReference<double>>(1, 2, 3, 2, 1e-5);
testRecurrentBackpropagationWeights<TReference<double>>(2, 3, 4, 5, 1e-5);
return 0;
}
// @(#)root/tmva $Id$
// Author: Saurav Shekhar
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
// Generic tests of the RNNLayer Backward pass //
////////////////////////////////////////////////////////////////////
#ifndef TMVA_TEST_DNN_TEST_RNN_TEST_BWDPASS_H
#define TMVA_TEST_DNN_TEST_RNN_TEST_BWDPASS_H
#include <iostream>
#include <vector>
#include "../Utility.h"
#include "TMVA/DNN/Functions.h"
#include "TMVA/DNN/DeepNet.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
template <typename Architecture>
auto printTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (size_t l = 0; l < A.size(); ++l) {
for (Int_t i = 0; i < A[l].GetNrows(); ++i) {
for (Int_t j = 0; j < A[l].GetNcols(); ++j) {
std::cout << A[l](i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
}
template <typename Architecture>
auto printTensor(const typename Architecture::Matrix_t &A, const std::string name = "matrix")
-> void
{
std::cout << name << "\n";
for (Int_t i = 0; i < A.GetNrows(); ++i) {
for (Int_t j = 0; j < A.GetNcols(); ++j) {
std::cout << A(i, j) << " ";
}
std::cout << "\n";
}
std::cout << "********\n";
}
/*! Compute the loss of the net as a function of the weight at index (i,j) in
* layer l. dx is added as an offset to the current value of the weight. */
//______________________________________________________________________________
template <typename Architecture>
auto evaluate_net_weight(TDeepNet<Architecture> &net, std::vector<typename Architecture::Matrix_t> & X,
const typename Architecture::Matrix_t &Y, const typename Architecture::Matrix_t &W, size_t l,
size_t k, size_t i, size_t j, typename Architecture::Scalar_t dx) ->
typename Architecture::Scalar_t
{
using Scalar_t = typename Architecture::Scalar_t;
net.GetLayerAt(l)->GetWeightsAt(k).operator()(i,j) += dx;
Scalar_t res = net.Loss(X, Y, W, false, false);
net.GetLayerAt(l)->GetWeightsAt(k).operator()(i,j) -= dx;
return res;
}
/*! Compute the loss of the net as a function of the weight at index i in
* layer l. dx is added as an offset to the current value of the weight. */
//______________________________________________________________________________
template <typename Architecture>
auto evaluate_net_bias(TDeepNet<Architecture> &net, std::vector<typename Architecture::Matrix_t> & X,
const typename Architecture::Matrix_t &Y, const typename Architecture::Matrix_t &W, size_t l,
size_t k, size_t i, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
{
using Scalar_t = typename Architecture::Scalar_t;
net.GetLayerAt(l)->GetBiasesAt(k).operator()(i,0) += dx;
Scalar_t res = net.Loss(X, Y, W, false, false);
net.GetLayerAt(l)->GetBiasesAt(k).operator()(i,0) -= dx;
return res;
}
/*! Generate a DeepNet, test backward pass */
//______________________________________________________________________________
template <typename Architecture>
auto testRecurrentBackpropagationWeights(size_t timeSteps, size_t batchSize, size_t stateSize,
size_t inputSize, typename Architecture::Scalar_t dx)
-> Double_t
{
using Matrix_t = typename Architecture::Matrix_t;
using Tensor_t = std::vector<Matrix_t>;
using RNNLayer_t = TBasicRNNLayer<Architecture>;
using Net_t = TDeepNet<Architecture>;
using Scalar_t = typename Architecture::Scalar_t;
std::vector<TMatrixT<Double_t>> XRef(batchSize, TMatrixT<Double_t>(timeSteps, inputSize)); // B x T x D
Tensor_t XArch;
//for (size_t i = 0; i < batchSize; ++i) XArch.emplace_back(timeSteps, inputSize); // B x T x D
for (size_t i = 0; i < batchSize; ++i) {
//randomMatrix(XRef[i]);
for (size_t l = 0; l < XRef[i].GetNrows(); ++l) {
for (size_t m = 0; m < XRef[i].GetNcols(); ++m) {
XRef[i](l, m) = i + l + m;
}
}
XArch.emplace_back(XRef[i]);
}
Matrix_t Y(batchSize, timeSteps * stateSize), weights(batchSize, 1);
//randomMatrix(Y);
for (size_t i = 0; i < Y.GetNrows(); ++i) {
for (size_t j = 0; j < Y.GetNcols(); ++j) {
Y(i, j) = (i + j)/2.0 - 0.75;
}
}
fillMatrix(weights, 1.0);
Net_t rnn(batchSize, batchSize, timeSteps, inputSize, 0, 0, 0, ELossFunction::kMeanSquaredError, EInitialization::kGauss);
RNNLayer_t* layer = rnn.AddBasicRNNLayer(stateSize, inputSize, timeSteps);
rnn.AddReshapeLayer(1, timeSteps, stateSize, true);
rnn.Initialize();
rnn.Forward(XArch);
rnn.Backward(XArch, Y, weights);
Scalar_t maximum_error = 0.0;
// Weights Input, k = 0
auto &Wi = layer->GetWeightsAt(0);
auto &dWi = layer->GetWeightGradientsAt(0);
for (Int_t i = 0; i < Wi.GetNrows(); ++i) {
for (Int_t j = 0; j < Wi.GetNcols(); ++j) {
auto f = [&rnn, &XArch, &Y, &weights, i, j](Scalar_t x) {
return evaluate_net_weight(rnn, XArch, Y, weights, 0, 0, i, j, x);
};
Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
Scalar_t dy_ref = dWi(i, j);
// Compute the relative error if dy != 0.
Scalar_t error;
if (std::fabs(dy_ref) > 1e-15) {
error = std::fabs((dy - dy_ref) / dy_ref);
} else {
error = std::fabs(dy - dy_ref);
}
maximum_error = std::max(error, maximum_error);
}
}
std::cout << "\rTesting weight input gradients: ";
std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
// Weights State, k = 1
Scalar_t smaximum_error = 0.0;
auto &Ws = layer->GetWeightsAt(1);
auto &dWs = layer->GetWeightGradientsAt(1);
for (Int_t i = 0; i < Ws.GetNrows(); ++i) {
for (Int_t j = 0; j < Ws.GetNcols(); ++j) {
auto f = [&rnn, &XArch, &Y, &weights, i, j](Scalar_t x) {
return evaluate_net_weight(rnn, XArch, Y, weights, 0, 1, i, j, x);
};
Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
Scalar_t dy_ref = dWs(i, j);
// Compute the relative error if dy != 0.
Scalar_t error;
if (std::fabs(dy_ref) > 1e-15) {
error = std::fabs((dy - dy_ref) / dy_ref);
} else {
error = std::fabs(dy - dy_ref);
}
smaximum_error = std::max(error, smaximum_error);
}
}
std::cout << "\rTesting weight state gradients: ";
std::cout << "maximum relative error: " << print_error(smaximum_error) << std::endl;
return std::max(maximum_error, smaximum_error);
}
/*! Generate a DeepNet, test backward pass */
//______________________________________________________________________________
template <typename Architecture>
auto testRecurrentBackpropagationBiases(size_t timeSteps, size_t batchSize, size_t stateSize,
size_t inputSize, typename Architecture::Scalar_t dx)
-> Double_t
{
using Matrix_t = typename Architecture::Matrix_t;
using Tensor_t = std::vector<Matrix_t>;
using RNNLayer_t = TBasicRNNLayer<Architecture>;
using Net_t = TDeepNet<Architecture>;
using Scalar_t = typename Architecture::Scalar_t;
std::vector<TMatrixT<Double_t>> XRef(batchSize, TMatrixT<Double_t>(timeSteps, inputSize)); // T x B x D
Tensor_t XArch;
//for (size_t i = 0; i < batchSize; ++i) XArch.emplace_back(timeSteps, inputSize); // B x T x D
for (size_t i = 0; i < batchSize; ++i) {
randomMatrix(XRef[i]);
XArch.emplace_back(XRef[i]);
}
Matrix_t Y(batchSize, stateSize), weights(batchSize, 1);
randomMatrix(Y);
fillMatrix(weights, 1.0);
Net_t rnn(batchSize, batchSize, timeSteps, inputSize, 0, 0, 0, ELossFunction::kMeanSquaredError, EInitialization::kGauss);
RNNLayer_t* layer = rnn.AddBasicRNNLayer(stateSize, inputSize, timeSteps);
rnn.AddReshapeLayer(1, timeSteps, stateSize, true);
rnn.Initialize();
rnn.Forward(XArch);
rnn.Backward(XArch, Y, weights);
Scalar_t maximum_error = 0.0;
auto &B = layer->GetBiasesAt(0);
auto &dB = layer->GetBiasGradientsAt(0);
for (Int_t i = 0; i < B.GetNrows(); ++i) {
auto f = [&rnn, &XArch, &Y, &weights, i](Scalar_t x) {
return evaluate_net_bias(rnn, XArch, Y, weights, 0, 0, i, x);
};
Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
Scalar_t dy_ref = dB(i, 0);
// Compute the relative error if dy != 0.
Scalar_t error;
if (std::fabs(dy_ref) > 1e-15) {
error = std::fabs((dy - dy_ref) / dy_ref);
} else {
error = std::fabs(dy - dy_ref);
}
maximum_error = std::max(error, maximum_error);
}
std::cout << "\rTesting bias gradients: ";
std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
return maximum_error;
}
#endif
// @(#)root/tmva $Id$
// Author: Saurav Shekhar 16/02/17
/*************************************************************************
* Copyright (C) 2017, Saurav Shekhar *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
////////////////////////////////////////////////////////////////////
// Testing RNNLayer backpropagation //
////////////////////////////////////////////////////////////////////
#include <iostream>
#include "TMVA/DNN/Architectures/Cpu.h"
#include "TestRecurrentBackpropagation.h"
using namespace TMVA::DNN;
using namespace TMVA::DNN::RNN;
int main() {
std::cout << "Testing RNN backward pass\n";
using Scalar_t = Double_t;
// timesteps, batchsize, statesize, inputsize
testRecurrentBackpropagationWeights<TCpu<Scalar_t>>(1, 2, 1, 2, 1e-5);
testRecurrentBackpropagationBiases<TCpu<Scalar_t>>(1, 2, 3, 2, 1e-5);
testRecurrentBackpropagationWeights<TCpu<Scalar_t>>(2, 3, 4, 5, 1e-5);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment