Skip to content
Snippets Groups Projects
Commit d1dcfb8a authored by Enric Tejedor Saavedra's avatar Enric Tejedor Saavedra Committed by Danilo Piparo
Browse files

Gtest for CSV data source

parent 372b6474
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,9 @@ ROOT_ADD_GTEST(dataframe_utils dataframe/dataframe_utils.cxx LIBRARIES TreePlaye
ROOT_ADD_GTEST(datasource_noncopiable dataframe/datasource_noncopiable.cxx LIBRARIES TreePlayer)
ROOT_ADD_GTEST(datasource_root dataframe/datasource_root.cxx LIBRARIES TreePlayer)
ROOT_ADD_GTEST(datasource_trivial dataframe/datasource_trivial.cxx LIBRARIES TreePlayer)
configure_file(dataframe/TCsvDS_test_headers.csv . COPYONLY)
configure_file(dataframe/TCsvDS_test_noheaders.csv . COPYONLY)
ROOT_ADD_GTEST(datasource_csv dataframe/datasource_csv.cxx LIBRARIES TreePlayer)
ROOT_ADD_PYUNITTEST(dataframe_histograms dataframe/dataframe_histograms.py)
# ROOT_ADD_PYUNITTEST(dataframe_cache dataframe/dataframe_cache.py)
Name,Age,Height,Married
Harry,60,185.2,true
"Bob,Bob",50,180.0,true
"""Joe""",40,200.5,false
"Tom",30,170.,false
John ,1,.7,false
"Mary Ann" ,-1,.7,true
Harry,60,185.2,true
"Bob,Bob",55,180.0,true
"""Joe""",20,200.5,false
"Tom",30,170.,False
John ,1,.7,FALSE
"Mary Ann" ,-1,.7,TruE
#include <ROOT/TDataFrame.hxx>
#include <ROOT/TCsvDS.hxx>
#include <ROOT/TSeq.hxx>
#include <gtest/gtest.h>
#include <iostream>
using namespace ROOT::Experimental;
using namespace ROOT::Experimental::TDF;
auto fileName0 = "TCsvDS_test_headers.csv";
auto fileName1 = "TCsvDS_test_noheaders.csv";
TEST(TCsvDS, ColTypeNames)
{
TCsvDS tds(fileName0);
tds.SetNSlots(1);
auto colNames = tds.GetColumnNames();
EXPECT_TRUE(tds.HasColumn("Name"));
EXPECT_TRUE(tds.HasColumn("Age"));
EXPECT_FALSE(tds.HasColumn("Address"));
EXPECT_STREQ("Height", colNames[2].c_str());
EXPECT_STREQ("Married", colNames[3].c_str());
EXPECT_STREQ("std::string", tds.GetTypeName("Name").c_str());
EXPECT_STREQ("int", tds.GetTypeName("Age").c_str());
EXPECT_STREQ("double", tds.GetTypeName("Height").c_str());
EXPECT_STREQ("bool", tds.GetTypeName("Married").c_str());
}
TEST(TCsvDS, ColNamesNoHeaders)
{
TCsvDS tds(fileName1, false);
tds.SetNSlots(1);
auto colNames = tds.GetColumnNames();
EXPECT_STREQ("Col0", colNames[0].c_str());
EXPECT_STREQ("Col1", colNames[1].c_str());
EXPECT_STREQ("Col2", colNames[2].c_str());
EXPECT_STREQ("Col3", colNames[3].c_str());
}
TEST(TCsvDS, EntryRanges)
{
TCsvDS tds(fileName0);
tds.SetNSlots(3U);
// Still dividing in equal parts...
auto ranges = tds.GetEntryRanges();
EXPECT_EQ(3U, ranges.size());
EXPECT_EQ(0U, ranges[0].first);
EXPECT_EQ(2U, ranges[0].second);
EXPECT_EQ(2U, ranges[1].first);
EXPECT_EQ(4U, ranges[1].second);
EXPECT_EQ(4U, ranges[2].first);
EXPECT_EQ(6U, ranges[2].second);
}
TEST(TCsvDS, ColumnReaders)
{
TCsvDS tds(fileName0);
const auto nSlots = 3U;
tds.SetNSlots(nSlots);
auto vals = tds.GetColumnReaders<int>("Age");
auto ranges = tds.GetEntryRanges();
auto slot = 0U;
std::vector<int> ages = { 60, 50, 40, 30, 1, -1 };
for (auto &&range : ranges) {
tds.InitSlot(slot, range.first);
for (auto i : ROOT::TSeq<int>(range.first, range.second)) {
tds.SetEntry(slot, i);
auto val = **vals[slot];
EXPECT_EQ(ages[i], val);
}
slot++;
}
}
TEST(TCsvDS, ColumnReadersString)
{
TCsvDS tds(fileName0);
const auto nSlots = 3U;
tds.SetNSlots(nSlots);
auto vals = tds.GetColumnReaders<std::string>("Name");
auto ranges = tds.GetEntryRanges();
auto slot = 0U;
std::vector<std::string> names = { "Harry", "Bob,Bob", "\"Joe\"", "Tom", " John ", " Mary Ann " };
for (auto &&range : ranges) {
tds.InitSlot(slot, range.first);
for (auto i : ROOT::TSeq<int>(range.first, range.second)) {
tds.SetEntry(slot, i);
auto val = *((std::string*)*vals[slot]);
EXPECT_EQ(names[i], val);
}
slot++;
}
}
#ifndef NDEBUG
TEST(TCsvDS, SetNSlotsTwice)
{
auto theTest = []() {
TCsvDS tds(fileName0);
tds.SetNSlots(1);
tds.SetNSlots(1);
};
ASSERT_DEATH(theTest(), "Setting the number of slots even if the number of slots is different from zero.");
}
#endif
#ifdef R__B64
TEST(TCsvDS, FromATDF)
{
std::unique_ptr<TDataSource> tds(new TCsvDS(fileName0));
TDataFrame tdf(std::move(tds));
auto max = tdf.Max<double>("Height");
auto min = tdf.Min<double>("Height");
auto c = tdf.Count();
EXPECT_EQ(6U, *c);
EXPECT_DOUBLE_EQ(200.5, *max);
EXPECT_DOUBLE_EQ(.7, *min);
}
TEST(TCsvDS, FromATDFWithJitting)
{
std::unique_ptr<TDataSource> tds(new TCsvDS(fileName0));
TDataFrame tdf(std::move(tds));
auto max = tdf.Filter("Age<40").Max("Age");
auto min = tdf.Define("Age2", "Age").Filter("Age2>30").Min("Age2");
EXPECT_EQ(30, *max);
EXPECT_EQ(40, *min);
}
// NOW MT!-------------
#ifdef R__USE_IMT
TEST(TCsvDS, DefineSlotCheckMT)
{
auto nSlots = 4U;
ROOT::EnableImplicitMT(nSlots);
std::hash<std::thread::id> hasher;
using H_t = decltype(hasher(std::this_thread::get_id()));
std::vector<H_t> ids(nSlots, 0);
std::unique_ptr<TDataSource> tds(new TCsvDS(fileName0));
TDataFrame d(std::move(tds));
auto m = d.DefineSlot("x", [&](unsigned int slot) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
ids[slot] = hasher(std::this_thread::get_id());
return 1.;
}).Max("x");
EXPECT_EQ(1, *m); // just in case
std::set<H_t> s(ids.begin(), ids.end());
EXPECT_EQ(nSlots, s.size());
EXPECT_TRUE(s.end() == s.find(0));
}
TEST(TCsvDS, FromATDFMT)
{
std::unique_ptr<TDataSource> tds(new TCsvDS(fileName0));
TDataFrame tdf(std::move(tds));
auto max = tdf.Max<double>("Height");
auto min = tdf.Min<double>("Height");
auto c = tdf.Count();
EXPECT_EQ(6U, *c);
EXPECT_DOUBLE_EQ(200.5, *max);
EXPECT_DOUBLE_EQ(.7, *min);
}
TEST(TCsvDS, FromATDFWithJittingMT)
{
std::unique_ptr<TDataSource> tds(new TCsvDS(fileName0));
TDataFrame tdf(std::move(tds));
auto max = tdf.Filter("Age<40").Max("Age");
auto min = tdf.Define("Age2", "Age").Filter("Age2>30").Min("Age2");
EXPECT_EQ(30, *max);
EXPECT_EQ(40, *min);
}
#endif // R__USE_IMT
#endif // R__B64
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment