#include #include #include #include #include "stats.hpp" #include "welford.hpp" #include #include using namespace fuc2; std::random_device RNG; std::mt19937 GENERATOR(RNG()); template T uniform(T from, T to) { std::uniform_int_distribution rand(from, to); return rand(GENERATOR); } template T uniform_real(T from, T to) { std::uniform_real_distribution rand(from, to); return rand(GENERATOR); } namespace stats_tests { std::vector generate_samples(size_t count) { std::vector samples; // generate random numbers for(size_t i = 0; i < count; i++) { double num = uniform_real(0.0, 100000.0); samples.push_back(num); } return samples; } void test_basic_stats() { Stats stats; auto samples = generate_samples(1000); // calculate it manually from the vector of numbers double sum = 0.0; double sumsq = 0.0; for(auto num : samples) { sum += num; sumsq += num * num; stats.sample(num); } double mean = sum / samples.size(); ALMOST_EQUAL(sum, stats.sum, 5); ALMOST_EQUAL(sumsq, stats.sumsq, 5); // calculate mean and stddev using Stats ALMOST_EQUAL(mean, stats.mean(), 5); double n = samples.size(); // currently using variance, not sample_variance for stddev double var = (sumsq - (sum * sum / n)) / n; ALMOST_EQUAL(var, stats.variance(), 5); double stddev = std::sqrt(var); ALMOST_EQUAL(stddev, stats.stddev(), 5); } void test_welford() { Stats stats; WelfordStats welf; for(size_t i = 0; i < 10000; i++) { double num = uniform_real(0.0, 1.0); stats.sample(num); welf.sample(num); } ALMOST_EQUAL(stats.mean(), welf.mean(), 5); ALMOST_EQUAL(stats.variance(), welf.variance(), 5); ALMOST_EQUAL(stats.stddev(), welf.stddev(), 5); } void test_timing_stats() { Stats stats; // need to find something to time... } double round_to(double value, int decimal_places) { double multiplier = std::pow(10.0, decimal_places); return std::round(value * multiplier) / multiplier; } void test_t_test() { Stats stats; Stats stats2; WelfordStats welf; WelfordStats welf2; std::string filename("samples.txt"); std::fstream s{filename, s.binary | s.trunc | s.out}; double skew_factor = 0.8; for(size_t i = 0; i < 10; i++) { double num = uniform_real(-10.0, 10.0); auto line = fmt::format("{} {}\n", num, num + skew_factor); s << line; stats.sample(num); stats2.sample(num + skew_factor); welf.sample(num); welf2.sample(num + skew_factor); } fmt::println("welford samples t-test:"); welf.t_test(welf2); fmt::println("naive algebra samples t-test:"); stats.t_test(stats2); } void failing_bad_t_test() { std::vector sample1{ -1.94269, 3.95854, 0.215857, -9.16792, 0.939992, 1.38189, 7.66882, -6.18898, -1.12191, -9.95097, }; std::vector sample2{ -0.942695, 4.95854, 1.21586, -8.16792, 1.93999, 2.38189, 8.66882, -5.18898, -0.121914, -8.95097, }; double expect_t =-0.39838; double expect_dof = 18; double expect_p_val =0.695; Stats stats1; Stats stats2; WelfordStats welf1; WelfordStats welf2; for(double num : sample1) { stats1.sample(num); welf1.sample(num); } for(double num : sample2) { stats2.sample(num); welf2.sample(num); } auto stats_test = stats1.t_test(stats2); auto welf_test = welf1.t_test(welf2); ALMOST_EQUAL(stats_test.t_stat, welf_test.t_stat, 5); ALMOST_EQUAL(stats_test.dof, welf_test.dof, 1); ALMOST_EQUAL(stats_test.p_val, welf_test.p_val, 5); } fuc2::Set TESTS{ .name="stats", .tests={ TEST(failing_bad_t_test), TEST(test_basic_stats), TEST(test_timing_stats), TEST(test_t_test), TEST(test_welford), } }; }