Mostly working naive vs. welford sampling and the t-test actually works now.

10 hours ago · 1d46682801
parent dc7277ba37
commit 1d46682801
6 changed files with 229 additions and 20 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) <year> <copyright holders>
+Copyright (c) Zed A. Shaw

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

--- a/cpp/Makefile
+++ b/cpp/Makefile
@ -1,3 +1,4 @@
+R_SCRIPT=C:/Users/lcthw/AppData/Local/Programs/R/R-4.6.0/bin/Rscript.exe
 all: build

 reset:
@ -19,7 +20,10 @@ debug_build:
 	meson compile -j 10 -C builddir

 test: build
-	./builddir/fuc2it
+	./builddir/fuc2it -v
+
+r_test:
+	$(R_SCRIPT) tests/confirm.r

 run: build test
 ifeq '$(OS)' 'Windows_NT'
--- a/cpp/src/stats.cpp
+++ b/cpp/src/stats.cpp
@ -1,7 +1,9 @@
 #include "stats.hpp"
+#include "welford.hpp"
 #include <fmt/core.h>
 #include <numeric>
 #include <numbers>
+#include <functional>

 void Stats::dump(std::string msg)
 {
@ -11,16 +13,26 @@ void Stats::dump(std::string msg)
            stddev());
 }

-// WARNING: got this code from google AI. PROBABLY BULLSHIT
-void Stats::t_test(Stats& other) {
-  double n1 = this->n;
-  double n2 = other.n;
+double simpson(double a, double b, uint32_t n, const std::function<double(double)> func) {
+    const double h = ( b - a ) / n;
+    double sum = 0.0;
+    for ( uint32_t i = 0; i < n; ++i ) {
+    	const double x = a + i * h;
+    	sum += ( func(x) + 4.0 * func(x + h / 2.0) + func(x + h) ) / 6.0;
+    }
+    return sum * h;
+}
+

-  double mean1 = this->mean();
-  double mean2 = other.mean();
+TTest internal_t_test(auto& first, auto& second) {
+  double n1 = first.n;
+  double n2 = second.n;

-  double var1 = this->variance();
-  double var2 = other.variance();
+  double mean1 = first.mean();
+  double mean2 = second.mean();
+
+  double var1 = first.sample_variance();
+  double var2 = second.sample_variance();

  // calculate t-stat
  double delta_mean = mean1 - mean2;
@ -30,12 +42,33 @@ void Stats::t_test(Stats& other) {
  // calculate degrees of freedom
  double num = std::pow((var1 / n1) + (var2 / n2), 2);
  double den = (std::pow(var1 / n1, 2) / (n1 - 1)) + (std::pow(var2 / n2, 2) / (n2 - 1));
-  double df = num / den;
+  double dof = num / den;
+
+  // welch calculation
+  double temp = first.sample_variance() / first.n + second.sample_variance() / second.n;
+  double welchs = (first.mean() - second.mean()) / std::sqrt(temp);

-  double x = df / (df + t_stat * t_stat);
+  double gamm = std::exp(
+      std::lgamma(dof / 2.0) + std::lgamma(0.5)
+        - std::lgamma(dof / 2.0 + 0.5));
+  double b = dof / ( welchs * welchs + dof);

-  double p_val = std::tgamma((df + 1.0) / 2.0) / (std::sqrt(df * std::numbers::pi) * std::tgamma(df / 2.0)) * std::pow(1.0 + (t_stat * t_stat) / df, -(df + 1.0) / 2.0);
+  auto func = [&](const double r) {
+    return std::pow(r, dof / 2.0 - 1.0) / std::sqrt(1.0 - r);
+  };
+
+  double p_val = simpson(0.0, b, 10000, func) / gamm;
+
+  return {
+    .t_stat = t_stat,
+    .dof = dof,
+    .p_val = p_val};
+}
+
+TTest Stats::t_test(Stats& other) {
+  return internal_t_test(*this, other);
+}

-  fmt::println("t_stat={}, df={}, p_val={}",
-      t_stat, df, p_val);
+TTest WelfordStats::t_test(WelfordStats& other) {
+  return internal_t_test(*this, other);
 }
--- a/cpp/src/stats.hpp
+++ b/cpp/src/stats.hpp
@ -2,6 +2,12 @@
 #include <cmath>
 #include <chrono>

+struct TTest {
+  double t_stat = 0.0;
+  double dof = 0.0;
+  double p_val = 0.0;
+};
+
 struct Stats {
  using TimeBullshit = std::chrono::time_point<std::chrono::high_resolution_clock>;

@ -28,6 +34,10 @@ struct Stats {
  }

  inline double variance() {
+    return (sumsq - (sum * sum / n)) / n;
+  }
+
+  inline double sample_variance() {
    return (sumsq - (sum * sum / n)) / (n - 1);
  }

@ -61,5 +71,5 @@ struct Stats {

  void dump(std::string msg="");

-  void t_test(Stats& other);
+  TTest t_test(Stats& other);
 };
--- a/cpp/tests/stats_tests.cpp
+++ b/cpp/tests/stats_tests.cpp
@ -2,19 +2,181 @@
 #include <deque>
 #include <string>
 #include <fuc2/testing.hpp>
+#include "stats.hpp"
+#include "welford.hpp"
+#include <fstream>
+#include <random>

 using namespace fuc2;

+std::random_device RNG;
+std::mt19937 GENERATOR(RNG());
+
+template<typename T>
+T uniform(T from, T to) {
+  std::uniform_int_distribution<T> rand(from, to);
+
+  return rand(GENERATOR);
+}
+
+template<typename T>
+T uniform_real(T from, T to) {
+  std::uniform_real_distribution<T> rand(from, to);
+
+  return rand(GENERATOR);
+}
+
 namespace stats_tests {

-  void test_stats_stuff() {
-    CHECK(1 == 1);
+  std::vector<double> generate_samples(size_t count) {
+    std::vector<double> samples;
+
+    // generate random numbers
+    for(size_t i = 0; i < count; i++) {
+      double num = uniform_real(0.0, 100000.0);
+      samples.push_back(num);
+    }
+
+    return samples;
+  }
+
+  void test_basic_stats() {
+    Stats stats;
+    auto samples = generate_samples(1000);
+
+    // calculate it manually from the vector of numbers
+    double sum = 0.0;
+    double sumsq = 0.0;
+    for(auto num : samples) {
+      sum += num;
+      sumsq += num * num;
+
+      stats.sample(num);
+    }
+
+    double mean = sum / samples.size();
+
+    ALMOST_EQUAL(sum, stats.sum, 5);
+    ALMOST_EQUAL(sumsq, stats.sumsq, 5);
+
+    // calculate mean and stddev using Stats
+    ALMOST_EQUAL(mean, stats.mean(), 5);
+
+    double n = samples.size();
+    // currently using variance, not sample_variance for stddev
+    double var = (sumsq - (sum * sum / n)) / n;
+    ALMOST_EQUAL(var, stats.variance(), 5);
+
+    double stddev = std::sqrt(var);
+    ALMOST_EQUAL(stddev, stats.stddev(), 5);
+  }
+
+  void test_welford() {
+    Stats stats;
+    WelfordStats welf;
+
+    for(size_t i = 0; i < 10000; i++) {
+      double num = uniform_real(0.0, 1.0);
+      stats.sample(num);
+      welf.sample(num);
+    }
+
+    ALMOST_EQUAL(stats.mean(), welf.mean(), 5);
+    ALMOST_EQUAL(stats.variance(), welf.variance(), 5);
+    ALMOST_EQUAL(stats.stddev(), welf.stddev(), 5);
+  }
+
+  void test_timing_stats() {
+    Stats stats;
+    // need to find something to time...
+  }
+
+  double round_to(double value, int decimal_places) {
+      double multiplier = std::pow(10.0, decimal_places);
+      return std::round(value * multiplier) / multiplier;
+  }
+
+  void test_t_test() {
+    Stats stats;
+    Stats stats2;
+    WelfordStats welf;
+    WelfordStats welf2;
+
+    std::string filename("samples.txt");
+    std::fstream s{filename, s.binary | s.trunc | s.out};
+    double skew_factor = 0.8;
+
+    for(size_t i = 0; i < 10; i++) {
+      double num = uniform_real(-10.0, 10.0);
+      auto line = fmt::format("{} {}\n", num, num + skew_factor);
+      s << line;
+
+      stats.sample(num);
+      stats2.sample(num + skew_factor);
+
+      welf.sample(num);
+      welf2.sample(num + skew_factor);
+    }
+
+    fmt::println("welford samples t-test:");
+    welf.t_test(welf2);
+
+    fmt::println("naive algebra samples t-test:");
+    stats.t_test(stats2);
+  }
+
+  void failing_bad_t_test() {
+    std::vector<double> sample1{
+      -1.94269, 3.95854,
+        0.215857, -9.16792,
+        0.939992, 1.38189,
+        7.66882, -6.18898,
+        -1.12191, -9.95097,
+    };
+
+    std::vector<double> sample2{
+      -0.942695, 4.95854,
+        1.21586, -8.16792,
+        1.93999, 2.38189,
+        8.66882, -5.18898,
+        -0.121914, -8.95097,
+    };
+
+    double expect_t =-0.39838;
+    double expect_dof = 18;
+    double expect_p_val =0.695;
+
+    Stats stats1;
+    Stats stats2;
+    WelfordStats welf1;
+    WelfordStats welf2;
+
+    for(double num : sample1) {
+      stats1.sample(num);
+      welf1.sample(num);
+    }
+
+    for(double num : sample2) {
+      stats2.sample(num);
+      welf2.sample(num);
+    }
+
+    auto stats_test = stats1.t_test(stats2);
+    auto welf_test = welf1.t_test(welf2);
+
+    ALMOST_EQUAL(stats_test.t_stat, welf_test.t_stat, 5);
+    ALMOST_EQUAL(stats_test.dof, welf_test.dof, 1);
+    ALMOST_EQUAL(stats_test.p_val, welf_test.p_val, 5);
  }

  fuc2::Set TESTS{
    .name="stats",
      .tests={
-        TEST(test_stats_stuff),
+        TEST(failing_bad_t_test),
+        TEST(test_basic_stats),
+        TEST(test_timing_stats),
+        TEST(test_t_test),
+        TEST(test_welford),
      }
  };
 }
--- a/cpp/wraps/fuc2.wrap
+++ b/cpp/wraps/fuc2.wrap
@ -1,5 +1,5 @@
 [wrap-git]
-directory=fuc2-0.1.0
+directory=fuc2-0.2.0
 url=https://lcthw.dev/cpp/fuc2.git
 revision=HEAD
 depth=1