|
|
@@ -0,0 +1,212 @@
|
|
|
+package com.lc.ibps.components.verification.regression;
|
|
|
+
|
|
|
+import org.apache.commons.math3.distribution.TDistribution;
|
|
|
+import org.apache.commons.math3.stat.StatUtils;
|
|
|
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
|
|
|
+import org.apache.commons.math3.stat.inference.OneWayAnova;
|
|
|
+import org.apache.commons.math3.stat.inference.TTest;
|
|
|
+import org.apache.commons.math3.stat.inference.TestUtils;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+public class StatisticsTest {
|
|
|
+ //一元线性回归测试
|
|
|
+ public static void test1() {
|
|
|
+
|
|
|
+ double[][] data = linearScatters();
|
|
|
+
|
|
|
+ SimpleLinearRegression re = new SimpleLinearRegression();
|
|
|
+
|
|
|
+ re.addData(data);
|
|
|
+
|
|
|
+ System.out.println("R方为"+ re.getRSquared());
|
|
|
+ System.out.println("调整R方为"+ re.getAdjRSquared());
|
|
|
+ System.out.println(re.getFunction());
|
|
|
+ System.out.println("标准误为:" + re.getStdErrors()[0]);
|
|
|
+
|
|
|
+ System.out.println("f值:" + re.getFValue() );
|
|
|
+ System.out.println("f检验P值:" + StatisticsUtil.getPValue(re.getFValue(), 1, data.length - 2));
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public static double[][] linearScatters() {
|
|
|
+ List<double[]> data = new ArrayList<>();
|
|
|
+ for (double x = 0; x <= 10; x += 0.1) {
|
|
|
+ double y = 1.5 * x + 0.5;
|
|
|
+ y += Math.random() * 60 - 2; // 随机数
|
|
|
+ double[] xy = {x, y};
|
|
|
+ data.add(xy);
|
|
|
+ }
|
|
|
+ return data.stream().toArray(double[][]::new);
|
|
|
+ }
|
|
|
+//多项式回归测试
|
|
|
+ public static void test2() {
|
|
|
+ // 自变量数据
|
|
|
+// double[] xArray = new double[100];
|
|
|
+// double k = 0.0;
|
|
|
+// for (int i = 0; i < 100; i++) {
|
|
|
+// xArray[i] = k;
|
|
|
+// k += 0.1;
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 因变量数据
|
|
|
+// double[] yArray = new double[100];
|
|
|
+//
|
|
|
+// for (int i = 0; i < xArray.length; i++) {
|
|
|
+// double x = xArray[i];
|
|
|
+// double x1 = x;
|
|
|
+// double x2 = x * x;
|
|
|
+// double x3 = x * x * x;
|
|
|
+// yArray[i] = 20 + 2 * x1 + 12 * x2 + 8 * x3 + Math.random() * x1 * 500;
|
|
|
+// }
|
|
|
+
|
|
|
+// w.add(0,-0.01);
|
|
|
+// w.add(4.91,5);
|
|
|
+// w.add(9.82,10.12);
|
|
|
+// w.add(14.74,14.65);
|
|
|
+// w.add(19.65,19.61);
|
|
|
+// w.add(24.56,24.06);
|
|
|
+ double[] xArray = {0,4.91,9.82,14.74,19.65,24.56};
|
|
|
+ double[] yArray = {-0.01,5,10.12,14.65,19.61,24.06};
|
|
|
+ final PolynomialRegression po = new PolynomialRegression();
|
|
|
+ po.addData(xArray, yArray, 1, 3);
|
|
|
+
|
|
|
+ System.out.println("R方为"+ po.getRSquared());
|
|
|
+ System.out.println("调整R方为"+ po.getAdjRSquared());
|
|
|
+ System.out.println(po.getFunction());
|
|
|
+ System.out.println("标准误为:" + Arrays.toString(po.getStdErrors()));
|
|
|
+
|
|
|
+ System.out.println("f值:" + po.getFValue() );
|
|
|
+ System.out.println("f检验P值:" + StatisticsUtil.getPValue(po.getFValue(), 3, yArray.length - 4));
|
|
|
+ TDistribution t = new TDistribution(16);
|
|
|
+// t.inverseCumulativeProbability(1 - p/2);
|
|
|
+ System.out.println((1-t.cumulativeProbability(0.5)));
|
|
|
+
|
|
|
+// 单样本t检验 (单样本 t 检验是一种用于检验一个样本均值是否与一个已知的总体均值显著不同的统计学方法。在单样本 t 检验中,我们需要计算一个 t 统计量,其基于样本均值、样本标准差和样本大小,然后使用 t 分布表来确定 p 值)
|
|
|
+ //////////////////////
|
|
|
+ // 模拟一个样本数据
|
|
|
+ double[] sampleData = {10.2, 8.1, 9.5, 11.2, 12.5, 10.8, 8.9, 9.6, 10.1, 11.0};
|
|
|
+
|
|
|
+// 假设总体均值为10
|
|
|
+ double populationMean = 10.0;
|
|
|
+
|
|
|
+// 创建TTest对象,并进行单样本t检验
|
|
|
+ TTest tTest = new TTest();
|
|
|
+ double pValue = tTest.tTest(populationMean, sampleData);
|
|
|
+
|
|
|
+// 输出检验结果
|
|
|
+ System.out.println("样本数据的平均值为:" + StatUtils.mean(sampleData));
|
|
|
+ System.out.println("样本数据的标准差为:" + Math.sqrt(StatUtils.variance(sampleData)));
|
|
|
+ System.out.println("总体均值为:" + populationMean);
|
|
|
+ System.out.println("p值为:" + pValue);
|
|
|
+ System.out.println("t: "+ TestUtils.t(populationMean, sampleData));
|
|
|
+ System.out.println("p: "+TestUtils.tTest(populationMean, sampleData));
|
|
|
+ System.out.println("显著性水平: "+TestUtils.tTest(populationMean, sampleData, 0.05));
|
|
|
+
|
|
|
+// 判断p值是否小于0.05,如果小于0.05则拒绝零假设
|
|
|
+ if (pValue < 0.05) {
|
|
|
+ System.out.println("拒绝零假设,样本均值与总体均值显著不同。");
|
|
|
+ } else {
|
|
|
+ System.out.println("不能拒绝零假设,样本均值与总体均值可能相同。");
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+// 5) 卡方检验(用于确定观察到的频数与期望的频数之间是否存在显著差异)
|
|
|
+
|
|
|
+ long[] observed = {10, 9, 11};
|
|
|
+ double[] expected = {10.1, 9.8, 10.3};
|
|
|
+// 设置显著性水平
|
|
|
+ double alpha = 0.05;
|
|
|
+
|
|
|
+ ChiSquareTest chiSquareTest = new ChiSquareTest();
|
|
|
+ boolean result = chiSquareTest.chiSquareTest(expected, observed, alpha);
|
|
|
+
|
|
|
+// 输出结果
|
|
|
+ if (result) {
|
|
|
+ System.out.println("拒绝零假设");
|
|
|
+ } else {
|
|
|
+ System.out.println("接受零假设");
|
|
|
+ }
|
|
|
+// 6)单因素方差分析检验ANOVA(用于比较多个组或处理之间的均值是否存在显著差异。它用于确定一个因素(独立变量)对一个连续的因变量是否有影响)
|
|
|
+
|
|
|
+// 定义三个组的数据
|
|
|
+ double[] group1 = {93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0};
|
|
|
+ double[] group2 = {99.0, 92.0, 102.0, 100.0, 102.0, 89.0};
|
|
|
+ double[] group3 = {110.0, 115.0, 111.0, 117.0, 128.0, 117.0 };
|
|
|
+
|
|
|
+// 将三个组的数据合并到一个二维数组中
|
|
|
+ ArrayList<double[]> classes = new ArrayList<>();
|
|
|
+ classes.add(group1);
|
|
|
+ classes.add(group2);
|
|
|
+ classes.add(group3);
|
|
|
+//double[][] data = {group1, group2, group3};
|
|
|
+
|
|
|
+// 进行单因素方差分析检验
|
|
|
+ OneWayAnova anova = new OneWayAnova();
|
|
|
+ double fValue = anova.anovaFValue(classes);
|
|
|
+
|
|
|
+// 打印F值和P值
|
|
|
+ System.out.println("F值:" + fValue);
|
|
|
+ System.out.println("P值:" + anova.anovaPValue(classes));
|
|
|
+
|
|
|
+//另外的方法
|
|
|
+ double fStatistic = TestUtils.oneWayAnovaFValue(classes); // F-value
|
|
|
+ double pValue2 = TestUtils.oneWayAnovaPValue(classes); // P-value
|
|
|
+ System.out.println("F: "+fStatistic);
|
|
|
+ System.out.println("P: "+pValue2);
|
|
|
+ }
|
|
|
+ //多元回归分析测试
|
|
|
+ public static void test3() {
|
|
|
+ double[][] x = randomX3();
|
|
|
+ double[] y = randomY3(x);
|
|
|
+
|
|
|
+ MultipleLinearRegression mul = new MultipleLinearRegression();
|
|
|
+ mul.addData(x, y, 2);
|
|
|
+
|
|
|
+ System.out.println(mul.getFunction());
|
|
|
+ System.out.println("标准误: " + mul.getStdErrors()[0]);
|
|
|
+ System.out.println("R方 : " + mul.getRSquared());
|
|
|
+ System.out.println("调整后R方 :" + mul.getAdjRSquared());
|
|
|
+ System.out.println("f值:" + mul.getFValue()) ;
|
|
|
+
|
|
|
+ System.out.println("f检验P值:" + StatisticsUtil.getPValue(mul.getFValue(), mul.getDfIndependent(), mul.getDfDependent() ));
|
|
|
+ }
|
|
|
+
|
|
|
+ public static double[][] randomX3() {
|
|
|
+ List<double[]> data = new ArrayList<>();
|
|
|
+ for (double i = 0; i < 10; i += 0.1) {
|
|
|
+ double x1 = i;
|
|
|
+ double x2 = Math.sqrt(i);
|
|
|
+ data.add(new double[]{x1, x2});
|
|
|
+ }
|
|
|
+ return data.stream().toArray(double[][]::new);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static double[] randomY3(double[][] arr) {
|
|
|
+ if (arr != null && arr.length > 0) {
|
|
|
+ int len = arr.length;
|
|
|
+ double[] y = new double[len];
|
|
|
+ for (int i = 0; i < len; i++) {
|
|
|
+ double[] x = arr[i];
|
|
|
+ // 构造数据
|
|
|
+ y[i] = functionConstructorY3(x);
|
|
|
+ }
|
|
|
+ return y;
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static double functionConstructorY3(double[] x) {
|
|
|
+ double x1 = x[0];
|
|
|
+ double x2 = x[1];
|
|
|
+ return 20 + 2 * x1 + 3 * x2 + Math.random() * 30;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args){
|
|
|
+ test2();
|
|
|
+ }
|
|
|
+}
|