Statistical functions with java

Below is a simple set of classes for performing your own statistical analysis. As noted, they are simple (no multiple regression analysis, etc.)

class myStats {
  double standardDeviation (double[] array) {
    double[] arr = new double[array.length];
    double av = average(array);
    for (int i = 0; i < array.length; i++) {
      arr[i] = (array[i] - av) * (array[i] - av);
    }
    double d = 0;
    for (int i = 0; i < array.length; i++) {
      d = d + arr[i];
    }
    return Math.sqrt(d / (arr.length - 1));
  }

  double correlation (double[] array1, double[] array2) {
    double[] arr = new double[array1.length];
    double av1 = average(array1);
    double av2 = average(array2);
    for (int i = 0; i < array1.length; i++) {
      arr[i] = (array1[i] - av1) * (array2[i] - av2);
    }
    double d = 0;
    for (int i = 0; i < array1.length; i++) {
      d = d + arr[i];
    }
    double sd1 = standardDeviation(array1);
    double sd2 = standardDeviation(array2);
    return (d / (sd1 * sd2)) / (array1.length - 1);
  }

  double slope (double[] array1, double[] array2) {
    double[] arr = new double[array1.length];
    double av1 = average(array1);
    double av2 = average(array2);
    for (int i = 0; i < array1.length; i++) {
      arr[i] = (array1[i] - av1) * (array2[i] - av2);
    }
    double d = 0;
    for (int i = 0; i < array1.length; i++) {
      d = d + arr[i];
    }
    double sd1 = standardDeviation(array1);
    double sd2 = standardDeviation(array2);
    return (sd1 / sd2) * correlation(array1,array2);
  }

  double yIntercept(double array1[], double array2[]) {
    double d = 0;
    double sl = slope(array1, array2);
    double yAvg = average(array1);
    double xAvg = average(array2);
    d = yAvg - (sl * xAvg);
    return d;
  }

  double average(double array[]) {
    double d = 0;
    double total = 0;
    for (int i = 0; i < array.length; i++) {
      total = total + array[i];
    }
    d = total / array.length;
    return d;
  }

  double lineFit(double array1[], double array2[]) {
    double yAvg = average(array1);
    double sl = slope(array1, array2);
    double yi  = yIntercept(array1, array2);
    double d1 = 0;
    double squaredResidual = 0;
    double squaredYVariance = 0;
    for (int i = 0; i < array1.length; i++) {
      squaredYVariance = squaredYVariance + ((array1[i] - yAvg) * (array1[i] - yAvg));
      d1 = ((array2[i] * sl) + yi);
      squaredResidual = squaredResidual + ((array1[i] - d1) * (array1[i] - d1));
    }
    return 1 - (squaredResidual / squaredYVariance);
  }
}

class runMyStats {
  public static void main(String args[]) {
    double[] i2 = {5,10,20,8,4,6,12,15};
    double[] i = {27,46,73,40,30,28,46,59};
    //double[] i2 = {3,6,9,12};
    //double[] i = {15,30,45,60};
    myStats m = new myStats();
    System.out.println("AVG = " + m.average(i));
    System.out.println("STDEV = " + m.standardDeviation(i));
    System.out.println("Correlation = " + m.correlation(i,i2));
    System.out.println("Slope = " + m.slope(i,i2));
    System.out.println("YIntercept = " + m.yIntercept(i,i2));
    System.out.println("lineFit = " + m.lineFit(i,i2));
  }
}

2 comments for “Statistical functions with java

  1. April 27, 2011 at 12:16 PM

    Hi Steve,

    I am trying to create a java script code that is able to calculate median and 2.5% and 97.5% percentile from an array of values. I managed to create the coding for the median but so far I was not able to find any detail on java functions to calculate percentiles I found however various open java libraries with various statistical functions including percentiles calculations (e.g. “getpercentile” function) but I would prefer to not use any external library.

    Thanks in advance for your help

    Regards

    Valentina

  2. April 27, 2011 at 2:26 PM

    Hi Valentina,

    If you want to roll your own, the only way I can think of to do this is to calculate the size of the array, sort the array (java.util.Arrays.sort()), and then determine your percentiles based on the number of elements/samples in a given percentile.

    I hope that helps.

    Steve

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.