Source for org.jfree.data.statistics.Statistics

   1: /* ===========================================================
   2:  * JFreeChart : a free chart library for the Java(tm) platform
   3:  * ===========================================================
   4:  *
   5:  * (C) Copyright 2000-2007, by Object Refinery Limited and Contributors.
   6:  *
   7:  * Project Info:  http://www.jfree.org/jfreechart/index.html
   8:  *
   9:  * This library is free software; you can redistribute it and/or modify it 
  10:  * under the terms of the GNU Lesser General Public License as published by 
  11:  * the Free Software Foundation; either version 2.1 of the License, or 
  12:  * (at your option) any later version.
  13:  *
  14:  * This library is distributed in the hope that it will be useful, but 
  15:  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  16:  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
  17:  * License for more details.
  18:  *
  19:  * You should have received a copy of the GNU Lesser General Public
  20:  * License along with this library; if not, write to the Free Software
  21:  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
  22:  * USA.  
  23:  *
  24:  * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 
  25:  * in the United States and other countries.]
  26:  *
  27:  * ---------------
  28:  * Statistics.java
  29:  * ---------------
  30:  * (C) Copyright 2000-2007, by Matthew Wright and Contributors.
  31:  *
  32:  * Original Author:  Matthew Wright;
  33:  * Contributor(s):   David Gilbert (for Object Refinery Limited);
  34:  *
  35:  * Changes (from 08-Nov-2001)
  36:  * --------------------------
  37:  * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
  38:  *               Moved from JFreeChart to package com.jrefinery.data.* in 
  39:  *               JCommon class library (DG);
  40:  * 24-Jun-2002 : Removed unnecessary local variable (DG);
  41:  * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
  42:  * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
  43:  * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
  44:  * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 
  45:  *               release (DG);
  46:  *
  47:  */
  48: 
  49: package org.jfree.data.statistics;
  50: 
  51: import java.util.ArrayList;
  52: import java.util.Collection;
  53: import java.util.Collections;
  54: import java.util.Iterator;
  55: import java.util.List;
  56: 
  57: /**
  58:  * A utility class that provides some common statistical functions.
  59:  */
  60: public abstract class Statistics {
  61: 
  62:     /**
  63:      * Returns the mean of an array of numbers.  This is equivalent to calling
  64:      * <code>calculateMean(values, true)</code>.
  65:      *
  66:      * @param values  the values (<code>null</code> not permitted).
  67:      *
  68:      * @return The mean.
  69:      */
  70:     public static double calculateMean(Number[] values) {
  71:         return calculateMean(values, true);
  72:     }
  73:     
  74:     /**
  75:      * Returns the mean of an array of numbers.
  76:      *
  77:      * @param values  the values (<code>null</code> not permitted).
  78:      * @param includeNullAndNaN  a flag that controls whether or not 
  79:      *     <code>null</code> and <code>Double.NaN</code> values are included
  80:      *     in the calculation (if either is present in the array, the result is 
  81:      *     {@link Double#NaN}).
  82:      *
  83:      * @return The mean.
  84:      * 
  85:      * @since 1.0.3
  86:      */
  87:     public static double calculateMean(Number[] values, 
  88:             boolean includeNullAndNaN) {
  89:         
  90:         if (values == null) {
  91:             throw new IllegalArgumentException("Null 'values' argument.");
  92:         }
  93:         double sum = 0.0;
  94:         double current;
  95:         int counter = 0;
  96:         for (int i = 0; i < values.length; i++) {
  97:             // treat nulls the same as NaNs
  98:             if (values[i] != null) {
  99:                 current = values[i].doubleValue();    
 100:             }
 101:             else {
 102:                 current = Double.NaN;
 103:             }
 104:             // calculate the sum and count
 105:             if (includeNullAndNaN || !Double.isNaN(current)) {
 106:                 sum = sum + current;
 107:                 counter++;
 108:             }
 109:         }
 110:         double result = (sum / counter);
 111:         return result;
 112:     }
 113: 
 114:     /**
 115:      * Returns the mean of a collection of <code>Number</code> objects.
 116:      * 
 117:      * @param values  the values (<code>null</code> not permitted).
 118:      * 
 119:      * @return The mean.
 120:      */
 121:     public static double calculateMean(Collection values) {
 122:         return calculateMean(values, true);
 123:     }
 124:     
 125:     /**
 126:      * Returns the mean of a collection of <code>Number</code> objects.
 127:      * 
 128:      * @param values  the values (<code>null</code> not permitted).
 129:      * @param includeNullAndNaN  a flag that controls whether or not 
 130:      *     <code>null</code> and <code>Double.NaN</code> values are included
 131:      *     in the calculation (if either is present in the array, the result is 
 132:      *     {@link Double#NaN}).
 133:      * 
 134:      * @return The mean.
 135:      * 
 136:      * @since 1.0.3
 137:      */
 138:     public static double calculateMean(Collection values, 
 139:             boolean includeNullAndNaN) {
 140:         
 141:         if (values == null) {
 142:             throw new IllegalArgumentException("Null 'values' argument.");
 143:         }
 144:         int count = 0;
 145:         double total = 0.0;
 146:         Iterator iterator = values.iterator();
 147:         while (iterator.hasNext()) {
 148:             Object object = iterator.next();
 149:             if (object == null) {
 150:                 if (includeNullAndNaN) {
 151:                     return Double.NaN;
 152:                 }
 153:             }
 154:             else {
 155:                 if (object instanceof Number) {
 156:                     Number number = (Number) object;
 157:                     double value = number.doubleValue();
 158:                     if (Double.isNaN(value)) {
 159:                         if (includeNullAndNaN) {
 160:                             return Double.NaN;
 161:                         }
 162:                     }
 163:                     else {
 164:                         total = total + number.doubleValue();
 165:                         count = count + 1;
 166:                     }
 167:                 }
 168:             }
 169:         }      
 170:         return total / count;
 171:     }
 172: 
 173:     /**
 174:      * Calculates the median for a list of values (<code>Number</code> objects).
 175:      * The list of values will be copied, and the copy sorted, before 
 176:      * calculating the median.  To avoid this step (if your list of values
 177:      * is already sorted), use the {@link #calculateMedian(List, boolean)} 
 178:      * method.
 179:      * 
 180:      * @param values  the values (<code>null</code> permitted).
 181:      * 
 182:      * @return The median.
 183:      */
 184:     public static double calculateMedian(List values) {
 185:         return calculateMedian(values, true);
 186:     }
 187:     
 188:     /**
 189:      * Calculates the median for a list of values (<code>Number</code> objects).
 190:      * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
 191:      * to be presorted in ascending order by value.
 192:      * 
 193:      * @param values  the values (<code>null</code> permitted).
 194:      * @param copyAndSort  a flag that controls whether the list of values is
 195:      *                     copied and sorted.
 196:      * 
 197:      * @return The median.
 198:      */
 199:     public static double calculateMedian(List values, boolean copyAndSort) {
 200:         
 201:         double result = Double.NaN;
 202:         if (values != null) {
 203:             if (copyAndSort) {
 204:                 int itemCount = values.size();
 205:                 List copy = new ArrayList(itemCount);
 206:                 for (int i = 0; i < itemCount; i++) {
 207:                     copy.add(i, values.get(i));   
 208:                 }
 209:                 Collections.sort(copy);
 210:                 values = copy;
 211:             }
 212:             int count = values.size();
 213:             if (count > 0) {
 214:                 if (count % 2 == 1) {
 215:                     if (count > 1) {
 216:                         Number value = (Number) values.get((count - 1) / 2);
 217:                         result = value.doubleValue();
 218:                     }
 219:                     else {
 220:                         Number value = (Number) values.get(0);
 221:                         result = value.doubleValue();
 222:                     }
 223:                 }
 224:                 else {
 225:                     Number value1 = (Number) values.get(count / 2 - 1);
 226:                     Number value2 = (Number) values.get(count / 2);
 227:                     result = (value1.doubleValue() + value2.doubleValue()) 
 228:                              / 2.0;
 229:                 }
 230:             }
 231:         }
 232:         return result;
 233:     }
 234:     
 235:     /**
 236:      * Calculates the median for a sublist within a list of values 
 237:      * (<code>Number</code> objects).
 238:      * 
 239:      * @param values  the values, in any order (<code>null</code> not 
 240:      *                permitted).
 241:      * @param start  the start index.
 242:      * @param end  the end index.
 243:      * 
 244:      * @return The median.
 245:      */
 246:     public static double calculateMedian(List values, int start, int end) {
 247:         return calculateMedian(values, start, end, true);
 248:     }
 249: 
 250:     /**
 251:      * Calculates the median for a sublist within a list of values 
 252:      * (<code>Number</code> objects).  The entire list will be sorted if the 
 253:      * <code>ascending</code< argument is <code>false</code>.
 254:      * 
 255:      * @param values  the values (<code>null</code> not permitted).
 256:      * @param start  the start index.
 257:      * @param end  the end index.
 258:      * @param copyAndSort  a flag that that controls whether the list of values 
 259:      *                     is copied and sorted.
 260:      * 
 261:      * @return The median.
 262:      */
 263:     public static double calculateMedian(List values, int start, int end,
 264:                                          boolean copyAndSort) {
 265:         
 266:         double result = Double.NaN;
 267:         if (copyAndSort) {
 268:             List working = new ArrayList(end - start + 1);
 269:             for (int i = start; i <= end; i++) {
 270:                 working.add(values.get(i));  
 271:             }
 272:             Collections.sort(working); 
 273:             result = calculateMedian(working, false);
 274:         }
 275:         else {
 276:             int count = end - start + 1;
 277:             if (count > 0) {
 278:                 if (count % 2 == 1) {
 279:                     if (count > 1) {
 280:                         Number value 
 281:                             = (Number) values.get(start + (count - 1) / 2);
 282:                         result = value.doubleValue();
 283:                     }
 284:                     else {
 285:                         Number value = (Number) values.get(start);
 286:                         result = value.doubleValue();
 287:                     }
 288:                 }
 289:                 else {
 290:                     Number value1 = (Number) values.get(start + count / 2 - 1);
 291:                     Number value2 = (Number) values.get(start + count / 2);
 292:                     result 
 293:                         = (value1.doubleValue() + value2.doubleValue()) / 2.0;
 294:                 }
 295:             }
 296:         }
 297:         return result;    
 298:         
 299:     }
 300:     
 301:     /**
 302:      * Returns the standard deviation of a set of numbers.
 303:      *
 304:      * @param data  the data (<code>null</code> or zero length array not 
 305:      *     permitted).
 306:      *
 307:      * @return The standard deviation of a set of numbers.
 308:      */
 309:     public static double getStdDev(Number[] data) {
 310:         if (data == null) {
 311:             throw new IllegalArgumentException("Null 'data' array.");
 312:         }
 313:         if (data.length == 0) {
 314:             throw new IllegalArgumentException("Zero length 'data' array.");
 315:         }
 316:         double avg = calculateMean(data);
 317:         double sum = 0.0;
 318: 
 319:         for (int counter = 0; counter < data.length; counter++) {
 320:             double diff = data[counter].doubleValue() - avg;
 321:             sum = sum + diff * diff;
 322:         }
 323:         return Math.sqrt(sum / (data.length - 1));
 324:     }
 325: 
 326:     /**
 327:      * Fits a straight line to a set of (x, y) data, returning the slope and
 328:      * intercept.
 329:      *
 330:      * @param xData  the x-data (<code>null</code> not permitted).
 331:      * @param yData  the y-data (<code>null</code> not permitted).
 332:      *
 333:      * @return A double array with the intercept in [0] and the slope in [1].
 334:      */
 335:     public static double[] getLinearFit(Number[] xData, Number[] yData) {
 336: 
 337:         if (xData == null) { 
 338:             throw new IllegalArgumentException("Null 'xData' argument.");
 339:         }
 340:         if (yData == null) { 
 341:             throw new IllegalArgumentException("Null 'yData' argument.");
 342:         }
 343:         if (xData.length != yData.length) {
 344:             throw new IllegalArgumentException(
 345:                 "Statistics.getLinearFit(): array lengths must be equal.");
 346:         }
 347: 
 348:         double[] result = new double[2];
 349:         // slope
 350:         result[1] = getSlope(xData, yData);
 351:         // intercept
 352:         result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
 353: 
 354:         return result;
 355: 
 356:     }
 357: 
 358:     /**
 359:      * Finds the slope of a regression line using least squares.
 360:      *
 361:      * @param xData  the x-values (<code>null</code> not permitted).
 362:      * @param yData  the y-values (<code>null</code> not permitted).
 363:      *
 364:      * @return The slope.
 365:      */
 366:     public static double getSlope(Number[] xData, Number[] yData) {
 367: 
 368:         if (xData == null) { 
 369:             throw new IllegalArgumentException("Null 'xData' argument.");
 370:         }
 371:         if (yData == null) { 
 372:             throw new IllegalArgumentException("Null 'yData' argument.");
 373:         }
 374:         if (xData.length != yData.length) {
 375:             throw new IllegalArgumentException("Array lengths must be equal.");
 376:         }
 377: 
 378:         // ********* stat function for linear slope ********
 379:         // y = a + bx
 380:         // a = ybar - b * xbar
 381:         //     sum(x * y) - (sum (x) * sum(y)) / n
 382:         // b = ------------------------------------
 383:         //     sum (x^2) - (sum(x)^2 / n
 384:         // *************************************************
 385: 
 386:         // sum of x, x^2, x * y, y
 387:         double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
 388:         int counter;
 389:         for (counter = 0; counter < xData.length; counter++) {
 390:             sx = sx + xData[counter].doubleValue();
 391:             sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
 392:             sxy = sxy + yData[counter].doubleValue() 
 393:                       * xData[counter].doubleValue();
 394:             sy = sy + yData[counter].doubleValue();
 395:         }
 396:         return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
 397: 
 398:     }
 399: 
 400:     /**
 401:      * Calculates the correlation between two datasets.  Both arrays should 
 402:      * contain the same number of items.  Null values are treated as zero.
 403:      * <P>
 404:      * Information about the correlation calculation was obtained from:
 405:      * 
 406:      * http://trochim.human.cornell.edu/kb/statcorr.htm
 407:      * 
 408:      * @param data1  the first dataset.
 409:      * @param data2  the second dataset.
 410:      * 
 411:      * @return The correlation.
 412:      */
 413:     public static double getCorrelation(Number[] data1, Number[] data2) {
 414:         if (data1 == null) {
 415:             throw new IllegalArgumentException("Null 'data1' argument.");
 416:         }
 417:         if (data2 == null) {
 418:             throw new IllegalArgumentException("Null 'data2' argument.");
 419:         }
 420:         if (data1.length != data2.length) {
 421:             throw new IllegalArgumentException(
 422:                 "'data1' and 'data2' arrays must have same length."
 423:             );   
 424:         }
 425:         int n = data1.length;
 426:         double sumX = 0.0;
 427:         double sumY = 0.0;
 428:         double sumX2 = 0.0;
 429:         double sumY2 = 0.0;
 430:         double sumXY = 0.0;
 431:         for (int i = 0; i < n; i++) {
 432:             double x = 0.0;
 433:             if (data1[i] != null) {
 434:                 x = data1[i].doubleValue();   
 435:             }
 436:             double y = 0.0;
 437:             if (data2[i] != null) {
 438:                 y = data2[i].doubleValue();   
 439:             }
 440:             sumX = sumX + x;
 441:             sumY = sumY + y;
 442:             sumXY = sumXY + (x * y);
 443:             sumX2 = sumX2 + (x * x);
 444:             sumY2 = sumY2 + (y * y);
 445:         }
 446:         return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 
 447:                 * (n * sumY2 - sumY * sumY), 0.5);      
 448:     }
 449: 
 450:     /**
 451:      * Returns a data set for a moving average on the data set passed in.
 452:      *
 453:      * @param xData  an array of the x data.
 454:      * @param yData  an array of the y data.
 455:      * @param period  the number of data points to average
 456:      *
 457:      * @return A double[][] the length of the data set in the first dimension,
 458:      *         with two doubles for x and y in the second dimension
 459:      */
 460:     public static double[][] getMovingAverage(Number[] xData, 
 461:                                               Number[] yData, 
 462:                                               int period) {
 463: 
 464:         // check arguments...
 465:         if (xData.length != yData.length) {
 466:             throw new IllegalArgumentException("Array lengths must be equal.");
 467:         }
 468: 
 469:         if (period > xData.length) {
 470:             throw new IllegalArgumentException(
 471:                 "Period can't be longer than dataset."
 472:             );
 473:         }
 474: 
 475:         double[][] result = new double[xData.length - period][2];
 476:         for (int i = 0; i < result.length; i++) {
 477:             result[i][0] = xData[i + period].doubleValue();
 478:             // holds the moving average sum
 479:             double sum = 0.0;
 480:             for (int j = 0; j < period; j++) {
 481:                 sum += yData[i + j].doubleValue();
 482:             }
 483:             sum = sum / period;
 484:             result[i][1] = sum;
 485:         }
 486:         return result;
 487: 
 488:     }
 489: 
 490: }