001 /* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2007, by Object Refinery Limited and Contributors.
006 *
007 * Project Info: http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
022 * USA.
023 *
024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
025 * in the United States and other countries.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-2007, by Matthew Wright and Contributors.
031 *
032 * Original Author: Matthew Wright;
033 * Contributor(s): David Gilbert (for Object Refinery Limited);
034 *
035 * Changes (from 08-Nov-2001)
036 * --------------------------
037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
038 * Moved from JFreeChart to package com.jrefinery.data.* in
039 * JCommon class library (DG);
040 * 24-Jun-2002 : Removed unnecessary local variable (DG);
041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
045 * release (DG);
046 *
047 */
048
049 package org.jfree.data.statistics;
050
051 import java.util.ArrayList;
052 import java.util.Collection;
053 import java.util.Collections;
054 import java.util.Iterator;
055 import java.util.List;
056
057 /**
058 * A utility class that provides some common statistical functions.
059 */
060 public abstract class Statistics {
061
062 /**
063 * Returns the mean of an array of numbers. This is equivalent to calling
064 * <code>calculateMean(values, true)</code>.
065 *
066 * @param values the values (<code>null</code> not permitted).
067 *
068 * @return The mean.
069 */
070 public static double calculateMean(Number[] values) {
071 return calculateMean(values, true);
072 }
073
074 /**
075 * Returns the mean of an array of numbers.
076 *
077 * @param values the values (<code>null</code> not permitted).
078 * @param includeNullAndNaN a flag that controls whether or not
079 * <code>null</code> and <code>Double.NaN</code> values are included
080 * in the calculation (if either is present in the array, the result is
081 * {@link Double#NaN}).
082 *
083 * @return The mean.
084 *
085 * @since 1.0.3
086 */
087 public static double calculateMean(Number[] values,
088 boolean includeNullAndNaN) {
089
090 if (values == null) {
091 throw new IllegalArgumentException("Null 'values' argument.");
092 }
093 double sum = 0.0;
094 double current;
095 int counter = 0;
096 for (int i = 0; i < values.length; i++) {
097 // treat nulls the same as NaNs
098 if (values[i] != null) {
099 current = values[i].doubleValue();
100 }
101 else {
102 current = Double.NaN;
103 }
104 // calculate the sum and count
105 if (includeNullAndNaN || !Double.isNaN(current)) {
106 sum = sum + current;
107 counter++;
108 }
109 }
110 double result = (sum / counter);
111 return result;
112 }
113
114 /**
115 * Returns the mean of a collection of <code>Number</code> objects.
116 *
117 * @param values the values (<code>null</code> not permitted).
118 *
119 * @return The mean.
120 */
121 public static double calculateMean(Collection values) {
122 return calculateMean(values, true);
123 }
124
125 /**
126 * Returns the mean of a collection of <code>Number</code> objects.
127 *
128 * @param values the values (<code>null</code> not permitted).
129 * @param includeNullAndNaN a flag that controls whether or not
130 * <code>null</code> and <code>Double.NaN</code> values are included
131 * in the calculation (if either is present in the array, the result is
132 * {@link Double#NaN}).
133 *
134 * @return The mean.
135 *
136 * @since 1.0.3
137 */
138 public static double calculateMean(Collection values,
139 boolean includeNullAndNaN) {
140
141 if (values == null) {
142 throw new IllegalArgumentException("Null 'values' argument.");
143 }
144 int count = 0;
145 double total = 0.0;
146 Iterator iterator = values.iterator();
147 while (iterator.hasNext()) {
148 Object object = iterator.next();
149 if (object == null) {
150 if (includeNullAndNaN) {
151 return Double.NaN;
152 }
153 }
154 else {
155 if (object instanceof Number) {
156 Number number = (Number) object;
157 double value = number.doubleValue();
158 if (Double.isNaN(value)) {
159 if (includeNullAndNaN) {
160 return Double.NaN;
161 }
162 }
163 else {
164 total = total + number.doubleValue();
165 count = count + 1;
166 }
167 }
168 }
169 }
170 return total / count;
171 }
172
173 /**
174 * Calculates the median for a list of values (<code>Number</code> objects).
175 * The list of values will be copied, and the copy sorted, before
176 * calculating the median. To avoid this step (if your list of values
177 * is already sorted), use the {@link #calculateMedian(List, boolean)}
178 * method.
179 *
180 * @param values the values (<code>null</code> permitted).
181 *
182 * @return The median.
183 */
184 public static double calculateMedian(List values) {
185 return calculateMedian(values, true);
186 }
187
188 /**
189 * Calculates the median for a list of values (<code>Number</code> objects).
190 * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
191 * to be presorted in ascending order by value.
192 *
193 * @param values the values (<code>null</code> permitted).
194 * @param copyAndSort a flag that controls whether the list of values is
195 * copied and sorted.
196 *
197 * @return The median.
198 */
199 public static double calculateMedian(List values, boolean copyAndSort) {
200
201 double result = Double.NaN;
202 if (values != null) {
203 if (copyAndSort) {
204 int itemCount = values.size();
205 List copy = new ArrayList(itemCount);
206 for (int i = 0; i < itemCount; i++) {
207 copy.add(i, values.get(i));
208 }
209 Collections.sort(copy);
210 values = copy;
211 }
212 int count = values.size();
213 if (count > 0) {
214 if (count % 2 == 1) {
215 if (count > 1) {
216 Number value = (Number) values.get((count - 1) / 2);
217 result = value.doubleValue();
218 }
219 else {
220 Number value = (Number) values.get(0);
221 result = value.doubleValue();
222 }
223 }
224 else {
225 Number value1 = (Number) values.get(count / 2 - 1);
226 Number value2 = (Number) values.get(count / 2);
227 result = (value1.doubleValue() + value2.doubleValue())
228 / 2.0;
229 }
230 }
231 }
232 return result;
233 }
234
235 /**
236 * Calculates the median for a sublist within a list of values
237 * (<code>Number</code> objects).
238 *
239 * @param values the values, in any order (<code>null</code> not
240 * permitted).
241 * @param start the start index.
242 * @param end the end index.
243 *
244 * @return The median.
245 */
246 public static double calculateMedian(List values, int start, int end) {
247 return calculateMedian(values, start, end, true);
248 }
249
250 /**
251 * Calculates the median for a sublist within a list of values
252 * (<code>Number</code> objects). The entire list will be sorted if the
253 * <code>ascending</code< argument is <code>false</code>.
254 *
255 * @param values the values (<code>null</code> not permitted).
256 * @param start the start index.
257 * @param end the end index.
258 * @param copyAndSort a flag that that controls whether the list of values
259 * is copied and sorted.
260 *
261 * @return The median.
262 */
263 public static double calculateMedian(List values, int start, int end,
264 boolean copyAndSort) {
265
266 double result = Double.NaN;
267 if (copyAndSort) {
268 List working = new ArrayList(end - start + 1);
269 for (int i = start; i <= end; i++) {
270 working.add(values.get(i));
271 }
272 Collections.sort(working);
273 result = calculateMedian(working, false);
274 }
275 else {
276 int count = end - start + 1;
277 if (count > 0) {
278 if (count % 2 == 1) {
279 if (count > 1) {
280 Number value
281 = (Number) values.get(start + (count - 1) / 2);
282 result = value.doubleValue();
283 }
284 else {
285 Number value = (Number) values.get(start);
286 result = value.doubleValue();
287 }
288 }
289 else {
290 Number value1 = (Number) values.get(start + count / 2 - 1);
291 Number value2 = (Number) values.get(start + count / 2);
292 result
293 = (value1.doubleValue() + value2.doubleValue()) / 2.0;
294 }
295 }
296 }
297 return result;
298
299 }
300
301 /**
302 * Returns the standard deviation of a set of numbers.
303 *
304 * @param data the data (<code>null</code> or zero length array not
305 * permitted).
306 *
307 * @return The standard deviation of a set of numbers.
308 */
309 public static double getStdDev(Number[] data) {
310 if (data == null) {
311 throw new IllegalArgumentException("Null 'data' array.");
312 }
313 if (data.length == 0) {
314 throw new IllegalArgumentException("Zero length 'data' array.");
315 }
316 double avg = calculateMean(data);
317 double sum = 0.0;
318
319 for (int counter = 0; counter < data.length; counter++) {
320 double diff = data[counter].doubleValue() - avg;
321 sum = sum + diff * diff;
322 }
323 return Math.sqrt(sum / (data.length - 1));
324 }
325
326 /**
327 * Fits a straight line to a set of (x, y) data, returning the slope and
328 * intercept.
329 *
330 * @param xData the x-data (<code>null</code> not permitted).
331 * @param yData the y-data (<code>null</code> not permitted).
332 *
333 * @return A double array with the intercept in [0] and the slope in [1].
334 */
335 public static double[] getLinearFit(Number[] xData, Number[] yData) {
336
337 if (xData == null) {
338 throw new IllegalArgumentException("Null 'xData' argument.");
339 }
340 if (yData == null) {
341 throw new IllegalArgumentException("Null 'yData' argument.");
342 }
343 if (xData.length != yData.length) {
344 throw new IllegalArgumentException(
345 "Statistics.getLinearFit(): array lengths must be equal.");
346 }
347
348 double[] result = new double[2];
349 // slope
350 result[1] = getSlope(xData, yData);
351 // intercept
352 result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
353
354 return result;
355
356 }
357
358 /**
359 * Finds the slope of a regression line using least squares.
360 *
361 * @param xData the x-values (<code>null</code> not permitted).
362 * @param yData the y-values (<code>null</code> not permitted).
363 *
364 * @return The slope.
365 */
366 public static double getSlope(Number[] xData, Number[] yData) {
367
368 if (xData == null) {
369 throw new IllegalArgumentException("Null 'xData' argument.");
370 }
371 if (yData == null) {
372 throw new IllegalArgumentException("Null 'yData' argument.");
373 }
374 if (xData.length != yData.length) {
375 throw new IllegalArgumentException("Array lengths must be equal.");
376 }
377
378 // ********* stat function for linear slope ********
379 // y = a + bx
380 // a = ybar - b * xbar
381 // sum(x * y) - (sum (x) * sum(y)) / n
382 // b = ------------------------------------
383 // sum (x^2) - (sum(x)^2 / n
384 // *************************************************
385
386 // sum of x, x^2, x * y, y
387 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
388 int counter;
389 for (counter = 0; counter < xData.length; counter++) {
390 sx = sx + xData[counter].doubleValue();
391 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
392 sxy = sxy + yData[counter].doubleValue()
393 * xData[counter].doubleValue();
394 sy = sy + yData[counter].doubleValue();
395 }
396 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
397
398 }
399
400 /**
401 * Calculates the correlation between two datasets. Both arrays should
402 * contain the same number of items. Null values are treated as zero.
403 * <P>
404 * Information about the correlation calculation was obtained from:
405 *
406 * http://trochim.human.cornell.edu/kb/statcorr.htm
407 *
408 * @param data1 the first dataset.
409 * @param data2 the second dataset.
410 *
411 * @return The correlation.
412 */
413 public static double getCorrelation(Number[] data1, Number[] data2) {
414 if (data1 == null) {
415 throw new IllegalArgumentException("Null 'data1' argument.");
416 }
417 if (data2 == null) {
418 throw new IllegalArgumentException("Null 'data2' argument.");
419 }
420 if (data1.length != data2.length) {
421 throw new IllegalArgumentException(
422 "'data1' and 'data2' arrays must have same length."
423 );
424 }
425 int n = data1.length;
426 double sumX = 0.0;
427 double sumY = 0.0;
428 double sumX2 = 0.0;
429 double sumY2 = 0.0;
430 double sumXY = 0.0;
431 for (int i = 0; i < n; i++) {
432 double x = 0.0;
433 if (data1[i] != null) {
434 x = data1[i].doubleValue();
435 }
436 double y = 0.0;
437 if (data2[i] != null) {
438 y = data2[i].doubleValue();
439 }
440 sumX = sumX + x;
441 sumY = sumY + y;
442 sumXY = sumXY + (x * y);
443 sumX2 = sumX2 + (x * x);
444 sumY2 = sumY2 + (y * y);
445 }
446 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
447 * (n * sumY2 - sumY * sumY), 0.5);
448 }
449
450 /**
451 * Returns a data set for a moving average on the data set passed in.
452 *
453 * @param xData an array of the x data.
454 * @param yData an array of the y data.
455 * @param period the number of data points to average
456 *
457 * @return A double[][] the length of the data set in the first dimension,
458 * with two doubles for x and y in the second dimension
459 */
460 public static double[][] getMovingAverage(Number[] xData,
461 Number[] yData,
462 int period) {
463
464 // check arguments...
465 if (xData.length != yData.length) {
466 throw new IllegalArgumentException("Array lengths must be equal.");
467 }
468
469 if (period > xData.length) {
470 throw new IllegalArgumentException(
471 "Period can't be longer than dataset."
472 );
473 }
474
475 double[][] result = new double[xData.length - period][2];
476 for (int i = 0; i < result.length; i++) {
477 result[i][0] = xData[i + period].doubleValue();
478 // holds the moving average sum
479 double sum = 0.0;
480 for (int j = 0; j < period; j++) {
481 sum += yData[i + j].doubleValue();
482 }
483 sum = sum / period;
484 result[i][1] = sum;
485 }
486 return result;
487
488 }
489
490 }