[76] | 1 | (function(){science.stats = {}; |
---|
| 2 | // Bandwidth selectors for Gaussian kernels. |
---|
| 3 | // Based on R's implementations in `stats.bw`. |
---|
| 4 | science.stats.bandwidth = { |
---|
| 5 | |
---|
| 6 | // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall. |
---|
| 7 | nrd0: function(x) { |
---|
| 8 | var hi = Math.sqrt(science.stats.variance(x)); |
---|
| 9 | if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34))) |
---|
| 10 | (lo = hi) || (lo = Math.abs(x[1])) || (lo = 1); |
---|
| 11 | return .9 * lo * Math.pow(x.length, -.2); |
---|
| 12 | }, |
---|
| 13 | |
---|
| 14 | // Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and |
---|
| 15 | // Visualization. Wiley. |
---|
| 16 | nrd: function(x) { |
---|
| 17 | var h = science.stats.iqr(x) / 1.34; |
---|
| 18 | return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h) |
---|
| 19 | * Math.pow(x.length, -1/5); |
---|
| 20 | } |
---|
| 21 | }; |
---|
| 22 | // See <http://en.wikipedia.org/wiki/Kernel_(statistics)>. |
---|
| 23 | science.stats.kernel = { |
---|
| 24 | uniform: function(u) { |
---|
| 25 | if (u <= 1 && u >= -1) return .5; |
---|
| 26 | return 0; |
---|
| 27 | }, |
---|
| 28 | triangular: function(u) { |
---|
| 29 | if (u <= 1 && u >= -1) return 1 - Math.abs(u); |
---|
| 30 | return 0; |
---|
| 31 | }, |
---|
| 32 | epanechnikov: function(u) { |
---|
| 33 | if (u <= 1 && u >= -1) return .75 * (1 - u * u); |
---|
| 34 | return 0; |
---|
| 35 | }, |
---|
| 36 | quartic: function(u) { |
---|
| 37 | if (u <= 1 && u >= -1) { |
---|
| 38 | var tmp = 1 - u * u; |
---|
| 39 | return (15 / 16) * tmp * tmp; |
---|
| 40 | } |
---|
| 41 | return 0; |
---|
| 42 | }, |
---|
| 43 | triweight: function(u) { |
---|
| 44 | if (u <= 1 && u >= -1) { |
---|
| 45 | var tmp = 1 - u * u; |
---|
| 46 | return (35 / 32) * tmp * tmp * tmp; |
---|
| 47 | } |
---|
| 48 | return 0; |
---|
| 49 | }, |
---|
| 50 | gaussian: function(u) { |
---|
| 51 | return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u); |
---|
| 52 | }, |
---|
| 53 | cosine: function(u) { |
---|
| 54 | if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u); |
---|
| 55 | return 0; |
---|
| 56 | } |
---|
| 57 | }; |
---|
| 58 | // http://exploringdata.net/den_trac.htm |
---|
| 59 | science.stats.kde = function() { |
---|
| 60 | var kernel = science.stats.kernel.gaussian, |
---|
| 61 | sample = [], |
---|
| 62 | bandwidth = science.stats.bandwidth.nrd; |
---|
| 63 | |
---|
| 64 | function kde(points, i) { |
---|
| 65 | var bw = bandwidth.call(this, sample); |
---|
| 66 | return points.map(function(x) { |
---|
| 67 | var i = -1, |
---|
| 68 | y = 0, |
---|
| 69 | n = sample.length; |
---|
| 70 | while (++i < n) { |
---|
| 71 | y += kernel((x - sample[i]) / bw); |
---|
| 72 | } |
---|
| 73 | return [x, y / bw / n]; |
---|
| 74 | }); |
---|
| 75 | } |
---|
| 76 | |
---|
| 77 | kde.kernel = function(x) { |
---|
| 78 | if (!arguments.length) return kernel; |
---|
| 79 | kernel = x; |
---|
| 80 | return kde; |
---|
| 81 | }; |
---|
| 82 | |
---|
| 83 | kde.sample = function(x) { |
---|
| 84 | if (!arguments.length) return sample; |
---|
| 85 | sample = x; |
---|
| 86 | return kde; |
---|
| 87 | }; |
---|
| 88 | |
---|
| 89 | kde.bandwidth = function(x) { |
---|
| 90 | if (!arguments.length) return bandwidth; |
---|
| 91 | bandwidth = science.functor(x); |
---|
| 92 | return kde; |
---|
| 93 | }; |
---|
| 94 | |
---|
| 95 | return kde; |
---|
| 96 | }; |
---|
| 97 | science.stats.iqr = function(x) { |
---|
| 98 | var quartiles = science.stats.quantiles(x, [.25, .75]); |
---|
| 99 | return quartiles[1] - quartiles[0]; |
---|
| 100 | }; |
---|
| 101 | // Welford's algorithm. |
---|
| 102 | science.stats.mean = function(x) { |
---|
| 103 | var n = x.length; |
---|
| 104 | if (n === 0) return NaN; |
---|
| 105 | var m = 0, |
---|
| 106 | i = -1; |
---|
| 107 | while (++i < n) m += (x[i] - m) / (i + 1); |
---|
| 108 | return m; |
---|
| 109 | }; |
---|
| 110 | science.stats.median = function(x) { |
---|
| 111 | return science.stats.quantiles(x, [.5])[0]; |
---|
| 112 | }; |
---|
| 113 | science.stats.mode = function(x) { |
---|
| 114 | x = x.slice().sort(science.ascending); |
---|
| 115 | var mode, |
---|
| 116 | n = x.length, |
---|
| 117 | i = -1, |
---|
| 118 | l = i, |
---|
| 119 | last = null, |
---|
| 120 | max = 0, |
---|
| 121 | tmp, |
---|
| 122 | v; |
---|
| 123 | while (++i < n) { |
---|
| 124 | if ((v = x[i]) !== last) { |
---|
| 125 | if ((tmp = i - l) > max) { |
---|
| 126 | max = tmp; |
---|
| 127 | mode = last; |
---|
| 128 | } |
---|
| 129 | last = v; |
---|
| 130 | l = i; |
---|
| 131 | } |
---|
| 132 | } |
---|
| 133 | return mode; |
---|
| 134 | }; |
---|
| 135 | // Uses R's quantile algorithm type=7. |
---|
| 136 | science.stats.quantiles = function(d, quantiles) { |
---|
| 137 | d = d.slice().sort(science.ascending); |
---|
| 138 | var n_1 = d.length - 1; |
---|
| 139 | return quantiles.map(function(q) { |
---|
| 140 | if (q === 0) return d[0]; |
---|
| 141 | else if (q === 1) return d[n_1]; |
---|
| 142 | |
---|
| 143 | var index = 1 + q * n_1, |
---|
| 144 | lo = Math.floor(index), |
---|
| 145 | h = index - lo, |
---|
| 146 | a = d[lo - 1]; |
---|
| 147 | |
---|
| 148 | return h === 0 ? a : a + h * (d[lo] - a); |
---|
| 149 | }); |
---|
| 150 | }; |
---|
| 151 | // Unbiased estimate of a sample's variance. |
---|
| 152 | // Also known as the sample variance, where the denominator is n - 1. |
---|
| 153 | science.stats.variance = function(x) { |
---|
| 154 | var n = x.length; |
---|
| 155 | if (n < 1) return NaN; |
---|
| 156 | if (n === 1) return 0; |
---|
| 157 | var mean = science.stats.mean(x), |
---|
| 158 | i = -1, |
---|
| 159 | s = 0; |
---|
| 160 | while (++i < n) { |
---|
| 161 | var v = x[i] - mean; |
---|
| 162 | s += v * v; |
---|
| 163 | } |
---|
| 164 | return s / (n - 1); |
---|
| 165 | }; |
---|
| 166 | })() |
---|