1 | (function(){science.stats = {}; |
---|
2 | // Bandwidth selectors for Gaussian kernels. |
---|
3 | // Based on R's implementations in `stats.bw`. |
---|
4 | science.stats.bandwidth = { |
---|
5 | |
---|
6 | // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall. |
---|
7 | nrd0: function(x) { |
---|
8 | var hi = Math.sqrt(science.stats.variance(x)); |
---|
9 | if (!(lo = Math.min(hi, science.stats.iqr(x) / 1.34))) |
---|
10 | (lo = hi) || (lo = Math.abs(x[1])) || (lo = 1); |
---|
11 | return .9 * lo * Math.pow(x.length, -.2); |
---|
12 | }, |
---|
13 | |
---|
14 | // Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and |
---|
15 | // Visualization. Wiley. |
---|
16 | nrd: function(x) { |
---|
17 | var h = science.stats.iqr(x) / 1.34; |
---|
18 | return 1.06 * Math.min(Math.sqrt(science.stats.variance(x)), h) |
---|
19 | * Math.pow(x.length, -1/5); |
---|
20 | } |
---|
21 | }; |
---|
22 | // See <http://en.wikipedia.org/wiki/Kernel_(statistics)>. |
---|
23 | science.stats.kernel = { |
---|
24 | uniform: function(u) { |
---|
25 | if (u <= 1 && u >= -1) return .5; |
---|
26 | return 0; |
---|
27 | }, |
---|
28 | triangular: function(u) { |
---|
29 | if (u <= 1 && u >= -1) return 1 - Math.abs(u); |
---|
30 | return 0; |
---|
31 | }, |
---|
32 | epanechnikov: function(u) { |
---|
33 | if (u <= 1 && u >= -1) return .75 * (1 - u * u); |
---|
34 | return 0; |
---|
35 | }, |
---|
36 | quartic: function(u) { |
---|
37 | if (u <= 1 && u >= -1) { |
---|
38 | var tmp = 1 - u * u; |
---|
39 | return (15 / 16) * tmp * tmp; |
---|
40 | } |
---|
41 | return 0; |
---|
42 | }, |
---|
43 | triweight: function(u) { |
---|
44 | if (u <= 1 && u >= -1) { |
---|
45 | var tmp = 1 - u * u; |
---|
46 | return (35 / 32) * tmp * tmp * tmp; |
---|
47 | } |
---|
48 | return 0; |
---|
49 | }, |
---|
50 | gaussian: function(u) { |
---|
51 | return 1 / Math.sqrt(2 * Math.PI) * Math.exp(-.5 * u * u); |
---|
52 | }, |
---|
53 | cosine: function(u) { |
---|
54 | if (u <= 1 && u >= -1) return Math.PI / 4 * Math.cos(Math.PI / 2 * u); |
---|
55 | return 0; |
---|
56 | } |
---|
57 | }; |
---|
58 | // http://exploringdata.net/den_trac.htm |
---|
59 | science.stats.kde = function() { |
---|
60 | var kernel = science.stats.kernel.gaussian, |
---|
61 | sample = [], |
---|
62 | bandwidth = science.stats.bandwidth.nrd; |
---|
63 | |
---|
64 | function kde(points, i) { |
---|
65 | var bw = bandwidth.call(this, sample); |
---|
66 | return points.map(function(x) { |
---|
67 | var i = -1, |
---|
68 | y = 0, |
---|
69 | n = sample.length; |
---|
70 | while (++i < n) { |
---|
71 | y += kernel((x - sample[i]) / bw); |
---|
72 | } |
---|
73 | return [x, y / bw / n]; |
---|
74 | }); |
---|
75 | } |
---|
76 | |
---|
77 | kde.kernel = function(x) { |
---|
78 | if (!arguments.length) return kernel; |
---|
79 | kernel = x; |
---|
80 | return kde; |
---|
81 | }; |
---|
82 | |
---|
83 | kde.sample = function(x) { |
---|
84 | if (!arguments.length) return sample; |
---|
85 | sample = x; |
---|
86 | return kde; |
---|
87 | }; |
---|
88 | |
---|
89 | kde.bandwidth = function(x) { |
---|
90 | if (!arguments.length) return bandwidth; |
---|
91 | bandwidth = science.functor(x); |
---|
92 | return kde; |
---|
93 | }; |
---|
94 | |
---|
95 | return kde; |
---|
96 | }; |
---|
97 | science.stats.iqr = function(x) { |
---|
98 | var quartiles = science.stats.quantiles(x, [.25, .75]); |
---|
99 | return quartiles[1] - quartiles[0]; |
---|
100 | }; |
---|
101 | // Welford's algorithm. |
---|
102 | science.stats.mean = function(x) { |
---|
103 | var n = x.length; |
---|
104 | if (n === 0) return NaN; |
---|
105 | var m = 0, |
---|
106 | i = -1; |
---|
107 | while (++i < n) m += (x[i] - m) / (i + 1); |
---|
108 | return m; |
---|
109 | }; |
---|
110 | science.stats.median = function(x) { |
---|
111 | return science.stats.quantiles(x, [.5])[0]; |
---|
112 | }; |
---|
113 | science.stats.mode = function(x) { |
---|
114 | x = x.slice().sort(science.ascending); |
---|
115 | var mode, |
---|
116 | n = x.length, |
---|
117 | i = -1, |
---|
118 | l = i, |
---|
119 | last = null, |
---|
120 | max = 0, |
---|
121 | tmp, |
---|
122 | v; |
---|
123 | while (++i < n) { |
---|
124 | if ((v = x[i]) !== last) { |
---|
125 | if ((tmp = i - l) > max) { |
---|
126 | max = tmp; |
---|
127 | mode = last; |
---|
128 | } |
---|
129 | last = v; |
---|
130 | l = i; |
---|
131 | } |
---|
132 | } |
---|
133 | return mode; |
---|
134 | }; |
---|
135 | // Uses R's quantile algorithm type=7. |
---|
136 | science.stats.quantiles = function(d, quantiles) { |
---|
137 | d = d.slice().sort(science.ascending); |
---|
138 | var n_1 = d.length - 1; |
---|
139 | return quantiles.map(function(q) { |
---|
140 | if (q === 0) return d[0]; |
---|
141 | else if (q === 1) return d[n_1]; |
---|
142 | |
---|
143 | var index = 1 + q * n_1, |
---|
144 | lo = Math.floor(index), |
---|
145 | h = index - lo, |
---|
146 | a = d[lo - 1]; |
---|
147 | |
---|
148 | return h === 0 ? a : a + h * (d[lo] - a); |
---|
149 | }); |
---|
150 | }; |
---|
151 | // Unbiased estimate of a sample's variance. |
---|
152 | // Also known as the sample variance, where the denominator is n - 1. |
---|
153 | science.stats.variance = function(x) { |
---|
154 | var n = x.length; |
---|
155 | if (n < 1) return NaN; |
---|
156 | if (n === 1) return 0; |
---|
157 | var mean = science.stats.mean(x), |
---|
158 | i = -1, |
---|
159 | s = 0; |
---|
160 | while (++i < n) { |
---|
161 | var v = x[i] - mean; |
---|
162 | s += v * v; |
---|
163 | } |
---|
164 | return s / (n - 1); |
---|
165 | }; |
---|
166 | })() |
---|