1 | // |
---|
2 | // BioArray Software Environment (BASE) - homepage http://base.thep.lu.se/ |
---|
3 | // Copyright (C) 2002,2003 Björn Samuelsson |
---|
4 | // |
---|
5 | // This file is part of BASE. |
---|
6 | // |
---|
7 | // BASE is free software; you can redistribute it and/or |
---|
8 | // modify it under the terms of the GNU General Public License |
---|
9 | // as published by the Free Software Foundation; either version 2 |
---|
10 | // of the License, or (at your option) any later version. |
---|
11 | // |
---|
12 | // BASE is distributed in the hope that it will be useful, |
---|
13 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | // GNU General Public License for more details. |
---|
16 | // |
---|
17 | // You should have received a copy of the GNU General Public License |
---|
18 | // along with this program; if not, write to the Free Software |
---|
19 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
20 | // |
---|
21 | #include <iostream> |
---|
22 | #include <base_lowess.h> |
---|
23 | |
---|
24 | namespace BASE |
---|
25 | { |
---|
26 | |
---|
27 | const double lowess::yw_range_factor = 6.; |
---|
28 | |
---|
29 | static inline double cube(double a) |
---|
30 | { |
---|
31 | return a * a * a; |
---|
32 | } |
---|
33 | |
---|
34 | double lowess::cubic_weight_func(double a) |
---|
35 | { |
---|
36 | if(fabs(a) < 1.) |
---|
37 | return cube(1. - cube(fabs(a))); |
---|
38 | else |
---|
39 | return 0.; |
---|
40 | } |
---|
41 | |
---|
42 | static inline double sqr(double a) |
---|
43 | { |
---|
44 | return a * a; |
---|
45 | } |
---|
46 | |
---|
47 | double lowess::quadratic_weight_func(double a) |
---|
48 | { |
---|
49 | if(fabs(a) < 1.) |
---|
50 | return sqr(1. - sqr(a)); |
---|
51 | else |
---|
52 | return 0.; |
---|
53 | } |
---|
54 | |
---|
55 | |
---|
56 | bool lowess::line_fit( |
---|
57 | double *x_beg, int npoints, double *y_beg, double &k, double &m, |
---|
58 | double *w_beg) |
---|
59 | { |
---|
60 | const int np = npoints; |
---|
61 | if(np <= 0) |
---|
62 | { |
---|
63 | std::cerr << "line_fit called with npoints <= 0\n"; |
---|
64 | exit(1); |
---|
65 | } |
---|
66 | |
---|
67 | double x_sum = 0., y_sum = 0., xx_sum = 0., xy_sum = 0., w_sum = 0.; |
---|
68 | |
---|
69 | if(w_beg) |
---|
70 | { |
---|
71 | for(int i = 0; i < np; i++) |
---|
72 | { |
---|
73 | double w = w_beg[i]; |
---|
74 | x_sum += x_beg[i] * w; |
---|
75 | y_sum += y_beg[i] * w; |
---|
76 | xx_sum += x_beg[i] * x_beg[i] * w; |
---|
77 | xy_sum += x_beg[i] * y_beg[i] * w; |
---|
78 | w_sum += w; |
---|
79 | } |
---|
80 | } |
---|
81 | else |
---|
82 | { |
---|
83 | for(int i = 0; i < np; i++) |
---|
84 | { |
---|
85 | x_sum += x_beg[i]; |
---|
86 | y_sum += y_beg[i]; |
---|
87 | xx_sum += x_beg[i] * x_beg[i]; |
---|
88 | xy_sum += x_beg[i] * y_beg[i]; |
---|
89 | } |
---|
90 | w_sum = np; |
---|
91 | } |
---|
92 | |
---|
93 | if(w_sum <= 0.) |
---|
94 | return false; |
---|
95 | |
---|
96 | double denom = w_sum*xx_sum - x_sum*x_sum; |
---|
97 | if(denom != 0.) |
---|
98 | { |
---|
99 | k = (w_sum*xy_sum - x_sum*y_sum) / denom; |
---|
100 | m = (y_sum - k*x_sum) / w_sum; |
---|
101 | } |
---|
102 | else |
---|
103 | { |
---|
104 | k = 0.; |
---|
105 | m = y_sum / w_sum; |
---|
106 | } |
---|
107 | return true; |
---|
108 | } |
---|
109 | |
---|
110 | // fit for one point |
---|
111 | // x must be sorted in ascending order |
---|
112 | bool lowess::p_lowess( |
---|
113 | double *x_beg, int npoints, double *y_beg, |
---|
114 | const double &p_x, double &p_y, double *temp_w_beg, |
---|
115 | double *yw_beg, weight_func x_weight_func) |
---|
116 | { |
---|
117 | const int np = npoints; |
---|
118 | if(np <= 0) |
---|
119 | { |
---|
120 | std::cerr << "p_lowess called with npoints <= 0\n"; |
---|
121 | exit(1); |
---|
122 | } |
---|
123 | |
---|
124 | double inv_radius = 1. / std::max(p_x - *x_beg, x_beg[np - 1] - p_x); |
---|
125 | if(yw_beg) |
---|
126 | { |
---|
127 | for(int i = 0; i < np; i++) |
---|
128 | { |
---|
129 | temp_w_beg[i] = (*x_weight_func)((x_beg[i] - p_x) * inv_radius) * |
---|
130 | yw_beg[i]; |
---|
131 | } |
---|
132 | } |
---|
133 | else |
---|
134 | { |
---|
135 | for(int i = 0; i < np; i++) |
---|
136 | temp_w_beg[i] = x_weight_func((x_beg[i] - p_x) * inv_radius); |
---|
137 | } |
---|
138 | |
---|
139 | double k, m; |
---|
140 | bool tmp = line_fit(x_beg, npoints, y_beg, k, m, temp_w_beg); |
---|
141 | |
---|
142 | p_y = k * p_x + m; |
---|
143 | return tmp; |
---|
144 | } |
---|
145 | |
---|
146 | // robust fit of line segments |
---|
147 | // x must be sorted in ascending order |
---|
148 | void lowess::calc( |
---|
149 | double *x_beg, int npoints, double *y_beg, |
---|
150 | double *y_fit_beg, char *point_fitted_beg, |
---|
151 | const double &frac, const int n_iter, const double &delta, |
---|
152 | weight_func x_weight_func, weight_func y_weight_func) |
---|
153 | { |
---|
154 | const int np_window = std::min(npoints, int(frac*npoints + 0.5)); |
---|
155 | if(npoints == 0) |
---|
156 | return; |
---|
157 | if(npoints < 0) |
---|
158 | { |
---|
159 | std::cerr << "lowess called with npoints < 0\n"; |
---|
160 | exit(1); |
---|
161 | } |
---|
162 | |
---|
163 | if(np_window <= 2) |
---|
164 | { |
---|
165 | std::copy(y_beg, y_beg + npoints, y_fit_beg); |
---|
166 | std::fill(point_fitted_beg, point_fitted_beg + npoints, true); |
---|
167 | return; |
---|
168 | } |
---|
169 | |
---|
170 | double w_beg[npoints]; |
---|
171 | double temp_beg[npoints]; |
---|
172 | |
---|
173 | for(int iter = 0; iter <= n_iter; iter++) |
---|
174 | { |
---|
175 | int window_i = 0; |
---|
176 | int i, prev_i; |
---|
177 | int misses = 0, pcalcs = 0; |
---|
178 | |
---|
179 | for(i = 0, prev_i = -1; prev_i < npoints - 1;) |
---|
180 | { |
---|
181 | //center window |
---|
182 | while(window_i + np_window < npoints && |
---|
183 | x_beg[i] - x_beg[window_i] > |
---|
184 | x_beg[window_i + np_window - 1] - x_beg[i]) |
---|
185 | { |
---|
186 | window_i++; |
---|
187 | } |
---|
188 | |
---|
189 | //fit one point |
---|
190 | pcalcs++; |
---|
191 | bool fitted = p_lowess(x_beg + window_i, np_window, |
---|
192 | y_beg + window_i, x_beg[i], y_fit_beg[i], temp_beg, |
---|
193 | iter ? w_beg + window_i : 0, x_weight_func); |
---|
194 | |
---|
195 | if(!fitted) |
---|
196 | { |
---|
197 | misses++; |
---|
198 | if(i < npoints - 1) |
---|
199 | ; |
---|
200 | else if(prev_i < 0) |
---|
201 | { |
---|
202 | std::cerr << "Not a single point fitted - this is an odd error.\n" |
---|
203 | "Please report it as a bug.\n"; |
---|
204 | exit(1); |
---|
205 | } |
---|
206 | else |
---|
207 | { |
---|
208 | for(int j = prev_i + 1; j < i; j++) |
---|
209 | y_fit_beg[j] = y_fit_beg[prev_i]; |
---|
210 | break; |
---|
211 | } |
---|
212 | } |
---|
213 | else |
---|
214 | { |
---|
215 | //interpolate skipped points |
---|
216 | if(prev_i + 1 < i) |
---|
217 | { |
---|
218 | double d = prev_i < 0 ? 0 : x_beg[i] - x_beg[prev_i]; |
---|
219 | if(d == 0.) |
---|
220 | { |
---|
221 | for(int j = prev_i + 1; j < i; j++) |
---|
222 | y_fit_beg[j] = y_fit_beg[i]; |
---|
223 | } |
---|
224 | else |
---|
225 | { |
---|
226 | for(int j = prev_i + 1; j < i; j++) |
---|
227 | { |
---|
228 | double a = (x_beg[j] - x_beg[prev_i])/d; |
---|
229 | y_fit_beg[j] = a*y_fit_beg[i] + (1.-a)*y_fit_beg[prev_i]; |
---|
230 | } |
---|
231 | } |
---|
232 | } |
---|
233 | |
---|
234 | for(int j = prev_i + 1; j < i; j++) |
---|
235 | point_fitted_beg[j] = false; |
---|
236 | point_fitted_beg[i] = true; |
---|
237 | |
---|
238 | prev_i = i; |
---|
239 | } |
---|
240 | |
---|
241 | double cut = x_beg[i] + delta; |
---|
242 | for(i++; i < npoints && x_beg[i] <= cut; i++) |
---|
243 | { |
---|
244 | if(x_beg[i] - x_beg[i-1] < 0) |
---|
245 | { |
---|
246 | std::cerr << "x-values not sorted in lowess\n"; |
---|
247 | exit(1); |
---|
248 | } |
---|
249 | } |
---|
250 | if(i >= npoints) |
---|
251 | i = npoints - 1; |
---|
252 | } |
---|
253 | |
---|
254 | if(misses) |
---|
255 | { |
---|
256 | std::cerr << "Warning: sum of weights was 0 in line_fit " << misses |
---|
257 | << " times (of " << pcalcs << ") in iteration " << iter << ".\n" |
---|
258 | "This happens if all points within the sliding window are\n" |
---|
259 | "more than 6 medians from the locally fitted function.\n"; |
---|
260 | } |
---|
261 | |
---|
262 | for(int i = 0; i < npoints; i++) |
---|
263 | temp_beg[i] = fabs(y_fit_beg[i] - y_beg[i]); |
---|
264 | |
---|
265 | std::pair<double*, double*> p = median(temp_beg, temp_beg + npoints); |
---|
266 | double inv_yw_range = 1. / |
---|
267 | (yw_range_factor * 0.5 * (*p.first + *p.second)); |
---|
268 | for(int i = 0; i < npoints; i++) |
---|
269 | w_beg[i] = y_weight_func((y_fit_beg[i] - y_beg[i]) * inv_yw_range); |
---|
270 | } |
---|
271 | } |
---|
272 | |
---|
273 | } |
---|
274 | |
---|