1 | //
|
---|
2 | // BioArray Software Environment (BASE) - homepage http://base.thep.lu.se/
|
---|
3 | // Copyright (C) 2002,2003 Björn Samuelsson
|
---|
4 | //
|
---|
5 | // This file is part of BASE.
|
---|
6 | //
|
---|
7 | // BASE is free software; you can redistribute it and/or
|
---|
8 | // modify it under the terms of the GNU General Public License
|
---|
9 | // as published by the Free Software Foundation; either version 2
|
---|
10 | // of the License, or (at your option) any later version.
|
---|
11 | //
|
---|
12 | // BASE is distributed in the hope that it will be useful,
|
---|
13 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | // GNU General Public License for more details.
|
---|
16 | //
|
---|
17 | // You should have received a copy of the GNU General Public License
|
---|
18 | // along with this program; if not, write to the Free Software
|
---|
19 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
---|
20 | //
|
---|
21 | #include <iostream>
|
---|
22 | #include <base_lowess.h>
|
---|
23 |
|
---|
24 | namespace BASE
|
---|
25 | {
|
---|
26 |
|
---|
27 | const double lowess::yw_range_factor = 6.;
|
---|
28 |
|
---|
29 | static inline double cube(double a)
|
---|
30 | {
|
---|
31 | return a * a * a;
|
---|
32 | }
|
---|
33 |
|
---|
34 | double lowess::cubic_weight_func(double a)
|
---|
35 | {
|
---|
36 | if(fabs(a) < 1.)
|
---|
37 | return cube(1. - cube(fabs(a)));
|
---|
38 | else
|
---|
39 | return 0.;
|
---|
40 | }
|
---|
41 |
|
---|
42 | static inline double sqr(double a)
|
---|
43 | {
|
---|
44 | return a * a;
|
---|
45 | }
|
---|
46 |
|
---|
47 | double lowess::quadratic_weight_func(double a)
|
---|
48 | {
|
---|
49 | if(fabs(a) < 1.)
|
---|
50 | return sqr(1. - sqr(a));
|
---|
51 | else
|
---|
52 | return 0.;
|
---|
53 | }
|
---|
54 |
|
---|
55 |
|
---|
56 | bool lowess::line_fit(
|
---|
57 | double *x_beg, int npoints, double *y_beg, double &k, double &m,
|
---|
58 | double *w_beg)
|
---|
59 | {
|
---|
60 | const int np = npoints;
|
---|
61 | if(np <= 0)
|
---|
62 | {
|
---|
63 | std::cerr << "line_fit called with npoints <= 0\n";
|
---|
64 | exit(1);
|
---|
65 | }
|
---|
66 |
|
---|
67 | double x_sum = 0., y_sum = 0., xx_sum = 0., xy_sum = 0., w_sum = 0.;
|
---|
68 |
|
---|
69 | if(w_beg)
|
---|
70 | {
|
---|
71 | for(int i = 0; i < np; i++)
|
---|
72 | {
|
---|
73 | double w = w_beg[i];
|
---|
74 | x_sum += x_beg[i] * w;
|
---|
75 | y_sum += y_beg[i] * w;
|
---|
76 | xx_sum += x_beg[i] * x_beg[i] * w;
|
---|
77 | xy_sum += x_beg[i] * y_beg[i] * w;
|
---|
78 | w_sum += w;
|
---|
79 | }
|
---|
80 | }
|
---|
81 | else
|
---|
82 | {
|
---|
83 | for(int i = 0; i < np; i++)
|
---|
84 | {
|
---|
85 | x_sum += x_beg[i];
|
---|
86 | y_sum += y_beg[i];
|
---|
87 | xx_sum += x_beg[i] * x_beg[i];
|
---|
88 | xy_sum += x_beg[i] * y_beg[i];
|
---|
89 | }
|
---|
90 | w_sum = np;
|
---|
91 | }
|
---|
92 |
|
---|
93 | if(w_sum <= 0.)
|
---|
94 | return false;
|
---|
95 |
|
---|
96 | double denom = w_sum*xx_sum - x_sum*x_sum;
|
---|
97 | if(denom != 0.)
|
---|
98 | {
|
---|
99 | k = (w_sum*xy_sum - x_sum*y_sum) / denom;
|
---|
100 | m = (y_sum - k*x_sum) / w_sum;
|
---|
101 | }
|
---|
102 | else
|
---|
103 | {
|
---|
104 | k = 0.;
|
---|
105 | m = y_sum / w_sum;
|
---|
106 | }
|
---|
107 | return true;
|
---|
108 | }
|
---|
109 |
|
---|
110 | // fit for one point
|
---|
111 | // x must be sorted in ascending order
|
---|
112 | bool lowess::p_lowess(
|
---|
113 | double *x_beg, int npoints, double *y_beg,
|
---|
114 | const double &p_x, double &p_y, double *temp_w_beg,
|
---|
115 | double *yw_beg, weight_func x_weight_func)
|
---|
116 | {
|
---|
117 | const int np = npoints;
|
---|
118 | if(np <= 0)
|
---|
119 | {
|
---|
120 | std::cerr << "p_lowess called with npoints <= 0\n";
|
---|
121 | exit(1);
|
---|
122 | }
|
---|
123 |
|
---|
124 | double inv_radius = 1. / std::max(p_x - *x_beg, x_beg[np - 1] - p_x);
|
---|
125 | if(yw_beg)
|
---|
126 | {
|
---|
127 | for(int i = 0; i < np; i++)
|
---|
128 | {
|
---|
129 | temp_w_beg[i] = (*x_weight_func)((x_beg[i] - p_x) * inv_radius) *
|
---|
130 | yw_beg[i];
|
---|
131 | }
|
---|
132 | }
|
---|
133 | else
|
---|
134 | {
|
---|
135 | for(int i = 0; i < np; i++)
|
---|
136 | temp_w_beg[i] = x_weight_func((x_beg[i] - p_x) * inv_radius);
|
---|
137 | }
|
---|
138 |
|
---|
139 | double k, m;
|
---|
140 | bool tmp = line_fit(x_beg, npoints, y_beg, k, m, temp_w_beg);
|
---|
141 |
|
---|
142 | p_y = k * p_x + m;
|
---|
143 | return tmp;
|
---|
144 | }
|
---|
145 |
|
---|
146 | // robust fit of line segments
|
---|
147 | // x must be sorted in ascending order
|
---|
148 | void lowess::calc(
|
---|
149 | double *x_beg, int npoints, double *y_beg,
|
---|
150 | double *y_fit_beg, char *point_fitted_beg,
|
---|
151 | const double &frac, const int n_iter, const double &delta,
|
---|
152 | weight_func x_weight_func, weight_func y_weight_func)
|
---|
153 | {
|
---|
154 | const int np_window = std::min(npoints, int(frac*npoints + 0.5));
|
---|
155 | if(npoints == 0)
|
---|
156 | return;
|
---|
157 | if(npoints < 0)
|
---|
158 | {
|
---|
159 | std::cerr << "lowess called with npoints < 0\n";
|
---|
160 | exit(1);
|
---|
161 | }
|
---|
162 |
|
---|
163 | if(np_window <= 2)
|
---|
164 | {
|
---|
165 | std::copy(y_beg, y_beg + npoints, y_fit_beg);
|
---|
166 | std::fill(point_fitted_beg, point_fitted_beg + npoints, true);
|
---|
167 | return;
|
---|
168 | }
|
---|
169 |
|
---|
170 | double w_beg[npoints];
|
---|
171 | double temp_beg[npoints];
|
---|
172 |
|
---|
173 | for(int iter = 0; iter <= n_iter; iter++)
|
---|
174 | {
|
---|
175 | int window_i = 0;
|
---|
176 | int i, prev_i;
|
---|
177 | int misses = 0, pcalcs = 0;
|
---|
178 |
|
---|
179 | for(i = 0, prev_i = -1; prev_i < npoints - 1;)
|
---|
180 | {
|
---|
181 | //center window
|
---|
182 | while(window_i + np_window < npoints &&
|
---|
183 | x_beg[i] - x_beg[window_i] >
|
---|
184 | x_beg[window_i + np_window - 1] - x_beg[i])
|
---|
185 | {
|
---|
186 | window_i++;
|
---|
187 | }
|
---|
188 |
|
---|
189 | //fit one point
|
---|
190 | pcalcs++;
|
---|
191 | bool fitted = p_lowess(x_beg + window_i, np_window,
|
---|
192 | y_beg + window_i, x_beg[i], y_fit_beg[i], temp_beg,
|
---|
193 | iter ? w_beg + window_i : 0, x_weight_func);
|
---|
194 |
|
---|
195 | if(!fitted)
|
---|
196 | {
|
---|
197 | misses++;
|
---|
198 | if(i < npoints - 1)
|
---|
199 | ;
|
---|
200 | else if(prev_i < 0)
|
---|
201 | {
|
---|
202 | std::cerr << "Not a single point fitted - this is an odd error.\n"
|
---|
203 | "Please report it as a bug.\n";
|
---|
204 | exit(1);
|
---|
205 | }
|
---|
206 | else
|
---|
207 | {
|
---|
208 | for(int j = prev_i + 1; j < i; j++)
|
---|
209 | y_fit_beg[j] = y_fit_beg[prev_i];
|
---|
210 | break;
|
---|
211 | }
|
---|
212 | }
|
---|
213 | else
|
---|
214 | {
|
---|
215 | //interpolate skipped points
|
---|
216 | if(prev_i + 1 < i)
|
---|
217 | {
|
---|
218 | double d = prev_i < 0 ? 0 : x_beg[i] - x_beg[prev_i];
|
---|
219 | if(d == 0.)
|
---|
220 | {
|
---|
221 | for(int j = prev_i + 1; j < i; j++)
|
---|
222 | y_fit_beg[j] = y_fit_beg[i];
|
---|
223 | }
|
---|
224 | else
|
---|
225 | {
|
---|
226 | for(int j = prev_i + 1; j < i; j++)
|
---|
227 | {
|
---|
228 | double a = (x_beg[j] - x_beg[prev_i])/d;
|
---|
229 | y_fit_beg[j] = a*y_fit_beg[i] + (1.-a)*y_fit_beg[prev_i];
|
---|
230 | }
|
---|
231 | }
|
---|
232 | }
|
---|
233 |
|
---|
234 | for(int j = prev_i + 1; j < i; j++)
|
---|
235 | point_fitted_beg[j] = false;
|
---|
236 | point_fitted_beg[i] = true;
|
---|
237 |
|
---|
238 | prev_i = i;
|
---|
239 | }
|
---|
240 |
|
---|
241 | double cut = x_beg[i] + delta;
|
---|
242 | for(i++; i < npoints && x_beg[i] <= cut; i++)
|
---|
243 | {
|
---|
244 | if(x_beg[i] - x_beg[i-1] < 0)
|
---|
245 | {
|
---|
246 | std::cerr << "x-values not sorted in lowess\n";
|
---|
247 | exit(1);
|
---|
248 | }
|
---|
249 | }
|
---|
250 | if(i >= npoints)
|
---|
251 | i = npoints - 1;
|
---|
252 | }
|
---|
253 |
|
---|
254 | if(misses)
|
---|
255 | {
|
---|
256 | std::cerr << "Warning: sum of weights was 0 in line_fit " << misses
|
---|
257 | << " times (of " << pcalcs << ") in iteration " << iter << ".\n"
|
---|
258 | "This happens if all points within the sliding window are\n"
|
---|
259 | "more than 6 medians from the locally fitted function.\n";
|
---|
260 | }
|
---|
261 |
|
---|
262 | for(int i = 0; i < npoints; i++)
|
---|
263 | temp_beg[i] = fabs(y_fit_beg[i] - y_beg[i]);
|
---|
264 |
|
---|
265 | std::pair<double*, double*> p = median(temp_beg, temp_beg + npoints);
|
---|
266 | double inv_yw_range = 1. /
|
---|
267 | (yw_range_factor * 0.5 * (*p.first + *p.second));
|
---|
268 | for(int i = 0; i < npoints; i++)
|
---|
269 | w_beg[i] = y_weight_func((y_fit_beg[i] - y_beg[i]) * inv_yw_range);
|
---|
270 | }
|
---|
271 | }
|
---|
272 |
|
---|
273 | }
|
---|
274 |
|
---|