Ticket #837: base_lowess.cc

File base_lowess.cc, 5.9 KB (added by Nicklas Nordborg, 17 years ago)

BASE 1 implementation of lowess

Line 
1//
2// BioArray Software Environment (BASE) - homepage http://base.thep.lu.se/
3// Copyright (C) 2002,2003 Björn Samuelsson
4//
5// This file is part of BASE.
6//
7// BASE is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public License
9// as published by the Free Software Foundation; either version 2
10// of the License, or (at your option) any later version.
11//
12// BASE is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15// GNU General Public License for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with this program; if not, write to the Free Software
19// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20//
21#include <iostream>
22#include <base_lowess.h>
23
24namespace BASE
25{
26
27const double lowess::yw_range_factor = 6.;
28
29static inline double cube(double a)
30{
31 return a * a * a;
32}
33
34double lowess::cubic_weight_func(double a)
35{
36 if(fabs(a) < 1.)
37 return cube(1. - cube(fabs(a)));
38 else
39 return 0.;
40}
41
42static inline double sqr(double a)
43{
44 return a * a;
45}
46
47double lowess::quadratic_weight_func(double a)
48{
49 if(fabs(a) < 1.)
50 return sqr(1. - sqr(a));
51 else
52 return 0.;
53}
54
55
56bool lowess::line_fit(
57 double *x_beg, int npoints, double *y_beg, double &k, double &m,
58 double *w_beg)
59{
60 const int np = npoints;
61 if(np <= 0)
62 {
63 std::cerr << "line_fit called with npoints <= 0\n";
64 exit(1);
65 }
66
67 double x_sum = 0., y_sum = 0., xx_sum = 0., xy_sum = 0., w_sum = 0.;
68
69 if(w_beg)
70 {
71 for(int i = 0; i < np; i++)
72 {
73 double w = w_beg[i];
74 x_sum += x_beg[i] * w;
75 y_sum += y_beg[i] * w;
76 xx_sum += x_beg[i] * x_beg[i] * w;
77 xy_sum += x_beg[i] * y_beg[i] * w;
78 w_sum += w;
79 }
80 }
81 else
82 {
83 for(int i = 0; i < np; i++)
84 {
85 x_sum += x_beg[i];
86 y_sum += y_beg[i];
87 xx_sum += x_beg[i] * x_beg[i];
88 xy_sum += x_beg[i] * y_beg[i];
89 }
90 w_sum = np;
91 }
92
93 if(w_sum <= 0.)
94 return false;
95
96 double denom = w_sum*xx_sum - x_sum*x_sum;
97 if(denom != 0.)
98 {
99 k = (w_sum*xy_sum - x_sum*y_sum) / denom;
100 m = (y_sum - k*x_sum) / w_sum;
101 }
102 else
103 {
104 k = 0.;
105 m = y_sum / w_sum;
106 }
107 return true;
108}
109
110// fit for one point
111// x must be sorted in ascending order
112bool lowess::p_lowess(
113 double *x_beg, int npoints, double *y_beg,
114 const double &p_x, double &p_y, double *temp_w_beg,
115 double *yw_beg, weight_func x_weight_func)
116{
117 const int np = npoints;
118 if(np <= 0)
119 {
120 std::cerr << "p_lowess called with npoints <= 0\n";
121 exit(1);
122 }
123
124 double inv_radius = 1. / std::max(p_x - *x_beg, x_beg[np - 1] - p_x);
125 if(yw_beg)
126 {
127 for(int i = 0; i < np; i++)
128 {
129 temp_w_beg[i] = (*x_weight_func)((x_beg[i] - p_x) * inv_radius) *
130 yw_beg[i];
131 }
132 }
133 else
134 {
135 for(int i = 0; i < np; i++)
136 temp_w_beg[i] = x_weight_func((x_beg[i] - p_x) * inv_radius);
137 }
138
139 double k, m;
140 bool tmp = line_fit(x_beg, npoints, y_beg, k, m, temp_w_beg);
141
142 p_y = k * p_x + m;
143 return tmp;
144}
145
146// robust fit of line segments
147// x must be sorted in ascending order
148void lowess::calc(
149 double *x_beg, int npoints, double *y_beg,
150 double *y_fit_beg, char *point_fitted_beg,
151 const double &frac, const int n_iter, const double &delta,
152 weight_func x_weight_func, weight_func y_weight_func)
153{
154 const int np_window = std::min(npoints, int(frac*npoints + 0.5));
155 if(npoints == 0)
156 return;
157 if(npoints < 0)
158 {
159 std::cerr << "lowess called with npoints < 0\n";
160 exit(1);
161 }
162
163 if(np_window <= 2)
164 {
165 std::copy(y_beg, y_beg + npoints, y_fit_beg);
166 std::fill(point_fitted_beg, point_fitted_beg + npoints, true);
167 return;
168 }
169
170 double w_beg[npoints];
171 double temp_beg[npoints];
172
173 for(int iter = 0; iter <= n_iter; iter++)
174 {
175 int window_i = 0;
176 int i, prev_i;
177 int misses = 0, pcalcs = 0;
178
179 for(i = 0, prev_i = -1; prev_i < npoints - 1;)
180 {
181 //center window
182 while(window_i + np_window < npoints &&
183 x_beg[i] - x_beg[window_i] >
184 x_beg[window_i + np_window - 1] - x_beg[i])
185 {
186 window_i++;
187 }
188
189 //fit one point
190 pcalcs++;
191 bool fitted = p_lowess(x_beg + window_i, np_window,
192 y_beg + window_i, x_beg[i], y_fit_beg[i], temp_beg,
193 iter ? w_beg + window_i : 0, x_weight_func);
194
195 if(!fitted)
196 {
197 misses++;
198 if(i < npoints - 1)
199 ;
200 else if(prev_i < 0)
201 {
202 std::cerr << "Not a single point fitted - this is an odd error.\n"
203 "Please report it as a bug.\n";
204 exit(1);
205 }
206 else
207 {
208 for(int j = prev_i + 1; j < i; j++)
209 y_fit_beg[j] = y_fit_beg[prev_i];
210 break;
211 }
212 }
213 else
214 {
215 //interpolate skipped points
216 if(prev_i + 1 < i)
217 {
218 double d = prev_i < 0 ? 0 : x_beg[i] - x_beg[prev_i];
219 if(d == 0.)
220 {
221 for(int j = prev_i + 1; j < i; j++)
222 y_fit_beg[j] = y_fit_beg[i];
223 }
224 else
225 {
226 for(int j = prev_i + 1; j < i; j++)
227 {
228 double a = (x_beg[j] - x_beg[prev_i])/d;
229 y_fit_beg[j] = a*y_fit_beg[i] + (1.-a)*y_fit_beg[prev_i];
230 }
231 }
232 }
233
234 for(int j = prev_i + 1; j < i; j++)
235 point_fitted_beg[j] = false;
236 point_fitted_beg[i] = true;
237
238 prev_i = i;
239 }
240
241 double cut = x_beg[i] + delta;
242 for(i++; i < npoints && x_beg[i] <= cut; i++)
243 {
244 if(x_beg[i] - x_beg[i-1] < 0)
245 {
246 std::cerr << "x-values not sorted in lowess\n";
247 exit(1);
248 }
249 }
250 if(i >= npoints)
251 i = npoints - 1;
252 }
253
254 if(misses)
255 {
256 std::cerr << "Warning: sum of weights was 0 in line_fit " << misses
257 << " times (of " << pcalcs << ") in iteration " << iter << ".\n"
258 "This happens if all points within the sliding window are\n"
259 "more than 6 medians from the locally fitted function.\n";
260 }
261
262 for(int i = 0; i < npoints; i++)
263 temp_beg[i] = fabs(y_fit_beg[i] - y_beg[i]);
264
265 std::pair<double*, double*> p = median(temp_beg, temp_beg + npoints);
266 double inv_yw_range = 1. /
267 (yw_range_factor * 0.5 * (*p.first + *p.second));
268 for(int i = 0; i < npoints; i++)
269 w_beg[i] = y_weight_func((y_fit_beg[i] - y_beg[i]) * inv_yw_range);
270 }
271}
272
273}
274