Context Navigation

Back to Ticket #837

Ticket #837: base_lowess.cc

File base_lowess.cc, 5.9 KB (added by Nicklas Nordborg, 17 years ago)
BASE 1 implementation of lowess

Line
1	//
2	// BioArray Software Environment (BASE) - homepage http://base.thep.lu.se/
3	// Copyright (C) 2002,2003 Björn Samuelsson
4	//
5	// This file is part of BASE.
6	//
7	// BASE is free software; you can redistribute it and/or
8	// modify it under the terms of the GNU General Public License
9	// as published by the Free Software Foundation; either version 2
10	// of the License, or (at your option) any later version.
11	//
12	// BASE is distributed in the hope that it will be useful,
13	// but WITHOUT ANY WARRANTY; without even the implied warranty of
14	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	// GNU General Public License for more details.
16	//
17	// You should have received a copy of the GNU General Public License
18	// along with this program; if not, write to the Free Software
19	// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20	//
21	#include <iostream>
22	#include <base_lowess.h>
23
24	namespace BASE
25	{
26
27	const double lowess::yw_range_factor = 6.;
28
29	static inline double cube(double a)
30	{
31	return a * a * a;
32	}
33
34	double lowess::cubic_weight_func(double a)
35	{
36	if(fabs(a) < 1.)
37	return cube(1. - cube(fabs(a)));
38	else
39	return 0.;
40	}
41
42	static inline double sqr(double a)
43	{
44	return a * a;
45	}
46
47	double lowess::quadratic_weight_func(double a)
48	{
49	if(fabs(a) < 1.)
50	return sqr(1. - sqr(a));
51	else
52	return 0.;
53	}
54
55
56	bool lowess::line_fit(
57	double x_beg, int npoints, double y_beg, double &k, double &m,
58	double *w_beg)
59	{
60	const int np = npoints;
61	if(np <= 0)
62	{
63	std::cerr << "line_fit called with npoints <= 0\n";
64	exit(1);
65	}
66
67	double x_sum = 0., y_sum = 0., xx_sum = 0., xy_sum = 0., w_sum = 0.;
68
69	if(w_beg)
70	{
71	for(int i = 0; i < np; i++)
72	{
73	double w = w_beg[i];
74	x_sum += x_beg[i] * w;
75	y_sum += y_beg[i] * w;
76	xx_sum += x_beg[i] * x_beg[i] * w;
77	xy_sum += x_beg[i] * y_beg[i] * w;
78	w_sum += w;
79	}
80	}
81	else
82	{
83	for(int i = 0; i < np; i++)
84	{
85	x_sum += x_beg[i];
86	y_sum += y_beg[i];
87	xx_sum += x_beg[i] * x_beg[i];
88	xy_sum += x_beg[i] * y_beg[i];
89	}
90	w_sum = np;
91	}
92
93	if(w_sum <= 0.)
94	return false;
95
96	double denom = w_sumxx_sum - x_sumx_sum;
97	if(denom != 0.)
98	{
99	k = (w_sumxy_sum - x_sumy_sum) / denom;
100	m = (y_sum - k*x_sum) / w_sum;
101	}
102	else
103	{
104	k = 0.;
105	m = y_sum / w_sum;
106	}
107	return true;
108	}
109
110	// fit for one point
111	// x must be sorted in ascending order
112	bool lowess::p_lowess(
113	double x_beg, int npoints, double y_beg,
114	const double &p_x, double &p_y, double *temp_w_beg,
115	double *yw_beg, weight_func x_weight_func)
116	{
117	const int np = npoints;
118	if(np <= 0)
119	{
120	std::cerr << "p_lowess called with npoints <= 0\n";
121	exit(1);
122	}
123
124	double inv_radius = 1. / std::max(p_x - *x_beg, x_beg[np - 1] - p_x);
125	if(yw_beg)
126	{
127	for(int i = 0; i < np; i++)
128	{
129	temp_w_beg[i] = (x_weight_func)((x_beg[i] - p_x) inv_radius) *
130	yw_beg[i];
131	}
132	}
133	else
134	{
135	for(int i = 0; i < np; i++)
136	temp_w_beg[i] = x_weight_func((x_beg[i] - p_x) * inv_radius);
137	}
138
139	double k, m;
140	bool tmp = line_fit(x_beg, npoints, y_beg, k, m, temp_w_beg);
141
142	p_y = k * p_x + m;
143	return tmp;
144	}
145
146	// robust fit of line segments
147	// x must be sorted in ascending order
148	void lowess::calc(
149	double x_beg, int npoints, double y_beg,
150	double y_fit_beg, char point_fitted_beg,
151	const double &frac, const int n_iter, const double &delta,
152	weight_func x_weight_func, weight_func y_weight_func)
153	{
154	const int np_window = std::min(npoints, int(frac*npoints + 0.5));
155	if(npoints == 0)
156	return;
157	if(npoints < 0)
158	{
159	std::cerr << "lowess called with npoints < 0\n";
160	exit(1);
161	}
162
163	if(np_window <= 2)
164	{
165	std::copy(y_beg, y_beg + npoints, y_fit_beg);
166	std::fill(point_fitted_beg, point_fitted_beg + npoints, true);
167	return;
168	}
169
170	double w_beg[npoints];
171	double temp_beg[npoints];
172
173	for(int iter = 0; iter <= n_iter; iter++)
174	{
175	int window_i = 0;
176	int i, prev_i;
177	int misses = 0, pcalcs = 0;
178
179	for(i = 0, prev_i = -1; prev_i < npoints - 1;)
180	{
181	//center window
182	while(window_i + np_window < npoints &&
183	x_beg[i] - x_beg[window_i] >
184	x_beg[window_i + np_window - 1] - x_beg[i])
185	{
186	window_i++;
187	}
188
189	//fit one point
190	pcalcs++;
191	bool fitted = p_lowess(x_beg + window_i, np_window,
192	y_beg + window_i, x_beg[i], y_fit_beg[i], temp_beg,
193	iter ? w_beg + window_i : 0, x_weight_func);
194
195	if(!fitted)
196	{
197	misses++;
198	if(i < npoints - 1)
199	;
200	else if(prev_i < 0)
201	{
202	std::cerr << "Not a single point fitted - this is an odd error.\n"
203	"Please report it as a bug.\n";
204	exit(1);
205	}
206	else
207	{
208	for(int j = prev_i + 1; j < i; j++)
209	y_fit_beg[j] = y_fit_beg[prev_i];
210	break;
211	}
212	}
213	else
214	{
215	//interpolate skipped points
216	if(prev_i + 1 < i)
217	{
218	double d = prev_i < 0 ? 0 : x_beg[i] - x_beg[prev_i];
219	if(d == 0.)
220	{
221	for(int j = prev_i + 1; j < i; j++)
222	y_fit_beg[j] = y_fit_beg[i];
223	}
224	else
225	{
226	for(int j = prev_i + 1; j < i; j++)
227	{
228	double a = (x_beg[j] - x_beg[prev_i])/d;
229	y_fit_beg[j] = ay_fit_beg[i] + (1.-a)y_fit_beg[prev_i];
230	}
231	}
232	}
233
234	for(int j = prev_i + 1; j < i; j++)
235	point_fitted_beg[j] = false;
236	point_fitted_beg[i] = true;
237
238	prev_i = i;
239	}
240
241	double cut = x_beg[i] + delta;
242	for(i++; i < npoints && x_beg[i] <= cut; i++)
243	{
244	if(x_beg[i] - x_beg[i-1] < 0)
245	{
246	std::cerr << "x-values not sorted in lowess\n";
247	exit(1);
248	}
249	}
250	if(i >= npoints)
251	i = npoints - 1;
252	}
253
254	if(misses)
255	{
256	std::cerr << "Warning: sum of weights was 0 in line_fit " << misses
257	<< " times (of " << pcalcs << ") in iteration " << iter << ".\n"
258	"This happens if all points within the sliding window are\n"
259	"more than 6 medians from the locally fitted function.\n";
260	}
261
262	for(int i = 0; i < npoints; i++)
263	temp_beg[i] = fabs(y_fit_beg[i] - y_beg[i]);
264
265	std::pair<double, double> p = median(temp_beg, temp_beg + npoints);
266	double inv_yw_range = 1. /
267	(yw_range_factor * 0.5 * (p.first + p.second));
268	for(int i = 0; i < npoints; i++)
269	w_beg[i] = y_weight_func((y_fit_beg[i] - y_beg[i]) * inv_yw_range);
270	}
271	}
272
273	}
274

Download in other formats:

Original Format