Ticket #837: base_lowess.cc

File base_lowess.cc, 5.9 KB (added by Nicklas Nordborg, 15 years ago)

BASE 1 implementation of lowess

Line 
1//
2// BioArray Software Environment (BASE) - homepage http://base.thep.lu.se/
3// Copyright (C) 2002,2003 Björn Samuelsson
4//
5// This file is part of BASE.
6//
7// BASE is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public License
9// as published by the Free Software Foundation; either version 2
10// of the License, or (at your option) any later version.
11//
12// BASE is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15// GNU General Public License for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with this program; if not, write to the Free Software
19// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
20//
21#include <iostream>
22#include <base_lowess.h>
23
24namespace BASE
25{
26
27const double lowess::yw_range_factor = 6.;
28
29static inline double cube(double a)
30{
31  return a * a * a;
32}
33
34double lowess::cubic_weight_func(double a)
35{
36  if(fabs(a) < 1.)
37    return cube(1. - cube(fabs(a)));
38  else
39    return 0.;
40}
41
42static inline double sqr(double a)
43{
44  return a * a;
45}
46
47double lowess::quadratic_weight_func(double a)
48{
49  if(fabs(a) < 1.)
50    return sqr(1. - sqr(a));
51  else
52    return 0.;
53}
54
55
56bool lowess::line_fit(
57  double *x_beg, int npoints, double *y_beg, double &k, double &m,
58  double *w_beg)
59{
60  const int np = npoints;
61  if(np <= 0)
62  {
63    std::cerr << "line_fit called with npoints <= 0\n";
64    exit(1);
65  }
66 
67  double x_sum = 0., y_sum = 0., xx_sum = 0., xy_sum = 0., w_sum = 0.;
68
69  if(w_beg)
70  {
71    for(int i = 0; i < np; i++)
72    {
73      double w = w_beg[i];
74      x_sum += x_beg[i] * w;
75      y_sum += y_beg[i] * w;
76      xx_sum += x_beg[i] * x_beg[i] * w;
77      xy_sum += x_beg[i] * y_beg[i] * w;
78      w_sum += w;
79    }
80  }
81  else
82  {
83    for(int i = 0; i < np; i++)
84    {
85      x_sum += x_beg[i];
86      y_sum += y_beg[i];
87      xx_sum += x_beg[i] * x_beg[i];
88      xy_sum += x_beg[i] * y_beg[i];
89    }
90    w_sum = np;
91  }
92 
93  if(w_sum <= 0.)
94    return false;
95
96  double denom = w_sum*xx_sum - x_sum*x_sum;
97  if(denom != 0.)
98  {
99    k = (w_sum*xy_sum - x_sum*y_sum) / denom;
100    m = (y_sum - k*x_sum) / w_sum;
101  }
102  else
103  {
104    k = 0.;
105    m = y_sum / w_sum;
106  }
107  return true;
108}
109
110// fit for one point
111// x must be sorted in ascending order
112bool lowess::p_lowess(
113  double *x_beg, int npoints, double *y_beg,
114  const double &p_x, double &p_y, double *temp_w_beg,
115  double *yw_beg, weight_func x_weight_func)
116{
117  const int np = npoints;
118  if(np <= 0)
119  {
120    std::cerr << "p_lowess called with npoints <= 0\n";
121    exit(1);
122  }
123
124  double inv_radius = 1. / std::max(p_x - *x_beg, x_beg[np - 1] - p_x);
125  if(yw_beg)
126  {
127    for(int i = 0; i < np; i++)
128    {
129      temp_w_beg[i] = (*x_weight_func)((x_beg[i] - p_x) * inv_radius) *
130        yw_beg[i];
131    }
132  }
133  else
134  {
135    for(int i = 0; i < np; i++)
136      temp_w_beg[i] = x_weight_func((x_beg[i] - p_x) * inv_radius);
137  }
138
139  double k, m;
140  bool tmp = line_fit(x_beg, npoints, y_beg, k, m, temp_w_beg);
141
142  p_y = k * p_x + m;
143  return tmp;
144}
145
146// robust fit of line segments
147// x must be sorted in ascending order
148void lowess::calc(
149  double *x_beg, int npoints, double *y_beg,
150  double *y_fit_beg, char *point_fitted_beg,
151  const double &frac, const int n_iter, const double &delta,
152  weight_func x_weight_func, weight_func y_weight_func)
153{
154  const int np_window = std::min(npoints, int(frac*npoints + 0.5));
155  if(npoints == 0)
156    return;
157  if(npoints < 0)
158  {
159    std::cerr << "lowess called with npoints < 0\n";
160    exit(1);
161  }
162
163  if(np_window <= 2)
164  {
165    std::copy(y_beg, y_beg + npoints, y_fit_beg);
166    std::fill(point_fitted_beg, point_fitted_beg + npoints, true);
167    return;
168  }
169
170  double w_beg[npoints];
171  double temp_beg[npoints];
172
173  for(int iter = 0; iter <= n_iter; iter++)
174  {
175    int window_i = 0;
176    int i, prev_i;
177    int misses = 0, pcalcs = 0;
178 
179    for(i = 0, prev_i = -1; prev_i < npoints - 1;)
180    {
181      //center window
182      while(window_i + np_window < npoints && 
183        x_beg[i] - x_beg[window_i] > 
184        x_beg[window_i + np_window - 1] - x_beg[i])
185      {
186        window_i++;
187      }
188
189      //fit one point
190      pcalcs++;
191      bool fitted = p_lowess(x_beg + window_i, np_window,
192        y_beg + window_i, x_beg[i], y_fit_beg[i], temp_beg,
193        iter ? w_beg + window_i : 0, x_weight_func);
194
195      if(!fitted) 
196      {
197        misses++;
198        if(i < npoints - 1)
199          ;
200        else if(prev_i < 0)
201        {
202          std::cerr << "Not a single point fitted - this is an odd error.\n"
203            "Please report it as a bug.\n";
204          exit(1);
205        }
206        else
207        {
208          for(int j = prev_i + 1; j < i; j++)
209            y_fit_beg[j] = y_fit_beg[prev_i];
210          break;
211        }
212      }
213      else
214      {
215        //interpolate skipped points
216        if(prev_i + 1 < i)
217        {
218          double d = prev_i < 0 ? 0 : x_beg[i] - x_beg[prev_i];
219          if(d == 0.)
220          {
221            for(int j = prev_i + 1; j < i; j++)
222              y_fit_beg[j] = y_fit_beg[i];
223          }
224          else
225          {
226            for(int j = prev_i + 1; j < i; j++)
227            {
228              double a = (x_beg[j] - x_beg[prev_i])/d;
229              y_fit_beg[j] = a*y_fit_beg[i] + (1.-a)*y_fit_beg[prev_i];
230            }
231          }
232        }
233
234        for(int j = prev_i + 1; j < i; j++)
235          point_fitted_beg[j] = false;
236        point_fitted_beg[i] = true;
237
238        prev_i = i;
239      }
240
241      double cut = x_beg[i] + delta;
242      for(i++; i < npoints && x_beg[i] <= cut; i++)
243      {
244        if(x_beg[i] - x_beg[i-1] < 0)
245        {
246          std::cerr << "x-values not sorted in lowess\n";
247          exit(1);
248        }
249      }
250      if(i >= npoints)
251        i = npoints - 1;
252    }
253
254    if(misses)
255    {
256      std::cerr << "Warning: sum of weights was 0 in line_fit " << misses
257        << " times (of " << pcalcs << ") in iteration " << iter << ".\n"
258        "This happens if all points within the sliding window are\n"
259        "more than 6 medians from the locally fitted function.\n";
260    }
261   
262    for(int i = 0; i < npoints; i++)
263      temp_beg[i] = fabs(y_fit_beg[i] - y_beg[i]);
264
265    std::pair<double*, double*> p = median(temp_beg, temp_beg + npoints);
266    double inv_yw_range = 1. /
267      (yw_range_factor * 0.5 * (*p.first + *p.second));
268    for(int i = 0; i < npoints; i++)
269      w_beg[i] = y_weight_func((y_fit_beg[i] - y_beg[i]) * inv_yw_range);
270  }
271}
272
273}
274