1#ifndef WEIGHTEDLOWESS_WINDOW_HPP
2#define WEIGHTEDLOWESS_WINDOW_HPP
9#include "sanisizer/sanisizer.hpp"
43template<
typename Data_>
44Data_ derive_delta(
const std::size_t num_anchors,
const std::size_t num_points,
const Data_* x) {
45 const auto points_m1 = num_points - 1;
46 auto diffs = sanisizer::create<std::vector<Data_> >(points_m1);
47 for (
decltype(I(points_m1)) i = 0; i < points_m1; ++i) {
48 diffs[i] = x[i + 1] - x[i];
51 std::sort(diffs.begin(), diffs.end());
52 for (
decltype(I(points_m1)) i = 1; i < points_m1; ++i) {
53 diffs[i] += diffs[i-1];
56 Data_ lowest_delta = diffs.back();
57 if (num_anchors > 1) {
58 auto max_skips = sanisizer::min(num_anchors - 1, points_m1);
59 for (
decltype(I(max_skips)) nskips = 0; nskips < max_skips; ++nskips) {
60 Data_ candidate_delta = diffs[points_m1 - nskips - 1] / (num_anchors - nskips);
61 lowest_delta = std::min(candidate_delta, lowest_delta);
77template<
typename Data_>
78void find_anchors(
const std::size_t num_points,
const Data_* x, Data_ delta, std::vector<std::size_t>& anchors) {
82 std::size_t last_pt = 0;
83 const std::size_t points_m1 = num_points - 1;
84 for (
decltype(I(points_m1)) pt = 1; pt < points_m1; ++pt) {
85 if (x[pt] - x[last_pt] > delta) {
86 anchors.push_back(pt);
91 anchors.push_back(points_m1);
95template<
typename Data_>
97 std::size_t left, right;
111template<
typename Data_>
112std::vector<Window<Data_> > find_limits(
113 const std::vector<std::size_t>& anchors,
114 const Data_ span_weight,
115 const std::size_t num_points,
117 const Data_* weights,
118 const Data_ min_width,
121 const auto nanchors = anchors.size();
122 auto limits = sanisizer::create<std::vector<Window<Data_> > >(nanchors);
123 const auto half_min_width = min_width / 2;
124 const auto points_m1 = num_points - 1;
126 parallelize(nthreads, nanchors, [&](
const int,
const decltype(I(nanchors)) start,
const decltype(I(nanchors)) length) {
127 for (
decltype(I(start)) s = start, end = start + length; s < end; ++s) {
128 const auto curpt = anchors[s];
129 const auto curx = x[curpt];
130 auto left = curpt, right = curpt;
131 Data_ curw = (weights == NULL ? 1 : weights[curpt]);
135 if (curpt > 0 && curpt < points_m1) {
136 auto next_ldist = curx - x[left - 1];
137 auto next_rdist = x[right + 1] - curx;
139 while (curw < span_weight) {
140 if (next_ldist < next_rdist) {
142 curw += (weights == NULL ? 1 : weights[left]);
146 next_ldist = curx - x[left - 1];
148 }
else if (next_ldist > next_rdist) {
150 curw += (weights == NULL ? 1 : weights[right]);
151 if (right == points_m1) {
154 next_rdist = x[right + 1] - curx;
162 curw += (weights == NULL ? 2 : weights[left] + weights[right]);
163 if (left == 0 || right == points_m1) {
166 next_ldist = curx - x[left - 1];
167 next_rdist = x[right + 1] - curx;
173 while (left > 0 && curw < span_weight) {
175 curw += (weights == NULL ? 1 : weights[left]);
178 while (right < points_m1 && curw < span_weight) {
180 curw += (weights == NULL ? 1 : weights[right]);
184 while (left > 0 && x[left] == x[left - 1]) {
188 while (right < points_m1 && x[right] == x[right + 1]) {
195 auto mdist = std::max(curx - x[left], x[right] - curx);
196 if (mdist < half_min_width) {
197 left = std::lower_bound(x, x + left, curx - half_min_width) - x;
206 right = std::upper_bound(x + right + 1, x + num_points, curx + half_min_width) - x;
209 mdist = std::max(curx - x[left], x[right] - curx);
212 limits[s].left = left;
213 limits[s].right = right;
214 limits[s].distance = mdist;
233template<
typename Data_>
238 std::vector<std::size_t> anchors;
239 const Data_* freq_weights = NULL;
240 Data_ total_weight = 0;
241 std::vector<internal::Window<Data_> > limits;
266template<
typename Data_>
271 if (!std::is_sorted(x, x + num_points)) {
272 throw std::runtime_error(
"'x' should be sorted");
276 auto& anchors = output.anchors;
278 sanisizer::resize(anchors, num_points);
279 std::iota(anchors.begin(), anchors.end(), 0);
280 }
else if (opt.
delta < 0) {
281 Data_ eff_delta = internal::derive_delta(opt.
anchors, num_points, x);
282 internal::find_anchors(num_points, x, eff_delta, anchors);
284 internal::find_anchors(num_points, x, opt.
delta, anchors);
289 output.total_weight = (output.freq_weights != NULL ? std::accumulate(output.freq_weights, output.freq_weights + num_points,
static_cast<Data_
>(0)) : num_points);
291 output.limits = internal::find_limits(anchors, span_weight, num_points, x, output.freq_weights, opt.
minimum_width, opt.
num_threads);
Namespace for LOWESS functions.
Definition compute.hpp:18
PrecomputedWindows< Data_ > define_windows(const std::size_t num_points, const Data_ *x, const Options< Data_ > &opt)
Definition window.hpp:267
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28
Definitions for parallelization.
Options for compute().
Definition Options.hpp:17
Data_ * weights
Definition Options.hpp:78
bool frequency_weights
Definition Options.hpp:85
Data_ delta
Definition Options.hpp:71
std::size_t anchors
Definition Options.hpp:55
Data_ span
Definition Options.hpp:28
Data_ minimum_width
Definition Options.hpp:42
bool span_as_proportion
Definition Options.hpp:35
int num_threads
Definition Options.hpp:92
Precomputed windows for LOWESS smoothing.
Definition window.hpp:234