diff --git a/TheilSen.m b/TheilSen.m index 8588c2838759e312e118f0649070950da580b94a..44b232b1529121076e5380e906c62493840fbcb7 100644 --- a/TheilSen.m +++ b/TheilSen.m @@ -1,4 +1,4 @@ -function [b0, b1] = TheilSen(X, y) +function coef = TheilSen(X, y) % THEILSEN performs Theil-Sen robust, simple linear regression(s) of X on y. % % For convenience, the input X may contain more than one predictor variable. @@ -13,12 +13,13 @@ function [b0, b1] = TheilSen(X, y) % y: A column vector containing the observations of the response variable. % % OUTPUT -% b0: Estimated offset(s) for each predictor variable in X with respect to y -% (as independent simple regression offsets per predictor). -% b1: Estimated slope(s) for each predictor variable in X with respect to y -% (as independent simple regression slopes per predictor). -% -% Vectors b0 and b1 contain as many entries as column vectors in X. +% coef: Estimated regression coefficients for each predictor column in X, with +% respect to the response variable y. Each column in coef corresponds +% to one predictor in X, i.e. it has as many columns as X does. +% The first row, i.e. coef(:, 1), contains the estimated offset(s). +% The second row, i.e. coef(:, 2), contains the estimated slope(s). +% (This output format is chosen to avoid confusion, e.g. with previous +% versions of this code.) % % EXAMPLE % See accompanying file example.m. @@ -81,13 +82,12 @@ end Cprm = reshape( permute(C, [1, 3, 2]), [], size(C, 2), 1 ); % estimate slope as the median of all pairwise slopes (per predictor column) -b1 = median(Cprm, 1, 'omitnan'); +b1s = median(Cprm, 1, 'omitnan'); % estimate offset as the median of all pairwise offsets (per predictor column) -if nargout == 2 % only compute if requested/used - b0 = median(bsxfun(@minus, y(:), ... - bsxfun(@times, b1, X(:, 1:end))), ... - 'omitnan'); -end +b0s = median(bsxfun(@minus, y(:), ... + bsxfun(@times, b1s, X(:, 1:end))), ... + 'omitnan'); +coef = [b0s; b1s]; end \ No newline at end of file diff --git a/example.m b/example.m index f383b58e254792204a49c44735e3be8939c7d034..d7808509dd81d23498b73783db789439880e7871 100644 --- a/example.m +++ b/example.m @@ -30,8 +30,7 @@ data_y(outlr_idx) = outlr_y; est_ls = [ones(N_total, 1), data_x] \ data_y; % Estimate Theil-Sen parameters. -[b0, b1] = TheilSen(data_x, data_y); -est_ts = [b0, b1]; +est_ts = TheilSen(data_x, data_y); % Plot everything and add comparison of estimates to title. figure() @@ -39,8 +38,8 @@ plims = [min(data_x), max(data_x)]'; normal_idx = setdiff(1:N_total, outlr_idx); plot(data_x(normal_idx), data_y(normal_idx), 'ko', ... data_x(outlr_idx), data_y(outlr_idx), 'rx', ... - plims, plims * est_ls(2) + est_ls(1), '-r', ... - plims, plims * est_ts(2) + est_ts(1), 'b-', ... + plims, est_ls(1) + plims * est_ls(2) , '-r', ... + plims, est_ts(1) + plims * est_ts(2), 'b-', ... 'linewidth', 2) legend('Normal data', 'Outlier', 'Least Squares', 'Theil-Sen', 'location', 'NW') title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ... @@ -48,19 +47,19 @@ title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ... %% Demonstrate use of multiple predictor variables in X. % NOTE: You must execute cell above first. -% create 2 different predictor columns +% create 3 different predictor columns data_x1 = data_x; -data_x2 = data_x1 * -2 + 10 + randn(size(data_x1)) * SDx_usual; -X = [data_x1, data_x2]; -[b0, b1] = TheilSen(X, data_y); +data_x2 = +10 - 2 * data_x1 + randn(size(data_x1)) * SDx_usual; +data_x3 = -50 + 5 * data_x1 + randn(size(data_x1)) * SDx_usual; +X = [data_x1, data_x2, data_x3]; +est_ts = TheilSen(X, data_y); % plot both simple regressions; note mainly the difference between x-axes num_pred = size(X, 2); figure() for pp = 1:num_pred - est_ts = [b0(pp), b1(pp)]; subplot(1, num_pred, pp) plims = [min(X(:, pp)), max(X(:, pp))]'; plot(X(:, pp), data_y, 'k^', ... - plims, plims * est_ts(2) + est_ts(1), 'b-', 'linewidth', 2) + plims, est_ts(1, pp) + plims * est_ts(2, pp), 'b-', 'linewidth', 2) end \ No newline at end of file