Skip to content
Snippets Groups Projects
Commit 415ab86e authored by Keyser, Johannes's avatar Keyser, Johannes
Browse files

Fix and avoid confusion about output parameters.

- Force output of offsets to avoid confusion.
- Make order of coefficients consistent everywhere.
- Match documentation and example calls.
parent 75234a97
No related branches found
No related tags found
No related merge requests found
function [b0, b1] = TheilSen(X, y) function coef = TheilSen(X, y)
% THEILSEN performs Theil-Sen robust, simple linear regression(s) of X on y. % THEILSEN performs Theil-Sen robust, simple linear regression(s) of X on y.
% %
% For convenience, the input X may contain more than one predictor variable. % For convenience, the input X may contain more than one predictor variable.
...@@ -13,12 +13,13 @@ function [b0, b1] = TheilSen(X, y) ...@@ -13,12 +13,13 @@ function [b0, b1] = TheilSen(X, y)
% y: A column vector containing the observations of the response variable. % y: A column vector containing the observations of the response variable.
% %
% OUTPUT % OUTPUT
% b0: Estimated offset(s) for each predictor variable in X with respect to y % coef: Estimated regression coefficients for each predictor column in X, with
% (as independent simple regression offsets per predictor). % respect to the response variable y. Each column in coef corresponds
% b1: Estimated slope(s) for each predictor variable in X with respect to y % to one predictor in X, i.e. it has as many columns as X does.
% (as independent simple regression slopes per predictor). % The first row, i.e. coef(:, 1), contains the estimated offset(s).
% % The second row, i.e. coef(:, 2), contains the estimated slope(s).
% Vectors b0 and b1 contain as many entries as column vectors in X. % (This output format is chosen to avoid confusion, e.g. with previous
% versions of this code.)
% %
% EXAMPLE % EXAMPLE
% See accompanying file example.m. % See accompanying file example.m.
...@@ -81,13 +82,12 @@ end ...@@ -81,13 +82,12 @@ end
Cprm = reshape( permute(C, [1, 3, 2]), [], size(C, 2), 1 ); Cprm = reshape( permute(C, [1, 3, 2]), [], size(C, 2), 1 );
% estimate slope as the median of all pairwise slopes (per predictor column) % estimate slope as the median of all pairwise slopes (per predictor column)
b1 = median(Cprm, 1, 'omitnan'); b1s = median(Cprm, 1, 'omitnan');
% estimate offset as the median of all pairwise offsets (per predictor column) % estimate offset as the median of all pairwise offsets (per predictor column)
if nargout == 2 % only compute if requested/used b0s = median(bsxfun(@minus, y(:), ...
b0 = median(bsxfun(@minus, y(:), ... bsxfun(@times, b1s, X(:, 1:end))), ...
bsxfun(@times, b1, X(:, 1:end))), ...
'omitnan'); 'omitnan');
end
coef = [b0s; b1s];
end end
\ No newline at end of file
...@@ -30,8 +30,7 @@ data_y(outlr_idx) = outlr_y; ...@@ -30,8 +30,7 @@ data_y(outlr_idx) = outlr_y;
est_ls = [ones(N_total, 1), data_x] \ data_y; est_ls = [ones(N_total, 1), data_x] \ data_y;
% Estimate Theil-Sen parameters. % Estimate Theil-Sen parameters.
[b0, b1] = TheilSen(data_x, data_y); est_ts = TheilSen(data_x, data_y);
est_ts = [b0, b1];
% Plot everything and add comparison of estimates to title. % Plot everything and add comparison of estimates to title.
figure() figure()
...@@ -39,8 +38,8 @@ plims = [min(data_x), max(data_x)]'; ...@@ -39,8 +38,8 @@ plims = [min(data_x), max(data_x)]';
normal_idx = setdiff(1:N_total, outlr_idx); normal_idx = setdiff(1:N_total, outlr_idx);
plot(data_x(normal_idx), data_y(normal_idx), 'ko', ... plot(data_x(normal_idx), data_y(normal_idx), 'ko', ...
data_x(outlr_idx), data_y(outlr_idx), 'rx', ... data_x(outlr_idx), data_y(outlr_idx), 'rx', ...
plims, plims * est_ls(2) + est_ls(1), '-r', ... plims, est_ls(1) + plims * est_ls(2) , '-r', ...
plims, plims * est_ts(2) + est_ts(1), 'b-', ... plims, est_ts(1) + plims * est_ts(2), 'b-', ...
'linewidth', 2) 'linewidth', 2)
legend('Normal data', 'Outlier', 'Least Squares', 'Theil-Sen', 'location', 'NW') legend('Normal data', 'Outlier', 'Least Squares', 'Theil-Sen', 'location', 'NW')
title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ... title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ...
...@@ -48,19 +47,19 @@ title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ... ...@@ -48,19 +47,19 @@ title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ...
%% Demonstrate use of multiple predictor variables in X. %% Demonstrate use of multiple predictor variables in X.
% NOTE: You must execute cell above first. % NOTE: You must execute cell above first.
% create 2 different predictor columns % create 3 different predictor columns
data_x1 = data_x; data_x1 = data_x;
data_x2 = data_x1 * -2 + 10 + randn(size(data_x1)) * SDx_usual; data_x2 = +10 - 2 * data_x1 + randn(size(data_x1)) * SDx_usual;
X = [data_x1, data_x2]; data_x3 = -50 + 5 * data_x1 + randn(size(data_x1)) * SDx_usual;
[b0, b1] = TheilSen(X, data_y); X = [data_x1, data_x2, data_x3];
est_ts = TheilSen(X, data_y);
% plot both simple regressions; note mainly the difference between x-axes % plot both simple regressions; note mainly the difference between x-axes
num_pred = size(X, 2); num_pred = size(X, 2);
figure() figure()
for pp = 1:num_pred for pp = 1:num_pred
est_ts = [b0(pp), b1(pp)];
subplot(1, num_pred, pp) subplot(1, num_pred, pp)
plims = [min(X(:, pp)), max(X(:, pp))]'; plims = [min(X(:, pp)), max(X(:, pp))]';
plot(X(:, pp), data_y, 'k^', ... plot(X(:, pp), data_y, 'k^', ...
plims, plims * est_ts(2) + est_ts(1), 'b-', 'linewidth', 2) plims, est_ts(1, pp) + plims * est_ts(2, pp), 'b-', 'linewidth', 2)
end end
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment