From 415ab86e12158f35772ff953a193114362ea2404 Mon Sep 17 00:00:00 2001
From: Johannes Keyser <johannes.keyser@sport.uni-giessen.de>
Date: Thu, 24 Mar 2022 18:41:05 +0100
Subject: [PATCH] Fix and avoid confusion about output parameters.

- Force output of offsets to avoid confusion.
- Make order of coefficients consistent everywhere.
- Match documentation and example calls.
---
 TheilSen.m | 26 +++++++++++++-------------
 example.m  | 19 +++++++++----------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/TheilSen.m b/TheilSen.m
index 8588c28..44b232b 100644
--- a/TheilSen.m
+++ b/TheilSen.m
@@ -1,4 +1,4 @@
-function [b0, b1] = TheilSen(X, y)
+function coef = TheilSen(X, y)
 % THEILSEN performs Theil-Sen robust, simple linear regression(s) of X on y.
 % 
 % For convenience, the input X may contain more than one predictor variable.
@@ -13,12 +13,13 @@ function [b0, b1] = TheilSen(X, y)
 %   y: A column vector containing the observations of the response variable.
 %
 % OUTPUT
-%   b0: Estimated offset(s) for each predictor variable in X with respect to y
-%       (as independent simple regression offsets per predictor).
-%   b1: Estimated slope(s) for each predictor variable in X with respect to y
-%       (as independent simple regression slopes per predictor).
-%
-%   Vectors b0 and b1 contain as many entries as column vectors in X.
+%   coef: Estimated regression coefficients for each predictor column in X, with
+%         respect to the response variable y. Each column in coef corresponds
+%         to one predictor in X, i.e. it has as many columns as X does.
+%         The first row, i.e. coef(:, 1), contains the estimated offset(s).
+%         The second row, i.e. coef(:, 2), contains the estimated slope(s).
+%         (This output format is chosen to avoid confusion, e.g. with previous
+%          versions of this code.)
 %
 % EXAMPLE
 %   See accompanying file example.m.
@@ -81,13 +82,12 @@ end
 Cprm = reshape( permute(C, [1, 3, 2]), [], size(C, 2), 1 );
 
 % estimate slope as the median of all pairwise slopes (per predictor column)
-b1 = median(Cprm, 1, 'omitnan');
+b1s = median(Cprm, 1, 'omitnan');
 
 % estimate offset as the median of all pairwise offsets (per predictor column)
-if nargout == 2  % only compute if requested/used
-    b0 = median(bsxfun(@minus, y(:), ...
-                bsxfun(@times, b1, X(:, 1:end))), ...
-                'omitnan');
-end
+b0s = median(bsxfun(@minus, y(:), ...
+             bsxfun(@times, b1s, X(:, 1:end))), ...
+            'omitnan');
 
+coef = [b0s; b1s];
 end
\ No newline at end of file
diff --git a/example.m b/example.m
index f383b58..d780850 100644
--- a/example.m
+++ b/example.m
@@ -30,8 +30,7 @@ data_y(outlr_idx) = outlr_y;
 est_ls = [ones(N_total, 1), data_x] \ data_y;
 
 % Estimate Theil-Sen parameters.
-[b0, b1] = TheilSen(data_x, data_y);
-est_ts = [b0, b1];
+est_ts = TheilSen(data_x, data_y);
 
 % Plot everything and add comparison of estimates to title.
 figure()
@@ -39,8 +38,8 @@ plims = [min(data_x), max(data_x)]';
 normal_idx = setdiff(1:N_total, outlr_idx);
 plot(data_x(normal_idx), data_y(normal_idx), 'ko', ...
      data_x(outlr_idx), data_y(outlr_idx), 'rx', ...
-     plims, plims * est_ls(2) + est_ls(1), '-r', ...
-     plims, plims * est_ts(2) + est_ts(1), 'b-', ...
+     plims, est_ls(1) + plims * est_ls(2) , '-r', ...
+     plims, est_ts(1) + plims * est_ts(2), 'b-', ...
      'linewidth', 2)
 legend('Normal data', 'Outlier', 'Least Squares', 'Theil-Sen', 'location', 'NW')
 title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ...
@@ -48,19 +47,19 @@ title(sprintf('True: [%.3g, %.3g], LS: [%.3g, %.3g], TS: [%.3g, %.3g]', ...
 
 %% Demonstrate use of multiple predictor variables in X.
 % NOTE: You must execute cell above first.
-% create 2 different predictor columns
+% create 3 different predictor columns
 data_x1 = data_x;
-data_x2 = data_x1 * -2 + 10 + randn(size(data_x1)) * SDx_usual;
-X = [data_x1, data_x2];
-[b0, b1] = TheilSen(X, data_y);
+data_x2 = +10 - 2 * data_x1 + randn(size(data_x1)) * SDx_usual;
+data_x3 = -50 + 5 * data_x1 + randn(size(data_x1)) * SDx_usual;
+X = [data_x1, data_x2, data_x3];
+est_ts = TheilSen(X, data_y);
 
 % plot both simple regressions; note mainly the difference between x-axes
 num_pred = size(X, 2);
 figure()
 for pp = 1:num_pred
-    est_ts = [b0(pp), b1(pp)];
     subplot(1, num_pred, pp)
     plims = [min(X(:, pp)), max(X(:, pp))]';
     plot(X(:, pp), data_y, 'k^', ...
-    plims, plims * est_ts(2) + est_ts(1), 'b-', 'linewidth', 2)
+    plims, est_ts(1, pp) + plims * est_ts(2, pp), 'b-', 'linewidth', 2)
 end
\ No newline at end of file
-- 
GitLab