From a3ca14ffc10e78cd73e537f00167ae91a10eab02 Mon Sep 17 00:00:00 2001 From: Johannes Keyser <johannes.keyser@sport.uni-giessen.de> Date: Wed, 23 Mar 2022 19:44:49 +0100 Subject: [PATCH] More and better phrased documentation. --- README.md | 17 +++++++---------- example.m | 12 ++++++------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 42792ba..b1271a8 100644 --- a/README.md +++ b/README.md @@ -14,18 +14,18 @@ FIXME: Mention main idea (data point pairs), and resulting complexity. ### No dependency on Matlab toolboxes This code is based on [Theil-Sen Robust Linear Regression](https://mathworks.com/matlabcentral/fileexchange/48294-theil-sen-robust-linear-regression), version 1.2.0.0, by [Zachary Danziger](https://mathworks.com/matlabcentral/profile/authors/1044524). -It was modified mainly by re-defining its call to the `nanmedian()` function, which creates a dependency on the (commercially licensed) [Statistics Toolbox](https://mathworks.com/products/statistics.html). +A key modification was to use `median(X, 'omitnan')` instead of `nanmedian(X)` to avoid dependency on the (commercially licensed) [Statistics Toolbox](https://mathworks.com/products/statistics.html). -There are several other implementations of the Theil-Sen estimator on Mathworks's [File Exchange](https://mathworks.com/matlabcentral/fileexchange). -Unfortunately all were found to depend on the Statistics Toolbox, [except one](https://mathworks.com/matlabcentral/fileexchange/43135-regression-utilities), which was judged to be slower and less versatile. +(Note that there are several other implementations of the Theil-Sen estimator on Mathworks's [File Exchange](https://mathworks.com/matlabcentral/fileexchange). +Unfortunately all were found to depend on the Statistics Toolbox, [except one](https://mathworks.com/matlabcentral/fileexchange/43135-regression-utilities), which was judged to be slower and less versatile.) ## Installation -Clone/copy the files to your computer, and add the folder to Matlab's path variable. +Copy the files to your computer, and add the folder to Matlab's path variable. ## Usage -TODO: Add examples, and show some expected output. +Please refer to the comments in the header lines of [TheilSen.m](TheilSen.m). ### Example @@ -39,17 +39,14 @@ Note how a few "unlucky" outliers can bias the least-squares estimate, but have - Add more documentation. - [ ] Mention original use case. - - [ ] Add usage example and plot. -- [ ] Add a drop-in for `nanmean()` to become independent of Statistics Toolbox. - [ ] Change input interface to match Matlab's `robustfit()`. - [ ] Add more checks for input sanity. - ## Changelog - October 2014 by Z. Danziger: Original version. - September 2015 by Z. Danziger: Updated help, speed increase for 2D case -- March 2022 by J. Keyser: Adjusted formatting, added documentation, +- March 2022 by J. Keyser: Adjusted formatting, added documentation, improved example and added plot, replace `nanmedian(X)` with `median(X, 'omitnan')`, ... ## Contributing and project status @@ -64,4 +61,4 @@ Feature requests should be directed to the original author. ## License -[Simplified BSD License](https://en.wikipedia.org/wiki/BSD_licenses#2-clause_license_(%22Simplified_BSD_License%22_or_%22FreeBSD_License%22)), see [license.txt](license.txt). +[BSD 2-clause simplified license](https://en.wikipedia.org/wiki/BSD_licenses#2-clause_license_(%22Simplified_BSD_License%22_or_%22FreeBSD_License%22)), see [license.txt](license.txt). diff --git a/example.m b/example.m index 1d88b18..b6c4cfb 100644 --- a/example.m +++ b/example.m @@ -5,19 +5,19 @@ clearvars * N_total = 20; N_outlr = round(0.2 * N_total); -% Simulate a linear data set +% Simulate a linear data set. true_b0 = -2; true_b1 = 10; data_x = linspace(0, 1, N_total)'; data_y = true_b0 + true_b1 .* data_x; -% add some "usual" Gaussian noise to both dimensions +% Add some "usual" Gaussian noise to both dimensions. SDx_usual = 0.1; SDy_usual = 2.0; data_x = data_x + randn(size(data_x)) * SDx_usual; data_y = data_y + randn(size(data_y)) * SDy_usual; -% simulate "outliers" as signal-dependent, positive noise in y dimension +% Simulate "outliers" as signal-dependent, positive noise in y dimension. SDy_outlr = 20 * SDy_usual; outlr_idx = randperm(N_total, N_outlr); % randomly select data points outlr_y = data_y(outlr_idx); % start with original value @@ -26,14 +26,14 @@ outlr_y = outlr_y + abs(randn(N_outlr, 1) * SDy_outlr); % over-dispersion outlr_y = outlr_y .* data_x(outlr_idx); % scale by x magnitude data_y(outlr_idx) = outlr_y; -% estimate least squares parameters +% Estimate least squares parameters. est_ls = [ones(N_total, 1), data_x] \ data_y; -% estimate Theil-Sen parameters +% Estimate Theil-Sen parameters. [m, b] = TheilSen([data_x, data_y]); est_ts = [b, m]; -% plot everything +% Plot everything and add comparison of estimates to title. figure() plims = [min(data_x), max(data_x)]'; normal_idx = setdiff(1:N_total, outlr_idx); -- GitLab