From 8ba5d1edef7fd81672b6a8239e10c329d930a603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20G=C3=A4rber?= <florian.gaerber@uni-hamburg.de> Date: Tue, 27 Aug 2024 13:06:20 +0200 Subject: [PATCH] feat: Add round_digits parameter to MAA Refs: #15 --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/MeanAdjustedAgreement.R | 20 +++++++++++--------- R/meanAdjAgree.R | 10 ++++++---- man/MeanAdjustedAgreement.Rd | 29 ++++++++++++++--------------- man/mean.index.Rd | 2 +- man/meanAdjAgree.Rd | 19 ++++++++++--------- 7 files changed, 47 insertions(+), 39 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 41b6714..cd7ad58 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: RFSurrogates Title: Surrogate Minimal Depth Variable Importance -Version: 0.4.1 +Version: 0.4.2 Authors@R: c( person("Stephan", "Seifert", , "stephan.seifert@uni-hamburg.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2567-5728")), diff --git a/NEWS.md b/NEWS.md index d05eb8e..476b500 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# RFSurrogates 0.4.2 + +- Added new optional parameter to `MeanAdjustedAgreement` and `meanAdjAgree`: `round_digits` defaulting to 2. This offers the same behaviour as before, but allows rounding to different amounts of decimal places if desired. + # RFSurrogates 0.4.1 - Fix `SurrogateMinimalDepth`'s result `$selected` returning modified names (#13) diff --git a/R/MeanAdjustedAgreement.R b/R/MeanAdjustedAgreement.R index 23bddf0..49ba341 100644 --- a/R/MeanAdjustedAgreement.R +++ b/R/MeanAdjustedAgreement.R @@ -11,6 +11,7 @@ #' @param candidates Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.) #' @param related (Default: TRUE) Whether related variables should be identified. #' @param num.threads (Default: 1) Number of threads used for determination of relations. +#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits. #' #' @return A `MeanAdjustedAgreement` list object: #' * `RFS`: The original [RandomForestSurrogates()] object. @@ -43,13 +44,13 @@ #' @keywords varrel #' @export MeanAdjustedAgreement <- function( - RFS, - t = 5, - variables = RFS$ranger$forest$independent.variable.names, - candidates = RFS$ranger$forest$independent.variable.names, - related = TRUE, - num.threads = 1 -) { + RFS, + t = 5, + variables = RFS$ranger$forest$independent.variable.names, + candidates = RFS$ranger$forest$independent.variable.names, + related = TRUE, + num.threads = 1, + round_digits = 2) { if (!inherits(RFS, "RandomForestSurrogates")) { stop("`RFS` must be a `RandomForestSurrogates` object.") } @@ -73,10 +74,11 @@ MeanAdjustedAgreement <- function( t = t, s.a = s$s.a, select.var = related, - num.threads = num.threads + num.threads = num.threads, + round_digits = round_digits ) - results = list( + results <- list( RFS = RFS, relations = maa$surr.res, threshold = maa$threshold diff --git a/R/meanAdjAgree.R b/R/meanAdjAgree.R index e5f3743..cf96a06 100644 --- a/R/meanAdjAgree.R +++ b/R/meanAdjAgree.R @@ -10,6 +10,7 @@ #' @param s.a average number of surrogate variables (ideally calculated by count.surrogates function). #' @param select.var set False if only relations should be calculated and no related variables should be selected. #' @param num.threads number of threads used for parallel execution. Default is number of CPUs available. +#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits in [mean.index]. #' #' @returns A list containing: #' * `variables`: the variables to which relations are investigated @@ -18,7 +19,7 @@ #' * `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. #' #' @export -meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL) { +meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL, round_digits = 2) { num.trees <- length(trees) index.variables <- match(variables, allvariables) index.candidates <- match(candidates, allvariables) @@ -39,7 +40,8 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel 1:length(index.variables), mean.index, list.res, - index.variables + index.variables, + round_digits = round_digits )), ncol = length(candidates), nrow = length(variables), byrow = TRUE ) @@ -64,9 +66,9 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel #' This is an internal function #' #' @keywords internal -mean.index <- function(i, list.res, index.variables) { +mean.index <- function(i, list.res, index.variables, round_digits = 2) { list <- list.res[which(names(list.res) == index.variables[i])] - mean.list <- round(Reduce("+", list) / length(list), 2) + mean.list <- round(Reduce("+", list) / length(list), digits = round_digits) if (length(mean.list) > 0) { return(mean.list) } else { diff --git a/man/MeanAdjustedAgreement.Rd b/man/MeanAdjustedAgreement.Rd index b342cb9..fb081e0 100644 --- a/man/MeanAdjustedAgreement.Rd +++ b/man/MeanAdjustedAgreement.Rd @@ -10,34 +10,33 @@ MeanAdjustedAgreement( variables = RFS$ranger$forest$independent.variable.names, candidates = RFS$ranger$forest$independent.variable.names, related = TRUE, - num.threads = 1 + num.threads = 1, + round_digits = 2 ) } \arguments{ -\item{RFS}{A \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object.} +\item{RFS}{A [RandomForestSurrogates()] object.} \item{t}{(Default: 5) Used to calculate threshold.} -\item{variables}{Vector of variable names for \strong{which related variables should be searched}. (Default: All variables used to create the random forest.)} +\item{variables}{Vector of variable names for **which related variables should be searched**. (Default: All variables used to create the random forest.)} -\item{candidates}{Vector of variable names that \strong{are candidates to be related to the variables}. (Default: All variables used to create the random forest.)} +\item{candidates}{Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.)} \item{related}{(Default: TRUE) Whether related variables should be identified.} \item{num.threads}{(Default: 1) Number of threads used for determination of relations.} + +\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits.} } \value{ -A \code{MeanAdjustedAgreement} list object: -\itemize{ -\item \code{RFS}: The original \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object. -\item \code{relations}: Matrix with mean adjusted agreement values -\itemize{ -\item Rows: \code{variables}. -\item Columns: \code{candidates}. -} -\item \code{threshold}: the threshold used to select related variables. -\item \code{related}: A list of vectors for each \code{variable} containing related \code{candidates}. Only present if \code{related = TRUE} (Default). -} +A `MeanAdjustedAgreement` list object: + * `RFS`: The original [RandomForestSurrogates()] object. + * `relations`: Matrix with mean adjusted agreement values + * Rows: `variables`. + * Columns: `candidates`. + * `threshold`: the threshold used to select related variables. + * `related`: A list of vectors for each `variable` containing related `candidates`. Only present if `related = TRUE` (Default). } \description{ This function uses the mean adjusted agreement to select variables that are related to a defined variable using a threshold T. diff --git a/man/mean.index.Rd b/man/mean.index.Rd index 098eaf1..9d2cf26 100644 --- a/man/mean.index.Rd +++ b/man/mean.index.Rd @@ -4,7 +4,7 @@ \alias{mean.index} \title{mean.index} \usage{ -\method{mean}{index}(i, list.res, index.variables) +\method{mean}{index}(i, list.res, index.variables, round_digits = 2) } \description{ This is an internal function diff --git a/man/meanAdjAgree.Rd b/man/meanAdjAgree.Rd index cb1f03c..61b1c39 100644 --- a/man/meanAdjAgree.Rd +++ b/man/meanAdjAgree.Rd @@ -12,11 +12,12 @@ meanAdjAgree( t, s.a, select.var, - num.threads = NULL + num.threads = NULL, + round_digits = 2 ) } \arguments{ -\item{trees}{list of trees created by \code{\link[=getTreeranger]{getTreeranger()}}, \code{\link[=addLayer]{addLayer()}} and \code{\link[=addSurrogates]{addSurrogates()}}.} +\item{trees}{list of trees created by [getTreeranger()], [addLayer()] and [addSurrogates()].} \item{variables}{vector of variable names.} @@ -24,22 +25,22 @@ meanAdjAgree( \item{candidates}{vector of variable names (strings) that are candidates to be related to the variables (has to be contained in allvariables)} -\item{t}{variable to calculate threshold. Used if \code{select.var = TRUE}.} +\item{t}{variable to calculate threshold. Used if `select.var = TRUE`.} \item{s.a}{average number of surrogate variables (ideally calculated by count.surrogates function).} \item{select.var}{set False if only relations should be calculated and no related variables should be selected.} \item{num.threads}{number of threads used for parallel execution. Default is number of CPUs available.} + +\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits in [mean.index].} } \value{ A list containing: -\itemize{ -\item \code{variables}: the variables to which relations are investigated -\item \code{surr.res}: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns -\item \code{threshold}: the threshold used to create surr.var from surr.res -\item \code{surr.var}: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. -} + * `variables`: the variables to which relations are investigated + * `surr.res`: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns + * `threshold`: the threshold used to create surr.var from surr.res + * `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. } \description{ This is the main function of var.relations function. -- GitLab