diff --git a/DESCRIPTION b/DESCRIPTION index 41b67146660aab9c52adabf2af8e59cfbd6f56db..cd7ad58972a23f3a4b89f9858b9f21932325f3a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: RFSurrogates Title: Surrogate Minimal Depth Variable Importance -Version: 0.4.1 +Version: 0.4.2 Authors@R: c( person("Stephan", "Seifert", , "stephan.seifert@uni-hamburg.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2567-5728")), diff --git a/NEWS.md b/NEWS.md index d05eb8e89a214fe53e3008695ab5dc4d3aa7c6f6..476b50041ac551e99961d8fcfd9174e38a77af8f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# RFSurrogates 0.4.2 + +- Added new optional parameter to `MeanAdjustedAgreement` and `meanAdjAgree`: `round_digits` defaulting to 2. This offers the same behaviour as before, but allows rounding to different amounts of decimal places if desired. + # RFSurrogates 0.4.1 - Fix `SurrogateMinimalDepth`'s result `$selected` returning modified names (#13) diff --git a/R/MeanAdjustedAgreement.R b/R/MeanAdjustedAgreement.R index 23bddf006ba9f31408958d873bdea1e1729ea027..49ba3419cf0cac0e312d845e790f85b6d7efbf9f 100644 --- a/R/MeanAdjustedAgreement.R +++ b/R/MeanAdjustedAgreement.R @@ -11,6 +11,7 @@ #' @param candidates Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.) #' @param related (Default: TRUE) Whether related variables should be identified. #' @param num.threads (Default: 1) Number of threads used for determination of relations. +#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits. #' #' @return A `MeanAdjustedAgreement` list object: #' * `RFS`: The original [RandomForestSurrogates()] object. @@ -43,13 +44,13 @@ #' @keywords varrel #' @export MeanAdjustedAgreement <- function( - RFS, - t = 5, - variables = RFS$ranger$forest$independent.variable.names, - candidates = RFS$ranger$forest$independent.variable.names, - related = TRUE, - num.threads = 1 -) { + RFS, + t = 5, + variables = RFS$ranger$forest$independent.variable.names, + candidates = RFS$ranger$forest$independent.variable.names, + related = TRUE, + num.threads = 1, + round_digits = 2) { if (!inherits(RFS, "RandomForestSurrogates")) { stop("`RFS` must be a `RandomForestSurrogates` object.") } @@ -73,10 +74,11 @@ MeanAdjustedAgreement <- function( t = t, s.a = s$s.a, select.var = related, - num.threads = num.threads + num.threads = num.threads, + round_digits = round_digits ) - results = list( + results <- list( RFS = RFS, relations = maa$surr.res, threshold = maa$threshold diff --git a/R/meanAdjAgree.R b/R/meanAdjAgree.R index e5f3743f54ee7f2f1f37d078b24b24466bb0c977..cf96a06cfb54282cc79ce65489b9996a2be3687f 100644 --- a/R/meanAdjAgree.R +++ b/R/meanAdjAgree.R @@ -10,6 +10,7 @@ #' @param s.a average number of surrogate variables (ideally calculated by count.surrogates function). #' @param select.var set False if only relations should be calculated and no related variables should be selected. #' @param num.threads number of threads used for parallel execution. Default is number of CPUs available. +#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits in [mean.index]. #' #' @returns A list containing: #' * `variables`: the variables to which relations are investigated @@ -18,7 +19,7 @@ #' * `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. #' #' @export -meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL) { +meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL, round_digits = 2) { num.trees <- length(trees) index.variables <- match(variables, allvariables) index.candidates <- match(candidates, allvariables) @@ -39,7 +40,8 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel 1:length(index.variables), mean.index, list.res, - index.variables + index.variables, + round_digits = round_digits )), ncol = length(candidates), nrow = length(variables), byrow = TRUE ) @@ -64,9 +66,9 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel #' This is an internal function #' #' @keywords internal -mean.index <- function(i, list.res, index.variables) { +mean.index <- function(i, list.res, index.variables, round_digits = 2) { list <- list.res[which(names(list.res) == index.variables[i])] - mean.list <- round(Reduce("+", list) / length(list), 2) + mean.list <- round(Reduce("+", list) / length(list), digits = round_digits) if (length(mean.list) > 0) { return(mean.list) } else { diff --git a/man/MeanAdjustedAgreement.Rd b/man/MeanAdjustedAgreement.Rd index b342cb91871e8f5c7a0699e4cbb569fb2578838b..fb081e07a3977c30945f2b8ca1400569c20f5863 100644 --- a/man/MeanAdjustedAgreement.Rd +++ b/man/MeanAdjustedAgreement.Rd @@ -10,34 +10,33 @@ MeanAdjustedAgreement( variables = RFS$ranger$forest$independent.variable.names, candidates = RFS$ranger$forest$independent.variable.names, related = TRUE, - num.threads = 1 + num.threads = 1, + round_digits = 2 ) } \arguments{ -\item{RFS}{A \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object.} +\item{RFS}{A [RandomForestSurrogates()] object.} \item{t}{(Default: 5) Used to calculate threshold.} -\item{variables}{Vector of variable names for \strong{which related variables should be searched}. (Default: All variables used to create the random forest.)} +\item{variables}{Vector of variable names for **which related variables should be searched**. (Default: All variables used to create the random forest.)} -\item{candidates}{Vector of variable names that \strong{are candidates to be related to the variables}. (Default: All variables used to create the random forest.)} +\item{candidates}{Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.)} \item{related}{(Default: TRUE) Whether related variables should be identified.} \item{num.threads}{(Default: 1) Number of threads used for determination of relations.} + +\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits.} } \value{ -A \code{MeanAdjustedAgreement} list object: -\itemize{ -\item \code{RFS}: The original \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object. -\item \code{relations}: Matrix with mean adjusted agreement values -\itemize{ -\item Rows: \code{variables}. -\item Columns: \code{candidates}. -} -\item \code{threshold}: the threshold used to select related variables. -\item \code{related}: A list of vectors for each \code{variable} containing related \code{candidates}. Only present if \code{related = TRUE} (Default). -} +A `MeanAdjustedAgreement` list object: + * `RFS`: The original [RandomForestSurrogates()] object. + * `relations`: Matrix with mean adjusted agreement values + * Rows: `variables`. + * Columns: `candidates`. + * `threshold`: the threshold used to select related variables. + * `related`: A list of vectors for each `variable` containing related `candidates`. Only present if `related = TRUE` (Default). } \description{ This function uses the mean adjusted agreement to select variables that are related to a defined variable using a threshold T. diff --git a/man/mean.index.Rd b/man/mean.index.Rd index 098eaf14d6e22585b79c7ebfe471425702a7b508..9d2cf260dc6cec3e4a2b9393571cf9fde75af53e 100644 --- a/man/mean.index.Rd +++ b/man/mean.index.Rd @@ -4,7 +4,7 @@ \alias{mean.index} \title{mean.index} \usage{ -\method{mean}{index}(i, list.res, index.variables) +\method{mean}{index}(i, list.res, index.variables, round_digits = 2) } \description{ This is an internal function diff --git a/man/meanAdjAgree.Rd b/man/meanAdjAgree.Rd index cb1f03cfdb1eaac340c8496b59c324e94aa7889b..61b1c3902a799b33fd13fe0f292a2356a5abe054 100644 --- a/man/meanAdjAgree.Rd +++ b/man/meanAdjAgree.Rd @@ -12,11 +12,12 @@ meanAdjAgree( t, s.a, select.var, - num.threads = NULL + num.threads = NULL, + round_digits = 2 ) } \arguments{ -\item{trees}{list of trees created by \code{\link[=getTreeranger]{getTreeranger()}}, \code{\link[=addLayer]{addLayer()}} and \code{\link[=addSurrogates]{addSurrogates()}}.} +\item{trees}{list of trees created by [getTreeranger()], [addLayer()] and [addSurrogates()].} \item{variables}{vector of variable names.} @@ -24,22 +25,22 @@ meanAdjAgree( \item{candidates}{vector of variable names (strings) that are candidates to be related to the variables (has to be contained in allvariables)} -\item{t}{variable to calculate threshold. Used if \code{select.var = TRUE}.} +\item{t}{variable to calculate threshold. Used if `select.var = TRUE`.} \item{s.a}{average number of surrogate variables (ideally calculated by count.surrogates function).} \item{select.var}{set False if only relations should be calculated and no related variables should be selected.} \item{num.threads}{number of threads used for parallel execution. Default is number of CPUs available.} + +\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits in [mean.index].} } \value{ A list containing: -\itemize{ -\item \code{variables}: the variables to which relations are investigated -\item \code{surr.res}: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns -\item \code{threshold}: the threshold used to create surr.var from surr.res -\item \code{surr.var}: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. -} + * `variables`: the variables to which relations are investigated + * `surr.res`: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns + * `threshold`: the threshold used to create surr.var from surr.res + * `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns. } \description{ This is the main function of var.relations function.