Skip to content
Snippets Groups Projects
Unverified Commit a5f375fb authored by Gärber, Florian's avatar Gärber, Florian Committed by GitHub
Browse files

feat: Add round_digits parameter to MAA (#16)

This addresses the first step of #15: The new parameter behaves as
before, but provides flexibility for those who want additional digits.
parents d55825b5 8ba5d1ed
No related branches found
No related tags found
No related merge requests found
Type: Package
Package: RFSurrogates
Title: Surrogate Minimal Depth Variable Importance
Version: 0.4.1
Version: 0.4.2
Authors@R: c(
person("Stephan", "Seifert", , "stephan.seifert@uni-hamburg.de", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-2567-5728")),
......
# RFSurrogates 0.4.2
- Added new optional parameter to `MeanAdjustedAgreement` and `meanAdjAgree`: `round_digits` defaulting to 2. This offers the same behaviour as before, but allows rounding to different amounts of decimal places if desired.
# RFSurrogates 0.4.1
- Fix `SurrogateMinimalDepth`'s result `$selected` returning modified names (#13)
......
......@@ -11,6 +11,7 @@
#' @param candidates Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.)
#' @param related (Default: TRUE) Whether related variables should be identified.
#' @param num.threads (Default: 1) Number of threads used for determination of relations.
#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits.
#'
#' @return A `MeanAdjustedAgreement` list object:
#' * `RFS`: The original [RandomForestSurrogates()] object.
......@@ -43,13 +44,13 @@
#' @keywords varrel
#' @export
MeanAdjustedAgreement <- function(
RFS,
t = 5,
variables = RFS$ranger$forest$independent.variable.names,
candidates = RFS$ranger$forest$independent.variable.names,
related = TRUE,
num.threads = 1
) {
RFS,
t = 5,
variables = RFS$ranger$forest$independent.variable.names,
candidates = RFS$ranger$forest$independent.variable.names,
related = TRUE,
num.threads = 1,
round_digits = 2) {
if (!inherits(RFS, "RandomForestSurrogates")) {
stop("`RFS` must be a `RandomForestSurrogates` object.")
}
......@@ -73,10 +74,11 @@ MeanAdjustedAgreement <- function(
t = t,
s.a = s$s.a,
select.var = related,
num.threads = num.threads
num.threads = num.threads,
round_digits = round_digits
)
results = list(
results <- list(
RFS = RFS,
relations = maa$surr.res,
threshold = maa$threshold
......
......@@ -10,6 +10,7 @@
#' @param s.a average number of surrogate variables (ideally calculated by count.surrogates function).
#' @param select.var set False if only relations should be calculated and no related variables should be selected.
#' @param num.threads number of threads used for parallel execution. Default is number of CPUs available.
#' @param round_digits (Default: 2) Round mean adjusted agreement to this many digits in [mean.index].
#'
#' @returns A list containing:
#' * `variables`: the variables to which relations are investigated
......@@ -18,7 +19,7 @@
#' * `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns.
#'
#' @export
meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL) {
meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, select.var, num.threads = NULL, round_digits = 2) {
num.trees <- length(trees)
index.variables <- match(variables, allvariables)
index.candidates <- match(candidates, allvariables)
......@@ -39,7 +40,8 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel
1:length(index.variables),
mean.index,
list.res,
index.variables
index.variables,
round_digits = round_digits
)),
ncol = length(candidates), nrow = length(variables), byrow = TRUE
)
......@@ -64,9 +66,9 @@ meanAdjAgree <- function(trees, variables, allvariables, candidates, t, s.a, sel
#' This is an internal function
#'
#' @keywords internal
mean.index <- function(i, list.res, index.variables) {
mean.index <- function(i, list.res, index.variables, round_digits = 2) {
list <- list.res[which(names(list.res) == index.variables[i])]
mean.list <- round(Reduce("+", list) / length(list), 2)
mean.list <- round(Reduce("+", list) / length(list), digits = round_digits)
if (length(mean.list) > 0) {
return(mean.list)
} else {
......
......@@ -10,34 +10,33 @@ MeanAdjustedAgreement(
variables = RFS$ranger$forest$independent.variable.names,
candidates = RFS$ranger$forest$independent.variable.names,
related = TRUE,
num.threads = 1
num.threads = 1,
round_digits = 2
)
}
\arguments{
\item{RFS}{A \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object.}
\item{RFS}{A [RandomForestSurrogates()] object.}
\item{t}{(Default: 5) Used to calculate threshold.}
\item{variables}{Vector of variable names for \strong{which related variables should be searched}. (Default: All variables used to create the random forest.)}
\item{variables}{Vector of variable names for **which related variables should be searched**. (Default: All variables used to create the random forest.)}
\item{candidates}{Vector of variable names that \strong{are candidates to be related to the variables}. (Default: All variables used to create the random forest.)}
\item{candidates}{Vector of variable names that **are candidates to be related to the variables**. (Default: All variables used to create the random forest.)}
\item{related}{(Default: TRUE) Whether related variables should be identified.}
\item{num.threads}{(Default: 1) Number of threads used for determination of relations.}
\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits.}
}
\value{
A \code{MeanAdjustedAgreement} list object:
\itemize{
\item \code{RFS}: The original \code{\link[=RandomForestSurrogates]{RandomForestSurrogates()}} object.
\item \code{relations}: Matrix with mean adjusted agreement values
\itemize{
\item Rows: \code{variables}.
\item Columns: \code{candidates}.
}
\item \code{threshold}: the threshold used to select related variables.
\item \code{related}: A list of vectors for each \code{variable} containing related \code{candidates}. Only present if \code{related = TRUE} (Default).
}
A `MeanAdjustedAgreement` list object:
* `RFS`: The original [RandomForestSurrogates()] object.
* `relations`: Matrix with mean adjusted agreement values
* Rows: `variables`.
* Columns: `candidates`.
* `threshold`: the threshold used to select related variables.
* `related`: A list of vectors for each `variable` containing related `candidates`. Only present if `related = TRUE` (Default).
}
\description{
This function uses the mean adjusted agreement to select variables that are related to a defined variable using a threshold T.
......
......@@ -4,7 +4,7 @@
\alias{mean.index}
\title{mean.index}
\usage{
\method{mean}{index}(i, list.res, index.variables)
\method{mean}{index}(i, list.res, index.variables, round_digits = 2)
}
\description{
This is an internal function
......
......@@ -12,11 +12,12 @@ meanAdjAgree(
t,
s.a,
select.var,
num.threads = NULL
num.threads = NULL,
round_digits = 2
)
}
\arguments{
\item{trees}{list of trees created by \code{\link[=getTreeranger]{getTreeranger()}}, \code{\link[=addLayer]{addLayer()}} and \code{\link[=addSurrogates]{addSurrogates()}}.}
\item{trees}{list of trees created by [getTreeranger()], [addLayer()] and [addSurrogates()].}
\item{variables}{vector of variable names.}
......@@ -24,22 +25,22 @@ meanAdjAgree(
\item{candidates}{vector of variable names (strings) that are candidates to be related to the variables (has to be contained in allvariables)}
\item{t}{variable to calculate threshold. Used if \code{select.var = TRUE}.}
\item{t}{variable to calculate threshold. Used if `select.var = TRUE`.}
\item{s.a}{average number of surrogate variables (ideally calculated by count.surrogates function).}
\item{select.var}{set False if only relations should be calculated and no related variables should be selected.}
\item{num.threads}{number of threads used for parallel execution. Default is number of CPUs available.}
\item{round_digits}{(Default: 2) Round mean adjusted agreement to this many digits in [mean.index].}
}
\value{
A list containing:
\itemize{
\item \code{variables}: the variables to which relations are investigated
\item \code{surr.res}: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns
\item \code{threshold}: the threshold used to create surr.var from surr.res
\item \code{surr.var}: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns.
}
* `variables`: the variables to which relations are investigated
* `surr.res`: matrix with mean adjusted agreement values and variables investigated in rows and candidate variables in columns
* `threshold`: the threshold used to create surr.var from surr.res
* `surr.var`: binary matrix showing if the variables are related (1) or non-related (0) with variables in rows and candidates in columns.
}
\description{
This is the main function of var.relations function.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment