Skip to content
Snippets Groups Projects
Commit 23b125a0 authored by Otto, Dr. Saskia's avatar Otto, Dr. Saskia
Browse files

added functions

parent 6fd48f38
Branches
No related tags found
No related merge requests found
#' Calculate the variance inflation factors
#'
#' Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
#' eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
#' voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
#' clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
#'
#' @name calc_vif
#' @param x matrix or data frame containing only numerical variables.
#' @return
#' The function returns a matrix with 3 columns:
#' \describe{
#' \item{\code{Rsquared}}{}
#' \item{\code{Tolerance}}{}
#' \item{\code{VIF}}{}
#' }
#' @export
#' @examples
#' x <- data.frame(a = 1:20, b = 1:20*2 + rnorm(20) )
#' calc_vif(x)
calc_vif <- function(x) {
result <- matrix(NA, nrow = ncol(x), ncol = 3)
rownames(result) <- colnames(x)
colnames(result) <- c("Rsquared", "Tolerance", "VIF")
for (v in 1:ncol(x)) {
v.name <- colnames(x)[v]
other.v.names <- colnames(x)[-v]
mod.formula <- as.formula(paste(v.name, "~", paste(other.v.names, collapse = "+")))
mod <- lm(mod.formula, data = x)
R2 <- summary(mod) $ r.squared
result[v, "Rsquared"] <- R2
result[v, "Tolerance"] <- 1 - R2
result[v, "VIF"] <- 1 / (1 - R2)
}
return(result)
}
#' Create multiple plots to explore gaps in the time series
#'
#' This function creates an image plot where available values for the different variables and years
#' are indicated in gray and missing values in white. At the right side and bottom, two barplots
#' are added showing the frequency of available variables per year and the available years per
#' variable.
#'
#' @name explore_na
#' @param x matrix or data frame containing time series of multiple variables.
#' @param time vector of time units that will be used for the x ax
#' @param cex_x double; the magnification to be used for the x labels relative to the
#' current setting of cex.
#' @param cex_y double; the magnification to be used for the y labels relative to the
#' current setting of cex.
#' @param madj_y double; adjustment of left margin relative to the
#' current setting for longer or short variable names in the image plot and right barplot.
#' @param hadj_y double; horizontal adjustment of the y labels in the image plot.
#' @param respect logical; this argument controls whether a unit column-width is the
#' same physical measurement on the device as a unit row-height (default is TRUE).
#'
#' @export
#' @author Saskia A. Otto
#' @examples
#' mat <- matrix(1:200, ncol = 10)
#' mat[sample(1:200, sample(10:80, 1))] <- NA
#' colnames(mat) <- paste0("variable", 1:10)
#' explore_na(x = mat, time = 1981:2000)
#' colnames(mat) <- LETTERS[1:10]
#' explore_na(x = mat, time = 1981:2000, madj_y = -1.5, hadj_y = 2, respect = FALSE)
explore_na <- function(x, time = NULL, cex_x = 1, cex_y = 1,
madj_y = 0, hadj_y = 1, respect = TRUE,...){
if(!is.null(time)) {
rownames(x) <- time
}
x[!is.na(x)] <- 1
x[is.na(x)] <- 0
x <- t(as.matrix(x))
ylabel <- rownames(x)
xlabel <- colnames(x)
# Reverse Y axis
reverse <- nrow(x) : 1
ylabel <- ylabel[reverse]
x <- x[reverse,]
# Data Map
layout(matrix(c(1,1,2,1,1,2,3,3,4), ncol=3, byrow = TRUE), widths = c(2,2,1.5),
heights = c(2,1,1.5), respect = respect)
# graphics::layout(matrix(c(1,1,1,1), ncol = 2), widths = c(3.5,3.5), heights = c(2,2),
# respect = respect)
# layout.show(plot_layout)
par(mar = c(3, 7+madj_y, 1.5, .5))
image(1:length(xlabel), 1:length(ylabel), t(x), col = c("white", "grey60"),
ylab = "", xlab = "", axes = FALSE, zlim = c(0,1))
axis(side = 1, at = 1:length(xlabel), labels = xlabel,
las = 2, cex.axis = cex_x, padj = 0.5)
axis(side = 2, at = 1:length(ylabel), labels = ylabel,
las = 1, cex.axis = cex_y, hadj = hadj_y)
box()
# barplot variables
par(mar = c(3, 7+madj_y, 1.5, 1.5))
barplot(apply(x>0, 1, sum), xlab = "", las = 2, horiz = TRUE,
cex.axis = cex_x, space = 0, axis.lty = 1, col = "#8cb2b2", yaxs = "i")
title(xlab="Number of years", line=2, cex.lab=1.2)
#barplot stations
par(mar = c(4, 5+madj_y, 1.5, .5))
barplot(apply(x>0, 2, sum), ylab = "",
las = 2, cex.axis = cex_y, space = 0, col = "#db7170", xaxs = "i")
title(ylab="Number of variables", line=2.5, cex.lab=1.2)
}
#' Imputation of missing values by local mean substitution
#'
#' The function substitutes missing values in time series with a mean of the \emph{n} previous and following years
#' (or less depending on the position of NAs, whether it is at the beginning or end of the time series, and
#' the presence of further NAs in the selected time period).
#'
#' @name impute
#' @param x numeric vector containing NAs.
#' @param n integer; the number of previous and following values to be included in the mean.
#' The default is 2 so the mean is based on a 5yr-period (including the year of the missing
#' value).
#'
#' @return
#' The function returns the same numeric input vector, but with replaced missing values.
#' @export
#' @author Saskia A. Otto
#' @examples
#' x <- c(NA, 1, 4, 2, 5, NA, 9, NA, 12, 11, NA)
#' impute(x)
impute <- function(x, n = 2) {
### Data input validation
if (!is.numeric(x)) {
stop("x needs to be numeric.")
}
if (!any(is.na(x))) {
return(x)
}
# --------------------------
out <- x
loc_na <- which(is.na(x))
for(i in loc_na) {
choose <- (i-n):(i+n)
# Remove negative indices
choose <- choose[choose>0]
out[i] <- mean(x[choose], na.rm = TRUE)
}
return(out)
}
#' Compute (partial) autocorrelation functions and test for significance
#'
#' @param x numeric; an evenly spaced time vector which should be tested for temporal
#' autocorrelation. Temporal gaps should be included as NAs.
#' @return
#' The function returns a list with 2 components:
#' \describe{
#' \item{\code{tac}}{TRUE if any lag in the (partial) autocorrelation functions
#' is significantly correlated (if correlation value > 0.4), else FALSE.}
#' \item{\code{max_lag}}{The maximal lag that is correlated.}
#' }
#' @export
#' @examples
#' test_tac(x = 1:20)
#' test_tac(x = rnorm(20))
#'
test_tac <- function(x) {
# Get acf values
acf_val <- as.vector(stats::acf(x,
na.action = stats::na.pass, plot = FALSE)$acf)
# Get pacf values
pacf_val <- as.vector(stats::pacf(x, na.action = stats::na.pass,
plot = FALSE)$acf)
# Is there temporal autocorrelation? TRUE = tac occurs
tac <- any((abs(acf_val[2:6]) > 0.4) & (abs(pacf_val[1:5]) >
0.4))
lags <- which(abs(pacf_val[1:5]) > 0.4)
if (length(lags) == 0) {
max_lag <- 0
} else {
max_lag <- max(lags)
}
# Create output list
res <- list(tac = tac, max_lag = max_lag)
return(res)
}
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#' 2e-14 outside that range are accepted and moved to the nearby endpoint.) #' 2e-14 outside that range are accepted and moved to the nearby endpoint.)
#' @param quantile_type an integer between 1 and 9 selecting one of the nine quantile #' @param quantile_type an integer between 1 and 9 selecting one of the nine quantile
#' algorithms detailed below to be used. Default is 7 (see also \code{\link{quantile}}). #' algorithms detailed below to be used. Default is 7 (see also \code{\link{quantile}}).
#' @param intervals logical; number of evenly-spaced intervals. Default is 5. #' @param intervals logical; number of evenly spaced intervals. Default is 5.
#' @param cols a character vector with colors for each quantile or interval. #' @param cols a character vector with colors for each quantile or interval.
#' @param main a title. #' @param main a title.
#' @param xlab The x axis title. Default is none. #' @param xlab The x axis title. Default is none.
...@@ -141,7 +141,7 @@ trafficlight <- function(x, time = NULL, sort_5yrmean = TRUE, sort_vec = NULL, ...@@ -141,7 +141,7 @@ trafficlight <- function(x, time = NULL, sort_5yrmean = TRUE, sort_vec = NULL,
legend_txt <- c(legend_txt, "missing value") legend_txt <- c(legend_txt, "missing value")
} }
} else { } else {
stop(" If you want to use evenly-spaced intervals, provide an integer for the number of intervals!") stop(" If you want to use evenly spaced intervals, provide an integer for the number of intervals!")
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment