Skip to content
Snippets Groups Projects
Commit 2e3b4840 authored by Otto, Dr. Saskia's avatar Otto, Dr. Saskia
Browse files

added (for now) the 2 state space approaches from INDperfrom

parent c583275e
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,12 @@ Depends:
Imports:
stats,
graphics,
magrittr
magrittr,
tripack,
grDevices,
ggplot2,
dplyr,
rlang
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
......@@ -4,7 +4,10 @@ export("%>%")
export(calc_vif)
export(explore_na)
export(impute)
export(mafa)
export(plot_statespace_ch)
export(plot_statespace_ed)
export(statespace_ch)
export(statespace_ed)
export(test_tac)
export(trafficlight)
import(graphics)
......
......@@ -66,12 +66,12 @@ explore_na <- function(x, time = NULL, cex_x = 1, cex_y = 1,
par(mar = c(3, 7+madj_y, 1.5, 1.5))
barplot(apply(x>0, 1, sum), xlab = "", las = 2, horiz = TRUE,
cex.axis = cex_x, space = 0, axis.lty = 1, col = "#8cb2b2", yaxs = "i")
title(xlab="Number of years", line=2, cex.lab=1.2)
title(xlab="# of years", line=2, cex.lab=1)
#barplot stations
par(mar = c(4, 7+madj_y, 1.5, .5))
barplot(apply(x>0, 2, sum), ylab = "",
las = 2, cex.axis = cex_y, space = 0, col = "#db7170", xaxs = "i")
title(ylab="Number of variables", line=2.5, cex.lab=1.2)
title(ylab="# of variables", line=2.5, cex.lab=1)
}
#' Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)
#'
#' @name mafa
#' @param x matrix or data frame containing time series of multiple variables.
#' @param time vector of time units that will be used for the output.
#' @param nreal integer; the number of realizations that should be performed.
#' @param nmafs the number of retained MAFs (default is 3).
#' @param seed integer; a number for the random number generator state.
#' @param contr_var integer; the number of variables with highest contribution to show in the loading plots.
#'
#' @details
#' MAFA is a multivariate statistical method, that allows the set of initial variables to be
#' decomposed into factors. The autocorrelation of which decreases, or the variogram increases,
#' when going from the first factors to the last ones. Hence the very first factors extract
#' the part of the variables which is the most continuous in time.
#'
#' The function for this code is adopted from Wollez et al. 2009 (\url{https://doi.org/10.1051/alr/2009020})
#'
#'
#' @export
#' @seealso \url{https://doi.org/10.1051/alr/2009020}
#' @examples
#' df <- data.frame(variable1variabl = 1:20, variable2= rnorm(20, 100, 30),
#' variable3 = 1:20 + rnorm(20))
#' mafa(x = df, time = 1981:2000)
#' df <- matrix(rnorm(1000), ncol = 50)
#' colnames(df) <- paste0(letters[1:25], rep(1:2,each = 25))
#' mafa(df, contr_var = 10)
#'
mafa <- function(x, time = NULL, nreal = 1000, nmafs = 3, seed = NULL, contr_var = NULL){
form <- stats::as.formula(paste("~", paste(colnames(x), collapse = "+"), sep = ""))
vars <- colnames(x) # Ind
nv <- length(vars) # jInd
zn <- 1:nv # Ind.n
res_noise <- vector("list",nv)
res <- vector("list",2)
for(i in 1:nv) {
res_noise[[i]] <- res
}
if (is.null(time)) time <- 1:nrow(x)
if (is.null(contr_var)) {
contr_var <- ncol(x)
} else {
if (contr_var > ncol(x)) contr_var <- ncol(x)
}
# set the random seed
if (!is.null(seed)) set.seed(seed)
# loop: nreal realizations is performed
for(j in 1:nreal){
# work on centred and normed indices
# add of a normal random noise with mean 0 and
# variance 0.1*(nb.indices/(nb.years-1))
xsc <- scale(x)
nij <- dim(xsc)
noise <- matrix(stats::rnorm(prod(nij),0,sqrt(0.1*(nij[2]/(nij[1]-1)))),nij[1],nij[2])
new_x <- as.data.frame(xsc + noise)
# Compute MAFs
resmaf <- maf(form, new_x)
# MAFs are ordered according to a reference
# the reference is the first realisation
lng <- length(resmaf$x[1,])
if(j==1) test <- resmaf
for(k in 1:lng){
# the reference is ordered so as to have 50% of the MAFs values as positive
if(sum(test$x[,k]>=0)/length(test$x[,k])<0.5){
test$x[,k] <- test$x[,k] * -1
test$rotation[,k] <- test$rotation[,k] * -1
}
# the following realisations will be order so as to have 50% of the MAFs values
# with the same sign than the reference
if(sum((test$x[,k]>=0)==(resmaf$x[,k]>=0),na.rm=T)/length(test$x[,k])<0.5){
resmaf$x[,k] <- resmaf$x[,k] * -1
resmaf$rotation[,k] <- resmaf$rotation[,k] * -1
}
}
# save results in outputs
for(k in 1:lng){
res_noise[[k]][[1]] <- rbind(res_noise[[k]][[1]], as.numeric(resmaf$x[,k]))
res_noise[[k]][[2]] <- rbind(res_noise[[k]][[2]], as.numeric(resmaf$rotation[,k]))
}
}
# Compute and save
# - median loadings of the nreal previous realisations
# - median MAF = median loadings * centred and normed indices
for(j in 1:lng){
med.rot <- apply(res_noise[[j]][[2]],2, stats::median)
resmaf$rotation[,j] <- med.rot
rot <- matrix(rep(med.rot,nij[1]),nij[1],byrow=T)
xsum <- apply(rot * xsc,1,sum)
resmaf$rotation[,j] <- resmaf$rotation[,j]/stats::sd(xsum)
resmaf$x[,j] <- xsum/stats::sd(xsum)
}
# Return 3 plots for each MAF
op <- par(mfrow = c(nmafs,3), mar = c(2.5,2.5,2.5,1))
for(j in 1:nmafs){
# MFA Trend
maf_x <- as.numeric(resmaf$x[,j])
barplot(aperm(matrix(maf_x)), names.arg = matrix(time), space = 0, col = "steelblue2",
cex.names = 1.1,main = paste("median MAF ", j, sep = ""),
ylim = c(-3.5,3.5), axis.lty = 1, xaxs = "i")
# Variogram
maf_vg <- calc_reg_1dvario(maf_x)
plot(maf_vg, type= "o", main= "", ylim = c(0, max(maf_vg[ ,2])))
abline(h = 1, lty = 2)
title("Variogram")
# Loadings
maf_rot <- resmaf$rotation[,j]
# choose top contr_var
choose <- rev(order(abs(maf_rot)))[1:contr_var]
maf_rot <- maf_rot[choose]
vars_j <- vars[choose]
# sort
ord <- order(maf_rot)
maf_rot <- maf_rot[ord]
vars_j <- vars_j[ord]
nvc <- length(vars_j)
mr_pos <- which(maf_rot > 0)
mr_neg <- which(maf_rot < 0)
fac <- 2
x_min <- max(abs(maf_rot))*-fac
x_max <- max(abs(maf_rot))*fac
plot(0, 0, xlim = c(x_min, x_max), ylim = c(0, nvc),
type = "n", xlab = "", ylab = "", yaxt = "n", main = "Loadings")
segments(x0 = rep(0, nvc), x1 = maf_rot, y0 = seq(0.5, nvc-.5, 1), y1 = seq(0.5, nvc-.5, 1))
abline(v = 0)
Col <- rep(1, nvc)
Col[seq(1, nvc, 1)[abs(maf_rot) == max(abs(maf_rot))]] <- 3
if (length(mr_pos) > 0) {
text(maf_rot[mr_pos], seq(0.5, nvc-.5, 1)[mr_pos], vars_j[mr_pos], cex = .9, col = Col[mr_pos], pos = 4)
}
if (length(mr_neg) > 0) {
text(maf_rot[mr_neg], seq(0.5, nvc-.5, 1)[mr_neg], vars_j[mr_neg], cex = .9, col = Col[mr_neg], pos = 2)
}
}
par(op)
# Return numeric results
out <- list(
scores = resmaf$x,
loadings = resmaf$rotation
)
return(out)
}
maf <- function(formula,data) {
if(length(data[,1])==1) stop("Cannot be run with only one sample.")
# PCA of original variables
bidpr<-stats::prcomp(formula,data,scale=TRUE)
bidn<-sum(bidpr$sdev>1e-10)
if(bidn<2) stop("stop: strictly less than two PCs")
bidpr$sdev<-bidpr$sdev[1:bidn]
bidpr$rotation<-bidpr$rotation[,1:bidn]
bidpr$x<-bidpr$x[,1:bidn]
# normalization of PCs
bidim<-dim(bidpr$x)
bidprsdev<-matrix(rep(bidpr$sdev,each=bidim[1]),ncol=bidim[2])
bidprx<-bidpr$x/bidprsdev
bidim<-dim(bidpr$rotation)
bidprsdev<-matrix(rep(bidpr$sdev,each=bidim[1]),ncol=bidim[2])
bidpr$rotation<-bidpr$rotation/bidprsdev
# PCA of increments of normalized PCs
biddtab<- diff(bidprx)
biddpr<-stats::prcomp(biddtab,center=F,scale=F)
biddnmaf<-sum(biddpr$sdev>1e-10)
if(biddnmaf<2) stop("stop: strictly less than two MAFs")
biddpr$sdev<-biddpr$sdev[1:biddnmaf]
biddpr$rotation<-biddpr$rotation[,1:biddnmaf]
biddpr$x<-biddpr$x[,1:biddnmaf]
biddx<-biddpr$x
# store complete transformation as a PCA output from original data
maf<-bidpr
maf$sdev <- biddpr$sdev
biddim<-dim(biddpr$x)
maf$rotation<-bidpr$rotation%*%biddpr$rotation
maf$x<-bidprx%*%biddpr$rotation
# reverse the order of MAFs
# in order to begin with lowest eigenvalues i.e. lowest variograms
maf$sdev<-.5*(maf$sdev^2)*(biddim[1]-1)/biddim[1]
names(maf$sdev)<-rev(names(maf$sdev))
maf$sdev<-rev(maf$sdev)
lng<-length(dimnames(maf$rotation)[[2]])
maf$rotation <- maf$rotation[,seq(lng,1)]
dimnames(maf$rotation)[[2]]<-paste("MAF",1:lng,sep="")
lng<-length(dimnames(maf$x)[[2]])
maf$x<-maf$x[,seq(lng,1)]
dimnames(maf$x)[[2]]<-paste("MAF",1:lng,sep="")
maf
}
calc_reg_1dvario <- function(x) {
lng<-length(x)
res<-numeric(0)
for(h in 1:(lng-1))
res<-c(res,0.5*mean(diff(x,lag=h)^2,na.rm=T))
res<-cbind(1:(lng-1),res)
res
}
calc_irreg_1dvario <- function(t, x) {
d<-abs(outer(t,t,"-"))
h<-sort(unique(d[lower.tri(d)]))
res<-numeric(0)
for (k in 1:length(h)) {
d2x<-outer(x,x,"-")^2
res<-c(res,0.5*mean(d2x[d==h[k]],na.rm=T)) }
res<-cbind(h,res)
res
}
#' Convex hull plot
#'
#' \code{plot_statespace_ch} generates a scatter plot of all observed combinations
#' in the 2-dimensional indicator space including the convex hull from defined
#' reference conditions and the current period.
#'
#' @param x An object from the \code{\link{statespace_ch}} function.
#' @param col_ch_ref Color of reference period (for points, path, and labels).
#' @param col_ch_cur Color of current period (for points, path, and labels).
#' @param size_time Text size of the time labels (both periods).
#'
#' @return The function returns a \code{\link[ggplot2]{ggplot}} object.
#'
#' @author Saskia A. Otto
#'
#' @export
#' @examples
#' x <- rnorm(25)
#' y <- rnorm(25)
#' ch <- statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
#' plot_statespace_ch(ch)
plot_statespace_ch <- function(x, col_ch_ref = "red",
col_ch_cur = "blue", size_time = 4) {
# Data input validation --------
if (missing(x)) {
stop("Argument x is missing.")
}
if (!is.list(x)) {
stop("x has to be a list (output of statespace_ch() function)!")
}
# -------------------------
# Get indices of selected periods
index_ref <- match(x$period_ref, x$time)
index_current <- match(x$period_current, x$time)
# Data preparation for plots
time_char <- as.character(x$time)
xy_full <- x$xy
xy_ref <- xy_full %>% dplyr::slice(index_ref)
xy_cur <- xy_full %>% dplyr::slice(index_current)
xrange <- c((min(x$xy$x, na.rm = TRUE) - abs(min(x$xy$x,
na.rm = TRUE) * 0.025)), (max(x$xy$x, na.rm = TRUE) +
abs(max(x$xy$x, na.rm = TRUE)) * 0.025))
yrange <- c((min(x$xy$y, na.rm = TRUE) - abs(min(x$xy$y,
na.rm = TRUE) * 0.025)), (max(x$xy$y, na.rm = TRUE) +
abs(max(x$xy$y, na.rm = TRUE)) * 0.025))
xnudge <- diff(xrange)/50
ynudge <- diff(yrange)/50
# Proportion shown as annotation
in_prop <- round((sum(x$inside_ch_ref)/length(x$inside_ch_ref) *
100), 0)
# Plot -----------------------------------
# Set general layout theme
ggplot2::theme_set(ggplot2::theme_bw())
# Scatter plot
chplot <- ggplot2::ggplot(xy_full,
ggplot2::aes(x = !!rlang::sym("x"), y = !!rlang::sym("y"))) +
ggplot2::geom_point(shape = 1, col = "black") +
ggplot2::xlab("x") + ggplot2::ylab("y") + ggplot2::xlim(xrange) +
ggplot2::ylim(yrange)
# Add convex hull of reference period
chplot <- chplot +
ggplot2::geom_point(data = xy_ref, shape = 19, col = col_ch_ref) +
ggplot2::geom_path(data = xy_ref[x$ch_ref,], col = col_ch_ref) +
ggplot2::geom_text(data = xy_ref, label = time_char[index_ref],
col = col_ch_ref, nudge_x = xnudge, nudge_y = ynudge,
size = size_time)
# Add convex hull of current period
chplot <- chplot +
ggplot2::geom_point(data = xy_cur, shape = 19, col = col_ch_cur) +
ggplot2::geom_path(data = xy_cur[x$ch_cur, ], col = col_ch_cur) +
ggplot2::geom_text(data = xy_cur, label = time_char[index_current],
col = col_ch_cur, nudge_x = xnudge, nudge_y = ynudge,
size = size_time)
# General layout
chplot <- chplot +
ggplot2::ggtitle(paste0("Proportion of recent time period within reference space: ",
in_prop, " %")) +
ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
plot.title = ggplot2::element_text(lineheight = 0.8,
face = "plain", size = 16), axis.text = ggplot2::element_text(size = 12),
axis.title = ggplot2::element_text(size = 14))
### end of function ###
return(chplot)
}
#' Time series plot of Euclidean distance
#'
#' \code{plot_statespace_ed} generates a time series plot of the Euclidean
#' distance in indicator state space from a defined reference conditions.
#'
#' @param x The output tibble from the \code{\link{statespace_ed}} function.
#'
#' @return The function returns a \code{\link[ggplot2]{ggplot}} object.
#'
#' @author Saskia A. Otto
#'
#' @export
#' @examples
#' x <- as.data.frame(matrix(rnorm(250), ncol = 10))
#' ed <- statespace_ed(x, time = 1976:2000, ref_time = 1976)
#' plot_statespace_ed(ed)
plot_statespace_ed <- function(x) {
# Data input validation ---------
if (missing(x)) {
stop("Argument x is missing.")
}
# x <- INDperform::check_input_tbl(x, tbl_name = "x", parent_func = "statespace_ed()")
# -------------------------------
# Set general layout theme
ggplot2::theme_set(ggplot2::theme_bw())
edplot <- ggplot2::ggplot(x,
ggplot2::aes(x = !!rlang::sym("time"), y = !!rlang::sym("ed"))) +
ggplot2::geom_smooth(col = "firebrick3",
fill = "cadetblue", alpha = 0.2) +
ggplot2::geom_line(col = "black", size = 0.5) +
ggplot2::geom_point(shape = 16, col = "black", size = 1.5) +
ggplot2::ylab(paste0("Euclidean distance s from reference point (",
x$time[which(x$ref_time == TRUE)], ")")) +
ggplot2::xlab("") + # General layout
ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()) +
ggplot2::theme(axis.text = ggplot2::element_text(size = 12),
axis.title = ggplot2::element_text(size = 14))
return(edplot)
}
#' Convex hull in 2-dimensional space
#'
#' \code{statespace_ch} calculates the convex hull in 2-dimensional space, e.g.
#' for two selected indicators, using the \code{\link[tripack]{tri.mesh}}
#' function.
#'
#' @param x The coordinates of points in the first dimension
#' (e.g. indicator 1 or PC1 scores from a PCA).
#' @param y The coordinates of points in the second dimension
#' (e.g. indicator 2 or the PC2 scores from a PCA).
#' @param time A vector containing the actual time series.
#' @param period_ref Vector of time units (e.g. years) used as reference
#' period (minimum of 3 time units required).
#' @param period_current Vector of time units (e.g. years) used as current
#' period to compare with the reference period (minimum of 3 time units
#' required).
#'
#' @details
#' \code{statespace_ch} implements a second state space approach to assess
#' the development of a suite of ecological state indicators
#' (Otto \emph{et al.} 2018, Tett \emph{et al.} 2008). While unidimensional
#' approaches such as the Euclidean distance (see \code{\link{statespace_ed}})
#' feature the disadvantage of defining one particular year or time step as
#' reference condition, this approach accounts for inter-annual variation by
#' defining a reference domain in state space based on several years: more
#' recent observations might be characterized as either within or outside this
#' domain.
#'
#' The reference domain can be described by a convex hull, which is a multivariate
#' measure derived from computational geometry representing the smallest convex set
#' containing all the points in Euclidean plane or in Euclidean space
#' (de Berg \emph{et al.}, 2008). While the convex hull can be calculated for
#' high-dimensional data, reducing the space to two dimensions allows for an easier
#' visualization and interpretation. Therefore, the \code{statespace_ch} function only
#' calculates the convex hull for two dimensions, i.e. for two indicators or principal
#' axes obtained by multivariate analysis such as a Principal Component Analysis
#' (PCA).
#'
#' @return
#' The function returns a list with the following elements
#' \describe{
#' \item{\code{ch_ref}}{A vector of the position of the convex hull of the
#' reference period.}
#' \item{\code{ch_cur}}{A vector of the position of the convex hull of the
#' current period.}
#' \item{\code{inside_ch_ref}}{A logical vector indicating whether each
#' year (time step) of the current period lies inside (TRUE) or outside
#' (FALSE) the state space domain of the reference period.}
#' \item{\code{xy}}{A data frame of the x and y coordinates.}
#' \item{\code{time}}{A vector of the full time series.}
#' \item{\code{period_ref}}{A vector of years (time steps) defined as
#' the reference period.}
#' \item{\code{period_current}}{A vector of years (time steps) defined as
#' the current period.}
#' }
#'
#'
#' @references
#' de Berg, M., Cheong, O., van Kreveld, M., Overmars, M. (2008) Computational
#' Geometry - Algorithms and Applications. Springer Berlin Heidelberg, 386pp.
#'
#' Otto, S.A., Kadin, M., Casini, M., Torres, M.A., Blenckner, T. (2018)
#' A quantitative framework for selecting and validating food web indicators.
#' \emph{Ecological Indicators}, 84: 619-631,
#' doi: https://doi.org/10.1016/j.ecolind.2017.05.045
#'
#' Tett, P., Carreira, C., Mills, D.K., van Leeuwen, S., Foden, J., Bresnan, E.,
#' Gowen, R.J. (2008) Use of a Phytoplankton Community Index to assess the
#' health of coastal waters. \emph{ICES Journal of Marine Science} 65, 1475-1482.
#'
#' @seealso \code{\link[tripack]{tri.mesh}} for the computation of the convex hull.
#' @author Saskia A. Otto
#'
#' @export
#' @examples
#' x <- rnorm(25)
#' y <- rnorm(25)
#' statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
statespace_ch <- function(x, y, time, period_ref, period_current) {
# Data input validation -----------------------
# Testing for required package installation
if (!requireNamespace("tripack", quietly = TRUE)) {
stop("The package `tripack` is needed for this function to work. Please install it.",
call. = FALSE)
}
if (missing(x)) {
stop("Argument x is missing.")
}
if (missing(y)) {
stop("Argument y is missing.")
}
if (missing(time)) {
stop("Argument time is missing.")
}
if (missing(period_ref)) {
stop("Argument period_ref is missing.")
}
if (missing(period_current)) {
stop("Argument period_current is missing.")
}
# Check input vectors
# x <- check_input_vec(x, "x")
# y <- check_input_vec(y, "y")
# time <- INDperform::check_input_vec(time, "argument time")
# (I added explicitly 'argument' to NOT test for regularity in time steps!)
if (length(x) != length(y) | length(x) != length(time)) {
stop("One of the x, y, or time vectors has a different length!")
}
# Testing input of both periods
if (any(period_ref %in% time == FALSE) | any(period_current %in%
time == FALSE)) {
stop("At least one of the defined periods is (to some extent) outside the given time series.")
}
if (length(period_ref) <= 2 | length(period_current) <=
2) {
stop("At least one of the defined periods has not a minimum of 3 time units.")
}
# Check if periods contain any NA
if (any(is.na(x[match(period_ref, time)])) | any(is.na(y[match(period_ref,
time)]))) {
stop(paste0("One of your indicators (x and/or y) has missing values in the reference ",
"period. Please select another period or fill the missing values (e.g. with the mean, ",
"median or interpolate)."))
}
if (any(is.na(x[match(period_current, time)])) |
any(is.na(y[match(period_current, time)]))) {
stop(paste0("One of your indicators (x and/or y) has missing values in the current ",
"period. Please select another period or fill the missing values (e.g. with the mean, ",
"median or interpolate)."))
}
# --------------------------------------------
index_ref <- match(period_ref, time)
index_current <- match(period_current, time)
# Convex hull computation
tr <- tripack::tri.mesh(x = x[index_ref], y = y[index_ref])
inside_ch_ref <- tripack::in.convex.hull(tr, x = x[index_current],
y = y[index_current])
# Helper function convexhull (returns convex hull
# around data points)
convexhull <- function(xcoord, ycoord) {
# xcoords = x ycoord = y
hpts <- grDevices::chull(x = xcoord, y = ycoord)
hpts <- c(hpts, hpts[1])
return(hpts)
}
# Get position of convex hull
ch_ref <- convexhull(xcoord = x[index_ref], ycoord = y[index_ref])
ch_cur <- convexhull(xcoord = x[index_current],
ycoord = y[index_current])
# Export list for plot
out <- list(ch_ref = ch_ref, ch_cur = ch_cur, inside_ch_ref = inside_ch_ref,
xy = data.frame(x, y), time = time, period_ref = period_ref,
period_current = period_current)
### end of function ###
return(out)
}
#' Euclidean distance in state space
#'
#' \code{statespace_ed} generates a time series of Euclidean distances from
#' defined reference conditions to assess the development of a suite of
#' ecological state indicators.
#'
#' @param x A data frame, tibble, matrix or vector of selected indicator(s).
#' @param time A vector containing the actual time series.
#' @param ref_time The reference time (single point in time, e.g. specific
#' year) on which to base the Euclidean distance. Default is set to the
#' first time point.
#' @param na_rm A logical value indicating whether rows with NA values should
#' be stripped before the computation proceeds. Default is set to TRUE. If set
#' to FALSE and data contains NAs, the function returns NA.
#'
#' @details
#' This function implements an approach adopted from Tett \emph{et al.} (2013) to
#' assess changes in the ecosystem state by studying trajectories in state space.
#' State space is defined here as the n-dimensional space of possible locations of
#' state variables or indicators. For a robust suite of indicators the unidimensional
#' Euclidean distance between each year (or any other time step) and a reference
#' year in state space is calculated. That means, the function calculates the square
#' root of the sum of squared distances between each standardized indicator value
#' in a specific year and its reference value, which is defined by the user.
#'
#' @return
#' \code{statespace_ed} returns a data frame with the time vector \code{time}, the
#' Euclidean distance \code{ed}, and a logical vector \code{ref_time} indicating
#' the time step defined as reference.
#'
#' @references
#' Tett, P., Gowen, R.J., Painting, S.J., Elliott, M., Forster, R., Mills, D.K.,
#' Bresnan, E., Capuzzo, E., Fernandes, T.F., Foden, J., Geider, R.J., Gilpin, L.C.,
#' Huxham, M., McQuatters-Gollop, A.L., Malcolm, S.J., Saux-Picart, S., Platt, T.,
#' Racault, M.F., Sathyendranath, S., van der Molen, J., Wilkinson, M. (2013)
#' Framework for understanding marine ecosystem health. \emph{Marine Ecology
#' Progress Series.} 494, 1-27.
#'
#' @author Saskia A. Otto
#'
#' @export
#'
#' @examples
#' x <- as.data.frame(matrix(rnorm(250), ncol = 10))
#' statespace_ed(x, time = 1976:2000, ref_time = 1976)
#'
statespace_ed <- function(x, time, ref_time = NULL, na_rm = TRUE) {
# Data input validation --------------------
if (missing(x)) {
stop("Argument x is missing.")
}
if (missing(time)) {
stop("Argument time is missing.")
}
# Check for correct input format
if (any(class(x) == "list")) {
stop("x cannot be a list.")
}
#time <- INDperform::check_input_vec(time, "argument time")
# (I added explicitly 'argument' to NOT test for regularity in time steps!)
if (na_rm == FALSE) {
return(NA)
} else {
y <- data.frame(time = time)
keep <- stats::complete.cases(x)
x <- x[keep, ]
time <- time[keep]
if (is.null(ref_time)) {
id <- 1
} else {
# Check if reference time still included and get new position
if (!ref_time %in% time) {
stop("The defined reference time is outside the time series OR by removing rows with NAs (if argument 'na.rm' set to TRUE) the reference time got also removed. Please choose another reference time or impute missing values.")
} else {
id <- which(time == ref_time)
}
}
}
# --------------------------------
# Standardize indicator time series
x_stand <- scale(x) # without year and sd
# Calculate the n-dimensional Euclidean distance
# between each time unit and the reference time
# (can be starting point)
df <- x_stand
for (i in 1:nrow(x_stand)) {
df[i, ] <- df[i, ] - x_stand[id, ]
}
calc_euc <- function(x) sqrt(sum(x^2))
ed <- apply(df, FUN = calc_euc, MARGIN = 1)
out <- data.frame(time = time, ed = ed, ref_time = FALSE)
out$ref_time[id] <- TRUE
out <- suppressMessages( dplyr::left_join(y, out) )
out$ref_time[is.na(out$ref_time)] <- FALSE
### END OF FUNCTION ###
return(out)
}
......@@ -69,7 +69,7 @@
trafficlight <- function(x, time = NULL, sort_5yrmean = TRUE, sort_vec = NULL,
method = "quantiles", probs = seq(0, 1, 0.2), quantile_type = 7, intervals = 5,
cols = c("green3", "greenyellow", "yellow", "gold", "red"),
cols = c("red","gold", "yellow", "greenyellow", "green3"),
main = "", xlab = "", ylab = "", adj_xlab = NULL, adj_ylab = NULL,
hadj_x = 0.5, vadj_x = -0.7, hadj_y = -0.7, vadj_y = 0.3,
madj_bottom = 0, madj_left = 0, madj_top = 0, madj_right = 0,
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mafa.R
\name{mafa}
\alias{mafa}
\title{Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)}
\usage{
mafa(x, time = NULL, nreal = 1000, nmafs = 3, seed = NULL, contr_var = NULL)
}
\arguments{
\item{x}{matrix or data frame containing time series of multiple variables.}
\item{time}{vector of time units that will be used for the output.}
\item{nreal}{integer; the number of realizations that should be performed.}
\item{nmafs}{the number of retained MAFs (default is 3).}
\item{seed}{integer; a number for the random number generator state.}
\item{contr_var}{integer; the number of variables with highest contribution to show in the loading plots.}
}
\description{
Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)
}
\details{
MAFA is a multivariate statistical method, that allows the set of initial variables to be
decomposed into factors. The autocorrelation of which decreases, or the variogram increases,
when going from the first factors to the last ones. Hence the very first factors extract
the part of the variables which is the most continuous in time.
The function for this code is adopted from Wollez et al. 2009 (\url{https://doi.org/10.1051/alr/2009020})
}
\examples{
df <- data.frame(variable1variabl = 1:20, variable2= rnorm(20, 100, 30),
variable3 = 1:20 + rnorm(20))
mafa(x = df, time = 1981:2000)
df <- matrix(rnorm(1000), ncol = 50)
colnames(df) <- paste0(letters[1:25], rep(1:2,each = 25))
mafa(df, contr_var = 10)
}
\seealso{
\url{https://doi.org/10.1051/alr/2009020}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_statespace_ch.R
\name{plot_statespace_ch}
\alias{plot_statespace_ch}
\title{Convex hull plot}
\usage{
plot_statespace_ch(x, col_ch_ref = "red", col_ch_cur = "blue", size_time = 4)
}
\arguments{
\item{x}{An object from the \code{\link{statespace_ch}} function.}
\item{col_ch_ref}{Color of reference period (for points, path, and labels).}
\item{col_ch_cur}{Color of current period (for points, path, and labels).}
\item{size_time}{Text size of the time labels (both periods).}
}
\value{
The function returns a \code{\link[ggplot2]{ggplot}} object.
}
\description{
\code{plot_statespace_ch} generates a scatter plot of all observed combinations
in the 2-dimensional indicator space including the convex hull from defined
reference conditions and the current period.
}
\examples{
x <- rnorm(25)
y <- rnorm(25)
ch <- statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
plot_statespace_ch(ch)
}
\author{
Saskia A. Otto
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_statespace_ed.R
\name{plot_statespace_ed}
\alias{plot_statespace_ed}
\title{Time series plot of Euclidean distance}
\usage{
plot_statespace_ed(x)
}
\arguments{
\item{x}{The output tibble from the \code{\link{statespace_ed}} function.}
}
\value{
The function returns a \code{\link[ggplot2]{ggplot}} object.
}
\description{
\code{plot_statespace_ed} generates a time series plot of the Euclidean
distance in indicator state space from a defined reference conditions.
}
\examples{
x <- as.data.frame(matrix(rnorm(250), ncol = 10))
ed <- statespace_ed(x, time = 1976:2000, ref_time = 1976)
plot_statespace_ed(ed)
}
\author{
Saskia A. Otto
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/statespace_ch.R
\name{statespace_ch}
\alias{statespace_ch}
\title{Convex hull in 2-dimensional space}
\usage{
statespace_ch(x, y, time, period_ref, period_current)
}
\arguments{
\item{x}{The coordinates of points in the first dimension
(e.g. indicator 1 or PC1 scores from a PCA).}
\item{y}{The coordinates of points in the second dimension
(e.g. indicator 2 or the PC2 scores from a PCA).}
\item{time}{A vector containing the actual time series.}
\item{period_ref}{Vector of time units (e.g. years) used as reference
period (minimum of 3 time units required).}
\item{period_current}{Vector of time units (e.g. years) used as current
period to compare with the reference period (minimum of 3 time units
required).}
}
\value{
The function returns a list with the following elements
\describe{
\item{\code{ch_ref}}{A vector of the position of the convex hull of the
reference period.}
\item{\code{ch_cur}}{A vector of the position of the convex hull of the
current period.}
\item{\code{inside_ch_ref}}{A logical vector indicating whether each
year (time step) of the current period lies inside (TRUE) or outside
(FALSE) the state space domain of the reference period.}
\item{\code{xy}}{A data frame of the x and y coordinates.}
\item{\code{time}}{A vector of the full time series.}
\item{\code{period_ref}}{A vector of years (time steps) defined as
the reference period.}
\item{\code{period_current}}{A vector of years (time steps) defined as
the current period.}
}
}
\description{
\code{statespace_ch} calculates the convex hull in 2-dimensional space, e.g.
for two selected indicators, using the \code{\link[tripack]{tri.mesh}}
function.
}
\details{
\code{statespace_ch} implements a second state space approach to assess
the development of a suite of ecological state indicators
(Otto \emph{et al.} 2018, Tett \emph{et al.} 2008). While unidimensional
approaches such as the Euclidean distance (see \code{\link{statespace_ed}})
feature the disadvantage of defining one particular year or time step as
reference condition, this approach accounts for inter-annual variation by
defining a reference domain in state space based on several years: more
recent observations might be characterized as either within or outside this
domain.
The reference domain can be described by a convex hull, which is a multivariate
measure derived from computational geometry representing the smallest convex set
containing all the points in Euclidean plane or in Euclidean space
(de Berg \emph{et al.}, 2008). While the convex hull can be calculated for
high-dimensional data, reducing the space to two dimensions allows for an easier
visualization and interpretation. Therefore, the \code{statespace_ch} function only
calculates the convex hull for two dimensions, i.e. for two indicators or principal
axes obtained by multivariate analysis such as a Principal Component Analysis
(PCA).
}
\examples{
x <- rnorm(25)
y <- rnorm(25)
statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
}
\references{
de Berg, M., Cheong, O., van Kreveld, M., Overmars, M. (2008) Computational
Geometry - Algorithms and Applications. Springer Berlin Heidelberg, 386pp.
Otto, S.A., Kadin, M., Casini, M., Torres, M.A., Blenckner, T. (2018)
A quantitative framework for selecting and validating food web indicators.
\emph{Ecological Indicators}, 84: 619-631,
doi: https://doi.org/10.1016/j.ecolind.2017.05.045
Tett, P., Carreira, C., Mills, D.K., van Leeuwen, S., Foden, J., Bresnan, E.,
Gowen, R.J. (2008) Use of a Phytoplankton Community Index to assess the
health of coastal waters. \emph{ICES Journal of Marine Science} 65, 1475-1482.
}
\seealso{
\code{\link[tripack]{tri.mesh}} for the computation of the convex hull.
}
\author{
Saskia A. Otto
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/statespace_ed.R
\name{statespace_ed}
\alias{statespace_ed}
\title{Euclidean distance in state space}
\usage{
statespace_ed(x, time, ref_time = NULL, na_rm = TRUE)
}
\arguments{
\item{x}{A data frame, tibble, matrix or vector of selected indicator(s).}
\item{time}{A vector containing the actual time series.}
\item{ref_time}{The reference time (single point in time, e.g. specific
year) on which to base the Euclidean distance. Default is set to the
first time point.}
\item{na_rm}{A logical value indicating whether rows with NA values should
be stripped before the computation proceeds. Default is set to TRUE. If set
to FALSE and data contains NAs, the function returns NA.}
}
\value{
\code{statespace_ed} returns a data frame with the time vector \code{time}, the
Euclidean distance \code{ed}, and a logical vector \code{ref_time} indicating
the time step defined as reference.
}
\description{
\code{statespace_ed} generates a time series of Euclidean distances from
defined reference conditions to assess the development of a suite of
ecological state indicators.
}
\details{
This function implements an approach adopted from Tett \emph{et al.} (2013) to
assess changes in the ecosystem state by studying trajectories in state space.
State space is defined here as the n-dimensional space of possible locations of
state variables or indicators. For a robust suite of indicators the unidimensional
Euclidean distance between each year (or any other time step) and a reference
year in state space is calculated. That means, the function calculates the square
root of the sum of squared distances between each standardized indicator value
in a specific year and its reference value, which is defined by the user.
}
\examples{
x <- as.data.frame(matrix(rnorm(250), ncol = 10))
statespace_ed(x, time = 1976:2000, ref_time = 1976)
}
\references{
Tett, P., Gowen, R.J., Painting, S.J., Elliott, M., Forster, R., Mills, D.K.,
Bresnan, E., Capuzzo, E., Fernandes, T.F., Foden, J., Geider, R.J., Gilpin, L.C.,
Huxham, M., McQuatters-Gollop, A.L., Malcolm, S.J., Saux-Picart, S., Platt, T.,
Racault, M.F., Sathyendranath, S., van der Molen, J., Wilkinson, M. (2013)
Framework for understanding marine ecosystem health. \emph{Marine Ecology
Progress Series.} 494, 1-27.
}
\author{
Saskia A. Otto
}
......@@ -13,7 +13,7 @@ trafficlight(
probs = seq(0, 1, 0.2),
quantile_type = 7,
intervals = 5,
cols = c("green3", "greenyellow", "yellow", "gold", "red"),
cols = c("red", "gold", "yellow", "greenyellow", "green3"),
main = "",
xlab = "",
ylab = "",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment