added (for now) the 2 state space approaches from INDperfrom

2e3b4840 · Otto, Dr. Saskia · c583275e · 2e3b4840 · 2e3b4840 · 2e3b4840
Commit 2e3b4840 authored Feb 23, 2021 by Otto, Dr. Saskia
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,7 +17,12 @@ Depends:
 Imports:
    stats,
    graphics,
-    magrittr
+    magrittr,
+    tripack,
+    grDevices,
+    ggplot2,
+    dplyr,
+    rlang
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.1
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,7 +4,10 @@ export("%>%")
 export(calc_vif)
 export(explore_na)
 export(impute)
-export(mafa)
+export(plot_statespace_ch)
+export(plot_statespace_ed)
+export(statespace_ch)
+export(statespace_ed)
 export(test_tac)
 export(trafficlight)
 import(graphics)

--- a/R/explore_na.R
+++ b/R/explore_na.R
@@ -66,12 +66,12 @@ explore_na <- function(x, time = NULL, cex_x = 1, cex_y = 1,
  par(mar = c(3, 7+madj_y, 1.5, 1.5))
  barplot(apply(x>0, 1, sum), xlab = "", las = 2, horiz = TRUE,
    cex.axis = cex_x, space = 0, axis.lty = 1, col = "#8cb2b2", yaxs = "i")
-  title(xlab="Number of years", line=2, cex.lab=1.2)
+  title(xlab="# of years", line=2, cex.lab=1)

  #barplot stations
  par(mar = c(4, 7+madj_y, 1.5, .5))
  barplot(apply(x>0, 2, sum), ylab = "",
    las = 2, cex.axis = cex_y, space = 0, col = "#db7170", xaxs = "i")
-  title(ylab="Number of variables", line=2.5, cex.lab=1.2)
+  title(ylab="# of variables", line=2.5, cex.lab=1)

 }
--- a/R/mafa.R
+++ b/R/mafa.R
-#' Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)
-#'
-#' @name mafa
-#' @param x matrix or data frame containing time series of multiple variables.
-#' @param time vector of time units that will be used for the output.
-#' @param nreal integer; the number of realizations that should be performed.
-#' @param nmafs the number of retained MAFs (default is 3).
-#' @param seed integer; a number for the random number generator state.
-#' @param contr_var integer; the number of variables with highest contribution to show in the loading plots.
-#'
-#' @details
-#' MAFA is a multivariate statistical method, that allows the set of initial variables to be
-#' decomposed into factors. The autocorrelation of which decreases, or the variogram increases,
-#' when going from the first factors to the last ones. Hence the very first factors extract
-#' the part of the variables which is the most continuous in time.
-#'
-#' The function for this code is adopted from Wollez et al. 2009 (\url{https://doi.org/10.1051/alr/2009020})
-#'
-#'
-#' @export
-#' @seealso \url{https://doi.org/10.1051/alr/2009020}
-#' @examples
-#' df <- data.frame(variable1variabl = 1:20, variable2= rnorm(20, 100, 30),
-#'  variable3 = 1:20 + rnorm(20))
-#' mafa(x = df, time = 1981:2000)
-#' df <- matrix(rnorm(1000), ncol = 50)
-#' colnames(df) <- paste0(letters[1:25], rep(1:2,each = 25))
-#' mafa(df, contr_var = 10)
-#'
-mafa <- function(x, time = NULL, nreal = 1000, nmafs = 3, seed = NULL, contr_var = NULL){
-
-  form <- stats::as.formula(paste("~", paste(colnames(x), collapse = "+"), sep = ""))
-  vars <- colnames(x) # Ind
-  nv <- length(vars)  # jInd
-  zn <- 1:nv # Ind.n
-
-  res_noise <- vector("list",nv)
-  res <- vector("list",2)
-  for(i in 1:nv) {
-    res_noise[[i]] <- res
-  }
-  if (is.null(time)) time <- 1:nrow(x)
-
-  if (is.null(contr_var)) {
-    contr_var <- ncol(x)
-  } else {
-    if (contr_var > ncol(x)) contr_var <- ncol(x)
-  }
-
-
-  # set the random seed
-  if (!is.null(seed)) set.seed(seed)
-
-  # loop: nreal realizations is performed
-  for(j in 1:nreal){
-
-    # work on centred and normed indices
-    # add of a normal random noise with mean 0 and
-    # variance 0.1*(nb.indices/(nb.years-1))
-    xsc <- scale(x)
-    nij <- dim(xsc)
-    noise <- matrix(stats::rnorm(prod(nij),0,sqrt(0.1*(nij[2]/(nij[1]-1)))),nij[1],nij[2])
-    new_x <- as.data.frame(xsc + noise)
-
-    # Compute MAFs
-    resmaf <- maf(form, new_x)
-
-    # MAFs are ordered according to a reference
-    # the reference is the first realisation
-    lng <- length(resmaf$x[1,])
-    if(j==1) test <- resmaf
-    for(k in 1:lng){
-      # the reference is ordered so as to have 50% of the MAFs values as positive
-      if(sum(test$x[,k]>=0)/length(test$x[,k])<0.5){
-        test$x[,k] <- test$x[,k] * -1
-        test$rotation[,k] <- test$rotation[,k] * -1
-      }
-      # the following realisations will be order so as to have 50% of the MAFs values
-      # with the same sign than the reference
-      if(sum((test$x[,k]>=0)==(resmaf$x[,k]>=0),na.rm=T)/length(test$x[,k])<0.5){
-        resmaf$x[,k] <- resmaf$x[,k] * -1
-        resmaf$rotation[,k] <- resmaf$rotation[,k] * -1
-      }
-    }
-
-    # save results in outputs
-    for(k in 1:lng){
-      res_noise[[k]][[1]] <- rbind(res_noise[[k]][[1]], as.numeric(resmaf$x[,k]))
-      res_noise[[k]][[2]] <- rbind(res_noise[[k]][[2]], as.numeric(resmaf$rotation[,k]))
-    }
-
-  }
-
-  # Compute and save
-  # - median loadings of the nreal previous realisations
-  # - median MAF = median loadings * centred and normed indices
-  for(j in 1:lng){
-    med.rot <- apply(res_noise[[j]][[2]],2, stats::median)
-    resmaf$rotation[,j] <- med.rot
-    rot <- matrix(rep(med.rot,nij[1]),nij[1],byrow=T)
-    xsum <- apply(rot * xsc,1,sum)
-    resmaf$rotation[,j] <- resmaf$rotation[,j]/stats::sd(xsum)
-    resmaf$x[,j] <- xsum/stats::sd(xsum)
-  }
-
-  # Return 3 plots for each MAF
-
-  op <- par(mfrow = c(nmafs,3), mar = c(2.5,2.5,2.5,1))
-  for(j in 1:nmafs){
-    # MFA Trend
-    maf_x <- as.numeric(resmaf$x[,j])
-    barplot(aperm(matrix(maf_x)), names.arg = matrix(time), space = 0, col = "steelblue2",
-      cex.names = 1.1,main = paste("median MAF ", j, sep = ""),
-      ylim = c(-3.5,3.5), axis.lty = 1,  xaxs = "i")
-
-    # Variogram
-    maf_vg <- calc_reg_1dvario(maf_x)
-    plot(maf_vg, type= "o", main= "", ylim = c(0, max(maf_vg[ ,2])))
-    abline(h = 1, lty = 2)
-    title("Variogram")
-
-    # Loadings
-    maf_rot <- resmaf$rotation[,j]
-    # choose top contr_var
-    choose <- rev(order(abs(maf_rot)))[1:contr_var]
-    maf_rot <- maf_rot[choose]
-    vars_j <- vars[choose]
-    # sort
-    ord <- order(maf_rot)
-    maf_rot <- maf_rot[ord]
-    vars_j <- vars_j[ord]
-    nvc <- length(vars_j)
-    mr_pos <- which(maf_rot > 0)
-    mr_neg <- which(maf_rot < 0)
-    fac <- 2
-    x_min <- max(abs(maf_rot))*-fac
-    x_max <- max(abs(maf_rot))*fac
-    plot(0, 0, xlim = c(x_min, x_max), ylim = c(0, nvc),
-      type = "n", xlab = "", ylab = "", yaxt = "n", main = "Loadings")
-    segments(x0 = rep(0, nvc), x1 = maf_rot, y0 = seq(0.5, nvc-.5, 1), y1 = seq(0.5, nvc-.5, 1))
-    abline(v = 0)
-    Col <- rep(1, nvc)
-    Col[seq(1, nvc, 1)[abs(maf_rot) == max(abs(maf_rot))]] <- 3
-    if (length(mr_pos) > 0) {
-      text(maf_rot[mr_pos], seq(0.5, nvc-.5, 1)[mr_pos], vars_j[mr_pos], cex = .9, col = Col[mr_pos], pos = 4)
-    }
-    if (length(mr_neg) > 0) {
-      text(maf_rot[mr_neg], seq(0.5, nvc-.5, 1)[mr_neg], vars_j[mr_neg], cex = .9, col = Col[mr_neg], pos = 2)
-    }
-
-
-  }
-    par(op)
-
-
-  # Return numeric results
-
-  out <- list(
-    scores = resmaf$x,
-    loadings = resmaf$rotation
-  )
-  return(out)
-
-
-}
-
-
-
-maf <- function(formula,data) {
-
-  if(length(data[,1])==1) stop("Cannot be run with only one sample.")
-
-  # PCA of original variables
-  bidpr<-stats::prcomp(formula,data,scale=TRUE)
-  bidn<-sum(bidpr$sdev>1e-10)
-  if(bidn<2) stop("stop: strictly less than two PCs")
-  bidpr$sdev<-bidpr$sdev[1:bidn]
-  bidpr$rotation<-bidpr$rotation[,1:bidn]
-  bidpr$x<-bidpr$x[,1:bidn]
-
-  # normalization of PCs
-  bidim<-dim(bidpr$x)
-  bidprsdev<-matrix(rep(bidpr$sdev,each=bidim[1]),ncol=bidim[2])
-  bidprx<-bidpr$x/bidprsdev
-  bidim<-dim(bidpr$rotation)
-  bidprsdev<-matrix(rep(bidpr$sdev,each=bidim[1]),ncol=bidim[2])
-  bidpr$rotation<-bidpr$rotation/bidprsdev
-
-  # PCA of increments of normalized PCs
-  biddtab<- diff(bidprx)
-  biddpr<-stats::prcomp(biddtab,center=F,scale=F)
-  biddnmaf<-sum(biddpr$sdev>1e-10)
-  if(biddnmaf<2) stop("stop: strictly less than two MAFs")
-  biddpr$sdev<-biddpr$sdev[1:biddnmaf]
-  biddpr$rotation<-biddpr$rotation[,1:biddnmaf]
-  biddpr$x<-biddpr$x[,1:biddnmaf]
-  biddx<-biddpr$x
-
-  # store complete transformation as a PCA output from original data
-  maf<-bidpr
-  maf$sdev <- biddpr$sdev
-  biddim<-dim(biddpr$x)
-  maf$rotation<-bidpr$rotation%*%biddpr$rotation
-  maf$x<-bidprx%*%biddpr$rotation
-
-  # reverse the order of MAFs
-  # in order to begin with lowest eigenvalues i.e. lowest variograms
-
-  maf$sdev<-.5*(maf$sdev^2)*(biddim[1]-1)/biddim[1]
-  names(maf$sdev)<-rev(names(maf$sdev))
-  maf$sdev<-rev(maf$sdev)
-  lng<-length(dimnames(maf$rotation)[[2]])
-  maf$rotation <- maf$rotation[,seq(lng,1)]
-  dimnames(maf$rotation)[[2]]<-paste("MAF",1:lng,sep="")
-  lng<-length(dimnames(maf$x)[[2]])
-  maf$x<-maf$x[,seq(lng,1)]
-  dimnames(maf$x)[[2]]<-paste("MAF",1:lng,sep="")
-  maf
-
-}
-
-
-
-
-calc_reg_1dvario <- function(x) {
-lng<-length(x)
-res<-numeric(0)
-    for(h in 1:(lng-1))
-    res<-c(res,0.5*mean(diff(x,lag=h)^2,na.rm=T))
-res<-cbind(1:(lng-1),res)
-res
-}
-
-calc_irreg_1dvario <- function(t, x) {
-  d<-abs(outer(t,t,"-"))
-  h<-sort(unique(d[lower.tri(d)]))
-  res<-numeric(0)
-  for (k in 1:length(h)) {
-    d2x<-outer(x,x,"-")^2
-    res<-c(res,0.5*mean(d2x[d==h[k]],na.rm=T)) }
-  res<-cbind(h,res)
-  res
-}
--- a/R/plot_statespace_ch.R
+++ b/R/plot_statespace_ch.R
+#' Convex hull plot
+#'
+#' \code{plot_statespace_ch} generates a scatter plot of all observed combinations
+#' in the 2-dimensional indicator space including the convex hull from defined
+#' reference conditions and the current period.
+#'
+#' @param x An object from the \code{\link{statespace_ch}} function.
+#' @param col_ch_ref Color of reference period (for points, path, and labels).
+#' @param col_ch_cur Color of current period (for points, path, and labels).
+#' @param size_time Text size of the time labels (both periods).
+#'
+#' @return The function returns a \code{\link[ggplot2]{ggplot}} object.
+#'
+#' @author Saskia A. Otto
+#'
+#' @export
+#' @examples
+#' x <- rnorm(25)
+#' y <- rnorm(25)
+#' ch <- statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
+#' plot_statespace_ch(ch)
+plot_statespace_ch <- function(x, col_ch_ref = "red",
+  col_ch_cur = "blue", size_time = 4) {
+
+  # Data input validation --------
+  if (missing(x)) {
+    stop("Argument x is missing.")
+  }
+  if (!is.list(x)) {
+    stop("x has to be a list (output of statespace_ch() function)!")
+  }
+  # -------------------------
+
+  # Get indices of selected periods
+  index_ref <- match(x$period_ref, x$time)
+  index_current <- match(x$period_current, x$time)
+
+  # Data preparation for plots
+  time_char <- as.character(x$time)
+  xy_full <- x$xy
+  xy_ref <- xy_full %>% dplyr::slice(index_ref)
+  xy_cur <- xy_full %>% dplyr::slice(index_current)
+  xrange <- c((min(x$xy$x, na.rm = TRUE) - abs(min(x$xy$x,
+    na.rm = TRUE) * 0.025)), (max(x$xy$x, na.rm = TRUE) +
+    abs(max(x$xy$x, na.rm = TRUE)) * 0.025))
+  yrange <- c((min(x$xy$y, na.rm = TRUE) - abs(min(x$xy$y,
+    na.rm = TRUE) * 0.025)), (max(x$xy$y, na.rm = TRUE) +
+    abs(max(x$xy$y, na.rm = TRUE)) * 0.025))
+  xnudge <- diff(xrange)/50
+  ynudge <- diff(yrange)/50
+
+  # Proportion shown as annotation
+  in_prop <- round((sum(x$inside_ch_ref)/length(x$inside_ch_ref) *
+    100), 0)
+
+
+  # Plot -----------------------------------
+
+  # Set general layout theme
+  ggplot2::theme_set(ggplot2::theme_bw())
+
+  # Scatter plot
+  chplot <- ggplot2::ggplot(xy_full,
+    ggplot2::aes(x = !!rlang::sym("x"), y = !!rlang::sym("y"))) +
+    ggplot2::geom_point(shape = 1, col = "black") +
+    ggplot2::xlab("x") + ggplot2::ylab("y") + ggplot2::xlim(xrange) +
+    ggplot2::ylim(yrange)
+
+  # Add convex hull of reference period
+  chplot <- chplot +
+  	ggplot2::geom_point(data = xy_ref, shape = 19, col = col_ch_ref) +
+  	ggplot2::geom_path(data = xy_ref[x$ch_ref,], col = col_ch_ref) +
+  	ggplot2::geom_text(data = xy_ref, label = time_char[index_ref],
+    col = col_ch_ref, nudge_x = xnudge, nudge_y = ynudge,
+    size = size_time)
+
+  # Add convex hull of current period
+  chplot <- chplot +
+  	ggplot2::geom_point(data = xy_cur, shape = 19, col = col_ch_cur) +
+  	ggplot2::geom_path(data = xy_cur[x$ch_cur, ], col = col_ch_cur) +
+  	ggplot2::geom_text(data = xy_cur, label = time_char[index_current],
+    col = col_ch_cur, nudge_x = xnudge, nudge_y = ynudge,
+    size = size_time)
+
+  # General layout
+  chplot <- chplot +
+  	ggplot2::ggtitle(paste0("Proportion of recent time period within reference space: ",
+    in_prop, " %")) +
+  	ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
+    panel.grid.minor = ggplot2::element_blank(),
+    plot.title = ggplot2::element_text(lineheight = 0.8,
+      face = "plain", size = 16), axis.text = ggplot2::element_text(size = 12),
+    axis.title = ggplot2::element_text(size = 14))
+
+  ### end of function ###
+  return(chplot)
+
+}
--- a/R/plot_statespace_ed.R
+++ b/R/plot_statespace_ed.R
+#' Time series plot of Euclidean distance
+#'
+#' \code{plot_statespace_ed} generates a time series plot of the Euclidean
+#' distance in indicator state space from a defined reference conditions.
+#'
+#' @param x The output tibble from the \code{\link{statespace_ed}} function.
+#'
+#' @return The function returns a \code{\link[ggplot2]{ggplot}} object.
+#'
+#' @author Saskia A. Otto
+#'
+#' @export
+#' @examples
+#' x <- as.data.frame(matrix(rnorm(250), ncol = 10))
+#' ed <- statespace_ed(x, time = 1976:2000, ref_time = 1976)
+#' plot_statespace_ed(ed)
+plot_statespace_ed <- function(x) {
+
+  # Data input validation ---------
+  if (missing(x)) {
+    stop("Argument x is missing.")
+  }
+  # x <- INDperform::check_input_tbl(x, tbl_name = "x", parent_func = "statespace_ed()")
+  # -------------------------------
+
+  # Set general layout theme
+  ggplot2::theme_set(ggplot2::theme_bw())
+
+  edplot <- ggplot2::ggplot(x,
+    ggplot2::aes(x = !!rlang::sym("time"), y = !!rlang::sym("ed"))) +
+    ggplot2::geom_smooth(col = "firebrick3",
+    fill = "cadetblue", alpha = 0.2) +
+  	ggplot2::geom_line(col = "black", size = 0.5) +
+  	ggplot2::geom_point(shape = 16, col = "black", size = 1.5) +
+  	ggplot2::ylab(paste0("Euclidean distance s from reference point (",
+    x$time[which(x$ref_time == TRUE)], ")")) +
+    ggplot2::xlab("") + # General layout
+  		ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
+    panel.grid.minor = ggplot2::element_blank()) +
+    ggplot2::theme(axis.text = ggplot2::element_text(size = 12),
+      axis.title = ggplot2::element_text(size = 14))
+
+  return(edplot)
+}
--- a/R/statespace_ch.R
+++ b/R/statespace_ch.R
+#' Convex hull in 2-dimensional space
+#'
+#' \code{statespace_ch} calculates the convex hull in 2-dimensional space, e.g.
+#' for two selected indicators, using the \code{\link[tripack]{tri.mesh}}
+#' function.
+#'
+#' @param x The coordinates of points in the first dimension
+#'  (e.g. indicator 1 or PC1 scores from a PCA).
+#' @param y The coordinates of points in the second dimension
+#'  (e.g. indicator 2 or the PC2 scores from a PCA).
+#' @param time A vector containing the actual time series.
+#' @param period_ref Vector of time units (e.g. years) used as reference
+#'  period (minimum of 3 time units required).
+#' @param period_current Vector of time units (e.g. years) used as current
+#'  period to compare with the reference period (minimum of 3 time units
+#'  required).
+#'
+#' @details
+#' \code{statespace_ch} implements a second state space approach to assess
+#' the development of a suite of ecological state indicators
+#' (Otto \emph{et al.} 2018, Tett \emph{et al.} 2008). While unidimensional
+#' approaches such as the Euclidean distance (see \code{\link{statespace_ed}})
+#' feature the disadvantage of defining one particular year or time step as
+#' reference condition, this approach accounts for inter-annual variation by
+#' defining a reference domain in state space based on several years: more
+#' recent observations might be characterized as either within or outside this
+#' domain.
+#'
+#' The reference domain can be described by a convex hull, which is a multivariate
+#' measure derived from computational geometry representing the smallest convex set
+#' containing all the points in Euclidean plane or in Euclidean space
+#' (de Berg \emph{et al.}, 2008). While the convex hull can be calculated for
+#' high-dimensional data, reducing the space to two dimensions allows for an easier
+#' visualization and interpretation. Therefore, the \code{statespace_ch} function only
+#' calculates the convex hull for two dimensions, i.e. for two indicators or principal
+#' axes obtained by multivariate analysis such as a Principal Component Analysis
+#' (PCA).
+#'
+#' @return
+#' The function returns a list with the following elements
+#' \describe{
+#'   \item{\code{ch_ref}}{A vector of the position of the convex hull of the
+#'   reference period.}
+#'   \item{\code{ch_cur}}{A vector of the position of the convex hull of the
+#'   current period.}
+#'   \item{\code{inside_ch_ref}}{A logical vector indicating whether each
+#'   year (time step) of the current period lies inside (TRUE) or outside
+#'   (FALSE) the state space domain of the reference period.}
+#'   \item{\code{xy}}{A data frame of the x and y coordinates.}
+#'   \item{\code{time}}{A vector of the full time series.}
+#'   \item{\code{period_ref}}{A vector of years (time steps) defined as
+#'   the reference period.}
+#'   \item{\code{period_current}}{A vector of years (time steps) defined as
+#'   the current period.}
+#' }
+#'
+#'
+#' @references
+#' de Berg, M., Cheong, O., van Kreveld, M., Overmars, M. (2008) Computational
+#' Geometry - Algorithms and Applications. Springer Berlin Heidelberg, 386pp.
+#'
+#' Otto, S.A., Kadin, M., Casini, M., Torres, M.A., Blenckner, T. (2018)
+#' A quantitative framework for selecting and validating food web indicators.
+#' \emph{Ecological Indicators}, 84: 619-631,
+#' doi: https://doi.org/10.1016/j.ecolind.2017.05.045
+#'
+#' Tett, P., Carreira, C., Mills, D.K., van Leeuwen, S., Foden, J., Bresnan, E.,
+#' Gowen, R.J. (2008) Use of a Phytoplankton Community Index to assess the
+#' health of coastal waters. \emph{ICES Journal of Marine Science} 65, 1475-1482.
+#'
+#' @seealso \code{\link[tripack]{tri.mesh}} for the computation of the convex hull.
+#' @author Saskia A. Otto
+#'
+#' @export
+#' @examples
+#' x <- rnorm(25)
+#' y <- rnorm(25)
+#' statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
+statespace_ch <- function(x, y, time, period_ref, period_current) {
+
+  # Data input validation -----------------------
+
+  # Testing for required package installation
+  if (!requireNamespace("tripack", quietly = TRUE)) {
+    stop("The package  `tripack` is needed for this function to work. Please install it.",
+      call. = FALSE)
+  }
+
+  if (missing(x)) {
+    stop("Argument x is missing.")
+  }
+  if (missing(y)) {
+    stop("Argument y is missing.")
+  }
+  if (missing(time)) {
+    stop("Argument time is missing.")
+  }
+  if (missing(period_ref)) {
+    stop("Argument period_ref is missing.")
+  }
+  if (missing(period_current)) {
+    stop("Argument period_current is missing.")
+  }
+
+  # Check input vectors
+  # x <- check_input_vec(x, "x")
+  # y <- check_input_vec(y, "y")
+  # time <- INDperform::check_input_vec(time, "argument time")
+	 # (I added explicitly 'argument' to NOT test for regularity in time steps!)
+
+  if (length(x) != length(y) | length(x) != length(time)) {
+    stop("One of the x, y, or time vectors has a different length!")
+  }
+
+  # Testing input of both periods
+  if (any(period_ref %in% time == FALSE) | any(period_current %in%
+    time == FALSE)) {
+    stop("At least one of the defined periods is (to some extent) outside the given time series.")
+  }
+
+  if (length(period_ref) <= 2 | length(period_current) <=
+    2) {
+    stop("At least one of the defined periods has not a minimum of 3 time units.")
+  }
+
+  # Check if periods contain any NA
+  if (any(is.na(x[match(period_ref, time)])) | any(is.na(y[match(period_ref,
+    time)]))) {
+    stop(paste0("One of your indicators (x and/or y) has missing values in the reference ",
+      "period. Please select another period or fill the missing values (e.g. with the mean, ",
+      "median or interpolate)."))
+  }
+  if (any(is.na(x[match(period_current, time)])) |
+    any(is.na(y[match(period_current, time)]))) {
+    stop(paste0("One of your indicators (x and/or y) has missing values in the current ",
+      "period. Please select another period or fill the missing values (e.g. with the mean, ",
+      "median or interpolate)."))
+  }
+
+  # --------------------------------------------
+
+  index_ref <- match(period_ref, time)
+  index_current <- match(period_current, time)
+
+  # Convex hull computation
+  tr <- tripack::tri.mesh(x = x[index_ref], y = y[index_ref])
+  inside_ch_ref <- tripack::in.convex.hull(tr, x = x[index_current],
+    y = y[index_current])
+
+  # Helper function convexhull (returns convex hull
+  # around data points)
+  convexhull <- function(xcoord, ycoord) {
+    # xcoords = x ycoord = y
+    hpts <- grDevices::chull(x = xcoord, y = ycoord)
+    hpts <- c(hpts, hpts[1])
+    return(hpts)
+  }
+
+  # Get position of convex hull
+  ch_ref <- convexhull(xcoord = x[index_ref], ycoord = y[index_ref])
+  ch_cur <- convexhull(xcoord = x[index_current],
+    ycoord = y[index_current])
+
+  # Export list for plot
+  out <- list(ch_ref = ch_ref, ch_cur = ch_cur, inside_ch_ref = inside_ch_ref,
+    xy = data.frame(x, y), time = time, period_ref = period_ref,
+    period_current = period_current)
+
+  ### end of function ###
+  return(out)
+}
--- a/R/statespace_ed.R
+++ b/R/statespace_ed.R
+#' Euclidean distance in state space
+#'
+#' \code{statespace_ed} generates a time series of Euclidean distances from
+#'  defined reference conditions to assess the development of a suite of
+#'  ecological state indicators.
+#'
+#' @param x A data frame, tibble, matrix or vector of selected indicator(s).
+#' @param time A vector containing the actual time series.
+#' @param ref_time The reference time (single point in time, e.g. specific
+#'  year) on which to base the Euclidean distance. Default is set to the
+#'  first time point.
+#' @param na_rm A logical value indicating whether rows with NA values should
+#'  be stripped before the computation proceeds. Default is set to TRUE. If set
+#'  to FALSE and data contains NAs, the function returns NA.
+#'
+#' @details
+#' This function implements an approach adopted from Tett \emph{et al.} (2013) to
+#' assess changes in the ecosystem state by studying trajectories in state space.
+#' State space is defined here as the n-dimensional space of possible locations of
+#' state variables or indicators. For a robust suite of indicators the unidimensional
+#' Euclidean distance between each year (or any other time step) and a reference
+#' year in state space is calculated. That means, the function calculates the square
+#' root of the sum of squared distances between each standardized indicator value
+#' in a specific year and its reference value, which is defined by the user.
+#'
+#' @return
+#' \code{statespace_ed} returns a data frame with the time vector \code{time}, the
+#' Euclidean distance  \code{ed}, and a logical vector  \code{ref_time} indicating
+#' the time step defined as reference.
+#'
+#' @references
+#' Tett, P., Gowen, R.J., Painting, S.J., Elliott, M., Forster, R., Mills, D.K.,
+#' Bresnan, E., Capuzzo, E., Fernandes, T.F., Foden, J., Geider, R.J., Gilpin, L.C.,
+#' Huxham, M., McQuatters-Gollop, A.L., Malcolm, S.J., Saux-Picart, S., Platt, T.,
+#' Racault, M.F., Sathyendranath, S., van der Molen, J., Wilkinson, M. (2013)
+#' Framework for understanding marine ecosystem health. \emph{Marine Ecology
+#' Progress Series.} 494, 1-27.
+#'
+#' @author Saskia A. Otto
+#'
+#' @export
+#'
+#' @examples
+#' x <- as.data.frame(matrix(rnorm(250), ncol = 10))
+#' statespace_ed(x, time = 1976:2000, ref_time = 1976)
+#'
+statespace_ed <- function(x, time, ref_time = NULL, na_rm = TRUE) {
+
+  # Data input validation --------------------
+	 if (missing(x)) {
+	 	stop("Argument x is missing.")
+	 }
+	 if (missing(time)) {
+	 	stop("Argument time is missing.")
+	 }
+	 # Check for correct input format
+	 if (any(class(x) == "list")) {
+	 	 stop("x cannot be a list.")
+	 }
+	 #time <- INDperform::check_input_vec(time, "argument time")
+	 # (I added explicitly 'argument' to NOT test for regularity in time steps!)
+
+	 if (na_rm == FALSE) {
+	 	return(NA)
+	 } else {
+		 y <- data.frame(time = time)
+	 	 keep <- stats::complete.cases(x)
+	 	 x <- x[keep, ]
+	 	 time <- time[keep]
+	 	 if (is.null(ref_time)) {
+    		id <- 1
+  		} else {
+	 	 		# Check if reference time still included and get new position
+	 	 		if (!ref_time %in% time) {
+	 		  		stop("The defined reference time is outside the time series OR by removing rows with NAs (if argument 'na.rm' set to TRUE) the reference time got also removed. Please choose another reference time or impute missing values.")
+	 	 		} else {
+	 		  		id <- which(time == ref_time)
+	 	 		}
+  		}
+	 }
+
+	 # --------------------------------
+
+  # Standardize indicator time series
+  x_stand <- scale(x)  # without year and sd
+
+  # Calculate the n-dimensional Euclidean distance
+  # between each time unit and the reference time
+  # (can be starting point)
+  df <- x_stand
+  for (i in 1:nrow(x_stand)) {
+    df[i, ] <- df[i, ] - x_stand[id, ]
+  }
+  calc_euc <- function(x) sqrt(sum(x^2))
+  ed <- apply(df, FUN = calc_euc, MARGIN = 1)
+  out <- data.frame(time = time, ed = ed, ref_time = FALSE)
+  out$ref_time[id] <- TRUE
+  out <- suppressMessages( dplyr::left_join(y, out) )
+  out$ref_time[is.na(out$ref_time)] <- FALSE
+
+
+  ### END OF FUNCTION ###
+  return(out)
+}
--- a/R/trafficlight.R
+++ b/R/trafficlight.R
@@ -69,7 +69,7 @@

 trafficlight <- function(x, time = NULL, sort_5yrmean = TRUE, sort_vec = NULL,
  method = "quantiles", probs = seq(0, 1, 0.2), quantile_type = 7, intervals = 5,
-  cols = c("green3", "greenyellow", "yellow", "gold", "red"),
+  cols = c("red","gold", "yellow", "greenyellow", "green3"),
  main = "", xlab = "", ylab = "", adj_xlab = NULL, adj_ylab = NULL,
  hadj_x = 0.5, vadj_x = -0.7, hadj_y = -0.7, vadj_y = 0.3,
  madj_bottom = 0, madj_left = 0, madj_top = 0, madj_right = 0,

--- a/man/mafa.Rd
+++ b/man/mafa.Rd
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mafa.R
-\name{mafa}
-\alias{mafa}
-\title{Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)}
-\usage{
-mafa(x, time = NULL, nreal = 1000, nmafs = 3, seed = NULL, contr_var = NULL)
-}
-\arguments{
-\item{x}{matrix or data frame containing time series of multiple variables.}
-
-\item{time}{vector of time units that will be used for the output.}
-
-\item{nreal}{integer; the number of realizations that should be performed.}
-
-\item{nmafs}{the number of retained MAFs (default is 3).}
-
-\item{seed}{integer; a number for the random number generator state.}
-
-\item{contr_var}{integer; the number of variables with highest contribution to show in the loading plots.}
-}
-\description{
-Min/Max Autocorrelation Factors Analysis (MAFA) in time (or 1D)
-}
-\details{
-MAFA is a multivariate statistical method, that allows the set of initial variables to be
-decomposed into factors. The autocorrelation of which decreases, or the variogram increases,
-when going from the first factors to the last ones. Hence the very first factors extract
-the part of the variables which is the most continuous in time.
-
-The function for this code is adopted from Wollez et al. 2009 (\url{https://doi.org/10.1051/alr/2009020})
-}
-\examples{
-df <- data.frame(variable1variabl = 1:20, variable2= rnorm(20, 100, 30),
- variable3 = 1:20 + rnorm(20))
-mafa(x = df, time = 1981:2000)
-df <- matrix(rnorm(1000), ncol = 50)
-colnames(df) <- paste0(letters[1:25], rep(1:2,each = 25))
-mafa(df, contr_var = 10)
-
-}
-\seealso{
-\url{https://doi.org/10.1051/alr/2009020}
-}
--- a/man/plot_statespace_ch.Rd
+++ b/man/plot_statespace_ch.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_statespace_ch.R
+\name{plot_statespace_ch}
+\alias{plot_statespace_ch}
+\title{Convex hull plot}
+\usage{
+plot_statespace_ch(x, col_ch_ref = "red", col_ch_cur = "blue", size_time = 4)
+}
+\arguments{
+\item{x}{An object from the \code{\link{statespace_ch}} function.}
+
+\item{col_ch_ref}{Color of reference period (for points, path, and labels).}
+
+\item{col_ch_cur}{Color of current period (for points, path, and labels).}
+
+\item{size_time}{Text size of the time labels (both periods).}
+}
+\value{
+The function returns a \code{\link[ggplot2]{ggplot}} object.
+}
+\description{
+\code{plot_statespace_ch} generates a scatter plot of all observed combinations
+in the 2-dimensional indicator space including the convex hull from defined
+reference conditions and the current period.
+}
+\examples{
+x <- rnorm(25)
+y <- rnorm(25)
+ch <- statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
+plot_statespace_ch(ch)
+}
+\author{
+Saskia A. Otto
+}
--- a/man/plot_statespace_ed.Rd
+++ b/man/plot_statespace_ed.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_statespace_ed.R
+\name{plot_statespace_ed}
+\alias{plot_statespace_ed}
+\title{Time series plot of Euclidean distance}
+\usage{
+plot_statespace_ed(x)
+}
+\arguments{
+\item{x}{The output tibble from the \code{\link{statespace_ed}} function.}
+}
+\value{
+The function returns a \code{\link[ggplot2]{ggplot}} object.
+}
+\description{
+\code{plot_statespace_ed} generates a time series plot of the Euclidean
+distance in indicator state space from a defined reference conditions.
+}
+\examples{
+x <- as.data.frame(matrix(rnorm(250), ncol = 10))
+ed <- statespace_ed(x, time = 1976:2000, ref_time = 1976)
+plot_statespace_ed(ed)
+}
+\author{
+Saskia A. Otto
+}
--- a/man/statespace_ch.Rd
+++ b/man/statespace_ch.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/statespace_ch.R
+\name{statespace_ch}
+\alias{statespace_ch}
+\title{Convex hull in 2-dimensional space}
+\usage{
+statespace_ch(x, y, time, period_ref, period_current)
+}
+\arguments{
+\item{x}{The coordinates of points in the first dimension
+(e.g. indicator 1 or PC1 scores from a PCA).}
+
+\item{y}{The coordinates of points in the second dimension
+(e.g. indicator 2 or the PC2 scores from a PCA).}
+
+\item{time}{A vector containing the actual time series.}
+
+\item{period_ref}{Vector of time units (e.g. years) used as reference
+period (minimum of 3 time units required).}
+
+\item{period_current}{Vector of time units (e.g. years) used as current
+period to compare with the reference period (minimum of 3 time units
+required).}
+}
+\value{
+The function returns a list with the following elements
+\describe{
+  \item{\code{ch_ref}}{A vector of the position of the convex hull of the
+  reference period.}
+  \item{\code{ch_cur}}{A vector of the position of the convex hull of the
+  current period.}
+  \item{\code{inside_ch_ref}}{A logical vector indicating whether each
+  year (time step) of the current period lies inside (TRUE) or outside
+  (FALSE) the state space domain of the reference period.}
+  \item{\code{xy}}{A data frame of the x and y coordinates.}
+  \item{\code{time}}{A vector of the full time series.}
+  \item{\code{period_ref}}{A vector of years (time steps) defined as
+  the reference period.}
+  \item{\code{period_current}}{A vector of years (time steps) defined as
+  the current period.}
+}
+}
+\description{
+\code{statespace_ch} calculates the convex hull in 2-dimensional space, e.g.
+for two selected indicators, using the \code{\link[tripack]{tri.mesh}}
+function.
+}
+\details{
+\code{statespace_ch} implements a second state space approach to assess
+the development of a suite of ecological state indicators
+(Otto \emph{et al.} 2018, Tett \emph{et al.} 2008). While unidimensional
+approaches such as the Euclidean distance (see \code{\link{statespace_ed}})
+feature the disadvantage of defining one particular year or time step as
+reference condition, this approach accounts for inter-annual variation by
+defining a reference domain in state space based on several years: more
+recent observations might be characterized as either within or outside this
+domain.
+
+The reference domain can be described by a convex hull, which is a multivariate
+measure derived from computational geometry representing the smallest convex set
+containing all the points in Euclidean plane or in Euclidean space
+(de Berg \emph{et al.}, 2008). While the convex hull can be calculated for
+high-dimensional data, reducing the space to two dimensions allows for an easier
+visualization and interpretation. Therefore, the \code{statespace_ch} function only
+calculates the convex hull for two dimensions, i.e. for two indicators or principal
+axes obtained by multivariate analysis such as a Principal Component Analysis
+(PCA).
+}
+\examples{
+x <- rnorm(25)
+y <- rnorm(25)
+statespace_ch(x, y, time = 1981:2005, period_ref = 1981:1986, period_current = 1995:2005)
+}
+\references{
+de Berg, M., Cheong, O., van Kreveld, M., Overmars, M. (2008) Computational
+Geometry - Algorithms and Applications. Springer Berlin Heidelberg, 386pp.
+
+Otto, S.A., Kadin, M., Casini, M., Torres, M.A., Blenckner, T. (2018)
+A quantitative framework for selecting and validating food web indicators.
+\emph{Ecological Indicators}, 84: 619-631,
+doi: https://doi.org/10.1016/j.ecolind.2017.05.045
+
+Tett, P., Carreira, C., Mills, D.K., van Leeuwen, S., Foden, J., Bresnan, E.,
+Gowen, R.J. (2008) Use of a Phytoplankton Community Index to assess the
+health of coastal waters. \emph{ICES Journal of Marine Science} 65, 1475-1482.
+}
+\seealso{
+\code{\link[tripack]{tri.mesh}} for the computation of the convex hull.
+}
+\author{
+Saskia A. Otto
+}
--- a/man/statespace_ed.Rd
+++ b/man/statespace_ed.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/statespace_ed.R
+\name{statespace_ed}
+\alias{statespace_ed}
+\title{Euclidean distance in state space}
+\usage{
+statespace_ed(x, time, ref_time = NULL, na_rm = TRUE)
+}
+\arguments{
+\item{x}{A data frame, tibble, matrix or vector of selected indicator(s).}
+
+\item{time}{A vector containing the actual time series.}
+
+\item{ref_time}{The reference time (single point in time, e.g. specific
+year) on which to base the Euclidean distance. Default is set to the
+first time point.}
+
+\item{na_rm}{A logical value indicating whether rows with NA values should
+be stripped before the computation proceeds. Default is set to TRUE. If set
+to FALSE and data contains NAs, the function returns NA.}
+}
+\value{
+\code{statespace_ed} returns a data frame with the time vector \code{time}, the
+Euclidean distance  \code{ed}, and a logical vector  \code{ref_time} indicating
+the time step defined as reference.
+}
+\description{
+\code{statespace_ed} generates a time series of Euclidean distances from
+ defined reference conditions to assess the development of a suite of
+ ecological state indicators.
+}
+\details{
+This function implements an approach adopted from Tett \emph{et al.} (2013) to
+assess changes in the ecosystem state by studying trajectories in state space.
+State space is defined here as the n-dimensional space of possible locations of
+state variables or indicators. For a robust suite of indicators the unidimensional
+Euclidean distance between each year (or any other time step) and a reference
+year in state space is calculated. That means, the function calculates the square
+root of the sum of squared distances between each standardized indicator value
+in a specific year and its reference value, which is defined by the user.
+}
+\examples{
+x <- as.data.frame(matrix(rnorm(250), ncol = 10))
+statespace_ed(x, time = 1976:2000, ref_time = 1976)
+
+}
+\references{
+Tett, P., Gowen, R.J., Painting, S.J., Elliott, M., Forster, R., Mills, D.K.,
+Bresnan, E., Capuzzo, E., Fernandes, T.F., Foden, J., Geider, R.J., Gilpin, L.C.,
+Huxham, M., McQuatters-Gollop, A.L., Malcolm, S.J., Saux-Picart, S., Platt, T.,
+Racault, M.F., Sathyendranath, S., van der Molen, J., Wilkinson, M. (2013)
+Framework for understanding marine ecosystem health. \emph{Marine Ecology
+Progress Series.} 494, 1-27.
+}
+\author{
+Saskia A. Otto
+}
--- a/man/trafficlight.Rd
+++ b/man/trafficlight.Rd
@@ -13,7 +13,7 @@ trafficlight(
  probs = seq(0, 1, 0.2),
  quantile_type = 7,
  intervals = 5,
-  cols = c("green3", "greenyellow", "yellow", "gold", "red"),
+  cols = c("red", "gold", "yellow", "greenyellow", "green3"),
  main = "",
  xlab = "",
  ylab = "",