Initial Commit

93503b1b · gdutz · 93503b1b · 93503b1b · 93503b1b · 93503b1b
Commit 93503b1b authored Aug 9, 2021 by gdutz
--- a/.Rbuildignore
+++ b/.Rbuildignore
+^packrat/
+^\.Rprofile$
+^.*\.Rproj$
+^\.Rproj\.user$
--- a/.gitignore
+++ b/.gitignore
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
--- a/DESCRIPTION
+++ b/DESCRIPTION
+Package: leo
+Type: Package
+Title: Useful functions for LEO surveys
+Version: 0.1.31
+Author: Gregor Dutz
+Maintainer: Gregor Dutz <gregor.dutz@uni-hamburg.de>
+Description: It does useful stuff.
+License: What license is it under?
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.1.1
--- a/NAMESPACE
+++ b/NAMESPACE
+# Generated by roxygen2: do not edit by hand
+export(leo.format)
+export(leo_getdesign)
+export(leo_svydesign)
+export(leo_tab)
+export(leo_table)
--- a/R/leo_design.R
+++ b/R/leo_design.R
+#' Creates survey design objects for LEO 2010 and 2018.
+#'
+#' @param leodata Original dataframe of LEO 2010 or LLEO 2018.
+#' @param year Either 2010 or 2018.
+#' @return A survey design object.
+#'
+#' @export
+leo_getdesign <- function(leodata, year = 2018) {
+  library(purrr)
+  library(dplyr)
+  # Lower variable names
+  names(leodata) <- tolower(names(leodata))
+  if (year == 2010) {
+    # logits entfernen
+    leodata <- leodata %>% select(-matches("^PV[1-5]$"))
+    # Level und PV 2010 umbenennen
+    data.table::setnames(leodata,
+                         old=c("pv1_62", "pv2_62", "pv3_62", "pv4_62", "pv5_62"),
+                         new=c("pv1", "pv2", "pv3", "pv4", "pv5"))
+    data.table::setnames(leodata,
+                         old=c("pv1_alphalevel_62", "pv2_alphalevel_62", "pv3_alphalevel_62", "pv4_alphalevel_62", "pv5_alphalevel_62"),
+                         new=c("alpha.pv1", "alpha.pv2", "alpha.pv3", "alpha.pv4", "alpha.pv5"))
+  }
+  names(leodata) <- tolower(names(leodata))
+  if (year == 2018) {
+    # PV-Variable für Alpha 1-3 (gering literalisiert)
+    leodata$alpha3.pv1 <- cut(leodata$alpha.pv1, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv2 <- cut(leodata$alpha.pv2, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv3 <- cut(leodata$alpha.pv3, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv4 <- cut(leodata$alpha.pv4, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv5 <- cut(leodata$alpha.pv5, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv6 <- cut(leodata$alpha.pv6, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv7 <- cut(leodata$alpha.pv7, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv8 <- cut(leodata$alpha.pv8, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv9 <- cut(leodata$alpha.pv9, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv10 <- cut(leodata$alpha.pv10, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    # Alpha-Level in Faktorvariable
+    leodata <- leodata %>% mutate_at(vars(matches("^alpha.pv[0-9]+$")), to_factor)
+    # Spaltennamen mit PVs und Leveln mit Hilfe eines regulären Ausdrucks (regexp) finden
+    lit <- grep("^pv[0-9]+$", colnames(leodata), value = TRUE)
+    alp <- grep("^alpha.pv[0-9]+$", colnames(leodata), value = TRUE)
+    gl <- grep("^alpha3.pv[0-9]+$", colnames(leodata), value = TRUE)
+  } else if (year == 2010) {
+    # PV-Variable für Alpha 1-3 (gering literalisiert)
+    leodata$alpha3.pv1 <- cut(leodata$alpha.pv1, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv2 <- cut(leodata$alpha.pv2, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv3 <- cut(leodata$alpha.pv3, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv4 <- cut(leodata$alpha.pv4, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    leodata$alpha3.pv5 <- cut(leodata$alpha.pv5, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
+    # Alpha-Level in Faktorvariable
+    leodata <- leodata %>% mutate_at(vars(matches("^alpha.pv[1-5]$")), to_factor)
+    # Spaltennamen mit PVs und Leveln mit Hilfe eines regulären Ausdrucks (regexp) finden
+    lit <- grep("^pv[1-5]$", colnames(leodata), value = TRUE)
+    alp <- grep("^alpha.pv[1-5]$", colnames(leodata), value = TRUE)
+    gl <- grep("^alpha3.pv[1-5]$", colnames(leodata), value = TRUE)
+  }
+  # Spaltennamen der HG-Variablen
+  alle <- leodata %>% select(-one_of(lit), -one_of(alp), -one_of(gl))
+  alle <- colnames(alle)
+  # Liste der Daten erstellen mit einer pv-Variable und einem Alpha-Level
+  leoList <- lit %>%
+    purrr::map(function(s) {select(leodata, alle, paste0("alpha.", s), paste0("alpha3.", s), s) %>%
+        rename(pv := !!s) %>%
+        rename(alpha.pv := !!paste0("alpha.", s)) %>%
+        rename(alpha3.pv := !!paste0("alpha3.", s))})
+  # Survey Design erstellen
+  if (year == 2018) {
+    # Survey Design erstellen
+    leo_design <- survey::svydesign(
+      ids = ~0,
+      weights = ~pgewges,
+      data = mitools::imputationList(leoList))
+  } else if (year == 2010) {
+    leo_design <- survey::svydesign(
+      ids = ~0,
+      weights = ~gewleointegr,
+      data = mitools::imputationList(leoList))
+  }
+  return <- leo_design
+}
--- a/R/leo_format.R
+++ b/R/leo_format.R
+#' Condenses rows and columns of a frequency table by summing the cells.
+#'
+#' @param a A leo_table object.
+#' @param upper An integer.
+#' @param lower An integer.
+#' @param left An integer.
+#' @param right An integer.
+#' @return An xtabs object.
+#' @examples
+#' example <- example
+#'
+#' @export
+leo.format <- function(a, upper, lower, left, right) {
+  stopifnot(inherits(a, what = "leo_table"))
+  x <- dim(a)[1]
+  y <- dim(a)[2]
+  cs <- NULL
+  rs <- NULL
+  if (!missing(upper)) {
+    a_upper <- t(matrix(colSums(a[1:upper,])))
+    rownames(a_upper)[1] <- paste0("1:", upper)
+    cs <- rbind(a_upper, a[(upper+1):x,])
+  }
+  if (!missing(lower)) {
+    a_lower <- t(matrix(colSums(a[(x-lower+1):y,])))
+    rownames(a_lower)[1] <- paste0((x-lower+1), ":", x)
+    cs <- rbind(a[1:(x-lower),], a_lower)
+  }
+  if (!missing(upper) & !missing(lower)) {
+    if ( upper + lower == x) {
+      cs <- rbind(a_upper, a_lower)
+      colnames(cs) <- colnames(a)
+    } else {
+      cs <- rbind(a_upper, a[(upper+1):(x-lower),])
+      cs <- rbind(cs, a_lower)
+    }
+  }
+  if (!is.null(cs)) {a <- cs}
+  if (!missing(left)) {
+    a_left <- matrix(rowSums(a[, 1:left]))
+    colnames(a_left)[1] <- paste0("1:", left)
+    rs <- cbind(a_left, a[,(left+1):y])
+  }
+  if (!missing(right)) {
+    a_right <- matrix(rowSums(a[, (y-right+1):y]))
+    colnames(a_right)[1] <- paste0((y-right+1),":", y)
+    rs <- cbind(a[,1:(y-right)], a_right)
+  }
+  if (!missing(left) & !missing(right)) {
+    if ( left + right == y) {
+      rs <- cbind(a_left, a_right)
+      rownames(rs) <- rownames(a)
+    } else {
+      rs <- cbind(a_left, a[,(left+1):(y-right)])
+      rs <- cbind(rs, a_right)
+    }
+  }
+  if (is.null(rs))
+  {
+    ret <- cs
+  } else {
+    ret <- rs
+  }
+}
--- a/R/leo_svydesign.R
+++ b/R/leo_svydesign.R
+#' Creates survey design objects for LEO 2010 and 2018.
+#'
+#' @param df Survey data
+#' @param survey_name leo2010 and leo2018 (leo2018hoch) implemented
+#' @return A survey design object.
+#'
+#' @export
+leo_svydesign <- function(df, survey_name = "leo2018", list = FALSE) {
+  names(df) <- tolower(names(df))
+  # create survey design object for LEO 2010 or LEO 2018 ----
+  if (survey_name == "leo2010" || survey_name == "leo2018" || survey_name == "leo2018hoch") {
+    if (survey_name == "leo2010") {
+      # remove logits
+      df <- df %>% dplyr::select(-tidyselect::matches("^pv[1-5]$"))
+      # rename variables fpr pvs and levels
+      old=c("pv1_62", "pv2_62", "pv3_62", "pv4_62", "pv5_62",
+            "pv1_alphalevel_62", "pv2_alphalevel_62", "pv3_alphalevel_62", "pv4_alphalevel_62", "pv5_alphalevel_62")
+      new=c("pv1", "pv2", "pv3", "pv4", "pv5",
+            "alpha.pv1", "alpha.pv2", "alpha.pv3", "alpha.pv4", "alpha.pv5")
+      df <- df %>% dplyr::rename_at(vars(old), ~new)
+    }
+    # some functions need a helper variable
+    df$a <- as.numeric(1)
+    # get column names of pvs and levels
+    lit <- grep("^pv[0-9]+", colnames(df), value = TRUE)
+    alp <- grep("^alpha.pv[0-9]+", colnames(df), value = TRUE)
+    # column names of all other variables
+    all <- df %>% dplyr::select(-tidyselect::one_of(lit), -tidyselect::one_of(alp))
+    all <- colnames(all)
+    # create list with one pv-variable and one level-variable each (and all other variables)
+    repList <- lit %>%
+      purrr::map(function(x) {dplyr::select(df, all, paste0("alpha.", x), x) %>%
+          dplyr::rename(pv := !!x) %>%
+          dplyr::rename(alpha.pv := !!paste0("alpha.", x))})
+    # create further alpha-variables for different analyses
+    # alpha3 (three levels): low literate (Alpha 1-3), Alpha 4, above Alpha 4
+    # alpha2 (two levels): low literat (Alpha 1-3), high literate (Alpha 4 and above)
+    # lowlit (two levels): same as alpha2, but reversed levels
+    repList <- lapply(repList, function(x) {x %>%
+        dplyr::mutate(alpha3 = ifelse(x$alpha.pv > 3, x$alpha.pv, 1)) %>%
+        dplyr::mutate(alpha2 = ifelse(x$alpha.pv > 3, 1, 0)) %>%
+        dplyr::mutate(lowlit = ifelse(x$alpha.pv > 3, 0, 1)) %>%
+        dplyr::mutate(alpha3 = factor(alpha3, labels = (c("a1-3", "a4", "a5")))) %>%
+        dplyr::mutate(alpha2 = factor(alpha2, labels = (c("a1-3", "a4-5")))) %>%
+        dplyr::mutate(lowlit = factor(lowlit, labels = (c("a4-5", "a1-3")))) %>%
+        dplyr::mutate(alpha.pv = labelled::to_factor(alpha.pv))})
+    # if the imputation list is requested, return it and exit function
+    if (list == TRUE) {
+      return(repList)
+    }
+    # create survey design
+    if (survey_name == "leo2018") {
+      design <- survey::svydesign(
+        ids = ~0,
+        weights = ~pgewges,
+        data = mitools::imputationList(repList))
+    } else if (survey_name == "leo2018hoch") {
+      design <- survey::svydesign(
+        ids = ~0,
+        weights = ~phochges,
+        data = mitools::imputationList(repList))
+    } else if (survey_name == "leo2010") {
+      design <- survey::svydesign(
+        ids = ~0,
+        weights = ~gewleointegr,
+        data = mitools::imputationList(repList))
+    }
+  }
+  # implement other surveys here----
+  return(design)
+}
--- a/R/leo_tab.R
+++ b/R/leo_tab.R
+#' Creates survey design objects for LEO 2010 and 2018.
+#'
+#' @param varName Original dataframe of LEO 2010 or LLEO 2018.
+#' @param Richtung Either 2010 or 2018.
+#' @param sheet.name Either 2010 or 2018.
+#' @param Excel.Name Either 2010 or 2018.
+#' @param ListenName Either 2010 or 2018.
+#' @param pWert Either 2010 or 2018.
+#'
+#' @return A dataframe with frequencies
+#'
+#' @export
+leo_tab <- function(varName, Richtung = 1, sheet.Name = "Mappe", Excel.Name,
+                     ListenName = leoList, pWert=FALSE){
+  if (file.exists(Excel.Name) && (sheet.Name %in% readxl::excel_sheets(Excel.Name))) {
+    warning(sprintf("%s: Sheet already existing. Skipping calculations.", sheet.Name))
+    return(NULL)
+  }
+  ### Chi2
+  # Dummyvariablen erstellen
+  ListenName <- lapply(leoList, function(x) {dummy_cols(x, select_columns = varName)})
+  # das design für gewichtete Berechnungen im Datensatz erstellen
+  desName <- svydesign(ids = ~0, weights = ~pgewges,
+                       data = mitools::imputationList(ListenName))
+  # das Design für auf die Grundgesamtheit hochgerechnete Werte erstellen
+  desOhne<- svydesign(ids = ~0, weights = ~1,
+                      data= mitools::imputationList(ListenName))
+  if(Richtung == 1){
+    i1 <- varName
+    i2 <- 'alpha.pv'
+  } else {
+    i1 <- 'alpha.pv'
+    i2 <- varName
+  }
+  ### Chi2 Berechnung ----
+  # Hilfsfunktion, um micombine.chisquare nicht immer wiederholen zu müssen
+  chi2.comb <- function(C) {
+    micombine.chisquare(dk=sapply(C, FUN = function(x) x$statistic),
+                        df=mean(sapply(C, FUN = function(x) x$parameter)))
+  }
+  # Chi2 für die gesamte Tabelle
+  chi.gesamt <- with(desName, svychisq(as.formula(paste0("~", i1, "+alpha2")), statistic ="Chisq"))
+  chi.gesamt <- chi2.comb(chi.gesamt)
+  cat(round(chi.gesamt["p"], 3))
+  cat("\n")
+  # Chi2 für einzelne Ausprägungen
+  lvls <- levels(to_factor(leoList[[1]][[varName]], levels="labels"))
+  #Levelnamen kürzen und Leerzeichen rausnehmen
+  lvls.short <- gsub(" ", "_", lvls)
+  #lvls.short <- substr(lvls.short, 0, 25)
+  chiNames <- paste0(varName, "_", lvls.short)
+  cat(chiNames)
+  chilist <- list()
+  bonferroni <- list()
+  for (i in seq_along(chiNames)) {
+    print(chiNames[[i]])
+    chiformula <- as.formula(paste0("~",chiNames[[i]], "+alpha2"))
+    chicomb <- chi2.comb(with(desName, svychisq(chiformula, statistic = "Chisq")))
+    chilist[i] <- chicomb[["p"]]
+    bonferroni[i] <- ifelse(chicomb[["p"]] < 0.05/length(lvls), "sig.", "n.s.")
+  }
+  chilist <- append(chilist, chi.gesamt["p"])
+  bonferroni <- append(bonferroni, ifelse(chi.gesamt["p"] < 0.05, "sig.", "n.s."))
+  cat(round(unlist(chilist),3))
+  cat("\n")
+  cat(unlist(bonferroni))
+  cat("\n")
+  chi2 <- data.frame(Chi2 = round(unlist(chilist), 3), Bonferroni = unlist(bonferroni))
+  ### Absolute Werte UNGEWICHTET und für Signifikanz gewichtete für A1-3 vs. höher ----
+  n.werte.alpha <- summary(MIcombine(with(desOhne, svyby(as.formula(paste0("~to_factor(", i1, ")")),
+                                                         as.formula(paste0("~to_factor(", i2, ")")), svytotal, na.rm=TRUE))))
+  # Reihe mit sauberen Namen einfügen
+  n.werte.alpha[,'nam'] <- rownames(n.werte.alpha)
+  n.werte.alpha <- n.werte.alpha %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
+    mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  # n.werte.alpha[paste0("nam", Richtung)] <- gsub("^a", "Alpha ", n.werte.alpha[[paste0("nam", Richtung)]])
+  # und das Ganze noch mal für die binäre Variable
+  n.werte.alpha.bi <- summary(MIcombine(with(desOhne,
+                                             svyby(as.formula(paste0("~to_factor(", varName,")")), ~to_factor(alpha2), svytotal, na.rm=T))))
+  # und auch hier saubere Namen einfügen
+  n.werte.alpha.bi[,'nam'] <- rownames(n.werte.alpha.bi)
+  if(Richtung == 1){
+    n.werte.alpha.bi <- n.werte.alpha.bi %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
+      mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  } else{
+    n.werte.alpha.bi <- n.werte.alpha.bi %>% mutate(nam2 = gsub(":to_factor.*$", "", nam)) %>%
+      mutate(nam1 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  }
+  # und für die Signifikanzen die gewichteten Anteile
+  n.werte.alpha.bi.weighted <- summary(MIcombine(with(desName,
+                                                      svyby(as.formula(paste0("~to_factor(", varName, ")")),
+                                                            ~to_factor(alpha2), svytotal, na.rm = TRUE))))
+  # und auch hier saubere Namen einfügen
+  n.werte.alpha.bi.weighted[,'nam'] <- rownames(n.werte.alpha.bi.weighted)
+  if(Richtung == 1){
+    n.werte.alpha.bi.weighted <- n.werte.alpha.bi.weighted %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
+      mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  } else{
+    n.werte.alpha.bi.weighted <- n.werte.alpha.bi.weighted %>% mutate(nam2 = gsub(":to_factor.*$", "", nam)) %>%
+      mutate(nam1 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  }
+  ### Anteile von Alpha-Leveln ----
+  anteile.alpha <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", i1, ")")),
+                                                         as.formula(paste0("~to_factor(", i2, ")")), svymean, na.rm=TRUE))))
+  anteile.alpha[,'nam'] <- rownames(anteile.alpha)
+  anteile.alpha <- anteile.alpha %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
+    mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+  ### Anteile von Gesamtbevölkerung ----
+  anteile.gb <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", varName, ")")),
+                                                      ~a, svymean, na.rm=TRUE))))
+  anteile.gb[,'nam'] <- rownames(anteile.gb)
+  if(Richtung == 1){
+    anteile.gb <- anteile.gb %>% mutate(nam1 = gsub(".*", "GB", nam)) %>%
+      mutate(nam2 = gsub(".*\\)", "", nam)) %>% select(-nam)
+    anteile.gb[paste0("nam", Richtung)] <- gsub("^a" ,"Alpha ", anteile.gb[[paste0("nam", Richtung)]])
+  }else {
+    anteile.gb <- anteile.gb %>% mutate(nam2 = gsub(".*", "GB", nam)) %>%
+      mutate(nam1 = gsub(".*\\)", "", nam)) %>% select(-nam)
+  }
+  ### Anteile von Alpha 1-3 vs. Rest ----
+  if(Richtung == 1){
+    anteile.alpha.bi <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", i1, ")")),
+                                                              ~alpha2, svymean, na.rm=TRUE))))
+  }else{
+    anteile.alpha.bi <- summary(MIcombine(with(desName, svyby(~to_factor(alpha2),
+                                                              as.formula(paste0("~to_factor(", varName, ")")),
+                                                              svymean, na.rm=TRUE))))
+  }
+  anteile.alpha.bi[,'nam'] <- rownames(anteile.alpha.bi)
+  anteile.alpha.bi <- anteile.alpha.bi %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
+    mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
+    # Alle Anteile in einen Datensatz
+  alle <- bind_rows(anteile.alpha, anteile.alpha.bi, anteile.gb)
+  aTab <- alle %>%
+    mutate(results = round(results * 100, 1)) %>%
+    select(results, nam1, nam2) %>%
+    pivot_wider(names_from = nam1, values_from = results)
+  nTab <- n.werte.alpha %>%
+    select(results, nam1, nam2) %>%
+    pivot_wider(names_from = nam1, values_from = results) %>%
+    select(-nam2)
+  colnames(nTab) <- paste("n", colnames(nTab))
+  nBi <- n.werte.alpha.bi %>%
+    select(results, nam1, nam2) %>%
+    pivot_wider(names_from = nam1, values_from = results) %>%
+    select(-nam2) %>%
+    mutate("GB" = rowSums(.))
+  colnames(nBi) <- paste("n", colnames(nBi))
+  rnames <- select(aTab, nam2)
+  rnames[nrow(rnames) + 1,] <- "Sum"
+  aTab <- select(aTab, -nam2)
+  aTab <- bind_cols(aTab, nTab, nBi)
+  aTab[nrow(aTab) + 1,] = colSums(aTab)
+  aTab <- bind_cols(rnames, aTab)
+  # Chi2 Hinzufügen
+  if(Richtung==1) aTab <- add_column(aTab, Chi2 = chi2[[1]], Bonferroni = chi2[[2]], .after = 9)
+  #umsortieren
+  #if(Richtung == 1) aTab <- aTab[, c(1:6,9,7,8,11,10,12:ncol(aTab))]
+  if(Richtung == 2) aTab <- aTab[c(1:5,8,6:7,9:nrow(aTab)),]
+  # SPEICHERN
+  if(sheet.Name == "Mappe") sheet.Name <- paste(varName, " (", Richtung, ")")
+  write.xlsx(as.data.frame(aTab), Excel.Name, sheetName = sheet.Name , append = TRUE, row.names = FALSE)
+  if(Richtung == 2) write.xlsx(chi2, Excel.Name, sheetName = paste0(sheet.Name, "_sig"), append = TRUE,
+                               row.names = FALSE)
+  # wb <- openxlsx::createWorkbook()
+  # openxlsx::addWorksheet(wb, sheetName = sheet.Name)
+  # openxlsx::writeDataTable(wb, sheet = 1, x = aTab,colNames = TRUE, rowNames = FALSE, tableStyle = "none")
+  # openxlsx::saveWorkbook(wb, Excel.Name)
+  return(as.data.frame(aTab))
+  }
--- a/R/leo_table.R
+++ b/R/leo_table.R
+#' Calculates frequency tables for multiply imputed and weighted data.
+#'
+#' @param rowvar A vector or a dataframe of multiply imputed variables.
+#' @param colvar A vector or a dataframe of multiply imputed variables.
+#' @param supercolvar A vector or a dataframe of multiply imputed variables.
+#' @param weight A vector with frequency weights.
+#' @param data A dataframe that contains above variables
+#' @return An xtabs object.
+#' @examples
+#' leo <- haven::read_sav("~/leo.sav")
+#' leo$f001 <- labelled::to_factor(leo$f001)
+#' leo$altgr5 <- labelled::to_factor(leo$altgr5)
+#' pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
+#' # 1-way
+#' leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
+#' # 2-way
+#' leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
+#' # 3-way
+#' leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+#'
+#' @export
+leo_table <- function(rowvar, colvar, supercolvar, weight, data) {
+  # check for missing and/or wrong arguments
+  if (missing(data))
+    stop("'data' is missing")
+  if (missing(rowvar))
+    stop("'rowvar' is missing")
+  if (!missing(supercolvar) && missing (colvar))
+    stop("'colvar' is missing")
+  # deparse variables for later use
+  rowvar_d <- deparse(substitute(rowvar))
+  colvar_d <- deparse(substitute(colvar))
+  supercolvar_d <- deparse(substitute(supercolvar))
+  # if no weight is supplied, weight = 1 for every case
+  if (missing(weight)) {
+    data$weight_gen_38376z5 <- 1
+    weight <- "weight_gen_38376z5"
+  } else {
+    weight <- deparse(substitute(weight))
+  }
+  # 1-way table ----
+  if (missing(colvar) && missing(supercolvar)) {
+    if (is.atomic(rowvar)) {
+      cat("1-way table\n")
+      f <- as.formula(paste(weight, "~", rowvar_d))
+      tbl <- xtabs(formula = f, data = data)
+    } else {
+      cat("1-way table with multiple imputations\n")
+      mi <- names(rowvar)
+      tbl.list <- lapply(X = mi, FUN = function(x) {
+        f <- as.formula(paste(weight, "~", x))
+        xtabs(f, data = data)})
+      tbl <- Reduce("+", tbl.list)/10
+      names(attr(tbl, "dimnames")) <- c("mi")
+    }
+  }
+  # 2-way table ----
+  if (missing(supercolvar) && !missing(colvar)) {
+    if (is.atomic(rowvar) && is.atomic(colvar)) {
+      cat("2-way table\n")
+      f <- as.formula(paste(weight, "~", rowvar_d, "+", colvar_d))
+      tbl <- xtabs(formula = f, data = data)
+    } else {
+      cat("2-way table with multiple imputations\n")
+      if (is.recursive(rowvar) && is.atomic(colvar)) {
+        cat("rowvar is multiple imputed\n")
+        mi <- names(rowvar)
+        tbl.list <- lapply(X = mi, FUN = function(x) {
+          f <- as.formula(paste(weight, "~", x, "+", colvar_d))
+          xtabs(f, data = data)})
+        tbl <- Reduce("+", tbl.list)/10
+        names(attr(tbl, "dimnames"))[1] <- "mi"
+      } else if (is.atomic(rowvar) && is.recursive(colvar)) {
+        cat("colvar is multiple imputed\n")
+        mi <- names(colvar)
+        tbl.list <- lapply(X = mi, FUN = function(x) {
+          f <- as.formula(paste(weight, "~", rowvar_d, "+", x))
+          xtabs(f, data = data)})
+        tbl <- Reduce("+", tbl.list)/10
+        names(attr(tbl, "dimnames"))[2] <- "mi"
+      } else {
+        stop("only one variable can be multiply imputed")
+      }
+    }
+  }
+  # 3-way table ----
+  if (!missing(supercolvar)) {
+    if (is.atomic(rowvar) && is.atomic(colvar) && is.atomic(supercolvar)) {
+    cat("3-way table\n")
+    rowvar <- deparse(substitute(rowvar))
+    colvar <- deparse(substitute(colvar))
+    supercolvar <- deparse(substitute(supercolvar))
+    f <- as.formula(paste(weight, "~", rowvar, "+", colvar, "+", supercolvar))
+    tbl <- xtabs(formula = f, data = data)
+    } else {
+      cat("3-way table with multiple imputations\n")
+      if (is.recursive(rowvar) && is.atomic(colvar) && is.atomic(supercolvar)) {
+        cat("rowvar is multiple imputed\n")
+        mi <- names(rowvar)
+        tbl.list <- lapply(X = mi, FUN = function(x) {
+          f <- as.formula(paste(weight, "~", x, "+", colvar_d, "+", supercolvar_d))
+          xtabs(f, data = data)})
+        tbl <- Reduce("+", tbl.list)/10
+        names(attr(tbl, "dimnames"))[1] <- "mi"
+      } else if (is.atomic(rowvar) && is.recursive(colvar) && is.atomic(supercolvar)) {
+        cat("colvar is multiple imputed\n")
+        mi <- names(colvar)
+        tbl.list <- lapply(X = mi, FUN = function(x) {
+          f <- as.formula(paste(weight, "~", rowvar_d, "+", x, "+", supercolvar_d))
+          xtabs(f, data = data)})
+        tbl <- Reduce("+", tbl.list)/10
+        names(attr(tbl, "dimnames"))[2] <- "mi"
+      } else if (is.atomic(rowvar) && is.atomic(colvar) && is.recursive(supercolvar)) {
+        cat("supercolvar is multiple imputed\n")
+        mi <- names(supercolvar)
+        tbl.list <- lapply(X = mi, FUN = function(x) {
+          f <- as.formula(paste(weight, "~", rowvar_d, "+", colvar_d, "+", x))
+          xtabs(f, data = data)})
+        tbl <- Reduce("+", tbl.list)/10
+        names(attr(tbl, "dimnames"))[3] <- "mi"
+      } else {
+        stop("only one variable can be multiply imputed")
+      }
+    }
+  }
+  if (exists("tbl.list") == TRUE) attr(tbl, "tbl.list") <- tbl.list
+  print(tbl)
+  class(tbl) <- append(class(tbl),"leo_table")
+  ret <- tbl
+}
--- a/leo.Rproj
+++ b/leo.Rproj
+Version: 1.0
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
--- a/leo_design_test.R
+++ b/leo_design_test.R
+rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
+library(haven)
+library(leo)
+#leo2018 <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
+leo2010 <- read_sav("C:/Users/Dutz/Documents/daten/leo2010/Leo_Enddaten_mit_PVs.sav")
+#test2018 <- leo_getdesign(leo2018)
+leodata <- leo_getdesign(leo2010, year = 2010)
+lit <- grep("^pv[1-5]", colnames(leodata), value = TRUE)
+alp <- grep("^alpha.pv[1-5]", colnames(leodata), value = TRUE)
+gl <- grep("^alpha3.pv[1-5]", colnames(leodata), value = TRUE)
+alle <- leodata %>% select(-one_of(lit))
--- a/leo_format_test.R
+++ b/leo_format_test.R
+rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
+library(haven)
+library(labelled)
+library(tidyverse)
+library(leo)
+leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
+names(leo) <- tolower(names(leo))
+pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
+leo$f001    <- to_factor(leo$f001)
+leo$altgr5  <- to_factor(leo$altgr5)
+leo$schulab <- to_factor(leo$schulab)
+leo$erstspr <- to_factor(leo$erstspr)
+leo$migra   <- to_factor(leo$migra)
+t <- leo_table(leo$altgr5, pvs, weight = leo$pgewges, data = leo)
+class(t) <- append(class(t),"leo_table")
+t1 <- leo.format(t, right = 3)
+t1
+t2 <- leo.format(t, upper = 2)
+t2
+t3 <- leo.format(t, right = 3, upper = 2)
+t3
+t4 <- leo.format(t, right = 3, left = 2, lower = 2 , upper = 2)
+t4
--- a/leo_table_test.R
+++ b/leo_table_test.R
+rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
+library(haven)
+library(labelled)
+library(tidyverse)
+library(xlsx)
+library(leo)
+#leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
+leo <- read_sav("G:/leo2018/leo7192_mit_PVs_v1.1.sav")
+names(leo) <- tolower(names(leo))
+# Variablen in Faktoren umwandeln
+leo$pol001 <- to_factor(leo$pol001)
+leo$f001 <- to_factor(leo$f001)
+leo$altgr5 <- to_factor(leo$altgr5)
+# Alpha-Level in separatem DF speichern
+pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
+# ohne Alpha-Level
+# 1-way
+tbl1 <- leo_table(rowvar = leo$pol001, weight = leo$pgewges, data = leo)
+# 2-way
+tbl2 <- leo_table(rowvar = leo$pol001, colvar = leo$f001, weight = leo$pgewges, data = leo)
+# 3-way
+tbl3 <- leo_table(rowvar = leo$pol001, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+# mit Alpha-Level
+# 1-way
+leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
+# 2-way
+leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
+leo_table(rowvar = pvs, colvar = leo$f001, weight = leo$pgewges, data = leo)
+# 3-way
+leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+leo_table(rowvar = leo$altgr5, colvar = pvs, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+leo_table(rowvar = leo$altgr5, colvar = leo$f001, supercolvar = pvs, weight = leo$pgewges, data = leo)
+# In Excel-Datei schreiben
+write.xlsx(tbl1, "test.xlsx", sheetName = "tbl1",
+           col.names = TRUE, row.names = FALSE, append = FALSE)
+write.xlsx(tbl2, "test.xlsx", sheetName = "tbl2",
+           col.names = TRUE, row.names = FALSE, append = TRUE)
+write.xlsx(tbl3, "test.xlsx", sheetName = "tbl3",
+           col.names = TRUE, row.names = FALSE, append = TRUE)
+tbl4 <- leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+# Mit relativen Häufigkeiten für 3-way tables
+# Insgesamt = 100%
+prop.table(tbl4)
+# Reihen über alle Gruppen/supercolvar = 100%
+prop.table(tbl4, 1)
+# Spalten über alle Gruppen/supercolvar = 100%
+prop.table(tbl4, 2)
+# Jede Gruppe/supercolvar = 100%
+prop.table(tbl4, 3)
+# Jede Supercolvar = 100%
+prop.table(tbl4, c(3, 1)) # <- Wahrscheinlich am interessantesten
+# Jede Spalte in jeder Gruppe/supercolvar = 100%
+prop.table(tbl4, c(3, 2))
+# 3-way-table plotten
+tbl5 <- leo_table(pvs, leo$altgr5, leo$f001, leo$pgewges, leo)
+tbl5 <- prop.table(tbl5, c(3, 1))
+df5 <- data.frame(tbl5)
+ggplot(data = df5, aes(x = leo.altgr5, y = Freq, fill = leo.f001, label = round(Freq, 2))) +
+  geom_bar(position = "dodge", stat = "identity") +
+  facet_grid(mi ~ leo.f001) +
+  scale_fill_brewer(palette = "Paired") +
+  geom_text(aes(y=mean(Freq)), size = 3)
--- a/man/leo.format.Rd
+++ b/man/leo.format.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/leo_format.R
+\name{leo.format}
+\alias{leo.format}
+\title{Condenses rows and columns of a frequency table by summing the cells.}
+\usage{
+leo.format(a, upper, lower, left, right)
+}
+\arguments{
+\item{a}{A leo_table object.}
+\item{upper}{An integer.}
+\item{lower}{An integer.}
+\item{left}{An integer.}
+\item{right}{An integer.}
+}
+\value{
+An xtabs object.
+}
+\description{
+Condenses rows and columns of a frequency table by summing the cells.
+}
+\examples{
+example <- example
+}
--- a/man/leo_getdesign.Rd
+++ b/man/leo_getdesign.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/leo_design.R
+\name{leo_getdesign}
+\alias{leo_getdesign}
+\title{Creates survey design objects for LEO 2010 and 2018.}
+\usage{
+leo_getdesign(leodata, year = 2018)
+}
+\arguments{
+\item{leodata}{Original dataframe of LEO 2010 or LLEO 2018.}
+\item{year}{Either 2010 or 2018.}
+}
+\value{
+A survey design object.
+}
+\description{
+Creates survey design objects for LEO 2010 and 2018.
+}
--- a/man/leo_svydesign.Rd
+++ b/man/leo_svydesign.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/leo_svydesign.R
+\name{leo_svydesign}
+\alias{leo_svydesign}
+\title{Creates survey design objects for LEO 2010 and 2018.}
+\usage{
+leo_svydesign(df, survey_name = "leo2018", list = FALSE)
+}
+\arguments{
+\item{df}{Survey data}
+\item{survey_name}{leo2010 and leo2018 (leo2018hoch) implemented}
+}
+\value{
+A survey design object.
+}
+\description{
+Creates survey design objects for LEO 2010 and 2018.
+}
--- a/man/leo_tab.Rd
+++ b/man/leo_tab.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/leo_tab.R
+\name{leo_tab}
+\alias{leo_tab}
+\title{Creates survey design objects for LEO 2010 and 2018.}
+\usage{
+leo_tab(
+  varName,
+  Richtung = 1,
+  sheet.Name = "Mappe",
+  Excel.Name,
+  ListenName = leoList,
+  pWert = FALSE
+)
+}
+\arguments{
+\item{varName}{Original dataframe of LEO 2010 or LLEO 2018.}
+\item{Richtung}{Either 2010 or 2018.}
+\item{Excel.Name}{Either 2010 or 2018.}
+\item{ListenName}{Either 2010 or 2018.}
+\item{pWert}{Either 2010 or 2018.}
+\item{sheet.name}{Either 2010 or 2018.}
+}
+\value{
+A dataframe with frequencies
+}
+\description{
+Creates survey design objects for LEO 2010 and 2018.
+}
--- a/man/leo_table.Rd
+++ b/man/leo_table.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/leo_table.R
+\name{leo_table}
+\alias{leo_table}
+\title{Calculates frequency tables for multiply imputed and weighted data.}
+\usage{
+leo_table(rowvar, colvar, supercolvar, weight, data)
+}
+\arguments{
+\item{rowvar}{A vector or a dataframe of multiply imputed variables.}
+\item{colvar}{A vector or a dataframe of multiply imputed variables.}
+\item{supercolvar}{A vector or a dataframe of multiply imputed variables.}
+\item{weight}{A vector with frequency weights.}
+\item{data}{A dataframe that contains above variables}
+}
+\value{
+An xtabs object.
+}
+\description{
+Calculates frequency tables for multiply imputed and weighted data.
+}
+\examples{
+leo <- haven::read_sav("~/leo.sav")
+leo$f001 <- labelled::to_factor(leo$f001)
+leo$altgr5 <- labelled::to_factor(leo$altgr5)
+pvs <- leo \%>\% select(matches("^alpha.pv[0-9]+")) \%>\% to_factor()
+# 1-way
+leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
+# 2-way
+leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
+# 3-way
+leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
+}
--- a/variancetest.R
+++ b/variancetest.R
+rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
+library(haven)
+library(labelled)
+library(tidyverse)
+library(xlsx)
+library(leo)
+leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
+names(leo) <- tolower(names(leo))
+# Alpha-Level in separatem DF speichern
+pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
+alpha <- leo.table(rowvar = pvs, weight = leo$pgewges, data = leo)
+alpha.prop <- prop.table(alpha)
+n <- sum(alpha.prop)
+mtr <- matrix(data = NA , nrow = NCOL(alpha.prop), ncol = NROW(alpha.prop))
+if (NCOL(alpha.prop) == 1 ) {
+  for (x in 1:NROW(alpha.prop)) {
+    mtr[x] <- sqrt((alpha.prop[x]*(1-alpha.prop[x]))/n)
+  }
+}
+alpha.list <- attr(alpha, "tbl.list")
+alpha.prop.list <- lapply(alpha.list, prop.table)
+n <- table.sum <- sum(alpha.prop.list) # equals 1
+sqrt((table.prop[x, y] * (1 - table.prop[x, y])) / n)
+# leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs.sav")
+#
+# # Alpha-Level in separatem DF speichern
+# pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
+#
+# t <- leo.table(pvs, leo$pol001, weight = leo$pgewges, data = leo)
+leo2010 <- read_sav("C:/Users/Dutz/Documents/daten/leo2010/Leo_Enddaten_mit_PVs.sav")
+names(leo2010) <- tolower(names(leo2010))
+leo2010$f001    <- to_factor(leo2010$f001)
+pvs <- leo2010 %>% select(matches("^pv[0-9]+_alphalevel_62")) %>% to_factor()
+t <- prop.table(leo.table(pvs, leo2010$f001, weight = leo2010$gewleointegr, data = leo2010), 2)
+tbl.list <- attr(t, "tbl.list")
+tbl.list <- lapply(tbl.list, prop.table, margin = 2)
+mtr <- matrix(data = NA , nrow = dim(tbl.list[[1]])[1], ncol = dim(tbl.list[[1]])[2])
+m <- length(tbl.list) # number of imputations
+for (x in 1:dim(tbl.list[[1]])[1]) {
+  for (y in 1:dim(tbl.list[[1]])[2]) {
+    mtr[x,y] <- var(sapply(tbl.list, `[`, x, y))
+  }
+}
+between <- mtr