Skip to content
Snippets Groups Projects
Commit 93503b1b authored by gdutz's avatar gdutz
Browse files

Initial Commit

parents
No related branches found
No related tags found
No related merge requests found
^packrat/
^\.Rprofile$
^.*\.Rproj$
^\.Rproj\.user$
.Rproj.user
.Rhistory
.RData
.Ruserdata
Package: leo
Type: Package
Title: Useful functions for LEO surveys
Version: 0.1.31
Author: Gregor Dutz
Maintainer: Gregor Dutz <gregor.dutz@uni-hamburg.de>
Description: It does useful stuff.
License: What license is it under?
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
# Generated by roxygen2: do not edit by hand
export(leo.format)
export(leo_getdesign)
export(leo_svydesign)
export(leo_tab)
export(leo_table)
#' Creates survey design objects for LEO 2010 and 2018.
#'
#' @param leodata Original dataframe of LEO 2010 or LLEO 2018.
#' @param year Either 2010 or 2018.
#' @return A survey design object.
#'
#' @export
leo_getdesign <- function(leodata, year = 2018) {
library(purrr)
library(dplyr)
# Lower variable names
names(leodata) <- tolower(names(leodata))
if (year == 2010) {
# logits entfernen
leodata <- leodata %>% select(-matches("^PV[1-5]$"))
# Level und PV 2010 umbenennen
data.table::setnames(leodata,
old=c("pv1_62", "pv2_62", "pv3_62", "pv4_62", "pv5_62"),
new=c("pv1", "pv2", "pv3", "pv4", "pv5"))
data.table::setnames(leodata,
old=c("pv1_alphalevel_62", "pv2_alphalevel_62", "pv3_alphalevel_62", "pv4_alphalevel_62", "pv5_alphalevel_62"),
new=c("alpha.pv1", "alpha.pv2", "alpha.pv3", "alpha.pv4", "alpha.pv5"))
}
names(leodata) <- tolower(names(leodata))
if (year == 2018) {
# PV-Variable für Alpha 1-3 (gering literalisiert)
leodata$alpha3.pv1 <- cut(leodata$alpha.pv1, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv2 <- cut(leodata$alpha.pv2, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv3 <- cut(leodata$alpha.pv3, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv4 <- cut(leodata$alpha.pv4, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv5 <- cut(leodata$alpha.pv5, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv6 <- cut(leodata$alpha.pv6, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv7 <- cut(leodata$alpha.pv7, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv8 <- cut(leodata$alpha.pv8, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv9 <- cut(leodata$alpha.pv9, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv10 <- cut(leodata$alpha.pv10, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
# Alpha-Level in Faktorvariable
leodata <- leodata %>% mutate_at(vars(matches("^alpha.pv[0-9]+$")), to_factor)
# Spaltennamen mit PVs und Leveln mit Hilfe eines regulären Ausdrucks (regexp) finden
lit <- grep("^pv[0-9]+$", colnames(leodata), value = TRUE)
alp <- grep("^alpha.pv[0-9]+$", colnames(leodata), value = TRUE)
gl <- grep("^alpha3.pv[0-9]+$", colnames(leodata), value = TRUE)
} else if (year == 2010) {
# PV-Variable für Alpha 1-3 (gering literalisiert)
leodata$alpha3.pv1 <- cut(leodata$alpha.pv1, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv2 <- cut(leodata$alpha.pv2, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv3 <- cut(leodata$alpha.pv3, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv4 <- cut(leodata$alpha.pv4, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
leodata$alpha3.pv5 <- cut(leodata$alpha.pv5, breaks = c(0, 3, 5), labels = c("gl", "nicht gl"))
# Alpha-Level in Faktorvariable
leodata <- leodata %>% mutate_at(vars(matches("^alpha.pv[1-5]$")), to_factor)
# Spaltennamen mit PVs und Leveln mit Hilfe eines regulären Ausdrucks (regexp) finden
lit <- grep("^pv[1-5]$", colnames(leodata), value = TRUE)
alp <- grep("^alpha.pv[1-5]$", colnames(leodata), value = TRUE)
gl <- grep("^alpha3.pv[1-5]$", colnames(leodata), value = TRUE)
}
# Spaltennamen der HG-Variablen
alle <- leodata %>% select(-one_of(lit), -one_of(alp), -one_of(gl))
alle <- colnames(alle)
# Liste der Daten erstellen mit einer pv-Variable und einem Alpha-Level
leoList <- lit %>%
purrr::map(function(s) {select(leodata, alle, paste0("alpha.", s), paste0("alpha3.", s), s) %>%
rename(pv := !!s) %>%
rename(alpha.pv := !!paste0("alpha.", s)) %>%
rename(alpha3.pv := !!paste0("alpha3.", s))})
# Survey Design erstellen
if (year == 2018) {
# Survey Design erstellen
leo_design <- survey::svydesign(
ids = ~0,
weights = ~pgewges,
data = mitools::imputationList(leoList))
} else if (year == 2010) {
leo_design <- survey::svydesign(
ids = ~0,
weights = ~gewleointegr,
data = mitools::imputationList(leoList))
}
return <- leo_design
}
#' Condenses rows and columns of a frequency table by summing the cells.
#'
#' @param a A leo_table object.
#' @param upper An integer.
#' @param lower An integer.
#' @param left An integer.
#' @param right An integer.
#' @return An xtabs object.
#' @examples
#' example <- example
#'
#' @export
leo.format <- function(a, upper, lower, left, right) {
stopifnot(inherits(a, what = "leo_table"))
x <- dim(a)[1]
y <- dim(a)[2]
cs <- NULL
rs <- NULL
if (!missing(upper)) {
a_upper <- t(matrix(colSums(a[1:upper,])))
rownames(a_upper)[1] <- paste0("1:", upper)
cs <- rbind(a_upper, a[(upper+1):x,])
}
if (!missing(lower)) {
a_lower <- t(matrix(colSums(a[(x-lower+1):y,])))
rownames(a_lower)[1] <- paste0((x-lower+1), ":", x)
cs <- rbind(a[1:(x-lower),], a_lower)
}
if (!missing(upper) & !missing(lower)) {
if ( upper + lower == x) {
cs <- rbind(a_upper, a_lower)
colnames(cs) <- colnames(a)
} else {
cs <- rbind(a_upper, a[(upper+1):(x-lower),])
cs <- rbind(cs, a_lower)
}
}
if (!is.null(cs)) {a <- cs}
if (!missing(left)) {
a_left <- matrix(rowSums(a[, 1:left]))
colnames(a_left)[1] <- paste0("1:", left)
rs <- cbind(a_left, a[,(left+1):y])
}
if (!missing(right)) {
a_right <- matrix(rowSums(a[, (y-right+1):y]))
colnames(a_right)[1] <- paste0((y-right+1),":", y)
rs <- cbind(a[,1:(y-right)], a_right)
}
if (!missing(left) & !missing(right)) {
if ( left + right == y) {
rs <- cbind(a_left, a_right)
rownames(rs) <- rownames(a)
} else {
rs <- cbind(a_left, a[,(left+1):(y-right)])
rs <- cbind(rs, a_right)
}
}
if (is.null(rs))
{
ret <- cs
} else {
ret <- rs
}
}
#' Creates survey design objects for LEO 2010 and 2018.
#'
#' @param df Survey data
#' @param survey_name leo2010 and leo2018 (leo2018hoch) implemented
#' @return A survey design object.
#'
#' @export
leo_svydesign <- function(df, survey_name = "leo2018", list = FALSE) {
names(df) <- tolower(names(df))
# create survey design object for LEO 2010 or LEO 2018 ----
if (survey_name == "leo2010" || survey_name == "leo2018" || survey_name == "leo2018hoch") {
if (survey_name == "leo2010") {
# remove logits
df <- df %>% dplyr::select(-tidyselect::matches("^pv[1-5]$"))
# rename variables fpr pvs and levels
old=c("pv1_62", "pv2_62", "pv3_62", "pv4_62", "pv5_62",
"pv1_alphalevel_62", "pv2_alphalevel_62", "pv3_alphalevel_62", "pv4_alphalevel_62", "pv5_alphalevel_62")
new=c("pv1", "pv2", "pv3", "pv4", "pv5",
"alpha.pv1", "alpha.pv2", "alpha.pv3", "alpha.pv4", "alpha.pv5")
df <- df %>% dplyr::rename_at(vars(old), ~new)
}
# some functions need a helper variable
df$a <- as.numeric(1)
# get column names of pvs and levels
lit <- grep("^pv[0-9]+", colnames(df), value = TRUE)
alp <- grep("^alpha.pv[0-9]+", colnames(df), value = TRUE)
# column names of all other variables
all <- df %>% dplyr::select(-tidyselect::one_of(lit), -tidyselect::one_of(alp))
all <- colnames(all)
# create list with one pv-variable and one level-variable each (and all other variables)
repList <- lit %>%
purrr::map(function(x) {dplyr::select(df, all, paste0("alpha.", x), x) %>%
dplyr::rename(pv := !!x) %>%
dplyr::rename(alpha.pv := !!paste0("alpha.", x))})
# create further alpha-variables for different analyses
# alpha3 (three levels): low literate (Alpha 1-3), Alpha 4, above Alpha 4
# alpha2 (two levels): low literat (Alpha 1-3), high literate (Alpha 4 and above)
# lowlit (two levels): same as alpha2, but reversed levels
repList <- lapply(repList, function(x) {x %>%
dplyr::mutate(alpha3 = ifelse(x$alpha.pv > 3, x$alpha.pv, 1)) %>%
dplyr::mutate(alpha2 = ifelse(x$alpha.pv > 3, 1, 0)) %>%
dplyr::mutate(lowlit = ifelse(x$alpha.pv > 3, 0, 1)) %>%
dplyr::mutate(alpha3 = factor(alpha3, labels = (c("a1-3", "a4", "a5")))) %>%
dplyr::mutate(alpha2 = factor(alpha2, labels = (c("a1-3", "a4-5")))) %>%
dplyr::mutate(lowlit = factor(lowlit, labels = (c("a4-5", "a1-3")))) %>%
dplyr::mutate(alpha.pv = labelled::to_factor(alpha.pv))})
# if the imputation list is requested, return it and exit function
if (list == TRUE) {
return(repList)
}
# create survey design
if (survey_name == "leo2018") {
design <- survey::svydesign(
ids = ~0,
weights = ~pgewges,
data = mitools::imputationList(repList))
} else if (survey_name == "leo2018hoch") {
design <- survey::svydesign(
ids = ~0,
weights = ~phochges,
data = mitools::imputationList(repList))
} else if (survey_name == "leo2010") {
design <- survey::svydesign(
ids = ~0,
weights = ~gewleointegr,
data = mitools::imputationList(repList))
}
}
# implement other surveys here----
return(design)
}
#' Creates survey design objects for LEO 2010 and 2018.
#'
#' @param varName Original dataframe of LEO 2010 or LLEO 2018.
#' @param Richtung Either 2010 or 2018.
#' @param sheet.name Either 2010 or 2018.
#' @param Excel.Name Either 2010 or 2018.
#' @param ListenName Either 2010 or 2018.
#' @param pWert Either 2010 or 2018.
#'
#' @return A dataframe with frequencies
#'
#' @export
leo_tab <- function(varName, Richtung = 1, sheet.Name = "Mappe", Excel.Name,
ListenName = leoList, pWert=FALSE){
if (file.exists(Excel.Name) && (sheet.Name %in% readxl::excel_sheets(Excel.Name))) {
warning(sprintf("%s: Sheet already existing. Skipping calculations.", sheet.Name))
return(NULL)
}
### Chi2
# Dummyvariablen erstellen
ListenName <- lapply(leoList, function(x) {dummy_cols(x, select_columns = varName)})
# das design für gewichtete Berechnungen im Datensatz erstellen
desName <- svydesign(ids = ~0, weights = ~pgewges,
data = mitools::imputationList(ListenName))
# das Design für auf die Grundgesamtheit hochgerechnete Werte erstellen
desOhne<- svydesign(ids = ~0, weights = ~1,
data= mitools::imputationList(ListenName))
if(Richtung == 1){
i1 <- varName
i2 <- 'alpha.pv'
} else {
i1 <- 'alpha.pv'
i2 <- varName
}
### Chi2 Berechnung ----
# Hilfsfunktion, um micombine.chisquare nicht immer wiederholen zu müssen
chi2.comb <- function(C) {
micombine.chisquare(dk=sapply(C, FUN = function(x) x$statistic),
df=mean(sapply(C, FUN = function(x) x$parameter)))
}
# Chi2 für die gesamte Tabelle
chi.gesamt <- with(desName, svychisq(as.formula(paste0("~", i1, "+alpha2")), statistic ="Chisq"))
chi.gesamt <- chi2.comb(chi.gesamt)
cat(round(chi.gesamt["p"], 3))
cat("\n")
# Chi2 für einzelne Ausprägungen
lvls <- levels(to_factor(leoList[[1]][[varName]], levels="labels"))
#Levelnamen kürzen und Leerzeichen rausnehmen
lvls.short <- gsub(" ", "_", lvls)
#lvls.short <- substr(lvls.short, 0, 25)
chiNames <- paste0(varName, "_", lvls.short)
cat(chiNames)
chilist <- list()
bonferroni <- list()
for (i in seq_along(chiNames)) {
print(chiNames[[i]])
chiformula <- as.formula(paste0("~",chiNames[[i]], "+alpha2"))
chicomb <- chi2.comb(with(desName, svychisq(chiformula, statistic = "Chisq")))
chilist[i] <- chicomb[["p"]]
bonferroni[i] <- ifelse(chicomb[["p"]] < 0.05/length(lvls), "sig.", "n.s.")
}
chilist <- append(chilist, chi.gesamt["p"])
bonferroni <- append(bonferroni, ifelse(chi.gesamt["p"] < 0.05, "sig.", "n.s."))
cat(round(unlist(chilist),3))
cat("\n")
cat(unlist(bonferroni))
cat("\n")
chi2 <- data.frame(Chi2 = round(unlist(chilist), 3), Bonferroni = unlist(bonferroni))
### Absolute Werte UNGEWICHTET und für Signifikanz gewichtete für A1-3 vs. höher ----
n.werte.alpha <- summary(MIcombine(with(desOhne, svyby(as.formula(paste0("~to_factor(", i1, ")")),
as.formula(paste0("~to_factor(", i2, ")")), svytotal, na.rm=TRUE))))
# Reihe mit sauberen Namen einfügen
n.werte.alpha[,'nam'] <- rownames(n.werte.alpha)
n.werte.alpha <- n.werte.alpha %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
# n.werte.alpha[paste0("nam", Richtung)] <- gsub("^a", "Alpha ", n.werte.alpha[[paste0("nam", Richtung)]])
# und das Ganze noch mal für die binäre Variable
n.werte.alpha.bi <- summary(MIcombine(with(desOhne,
svyby(as.formula(paste0("~to_factor(", varName,")")), ~to_factor(alpha2), svytotal, na.rm=T))))
# und auch hier saubere Namen einfügen
n.werte.alpha.bi[,'nam'] <- rownames(n.werte.alpha.bi)
if(Richtung == 1){
n.werte.alpha.bi <- n.werte.alpha.bi %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
} else{
n.werte.alpha.bi <- n.werte.alpha.bi %>% mutate(nam2 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam1 = gsub("^.*\\)", "", nam)) %>% select(-nam)
}
# und für die Signifikanzen die gewichteten Anteile
n.werte.alpha.bi.weighted <- summary(MIcombine(with(desName,
svyby(as.formula(paste0("~to_factor(", varName, ")")),
~to_factor(alpha2), svytotal, na.rm = TRUE))))
# und auch hier saubere Namen einfügen
n.werte.alpha.bi.weighted[,'nam'] <- rownames(n.werte.alpha.bi.weighted)
if(Richtung == 1){
n.werte.alpha.bi.weighted <- n.werte.alpha.bi.weighted %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
} else{
n.werte.alpha.bi.weighted <- n.werte.alpha.bi.weighted %>% mutate(nam2 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam1 = gsub("^.*\\)", "", nam)) %>% select(-nam)
}
### Anteile von Alpha-Leveln ----
anteile.alpha <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", i1, ")")),
as.formula(paste0("~to_factor(", i2, ")")), svymean, na.rm=TRUE))))
anteile.alpha[,'nam'] <- rownames(anteile.alpha)
anteile.alpha <- anteile.alpha %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
### Anteile von Gesamtbevölkerung ----
anteile.gb <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", varName, ")")),
~a, svymean, na.rm=TRUE))))
anteile.gb[,'nam'] <- rownames(anteile.gb)
if(Richtung == 1){
anteile.gb <- anteile.gb %>% mutate(nam1 = gsub(".*", "GB", nam)) %>%
mutate(nam2 = gsub(".*\\)", "", nam)) %>% select(-nam)
anteile.gb[paste0("nam", Richtung)] <- gsub("^a" ,"Alpha ", anteile.gb[[paste0("nam", Richtung)]])
}else {
anteile.gb <- anteile.gb %>% mutate(nam2 = gsub(".*", "GB", nam)) %>%
mutate(nam1 = gsub(".*\\)", "", nam)) %>% select(-nam)
}
### Anteile von Alpha 1-3 vs. Rest ----
if(Richtung == 1){
anteile.alpha.bi <- summary(MIcombine(with(desName, svyby(as.formula(paste0("~to_factor(", i1, ")")),
~alpha2, svymean, na.rm=TRUE))))
}else{
anteile.alpha.bi <- summary(MIcombine(with(desName, svyby(~to_factor(alpha2),
as.formula(paste0("~to_factor(", varName, ")")),
svymean, na.rm=TRUE))))
}
anteile.alpha.bi[,'nam'] <- rownames(anteile.alpha.bi)
anteile.alpha.bi <- anteile.alpha.bi %>% mutate(nam1 = gsub(":to_factor.*$", "", nam)) %>%
mutate(nam2 = gsub("^.*\\)", "", nam)) %>% select(-nam)
# Alle Anteile in einen Datensatz
alle <- bind_rows(anteile.alpha, anteile.alpha.bi, anteile.gb)
aTab <- alle %>%
mutate(results = round(results * 100, 1)) %>%
select(results, nam1, nam2) %>%
pivot_wider(names_from = nam1, values_from = results)
nTab <- n.werte.alpha %>%
select(results, nam1, nam2) %>%
pivot_wider(names_from = nam1, values_from = results) %>%
select(-nam2)
colnames(nTab) <- paste("n", colnames(nTab))
nBi <- n.werte.alpha.bi %>%
select(results, nam1, nam2) %>%
pivot_wider(names_from = nam1, values_from = results) %>%
select(-nam2) %>%
mutate("GB" = rowSums(.))
colnames(nBi) <- paste("n", colnames(nBi))
rnames <- select(aTab, nam2)
rnames[nrow(rnames) + 1,] <- "Sum"
aTab <- select(aTab, -nam2)
aTab <- bind_cols(aTab, nTab, nBi)
aTab[nrow(aTab) + 1,] = colSums(aTab)
aTab <- bind_cols(rnames, aTab)
# Chi2 Hinzufügen
if(Richtung==1) aTab <- add_column(aTab, Chi2 = chi2[[1]], Bonferroni = chi2[[2]], .after = 9)
#umsortieren
#if(Richtung == 1) aTab <- aTab[, c(1:6,9,7,8,11,10,12:ncol(aTab))]
if(Richtung == 2) aTab <- aTab[c(1:5,8,6:7,9:nrow(aTab)),]
# SPEICHERN
if(sheet.Name == "Mappe") sheet.Name <- paste(varName, " (", Richtung, ")")
write.xlsx(as.data.frame(aTab), Excel.Name, sheetName = sheet.Name , append = TRUE, row.names = FALSE)
if(Richtung == 2) write.xlsx(chi2, Excel.Name, sheetName = paste0(sheet.Name, "_sig"), append = TRUE,
row.names = FALSE)
# wb <- openxlsx::createWorkbook()
# openxlsx::addWorksheet(wb, sheetName = sheet.Name)
# openxlsx::writeDataTable(wb, sheet = 1, x = aTab,colNames = TRUE, rowNames = FALSE, tableStyle = "none")
# openxlsx::saveWorkbook(wb, Excel.Name)
return(as.data.frame(aTab))
}
#' Calculates frequency tables for multiply imputed and weighted data.
#'
#' @param rowvar A vector or a dataframe of multiply imputed variables.
#' @param colvar A vector or a dataframe of multiply imputed variables.
#' @param supercolvar A vector or a dataframe of multiply imputed variables.
#' @param weight A vector with frequency weights.
#' @param data A dataframe that contains above variables
#' @return An xtabs object.
#' @examples
#' leo <- haven::read_sav("~/leo.sav")
#' leo$f001 <- labelled::to_factor(leo$f001)
#' leo$altgr5 <- labelled::to_factor(leo$altgr5)
#' pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
#' # 1-way
#' leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
#' # 2-way
#' leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
#' # 3-way
#' leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
#'
#' @export
leo_table <- function(rowvar, colvar, supercolvar, weight, data) {
# check for missing and/or wrong arguments
if (missing(data))
stop("'data' is missing")
if (missing(rowvar))
stop("'rowvar' is missing")
if (!missing(supercolvar) && missing (colvar))
stop("'colvar' is missing")
# deparse variables for later use
rowvar_d <- deparse(substitute(rowvar))
colvar_d <- deparse(substitute(colvar))
supercolvar_d <- deparse(substitute(supercolvar))
# if no weight is supplied, weight = 1 for every case
if (missing(weight)) {
data$weight_gen_38376z5 <- 1
weight <- "weight_gen_38376z5"
} else {
weight <- deparse(substitute(weight))
}
# 1-way table ----
if (missing(colvar) && missing(supercolvar)) {
if (is.atomic(rowvar)) {
cat("1-way table\n")
f <- as.formula(paste(weight, "~", rowvar_d))
tbl <- xtabs(formula = f, data = data)
} else {
cat("1-way table with multiple imputations\n")
mi <- names(rowvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", x))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames")) <- c("mi")
}
}
# 2-way table ----
if (missing(supercolvar) && !missing(colvar)) {
if (is.atomic(rowvar) && is.atomic(colvar)) {
cat("2-way table\n")
f <- as.formula(paste(weight, "~", rowvar_d, "+", colvar_d))
tbl <- xtabs(formula = f, data = data)
} else {
cat("2-way table with multiple imputations\n")
if (is.recursive(rowvar) && is.atomic(colvar)) {
cat("rowvar is multiple imputed\n")
mi <- names(rowvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", x, "+", colvar_d))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames"))[1] <- "mi"
} else if (is.atomic(rowvar) && is.recursive(colvar)) {
cat("colvar is multiple imputed\n")
mi <- names(colvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", rowvar_d, "+", x))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames"))[2] <- "mi"
} else {
stop("only one variable can be multiply imputed")
}
}
}
# 3-way table ----
if (!missing(supercolvar)) {
if (is.atomic(rowvar) && is.atomic(colvar) && is.atomic(supercolvar)) {
cat("3-way table\n")
rowvar <- deparse(substitute(rowvar))
colvar <- deparse(substitute(colvar))
supercolvar <- deparse(substitute(supercolvar))
f <- as.formula(paste(weight, "~", rowvar, "+", colvar, "+", supercolvar))
tbl <- xtabs(formula = f, data = data)
} else {
cat("3-way table with multiple imputations\n")
if (is.recursive(rowvar) && is.atomic(colvar) && is.atomic(supercolvar)) {
cat("rowvar is multiple imputed\n")
mi <- names(rowvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", x, "+", colvar_d, "+", supercolvar_d))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames"))[1] <- "mi"
} else if (is.atomic(rowvar) && is.recursive(colvar) && is.atomic(supercolvar)) {
cat("colvar is multiple imputed\n")
mi <- names(colvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", rowvar_d, "+", x, "+", supercolvar_d))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames"))[2] <- "mi"
} else if (is.atomic(rowvar) && is.atomic(colvar) && is.recursive(supercolvar)) {
cat("supercolvar is multiple imputed\n")
mi <- names(supercolvar)
tbl.list <- lapply(X = mi, FUN = function(x) {
f <- as.formula(paste(weight, "~", rowvar_d, "+", colvar_d, "+", x))
xtabs(f, data = data)})
tbl <- Reduce("+", tbl.list)/10
names(attr(tbl, "dimnames"))[3] <- "mi"
} else {
stop("only one variable can be multiply imputed")
}
}
}
if (exists("tbl.list") == TRUE) attr(tbl, "tbl.list") <- tbl.list
print(tbl)
class(tbl) <- append(class(tbl),"leo_table")
ret <- tbl
}
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
library(haven)
library(leo)
#leo2018 <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
leo2010 <- read_sav("C:/Users/Dutz/Documents/daten/leo2010/Leo_Enddaten_mit_PVs.sav")
#test2018 <- leo_getdesign(leo2018)
leodata <- leo_getdesign(leo2010, year = 2010)
lit <- grep("^pv[1-5]", colnames(leodata), value = TRUE)
alp <- grep("^alpha.pv[1-5]", colnames(leodata), value = TRUE)
gl <- grep("^alpha3.pv[1-5]", colnames(leodata), value = TRUE)
alle <- leodata %>% select(-one_of(lit))
rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
library(haven)
library(labelled)
library(tidyverse)
library(leo)
leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
names(leo) <- tolower(names(leo))
pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
leo$f001 <- to_factor(leo$f001)
leo$altgr5 <- to_factor(leo$altgr5)
leo$schulab <- to_factor(leo$schulab)
leo$erstspr <- to_factor(leo$erstspr)
leo$migra <- to_factor(leo$migra)
t <- leo_table(leo$altgr5, pvs, weight = leo$pgewges, data = leo)
class(t) <- append(class(t),"leo_table")
t1 <- leo.format(t, right = 3)
t1
t2 <- leo.format(t, upper = 2)
t2
t3 <- leo.format(t, right = 3, upper = 2)
t3
t4 <- leo.format(t, right = 3, left = 2, lower = 2 , upper = 2)
t4
rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
library(haven)
library(labelled)
library(tidyverse)
library(xlsx)
library(leo)
#leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
leo <- read_sav("G:/leo2018/leo7192_mit_PVs_v1.1.sav")
names(leo) <- tolower(names(leo))
# Variablen in Faktoren umwandeln
leo$pol001 <- to_factor(leo$pol001)
leo$f001 <- to_factor(leo$f001)
leo$altgr5 <- to_factor(leo$altgr5)
# Alpha-Level in separatem DF speichern
pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
# ohne Alpha-Level
# 1-way
tbl1 <- leo_table(rowvar = leo$pol001, weight = leo$pgewges, data = leo)
# 2-way
tbl2 <- leo_table(rowvar = leo$pol001, colvar = leo$f001, weight = leo$pgewges, data = leo)
# 3-way
tbl3 <- leo_table(rowvar = leo$pol001, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
# mit Alpha-Level
# 1-way
leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
# 2-way
leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
leo_table(rowvar = pvs, colvar = leo$f001, weight = leo$pgewges, data = leo)
# 3-way
leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
leo_table(rowvar = leo$altgr5, colvar = pvs, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
leo_table(rowvar = leo$altgr5, colvar = leo$f001, supercolvar = pvs, weight = leo$pgewges, data = leo)
# In Excel-Datei schreiben
write.xlsx(tbl1, "test.xlsx", sheetName = "tbl1",
col.names = TRUE, row.names = FALSE, append = FALSE)
write.xlsx(tbl2, "test.xlsx", sheetName = "tbl2",
col.names = TRUE, row.names = FALSE, append = TRUE)
write.xlsx(tbl3, "test.xlsx", sheetName = "tbl3",
col.names = TRUE, row.names = FALSE, append = TRUE)
tbl4 <- leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
# Mit relativen Häufigkeiten für 3-way tables
# Insgesamt = 100%
prop.table(tbl4)
# Reihen über alle Gruppen/supercolvar = 100%
prop.table(tbl4, 1)
# Spalten über alle Gruppen/supercolvar = 100%
prop.table(tbl4, 2)
# Jede Gruppe/supercolvar = 100%
prop.table(tbl4, 3)
# Jede Supercolvar = 100%
prop.table(tbl4, c(3, 1)) # <- Wahrscheinlich am interessantesten
# Jede Spalte in jeder Gruppe/supercolvar = 100%
prop.table(tbl4, c(3, 2))
# 3-way-table plotten
tbl5 <- leo_table(pvs, leo$altgr5, leo$f001, leo$pgewges, leo)
tbl5 <- prop.table(tbl5, c(3, 1))
df5 <- data.frame(tbl5)
ggplot(data = df5, aes(x = leo.altgr5, y = Freq, fill = leo.f001, label = round(Freq, 2))) +
geom_bar(position = "dodge", stat = "identity") +
facet_grid(mi ~ leo.f001) +
scale_fill_brewer(palette = "Paired") +
geom_text(aes(y=mean(Freq)), size = 3)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leo_format.R
\name{leo.format}
\alias{leo.format}
\title{Condenses rows and columns of a frequency table by summing the cells.}
\usage{
leo.format(a, upper, lower, left, right)
}
\arguments{
\item{a}{A leo_table object.}
\item{upper}{An integer.}
\item{lower}{An integer.}
\item{left}{An integer.}
\item{right}{An integer.}
}
\value{
An xtabs object.
}
\description{
Condenses rows and columns of a frequency table by summing the cells.
}
\examples{
example <- example
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leo_design.R
\name{leo_getdesign}
\alias{leo_getdesign}
\title{Creates survey design objects for LEO 2010 and 2018.}
\usage{
leo_getdesign(leodata, year = 2018)
}
\arguments{
\item{leodata}{Original dataframe of LEO 2010 or LLEO 2018.}
\item{year}{Either 2010 or 2018.}
}
\value{
A survey design object.
}
\description{
Creates survey design objects for LEO 2010 and 2018.
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leo_svydesign.R
\name{leo_svydesign}
\alias{leo_svydesign}
\title{Creates survey design objects for LEO 2010 and 2018.}
\usage{
leo_svydesign(df, survey_name = "leo2018", list = FALSE)
}
\arguments{
\item{df}{Survey data}
\item{survey_name}{leo2010 and leo2018 (leo2018hoch) implemented}
}
\value{
A survey design object.
}
\description{
Creates survey design objects for LEO 2010 and 2018.
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leo_tab.R
\name{leo_tab}
\alias{leo_tab}
\title{Creates survey design objects for LEO 2010 and 2018.}
\usage{
leo_tab(
varName,
Richtung = 1,
sheet.Name = "Mappe",
Excel.Name,
ListenName = leoList,
pWert = FALSE
)
}
\arguments{
\item{varName}{Original dataframe of LEO 2010 or LLEO 2018.}
\item{Richtung}{Either 2010 or 2018.}
\item{Excel.Name}{Either 2010 or 2018.}
\item{ListenName}{Either 2010 or 2018.}
\item{pWert}{Either 2010 or 2018.}
\item{sheet.name}{Either 2010 or 2018.}
}
\value{
A dataframe with frequencies
}
\description{
Creates survey design objects for LEO 2010 and 2018.
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leo_table.R
\name{leo_table}
\alias{leo_table}
\title{Calculates frequency tables for multiply imputed and weighted data.}
\usage{
leo_table(rowvar, colvar, supercolvar, weight, data)
}
\arguments{
\item{rowvar}{A vector or a dataframe of multiply imputed variables.}
\item{colvar}{A vector or a dataframe of multiply imputed variables.}
\item{supercolvar}{A vector or a dataframe of multiply imputed variables.}
\item{weight}{A vector with frequency weights.}
\item{data}{A dataframe that contains above variables}
}
\value{
An xtabs object.
}
\description{
Calculates frequency tables for multiply imputed and weighted data.
}
\examples{
leo <- haven::read_sav("~/leo.sav")
leo$f001 <- labelled::to_factor(leo$f001)
leo$altgr5 <- labelled::to_factor(leo$altgr5)
pvs <- leo \%>\% select(matches("^alpha.pv[0-9]+")) \%>\% to_factor()
# 1-way
leo_table(rowvar = pvs, weight = leo$pgewges, data = leo)
# 2-way
leo_table(rowvar = leo$pol001, colvar = pvs, weight = leo$pgewges, data = leo)
# 3-way
leo_table(rowvar = pvs, colvar = leo$altgr5, supercolvar = leo$f001, weight = leo$pgewges, data = leo)
}
rm(list=ls(all=TRUE)) # REMOVE ALL OBJECTS FROM WORKSPACE
library(haven)
library(labelled)
library(tidyverse)
library(xlsx)
library(leo)
leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs_v1.1.sav")
names(leo) <- tolower(names(leo))
# Alpha-Level in separatem DF speichern
pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
alpha <- leo.table(rowvar = pvs, weight = leo$pgewges, data = leo)
alpha.prop <- prop.table(alpha)
n <- sum(alpha.prop)
mtr <- matrix(data = NA , nrow = NCOL(alpha.prop), ncol = NROW(alpha.prop))
if (NCOL(alpha.prop) == 1 ) {
for (x in 1:NROW(alpha.prop)) {
mtr[x] <- sqrt((alpha.prop[x]*(1-alpha.prop[x]))/n)
}
}
alpha.list <- attr(alpha, "tbl.list")
alpha.prop.list <- lapply(alpha.list, prop.table)
n <- table.sum <- sum(alpha.prop.list) # equals 1
sqrt((table.prop[x, y] * (1 - table.prop[x, y])) / n)
# leo <- read_sav("C:/Users/Dutz/Documents/daten/leo2018/leo7192_mit_PVs.sav")
#
# # Alpha-Level in separatem DF speichern
# pvs <- leo %>% select(matches("^alpha.pv[0-9]+")) %>% to_factor()
#
# t <- leo.table(pvs, leo$pol001, weight = leo$pgewges, data = leo)
leo2010 <- read_sav("C:/Users/Dutz/Documents/daten/leo2010/Leo_Enddaten_mit_PVs.sav")
names(leo2010) <- tolower(names(leo2010))
leo2010$f001 <- to_factor(leo2010$f001)
pvs <- leo2010 %>% select(matches("^pv[0-9]+_alphalevel_62")) %>% to_factor()
t <- prop.table(leo.table(pvs, leo2010$f001, weight = leo2010$gewleointegr, data = leo2010), 2)
tbl.list <- attr(t, "tbl.list")
tbl.list <- lapply(tbl.list, prop.table, margin = 2)
mtr <- matrix(data = NA , nrow = dim(tbl.list[[1]])[1], ncol = dim(tbl.list[[1]])[2])
m <- length(tbl.list) # number of imputations
for (x in 1:dim(tbl.list[[1]])[1]) {
for (y in 1:dim(tbl.list[[1]])[2]) {
mtr[x,y] <- var(sapply(tbl.list, `[`, x, y))
}
}
between <- mtr
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment