test2

8c872a1c · Wegner · dea66ef7 · 8c872a1c · 8c872a1c · 8c872a1c
Commit 8c872a1c authored 1 year ago by Wegner
--- a/DESCRIPTION
+++ b/DESCRIPTION
-Package: sozoekds
+Package: sozoekdsexam
-Title: Datapackage to the Course "Datascience for Socioeconomists" at University of Hamburg, Department of Socioeconomics
+Title: Datapackage for the exam to the Course "Datascience for Socioeconomists" at University of Hamburg, Department of Socioeconomics
 Version: 0.0.0.1
 Authors@R: 
  person("Lisa Marie", "Wegner", , "lisamarie.wegner@uni-hamburg.de", role = c("aut", "cre"), comment=c(ORCID=0009-0004-2234-3052))
-Description: Package includes 5 data sets that are used within the course.
+Description: Package includes data sets that are known from our course. There are additional data sets that will be used within the exam.
+            What's new? Adult_Income is a new dataset that uses census data from the usa and includes information about socio-demographics and income of adults.
+            The examscores dataset that is already known was cleaned.
+            All other data sets are the same as in the prior package sozoekds.
 License: CC BY 4.0
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Depends: 
    R (>= 3.5.0)
 LazyData: true
--- a/R/adult_income.R
+++ b/R/adult_income.R
+#' Census data about American citizens, their income, and sociodemographic details.
+#'
+#' This data was extracted from the 1994 Census bureau database by Ronny Kohavi and Barry Becker (Data Mining and Visualization, Silicon Graphics).
+#'
+#' @format A tibble with 32561 rows and 15 variables:
+#' \describe{
+#'    \item{\code{age}}{int age in years}
+#'    \item{\code{workclass}}{chr workclass: Private, Self-emp-not-inc, Local-gov, ?, State-gov}
+#'    \item{\code{fnlwgt}}{dbl weighting factor}
+#'    \item{\code{education}}{chr education: HS-grad, Some-college, Bachelors, Masters, Assoc-voc}
+#'    \item{\code{education.num}}{int education level represented as numeric}
+#'    \item{\code{marital.status}}{chr marital status: Married-civ-spouse, Never-married, Divorced, Separated, Widowed}
+#'    \item{\code{occupation}}{chr occupation: Prof-specialty, Craft-repair, Exec-managerial, Adm-clerical, Sales}
+#'    \item{\code{relationship}}{chr relationship: Husband, Not-in-family, Own-child, Unmarried, Wife}
+#'    \item{\code{race}}{chr race: White, Black, Asian-Pac-Islander, Amer-Indian-Eskimo, Other}
+#'    \item{\code{sex}}{chr gender: Male, Female}
+#'    \item{\code{native.country}}{chr native country: United-States, Mexico, ?, Philippines, Germany}
+#'    \item{\code{income}}{chr income: <=50K, >50K}
+#'    }
+"adult_income"
--- a/R/airbnbbig.R
+++ b/R/airbnbbig.R
@@ -83,5 +83,5 @@
 #'   \item{\code{ln_number_of_reviews}}{Natural logarithm of number of reviews}
 #' }
 #'
-#' @source Original dataset source (if applicable)
+#' 
 "airbnbbig"
--- a/R/examscores.R
+++ b/R/examscores.R
@@ -4,20 +4,20 @@
 #'
 #' @format A dat aframe, 30641 observations, 15 variables
 #' \describe{
-#' item{\code{Gender}}{Gender of a student (string)}
+#' \item{\code{Gender}}{Gender of a student (string)}
-#' item{\code{EthnicGroup}}{Origin of a student (string)}
+#' \item{\code{EthnicGroup}}{Origin of a student (string)}
-#' item{\code{ParentEduc}}{Parents education (string)}
+#' \item{\code{ParentEduc}}{Parents education (string)}
-#' item{\code{LunchType}}{Type of lunch (string)}
+#' \item{\code{LunchType}}{Type of lunch (string)}
-#' item{\code{TestPrep}}{Preparation for a test (string)}
+#' \item{\code{TestPrep}}{Preparation for a test (string)}
-#' item{\code{ParentMaritalStatus}}{Parental marital status (string)}
+#' \item{\code{ParentMaritalStatus}}{Parental marital status (string)}
-#' item{\code{PracticeSport}}{Practice sports (string)}
+#' \item{\code{PracticeSport}}{Practice sports (string)}
-#' item{\code{IsFirstChild}}{Firstborn (boolean)}
+#' \item{\code{IsFirstChild}}{Firstborn (boolean)}
-#' item{\code{NrSiblings}}{Number of siblings (integer)}
+#' \item{\code{NrSiblings}}{Number of siblings (integer)}
-#' item{\code{TransportMeans}}{Means of transport to school (string)}
+#' \item{\code{TransportMeans}}{Means of transport to school (string)}
-#' item{\code{WklyStudyHours}}{Number of study hours per week (integer)}
+#' \item{\code{WklyStudyHours}}{Number of study hours per week (integer)}
-#' item{\code{MathScore}}{Score in math (integer)}
+#' \item{\code{MathScore}}{Score in math (integer)}
-#' item{\code{ReadingScore}}{Score in reading (integer)}
+#' \item{\code{ReadingScore}}{Score in reading (integer)}
-#' item{\code{WritingScore}}{Score in writing (integer)}
+#' \item{\code{WritingScore}}{Score in writing (integer)}
 #'}
 "examscores"
--- a/README.md
+++ b/README.md
 # sozoekds
-Package for the course "Data Science for Socioeconomists" at the department of Socioeconomics at University of Hamburg, Germany.
+Package for the exam of the course "Data Science for Socioeconomists" at the department of Socioeconomics at University of Hamburg, Germany.
-Includes 5 different data sets that will be used within the course: *airbnbbig* and *airbnbsmall* include data about Airbnb listing from London, *calhouse* includes data about the housing market in California, *examscores* is a fictional dataset about grades and the socioeconomic background of students, *testdata* is a fictional dataset about income and spending habits.
 ## Installation
 Either:\
 #install.packages("remotes")\
 #library(remotes)\
-remotes::install_gitlab("BAQ6370/sozoekds", host="gitlab.rrz.uni-hamburg.de")
+remotes::install_gitlab("BAQ6370/sozoekdsexam", host="gitlab.rrz.uni-hamburg.de")
 or:
 #install.packages("devtools")\
 #library(devtools)\
-devtools::install_git("[https://gitlab.rrz.uni-hamburg.de/baq6370/sozoekds.git](https://gitlab.rrz.uni-hamburg.de/bay1977/sozoeko1.git)")
+devtools::install_git("[<https://gitlab.rrz.uni-hamburg.de/baq6370/sozoekdsexam.git>]") 
 ## Support
 If you find a bug please contact: [lisamarie.wegner\@uni-hamburg.de](lisamarie.wegner@uni-hamburg.de)
 ## Roadmap
-Package should be used in teaching Datascience for Socioeconomists at WiSo-Fakultaet, area Socioeconomics at Hamburg University in Winter 2023.
+Package should be used in exam of the course Datascience for Socioeconomists at WiSo-Fakultaet, area Socioeconomics at Hamburg University in Winter 2024.
 ## Authors and acknowledgment

--- a/data-raw/DATASET.R
+++ b/data-raw/DATASET.R
-## code to prepare `DATASET` dataset goes here
-usethis::use_data(DATASET, overwrite = TRUE)
--- a/data-raw/adult_income.csv
+++ b/data-raw/adult_income.csv
--- a/data-raw/adult_income_load.R
+++ b/data-raw/adult_income_load.R
+## code to prepare `adult_income dataset goes here
+library(devtools)
+library(roxygen2)
+adult_income <- read.csv("Z://wiso/SozOek_EmpWifo/Gemeinsam/L_Projekte/Data_Science_Kurs_WiSe_2023_24/Package/sozoekdsexam/data-raw/adult_income.csv")
+save(adult_income, file = "Z://wiso/SozOek_EmpWifo/Gemeinsam/L_Projekte/Data_Science_Kurs_WiSe_2023_24/Package/sozoekdsexam/data/adult_income.rda")
+usethis::use_data(adult_income, overwrite = TRUE)
\ No newline at end of file
--- a/data-raw/examscores_load.R
+++ b/data-raw/examscores_load.R
 ## code to prepare `examscores` dataset goes here
-examscores <- read.csv("Z://wiso/SozOek_EmpWifo/Gemeinsam/L_Projekte/Data_Science_Kurs_WiSe_2023_24/sozoekds/data-raw/examscores.csv")
+library(devtools)
+library(roxygen2)
+library(dplyr)
+examscores<- newdata %>% select(-high_mathscore)
 usethis::use_data(examscores, overwrite = TRUE)
--- a/data/adult_income.rda
+++ b/data/adult_income.rda
--- a/data/examscores.rda
+++ b/data/examscores.rda
--- a/man/adult_income.Rd
+++ b/man/adult_income.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/adult_income.R
+\docType{data}
+\name{adult_income}
+\alias{adult_income}
+\title{Census data about American citizens, their income, and sociodemographic details.}
+\format{
+A tibble with 32561 rows and 15 variables:
+\describe{
+\item{\code{age}}{int age in years}
+\item{\code{workclass}}{chr workclass: Private, Self-emp-not-inc, Local-gov, ?, State-gov}
+\item{\code{fnlwgt}}{dbl weighting factor}
+\item{\code{education}}{chr education: HS-grad, Some-college, Bachelors, Masters, Assoc-voc}
+\item{\code{education.num}}{int education level represented as numeric}
+\item{\code{marital.status}}{chr marital status: Married-civ-spouse, Never-married, Divorced, Separated, Widowed}
+\item{\code{occupation}}{chr occupation: Prof-specialty, Craft-repair, Exec-managerial, Adm-clerical, Sales}
+\item{\code{relationship}}{chr relationship: Husband, Not-in-family, Own-child, Unmarried, Wife}
+\item{\code{race}}{chr race: White, Black, Asian-Pac-Islander, Amer-Indian-Eskimo, Other}
+\item{\code{sex}}{chr gender: Male, Female}
+\item{\code{native.country}}{chr native country: United-States, Mexico, ?, Philippines, Germany}
+\item{\code{income}}{chr income: <=50K, >50K}
+}
+}
+\usage{
+adult_income
+}
+\description{
+This data was extracted from the 1994 Census bureau database by Ronny Kohavi and Barry Becker (Data Mining and Visualization, Silicon Graphics).
+}
+\keyword{datasets}
--- a/man/airbnbbig.Rd
+++ b/man/airbnbbig.Rd
@@ -7,9 +7,6 @@
 \format{
 A data frame, 51697 observations, 72 variables
 }
-\source{
-Original dataset source (if applicable)
-}
 \usage{
 airbnbbig
 }

--- a/man/airbnbsmall.Rd
+++ b/man/airbnbsmall.Rd
--- a/man/examscores.Rd
+++ b/man/examscores.Rd
@@ -7,20 +7,20 @@
 \format{
 A dat aframe, 30641 observations, 15 variables
 \describe{
-item{\code{Gender}}{Gender of a student (string)}
+\item{\code{Gender}}{Gender of a student (string)}
-item{\code{EthnicGroup}}{Origin of a student (string)}
+\item{\code{EthnicGroup}}{Origin of a student (string)}
-item{\code{ParentEduc}}{Parents education (string)}
+\item{\code{ParentEduc}}{Parents education (string)}
-item{\code{LunchType}}{Type of lunch (string)}
+\item{\code{LunchType}}{Type of lunch (string)}
-item{\code{TestPrep}}{Preparation for a test (string)}
+\item{\code{TestPrep}}{Preparation for a test (string)}
-item{\code{ParentMaritalStatus}}{Parental marital status (string)}
+\item{\code{ParentMaritalStatus}}{Parental marital status (string)}
-item{\code{PracticeSport}}{Practice sports (string)}
+\item{\code{PracticeSport}}{Practice sports (string)}
-item{\code{IsFirstChild}}{Firstborn (boolean)}
+\item{\code{IsFirstChild}}{Firstborn (boolean)}
-item{\code{NrSiblings}}{Number of siblings (integer)}
+\item{\code{NrSiblings}}{Number of siblings (integer)}
-item{\code{TransportMeans}}{Means of transport to school (string)}
+\item{\code{TransportMeans}}{Means of transport to school (string)}
-item{\code{WklyStudyHours}}{Number of study hours per week (integer)}
+\item{\code{WklyStudyHours}}{Number of study hours per week (integer)}
-item{\code{MathScore}}{Score in math (integer)}
+\item{\code{MathScore}}{Score in math (integer)}
-item{\code{ReadingScore}}{Score in reading (integer)}
+\item{\code{ReadingScore}}{Score in reading (integer)}
-item{\code{WritingScore}}{Score in writing (integer)}
+\item{\code{WritingScore}}{Score in writing (integer)}
 }
 }
 \usage{

--- a/man/testdata.Rd
+++ b/man/testdata.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{testdata}
 \alias{testdata}
-\title{Random generated numbers and values for expplanatory purpose.}
+\title{Random generated numbers and values for explanatory purpose.}
 \format{
 A data frame,100 observations,  4 variables
 \describe{
@@ -17,6 +17,6 @@ A data frame,100 observations,  4 variables
 testdata
 }
 \description{
-Random generated numbers and values for expplanatory purpose.
+Random generated numbers and values for explanatory purpose.
 }
 \keyword{datasets}
--- a/sozoekds.Rproj
+++ b/sozoekds.Rproj