source("init.R")
[1] “loading 1 file(s) …”
data = read.csv(file = paste0(input.dir,"/20240905_Whisky_A.csv"), as.is = TRUE, sep =";")
rownames(data) = data[,1]
data = data[,-1]
colnames(data)[1] = c("Brand")
colnames(data)[3] = c("Variety")
colnames(data)[4] = c("Country")
colnames(data)[6] = c("Barell")
colnames(data)[7] = c("Age")
colnames(data)[11] = c("Alcohol")
sample.info = data[,1:11]
data.x = data[,12:ncol(data)]
# normalize data to Sum of spectrum
data.total = data.x / rowSums(data.x)
# change name of Scandinavian countries
sample.info[which(sample.info[,"Country"] == "Norway"),"Country"] = "Scandinavia"
sample.info[which(sample.info[,"Country"] == "Sweden"),"Country"] = "Scandinavia"
sample.info[which(sample.info[,"Country"] == "Finland"),"Country"] = "Scandinavia"
sample.info[which(sample.info[,"Country"] == "Denmark"),"Country"] = "Scandinavia"
## SummarizedExperiment object
se = SummarizedExperiment(assays = list(original = t(data.x),
norm.total = t(data.total)),
colData = as.data.frame(sample.info))
saveObject(se,
file = file.path(data.dir, "sum_exp_norm.RData"))
print(dfSummary(sample.info[,3:11],
graph.magnif = 0.85),
method = 'render',
max.distinct.values = 42)
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Valid | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | Variety [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
2 | Country [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
3 | Region [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
4 | Barell [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
5 | Age [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
6 | chill.filtrated [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
7 | colour [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
8 | peated [character] |
|
|
686 (100.0%) | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
9 | Alcohol [character] |
|
|
686 (100.0%) | 0 (0.0%) |
Generated by summarytools 1.0.1 (R version 4.3.3)
2024-09-18
#QC sample level
prop.zero.sample = apply(data.x, 1, function(x) {
sum(x == 0)}) / ncol(data.x)
ind.max = apply(data.x, 2, which.max)
tab = table(ind.max)
ind.max.total = apply(data.total, 2, which.max)
tab.total = table(ind.max.total)
#QC metabolite level
prop.zero.var = apply(data.x, 2, function(x) {
sum(x == 0)}) / nrow(data.x)
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
##
## locale:
## [1] LC_CTYPE=de_DE.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=de_DE.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=de_DE.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [3] GenomicRanges_1.54.1 GenomeInfoDb_1.38.8
## [5] IRanges_2.36.0 S4Vectors_0.40.2
## [7] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [9] matrixStats_1.3.0 summarytools_1.0.1
## [11] R.utils_2.12.3 R.oo_1.26.0
## [13] R.methodsS3_1.8.2 DT_0.33
##
## loaded via a namespace (and not attached):
## [1] xfun_0.43 bslib_0.7.0 htmlwidgets_1.6.4
## [4] lattice_0.22-5 vctrs_0.6.5 tools_4.3.3
## [7] bitops_1.0-7 generics_0.1.3 tibble_3.2.1
## [10] fansi_1.0.6 pkgconfig_2.0.3 Matrix_1.6-5
## [13] checkmate_2.3.1 pryr_0.1.6 lifecycle_1.0.4
## [16] GenomeInfoDbData_1.2.11 compiler_4.3.3 stringr_1.5.1
## [19] rapportools_1.1 codetools_0.2-19 htmltools_0.5.8.1
## [22] sass_0.4.9 RCurl_1.98-1.16 yaml_2.3.8
## [25] pillar_1.9.0 crayon_1.5.2 jquerylib_0.1.4
## [28] tidyr_1.3.1 MASS_7.3-60.0.1 DelayedArray_0.28.0
## [31] cachem_1.0.8 magick_2.8.4 abind_1.4-5
## [34] tidyselect_1.2.1 digest_0.6.35 stringi_1.8.4
## [37] dplyr_1.1.4 reshape2_1.4.4 pander_0.6.5
## [40] purrr_1.0.2 grid_4.3.3 fastmap_1.1.1
## [43] SparseArray_1.2.4 cli_3.6.2 magrittr_2.0.3
## [46] S4Arrays_1.2.1 base64enc_0.1-3 utf8_1.2.4
## [49] backports_1.5.0 lubridate_1.9.3 timechange_0.3.0
## [52] rmarkdown_2.26 XVector_0.42.0 evaluate_0.23
## [55] knitr_1.46 tcltk_4.3.3 rlang_1.1.3
## [58] Rcpp_1.0.12 glue_1.7.0 rstudioapi_0.16.0
## [61] jsonlite_1.8.8 R6_2.5.1 plyr_1.8.9
## [64] zlibbioc_1.48.2