source("init.R")
num.trees.RF = 10000
mtry = as.integer(287^(3/4))
s = 15
t = 5
rerun = FALSE
For all analyses the following parameters were used for RF classification, variable selection and relation analysis:
se = loadObject(file = file.path(project.dir, "/subprojects/01_pretreatment/data/sum_exp_norm.RData"))
samples = colData(se)
maxsamp = 100
set.seed(42)
samples.scot = sample(which(samples[,"Country"] == "Scotland"), maxsamp)
#create se with relevant samples with class number at least 10
se.country = se[, c(samples.scot, which( samples[,"Country"] == "Germany" |
samples[,"Country"] == "Ireland" |
samples[,"Country"] == "USA" |
samples[,"Country"] == "Canada" |
samples[,"Country"] == "Japan"
))]
if (rerun) {
set.seed(42)
res.all = RF.analysis.all(sum.exp = se.country,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Country",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.all, file = file.path(data.dir,paste0("res_country",num.trees.RF,"_s_",s,"_t_",t,".RData")))
} else {
res.all = loadObject(file = file.path(data.dir,paste0("res_country",num.trees.RF,"_s_",s,"_t_",t,".RData")))
}
Classification was conducted with a OOB prediction error of 0.2821577
Canada | Germany | Ireland | Japan | Scotland | USA | |
---|---|---|---|---|---|---|
Canada | 8 | 0 | 1 | 1 | 0 | 1 |
Germany | 2 | 23 | 5 | 1 | 5 | 5 |
Ireland | 5 | 4 | 25 | 2 | 4 | 1 |
Japan | 1 | 0 | 5 | 2 | 2 | 0 |
Scotland | 0 | 5 | 8 | 2 | 83 | 2 |
USA | 0 | 1 | 2 | 0 | 3 | 32 |
#create se with relevant samples with class number at least 10
se.usa = se[, which( samples[,"Region"] == " Kentucky" |
samples[,"Region"] == "Tennessee"
)]
se.scotland = se[, which( samples[,"Region"] == "Speyside" |
samples[,"Region"] == "Campbeltown" |
samples[,"Region"] == "Highlands" |
samples[,"Region"] == "Islay" |
samples[,"Region"] == "Isle of Jura" |
samples[,"Region"] == "Isle of Mull" |
samples[,"Region"] == "Lowlands" |
samples[,"Region"] == "Orkney"
)]
if (rerun) {
set.seed(42)
res.usa = RF.analysis.all(sum.exp = se.usa,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Region",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.usa, file = file.path(data.dir,paste0("res_region_usa",num.trees.RF,"_s_",s,"_t_",t,".RData")))
set.seed(42)
res.scot = RF.analysis.all(sum.exp = se.scotland,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Region",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.scot, file = file.path(data.dir,paste0("res_region_scotland",num.trees.RF,"_s_",s,"_t_",t,".RData")))
} else {
res.usa = loadObject(file = file.path(data.dir,paste0("res_region_usa",num.trees.RF,"_s_",s,"_t_",t,".RData")))
res.scot = loadObject(file = file.path(data.dir,paste0("res_region_scotland",num.trees.RF,"_s_",s,"_t_",t,".RData")))
}
Classification was conducted with a OOB prediction error of 0.2903226
Kentucky | Tennessee | |
---|---|---|
Kentucky | 22 | 4 |
Tennessee | 5 | 0 |
Classification was conducted with a OOB prediction error of 0.497561
Campbeltown | Highlands | Islay | Isle of Jura | Isle of Mull | Lowlands | Orkney | Speyside | |
---|---|---|---|---|---|---|---|---|
Campbeltown | 2 | 6 | 0 | 0 | 3 | 0 | 1 | 4 |
Highlands | 6 | 30 | 9 | 0 | 3 | 7 | 1 | 49 |
Islay | 1 | 5 | 75 | 1 | 7 | 2 | 1 | 7 |
Isle of Jura | 0 | 2 | 0 | 6 | 0 | 0 | 0 | 5 |
Isle of Mull | 4 | 1 | 6 | 0 | 6 | 0 | 0 | 2 |
Lowlands | 0 | 4 | 1 | 0 | 0 | 4 | 0 | 4 |
Orkney | 1 | 2 | 0 | 1 | 1 | 0 | 10 | 5 |
Speyside | 7 | 22 | 3 | 5 | 4 | 2 | 9 | 73 |
Only Ex-Bourbon, Ex-Sherry and mixtures of both were analysed for all samples, as well as samples from Speyside and Islay
#create ses with Barells: Ex-Sherry, Ex-Bourbon and both for all samples and for samples from Islay and Speyside
se.barell = se[, which( samples[,"Barell"] == "Ex-Sherry" |
samples[,"Barell"] == "Ex-Bourbon" |
samples[,"Barell"] == "Ex-Bourbon, Ex-Sherry"
)]
# For samples from Speyside
se.speyside= se[, which( samples[,"Region"] == "Speyside" )]
samples.speyside = colData(se.speyside)
se.speyside= se.speyside[, which( samples.speyside[,"Barell"] == "Ex-Sherry" |
samples.speyside[,"Barell"] == "Ex-Bourbon" |
samples.speyside[,"Barell"] == "Ex-Bourbon, Ex-Sherry"
)]
# For samples from Islay
se.islay= se[, which( samples[,"Region"] == "Islay" )]
samples.islay = colData(se.islay)
se.islay= se.islay[, which( samples.islay[,"Barell"] == "Ex-Sherry" |
samples.islay[,"Barell"] == "Ex-Bourbon" |
samples.islay[,"Barell"] == "Ex-Bourbon, Ex-Sherry"
)]
if (rerun) {
set.seed(42)
res.barell = RF.analysis.all(sum.exp = se.barell,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Barell",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.barell, file = file.path(data.dir,paste0("res_barell",num.trees.RF,"_s_",s,"_t_",t,".RData")))
set.seed(42)
res.speyside = RF.analysis.all(sum.exp = se.speyside,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Barell",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.speyside, file = file.path(data.dir,paste0("res_speyside",num.trees.RF,"_s_",s,"_t_",t,".RData")))
set.seed(42)
res.islay = RF.analysis.all(sum.exp = se.islay,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Barell",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.islay, file = file.path(data.dir,paste0("res_islay",num.trees.RF,"_s_",s,"_t_",t,".RData")))
} else {
res.barell = loadObject(file = file.path(data.dir,paste0("res_barell",num.trees.RF,"_s_",s,"_t_",t,".RData")))
res.speyside = loadObject(file = file.path(data.dir,paste0("res_speyside",num.trees.RF,"_s_",s,"_t_",t,".RData")))
res.islay = loadObject(file = file.path(data.dir,paste0("res_islay",num.trees.RF,"_s_",s,"_t_",t,".RData")))
}
##All
Classification was conducted with a OOB prediction error of 0.3680556
Ex-Bourbon | Ex-Bourbon, Ex-Sherry | Ex-Sherry | |
---|---|---|---|
Ex-Bourbon | 71 | 15 | 1 |
Ex-Bourbon, Ex-Sherry | 26 | 61 | 33 |
Ex-Sherry | 2 | 29 | 50 |
Classification was conducted with a OOB prediction error of 0.25
Ex-Bourbon | Ex-Bourbon, Ex-Sherry | Ex-Sherry | |
---|---|---|---|
Ex-Bourbon | 11 | 0 | 1 |
Ex-Bourbon, Ex-Sherry | 3 | 12 | 5 |
Ex-Sherry | 2 | 4 | 22 |
Classification was conducted with a OOB prediction error of 0.4150943
Ex-Bourbon | Ex-Bourbon, Ex-Sherry | Ex-Sherry | |
---|---|---|---|
Ex-Bourbon | 13 | 3 | 0 |
Ex-Bourbon, Ex-Sherry | 6 | 12 | 5 |
Ex-Sherry | 1 | 7 | 6 |
#create se
se.colour = se[, which( samples[,"colour"] == "natural Colour" |
samples[,"colour"] == "plain caramel"
)]
if (rerun) {
set.seed(42)
res.colour = RF.analysis.all(sum.exp = se.colour,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "colour",
mtry = mtry,
s = s,
t = t,
VarAnal = TRUE)
saveObject(res.colour, file = file.path(data.dir,paste0("res_colour",num.trees.RF,"_s_",s,"_t_",t,".RData")))
} else {
res.colour = loadObject(file = file.path(data.dir,paste0("res_colour",num.trees.RF,"_s_",s,"_t_",t,".RData")))
}
Classification was conducted with a OOB prediction error of 0.0920245
natural Colour | plain caramel | |
---|---|---|
natural Colour | 306 | 20 |
plain caramel | 25 | 138 |
#create se
se.age = se[, which( samples[,"Age"] != "NAS"
)]
if (rerun) {
set.seed(42)
res.age = RF.analysis.all(sum.exp = se.age,
num.trees.RF = num.trees.RF,
assay = "original",
var.y = "Age",
mtry = mtry,
s = s,
t = t,
type = "regression",
VarAnal = TRUE)
saveObject(res.age, file = file.path(data.dir,paste0("res_age",num.trees.RF,"_s_",s,"_t_",t,".RData")))
} else {
res.age = loadObject(file = file.path(data.dir,paste0("res_age",num.trees.RF,"_s_",s,"_t_",t,".RData")))
}
A regression model to predict the age in years was trained. An error (Root of the mean square error) of 4.559628 and an R² of 0.4085824 was obtained