Würzburg rf code

library(randomForest)
library(party)
library(nnet)
library(caret)
library(latticeExtra)
library(gridExtra)
library(relaimpo)
########################## regression example #############################
## global operations
data(readingSkills)

set.seed(123223)
sample.ind <- sample(1:nrow(readingSkills), 150, replace = FALSE)
train.set <- readingSkills[sample.ind, ]
pred.set <- readingSkills[-sample.ind, ]

## party example
## ————————————————————————
set.seed(42)
train.cf <- cforest(score ~ ., data = train.set,
control = cforest_unbiased(mtry = 2, ntree = 50))

fit.cf <- predict(train.cf, newdata = pred.set)

scat.cf <- xyplot(fit.cf ~ pred.set$score, pch = 16,
asp = 1, col = “black”,
xlim = c(23, 58), ylim = c(23, 58),
xlab = “observed score”, ylab = “predicted score”,
panel = function(x, y, …) {
panel.xyplot(x, y, …)
lm1 <- lm(y ~ x)
lm1sum <- summary(lm1)
r2 <- lm1sum$adj.r.squared
panel.text(labels =
bquote(italic(R)^2 ==
.(format(r2,
digits = 3))),
x = 27.5, y = 55)
panel.smoother(x, y, method = “lm”,
col = “black”,
col.se = “black”,
alpha.se = 0.3)
panel.abline(a = 0, b = 1, col = “red”, lty = 2)
})
scat.cf

set.seed(42)
varimp(train.cf)

set.seed(42)
varimp(train.cf, conditional = TRUE)

set.seed(42)
dotplot(sort(varimp(train.cf, conditional = TRUE)))
## ————————————————————————
## randomForest example
## ————————————————————————
set.seed(42)
train.rf <- randomForest(score ~ ., data = train.set,
ntree = 50,
mtry = 2,
importance = TRUE,
keep.forest = TRUE,
do.trace = 100)

fit.rf <- predict(train.rf, pred.set)

scat.rf <- xyplot(fit.rf ~ pred.set$score, pch = 16,
asp = 1, col = “black”,
xlim = c(23, 58), ylim = c(23, 58),
xlab = “observed score”, ylab = “predicted score”,
panel = function(x, y, …) {
panel.xyplot(x, y, …)
lm1 <- lm(y ~ x)
lm1sum <- summary(lm1)
r2 <- lm1sum$adj.r.squared
panel.text(labels =
bquote(italic(R)^2 ==
.(format(r2,
digits = 3))),
x = 27.5, y = 55)
panel.smoother(x, y, method = “lm”,
col = “black”,
col.se = “black”,
alpha.se = 0.3)
panel.abline(a = 0, b = 1, col = “red”, lty = 2)
})
scat.rf

importance(train.rf, scale = FALSE)

varImpPlot(train.rf, scale = FALSE)
## ————————————————————————
## neural network (caret – nnet) example
## ————————————————————————
set.seed(42)
train.nn <- train(score ~ ., data = train.set, method = “nnet”,
preProcess = “range”, linout = TRUE, maxit = 100)

fit.nn <- predict(train.nn, pred.set, type = “raw”)

scat.nn <- xyplot(fit.nn ~ pred.set$score, pch = 16,
asp = 1, col = “black”,
xlim = c(23, 58), ylim = c(23, 58),
xlab = “observed score”, ylab = “predicted score”,
panel = function(x, y, …) {
panel.xyplot(x, y, …)
lm1 <- lm(y ~ x)
lm1sum <- summary(lm1)
r2 <- lm1sum$adj.r.squared
panel.text(labels =
bquote(italic(R)^2 ==
.(format(r2,
digits = 3))),
x = 27.5, y = 55)
panel.smoother(x, y, method = “lm”,
col = “black”,
col.se = “black”,
alpha.se = 0.3)
panel.abline(a = 0, b = 1, col = “red”, lty = 2)
})
scat.nn

varImp(train.nn)

dotPlot(varImp((train.nn)))
## ————————————————————————
## linear model example
## ————————————————————————
train.lm <- lm(score ~ ., data = train.set)

fit.lm <- predict(train.lm, newdata = pred.set)

scat.lm <- xyplot(fit.lm ~ pred.set$score, pch = 16,
asp = 1, col = “black”,
xlim = c(23, 58), ylim = c(23, 58),
xlab = “observed score”, ylab = “predicted score”,
panel = function(x, y, …) {
panel.xyplot(x, y, …)
lm1 <- lm(y ~ x)
lm1sum <- summary(lm1)
r2 <- lm1sum$adj.r.squared
panel.text(labels =
bquote(italic(R)^2 ==
.(format(r2,
digits = 3))),
x = 27.5, y = 55)
panel.smoother(x, y, method = “lm”,
col = “black”,
col.se = “black”,
alpha.se = 0.3)
panel.abline(a = 0, b = 1, col = “red”, lty = 2)
})
scat.lm

calc.relimp(train.lm,type = c(“lmg”, “last”, “first”, “pratt”),
rela = TRUE)

# Bootstrap Measures of Relative Importance (1000 samples)
boot <- boot.relimp(train.lm, b = 1000,
type = c(“lmg”, “last”, “first”, “pratt”),
rank = TRUE, diff = TRUE, rela = TRUE)

booteval.relimp(boot)

plot(booteval.relimp(boot, sort = TRUE))
## ————————————————————————
###########################################################################
########################## classification example #########################

## global

data(iris)

set.seed(42)
sample.ind <- sample(1:nrow(iris), 100, replace = FALSE)
train.set <- iris[sample.ind, ]
pred.set <- iris[-sample.ind, ]
## party example rf
## ————————————————————————
set.seed(42)
train.cf <- cforest(Species ~ ., data = train.set,
control = cforest_unbiased(mtry = 2, ntree = 50))

fit.cf <- predict(train.cf, newdata = pred.set)

ftable(fit.cf ~ pred.set$Species)

set.seed(42)
varimp(train.cf)

set.seed(42)
varimp(train.cf, conditional = TRUE)

set.seed(42)
dotplot(sort(varimp(train.cf, conditional = TRUE)))
## ————————————————————————

## party example ctree
# ————————————————————————-
train.ct <- ctree(Species ~ ., data = train.set)

plot(train.ct)

fit.ct <- predict(train.ct, newdata = pred.set)

result <- ftable(fit.ct ~ pred.set$Species)

grid.newpage()
grid.table(as.data.frame(result))

out <- data.frame(expand.grid(attr(result, “row.vars”)),
unclass(result))
rownames(out) <- out[, 1]
out <- out[, -1]
colnames(out) <- rownames(out)

grid.newpage()
gt <- tableGrob(out,
gpar.rowtext = gpar(col = “black”, cex = 1,
fontface = “bold”))

h <- grobHeight(gt)
w <- grobWidth(gt)

col.title <- textGrob(“Predicted”, y = unit(0.5, “npc”) + 0.55 * h,
vjust = 0, gp = gpar(fontface = “italic”))
row.title <- textGrob(“Observed”, x = unit(0.5, “npc”) – 0.52 * w,
y = unit(0.5, “npc”) – 0.55 * h,
vjust = 0, hjust = 0, rot = 90,
gp = gpar(fontface = “italic”))

gtr <- gTree(children = gList(gt, col.title, row.title))

grid.draw(gtr)

Leave a Reply