Some visual things you can do with R and ways to explore your data systematically. In the tutorial you will:
Once more, download these files into your working directory:
To check your working directory:
getwd()
To set your working diretory:
setwd("X:/project")
Run this to install/load libraries
source("helper.R")
Start a new notebook file by selecting “File” -> “New File” -> “R Notebook”
Save the file as “yourname_week7.Rmd”. Delete the instructions starting from “This is an [R…”. For the different code below, insert it as R chunks. An R chunk is code placed after a line that starts with ` { r } `and ends before a line with `
`.
As before, copy the code chunks into your R notebook as R chunks.
if
, else if
, and else
statements, which allow you to execute code based on if a condition is met. The structure is generally like so:
if(condition)
{
do_this
}
else if
and else
statements:
if(condition1)
{
do_this
}
else if (condition2)
{
do_that
}
else
{
do_that_other_thing
}
else if
statements as you wish following an if
. Example (copy this):i = 2
if( i > 0 )
{
print("Positive!")
}
else if( i < 0 )
{
print("Negative!")
}
else {
print("Zero!")
}
if
statement
ID = "Sara"
if( ID == "Sara" )
{
print("Positive!")
}
ifelse(i>0, "Positive!", "Negative!")
while (test_expression)
{
do_this
}
i = 10
while (i > 0)
{
i = i - 1
print(i)
}
for (i in objects)
{
do_this
}
for (i in 1:10)
{
print(i)
}
apply(X, MARGIN, FUN)
mat = matrix(sample(100), nrow=10, ncol=10 )
apply(mat, 1, sum)
mat = matrix(sample(100), nrow=10, ncol=10 )
apply(mat, c(1,2), sum)
lapply(X, FUN)
sapply(X, FUN)
lapply(1:10, function(i) rep(10, i))
sapply(1:10, function(i) rep(i^2, 10))
tapply(X, INDEX, FUN)
x = sample(10, size = 100, replace=T)
y = rep( c("X","Y"), 50 )
tapply(x,y, sum)
tapply(x,y, mean)
tapply(x,y, list)
summary(iris)
class(iris)
colnames(iris)
plot(iris)
pairs(iris, upper.panel = NULL )
plot(iris$Sepal.Length, iris$Petal.Length, pch=19, col=as.numeric(iris$Species) )
plot(iris$Sepal.Length, iris$Petal.Length, pch=12, cex=3, lwd=4, lty=4, type="b", col=colors()[sample(600,5)][as.numeric(iris$Species)] )
plot(lowess(iris$Sepal.Length, iris$Petal.Length), pch=19)
plot(Petal.Length ~ Sepal.Length, data=iris, pch=19, col=Species)
boxplot(iris$Sepal.Width~ iris$Species, col=1:3 )
beanplot(iris$Sepal.Width~ iris$Species, col=list(1,2,3))
iris.list = lapply( unique(iris$Species), function(si) iris$Sepal.Width[iris$Species==si])
vioplot( iris.list[[1]], iris.list[[2]], iris.list[[3]], col="darkgreen")
iris.bar = tapply( iris$Sepal.Length, iris$Species, mean)
barplot(iris.bar, col="black", xlab="Species", ylab="Count", main="Bar plot of mean Sepal Length")
hist(iris$Petal.Width, col="lightblue")
tapply(iris$Petal.Width < 0.75, iris$Species, sum)
hist(iris$Petal.Width, col="lightblue")
hist(iris$Petal.Width[iris$Species=="setosa"], col="red", add=T)
hist(iris$Petal.Width[iris$Species=="versicolor"], col="blue", add=T)
hist(iris$Petal.Width[iris$Species=="virginica"], col="purple", add=T)
h <- hist(iris$Petal.Width, col="lightblue")
h
hist(iris$Petal.Width[iris$Species=="setosa"], breaks=h$breaks,col="red", add=T)
hist(iris$Petal.Width[iris$Species=="versicolor"], breaks=h$breaks, col="blue", add=T)
hist(iris$Petal.Width[iris$Species=="virginica"], breaks=h$breaks,col="purple", add=T)
h <- hist(iris$Petal.Width, col=0, border=0)
hist(iris$Petal.Width[iris$Species=="setosa"], breaks=h$breaks,col=makeTransparent("red"), add=T)
hist(iris$Petal.Width[iris$Species=="versicolor"], breaks=h$breaks, col=makeTransparent("blue"), add=T)
hist(iris$Petal.Width[iris$Species=="virginica"], breaks=h$breaks,col=makeTransparent("purple"), add=T)
h <- hist(iris$Petal.Width, freq=F)
d_all <-density( iris$Petal.Width)
lines(d_all, col="black")
points()
polygon()
segments()
abline()
rug()
text()
mtext()
legend()
...
pch
type
lty
lwd
bty
...
## Ignore what these functions do for now, but copy them over
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "lightgreen", ...)
}
## with size proportional to the correlations.
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r, col= plasma(100)[round(r,2)*100])
}
pairs(iris, bg=1:3,lower.panel = panel.smooth, pch=19, upper.panel = panel.cor, diag.panel = panel.hist, cex.labels = 2, font.labels = 2)
iris2 = apply(iris[,1:4], 2, as.numeric)
heatmap.3(iris2, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100))
iris.r = t(apply(iris[,1:4], 1, rank))
heatmap.3(iris.r, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100))
iris.r2 = apply(iris[,1:4], 2, rank)
heatmap.3(iris.r2, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100))
samples.cor = cor( t(iris2) )
heatmap.3(samples.cor, col=plasma(100), ColSideCol=cols7[as.numeric(iris$Species)])
We can do most all of this with ggplot2.There are less finicky things to worry about, and is generally more intuitive.
g <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length))
g
g <- g + geom_point()
g
g <- g + geom_point(aes(color = Species))
g
g <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species)) + geom_point() + geom_smooth(method = "lm", se = F)
g
g <- ggplot(data=iris, aes(x=Species, y=Sepal.Length))
g + geom_boxplot(aes(fill=Species)) +
ylab("Sepal Length") + ggtitle("Iris Boxplot")
g <- ggplot(data=iris, aes(x=Petal.Width))
g + geom_histogram(binwidth=0.2, color="black", aes(fill=Species)) + xlab("Petal Width") + ylab("Frequency") + ggtitle("Histogram of Petal Width")
g <- ggplot(data=iris, aes(x=Species, y=Sepal.Length))
g + geom_bar(stat = "summary", fun = "mean") + xlab("Species") + ylab("Mean") + ggtitle("Bar plot of mean Sepal Length")
More here
colors()
palette()
blacks = c("black", 1, "#000000")
reds = c("red", 2, "#FF0000")
allreds = colors()[grep("red", colors())]
allredsRamp <- colorRampPalette(allreds)
allredsRamp(100)
grey2blue = colorpanel(100, "lightgrey", "blue", "darkblue")
rainbow(5)
heat.colors(10)
terrain.colors(100)
topo.colors(10)
cm.colors(5)
library(RColorBrewer)
display.brewer.all()
brewer.pal(8, "Set3" )
library(viridis)
n=10
magma(n)
plasma(n)
inferno(n)
viridis(n)
cividis(n)
turbo(n)
plot(1:n, col=magma(n), pch=19, cex=5)
plot(1:n, col=plasma(n), pch=19, cex=5)
n=100
plot(1:n, col=turbo(n), pch=19, cex=5)
## Resources
Back to the homepage