This project is maintained by sarbal
Some visual things you can do with R. Once more, download these files into your working directory:
getwd()
To set your working diretory:
setwd("H:/URP")
Run this to install/load libraries
source("helper.R")
summary(iris)
class(iris)
colnames(iris)
plot(iris)
pairs(iris, upper.panel = NULL )
plot(iris$Sepal.Length, iris$Petal.Length, pch=19, col=as.numeric(iris$Species) )
## Playing with parameters
plot(iris$Sepal.Length, iris$Petal.Length, pch=12, cex=3, lwd=4, lty=4, type="b", col=colors()[sample(600,5)][as.numeric(iris$Species)] )
## Plotting smoothed version
plot(lowess(iris$Sepal.Length, iris$Petal.Length), pch=19)
## Plotting using the forumla method
plot(Petal.Length ~ Sepal.Length, data=iris, pch=19, col=Species)
boxplot(iris$Sepal.Width~ iris$Species, col=1:3 )
beanplot(iris$Sepal.Width~ iris$Species, col=list(1,2,3))
iris.list = lapply( unique(iris$Species), function(si) iris$Sepal.Width[iris$Species==si])
vioplot( iris.list[[1]], iris.list[[2]], iris.list[[3]], col="darkgreen")
hist(iris$Petal.Width, col="lightblue")
iris.bar = tapply( iris$Sepal.Length, iris$Species, mean)
barplot(iris.bar, col="black", xlab="Species", ylab="Count", main="Bar plot of mean Sepal Length")
small_petals <- which(iris$Petal.Width <0.75)
count(iris$Species[small_petals])
count(iris$Species)
hist(iris$Petal.Width, col="lightblue")
hist(iris$Petal.Width[iris$Species=="setosa"], col="red", add=T)
hist(iris$Petal.Width[iris$Species=="versicolor"], col="blue", add=T)
hist(iris$Petal.Width[iris$Species=="virginica"], col="purple", add=T)
h <- hist(iris$Petal.Width, col="lightblue")
h
hist(iris$Petal.Width[iris$Species=="setosa"], breaks=h$breaks,col="red", add=T)
hist(iris$Petal.Width[iris$Species=="versicolor"], breaks=h$breaks, col="blue", add=T)
hist(iris$Petal.Width[iris$Species=="virginica"], breaks=h$breaks,col="purple", add=T)
- How about some density lines?
h <- hist(iris$Petal.Width, freq=F) d_all <-density( iris$Petal.Width) lines(d_all, col=”black”)
- We can keep adding layers to our plots with other functions:
points() polygon() segments() abline() rug() text() mtext() legend() …
- More fun things to play with too
pch type lty lwd bty …
- Going back to the matrix scatterplot, let's have a visual that summarizes all the data
panel.hist <- function(x, …) { usr <- par(“usr”); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col = “lightgreen”, …) }
panel.cor <- function(x, y, digits = 2, prefix = “”, cex.cor, …) { usr <- par(“usr”); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r, col= plasma(100)[round(r,2)*100]) } pairs(iris, bg=1:3,lower.panel = panel.smooth, pch=19, upper.panel = panel.cor, diag.panel = panel.hist, cex.labels = 2, font.labels = 2)
- Great. What if we want to ask how similar are these individual plants to each other within each species. What can we look at?
- Correlations are fun.
iris2 = apply(iris[,1:4], 2, as.numeric) heatmap.3(iris2, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100)) iris.r = t(apply(iris[,1:4], 1, rank)) heatmap.3(iris.r, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100)) iris.r2 = apply(iris[,1:4], 2, rank) heatmap.3(iris.r2, RowSideCol=cols7[as.numeric(iris$Species)] , col=viridis(100)) samples.cor = cor( t(iris2) ) heatmap.3(samples.cor, col=plasma(100), ColSideCol=cols7[as.numeric(iris$Species)])
## "Tidyr" versions
We can do most all of this with [ggplot2](https://github.com/rstudio/cheatsheets/blob/master/data-visualization-2.1.pdf).There are less things finicky things to worry about, and is generally more intuitive.
g <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length)) g
- This does nothing, because we've not specified what we want to draw:
g <- g + geom_point()
- Points! Now to color them:
g <- g + geom_point(aes(color = Species))
- We can keep building onto the "g" variable.
g <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species)) + geom_point() + geom_smooth(method = “lm”, se = F)
- How about boxplots?
g <- ggplot(data=iris, aes(x=Species, y=Sepal.Length)) g + geom_boxplot(aes(fill=Species)) + ylab(“Sepal Length”) + ggtitle(“Iris Boxplot”) + stat_summary(fun.y=mean, geom=”point”, shape=5, size=4)
- Histograms:
g <- ggplot(data=iris, aes(x=Sepal.Width)) g + geom_histogram(binwidth=0.2, color=”black”, aes(fill=Species)) + xlab(“Sepal Width”) + ylab(“Frequency”) + ggtitle(“Histogram of Sepal Width”)
- Barplots:
g <- ggplot(data=iris, aes(x=Species, y=Sepal.Length)) g + geom_bar(stat = “summary”, fun.y = “mean”) + xlab(“Species”) + ylab(“Mean”) + ggtitle(“Bar plot of mean Sepal Length”)
More [here](https://www.mailman.columbia.edu/sites/default/files/media/fdawg_ggplot2.html)
## Colors and palettes
colors() palette()
- Preset colors as strings or as numbers
- Or based on their RGB
- e.g.,
blacks = c(“black”, 1, “#000000”) reds = c(“red”, 2, “#FF0000”) allreds = colors()[grep(“red”, colors())]
- Color ramps
allredsRamp <- colorRampPalette(allreds) allredsRamp(100) grey2blue = colorpanel(100, “lightgrey”, “blue”, “darkblue”)
- Predefined palettes:
- default R:
rainbow(5) heat.colors(10) terrain.colors(100) topo.colors(10) cm.colors(5)
- R color brewer
library(RColorBrewer) display.brewer.all() brewer.pal(8, “Set3” )
- everyone's new favorite are the viridis palettes (color-blind friendly)
library(viridis) magma() plasma() inferno() viridis() cividis() ```
{ r } `and ends before a line with `
`.Solutions: Next week!
Back to the homepage