1. Load data
Using iris data for analysis.
head(iris)
df <- iris
df <- as.data.frame(iris)
Dimension conversion for futher analysis:
row.names(df) <- paste(df$Species, row.names(df), sep="_")
df$Species <- NULL
head(df)
2. PCA analysis using prcomp
df_pca <- prcomp(df)
2.1 plot directly
plot(df_pca$x[,1], df_pca$x[,2])
3. Using ggplot2 to revise this plot:
First, a new dataframe should be created, with the information of sample-group.
df_out <- as.data.frame(df_pca$x)
df_out$group <- sapply( strsplit(as.character(row.names(df)), "_"), "[[", 1 )
head(df_out)
Second, some prepartions for ggplot2:
library(ggplot2)
library(grid)
library(gridExtra)
Try:
p<-ggplot(df_out,aes(x=PC1,y=PC2,color=group ))
p<-p+geom_point()
p
3.2 plot with a theme
For format of picture.
theme<-theme(panel.background = element_blank(),panel.border=element_rect(fill=NA),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),strip.background=element_blank(),axis.text.x=element_text(colour="black"),axis.text.y=element_text(colour="black"),axis.ticks=element_line(colour="black"),plot.margin=unit(c(1,1,1,1),"line"))
For plot:
p<-ggplot(df_out,aes(x=PC1,y=PC2,color=group ))
p<-p+geom_point()+theme
p
3.3 Put the words on the figure:
p<-ggplot(df_out,aes(x=PC1,y=PC2,color=group, label=row.names(df) ))
p<-p+geom_point()+ geom_text(size=3)+theme
p
3.4 The percentage:
percentage <- round(df_pca$sdev / sum(df_pca$sdev) * 100, 2)
percentage <- paste( colnames(df_out), "(", paste( as.character(percentage), "%", ")", sep="") )
p<-ggplot(df_out,aes(x=PC1,y=PC2,color=group ))
p<-p+geom_point()+theme + xlab(percentage[1]) + ylab(percentage[2])
p
3.5 Change the order for Sample group:
df_out$group <- factor(df_out$group, levels = c("virginica", "setosa", "versicolor"))
p<-ggplot(df_out,aes(x=PC1,y=PC2,color=group ))
p<-p+geom_point()+theme + xlab(percentage[1]) + ylab(percentage[2]) + scale_color_manual(values=c("#FFFF00", "#00FFFF", "#FF00FF"))
p
3.5 Save in PDF file
pdf( "file_out.pdf",width = 10,height = 10)
library(gridExtra)
yy <- grid.arrange(p,nrow=1)
op <- par(no.readonly=TRUE)
par(op)
dev.off()
3.6 Plot features that contribute to the classification
df_out_r <- as.data.frame(df_pca$rotation)
df_out_r$feature <- row.names(df_out_r)
df_out_r
p<-ggplot(df_out_r,aes(x=PC1,y=PC2,label=feature,color=feature ))
p<-p+geom_point()+theme + geom_text(size=3)
p
近期评论