All Metadata Selected Metadata png

knitr::opts_chunk$set(echo = TRUE)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

Including Plots

You can also embed plots, for example:

plot(pressure)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

玫瑰图

*柱状图的变种

df<-data.frame(x=LETTERS[1:6],
               y=sample(1:10,6))

library(ggplot2)

ggplot(df,aes(x=x,y=y))+
  geom_col(aes(fill=x),show.legend = F)+
  coord_polar()

x<-1:180
x
y<-sin(10*x*pi/180)

df<-data.frame(x=x,y=abs(y))

df$yz<-df$y*df$z

ggplot(data=df,aes(x=x,y=y))+
  geom_area(fill="blue",
            alpha=0.5,
            color="black")+
  coord_polar()+
  theme_bw()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.border = element_blank(),
        axis.title = element_blank())

完整代码

library(tidyverse)
library(ggplot2)

set.seed(123456)

df<-data.frame(x=LETTERS[1:10],
               y=sample(1:20,10))
df

x<-1:180
x
y<-sin(10*x*pi/180)

df1<-data.frame(x1=x,
               y1=abs(y),
               var=gl(10,18,labels = LETTERS[1:10]))
df1
merge(df1,df,by.x = 'var',by.y = 'x') %>% 
  mutate(new_y=y1*y) -> df2

ggplot(data=df2,aes(x=x,y=new_y))+
  geom_area(aes(fill=var),
            alpha=0.8,
            color="black",
            show.legend = F)+
  coord_polar()+
  theme_bw()+
  theme(axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        panel.border = element_blank(),
        axis.title = element_blank())+
  scale_x_continuous(breaks = seq(9,180,18),
                     labels = df$x)+
  geom_text(data=df,aes(x=seq(9,180,18),
                         y=y+1,
                         label=y))

python实现 R,本文参考

堆叠柱状图

海盗图

当因变量（Dependent variable）为数值型，自变量（Independent variable）为分类变量时，可以选择箱形图，柱状图，或小提琴图等等。

1. 原始数据，通常为点。
2. 柱状图形：显示数据的集中趋势（如：中位数，均数）。
3. 密度图：呈现数据的分布。
4. 统计推断：如95%可信区间等。 library(yarrr) #预先安装 # 基础版本的海盗图；选择两个变量作为作图的数据：weight作为因变量，Diet作为自变量。ChickWeight为自带数据 pirateplot(weight ~ Diet, data = ChickWeight, theme = 1, #1，3好看一些 main = "Pirate plot", inf.method = "ci", # 下方海盗图中部位置的框指的是可信区间 bar.f.o = 0.2) # 调整柱状图部分的颜色深度，数值从0-1 # pal = "decision" #自选调色板 # piratepal(palette = "all") # 所有颜色 本文参考

ROC曲线绘制

# 真阳性（True Positive , TP）被模型预测为正的正样本；

# 假阴性（False Negative , FN）被模型预测为负的正样本；

# 假阳性（False Positive , FP）被模型预测为正的负样本；

# 真阴性（True Negative , TN）被模型预测为负的负样本。
# ROC曲线 y轴为TPR（击中率）；x轴为1-TNR（虚报率）
# 准确率(Precision) =  TP/（TP+FP）

# F1 = 2 * P * R / (P + R)  同时描绘了准确率和灵敏度

# AUC：ROC曲线下面积

#逻辑回归
library(pROC) #输入一列真实的分类，一列模型的预测概率

## 实战1：逻辑回归ROC
library(rio);library(tidyverse)
library(car)
library(mlogit)
Cured = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
Intervention = c("NO", "NO", "YES", "NO", "NO", "NO", "YES","NO", "NO", "NO", "NO", "NO", "YES","YES","YES","YES", "NO","YES", "YES", "YES", "YES", "NO" )
Duration = c(7, 7,6, 5, 7, 4, 7, 6, 3, 7, 6, 3, 8, 8, 7, 9, 9, 7, 8, 5, 10, 9)
A = data.frame(Cured,Intervention,Duration)
M1 = glm(data = A, Cured~Intervention,family = binomial()) #二项分布（二元）
M2 = glm(data = A, Cured~Intervention+Duration,family = binomial()) # JASP中的factor和cov类似
## ROC曲线的绘制
Yhat1 = predict(M1,A,type = 'response') #预测概率值  注意逻辑回归里面type选response
Yhat2 = predict(M2,A,type = 'response') #预测概率值
A$cured;Yhat1;Yhat2
modelroc = roc(A$Cured,Yhat1)
modelroc2 = roc(A$Cured,Yhat2)

plot(modelroc,print.auc=T,auc.polygon=T,max.auc.polygon=T,print.thres=T,auc.polygon.col='skyblue' ,main="ROC", percent=TRUE)
lines(modelroc2,col=2)
auc(A$Cured,Yhat1);auc(A$Cured,Yhat2) #计算AUC值

#ggplot画风的ROC 
(g=ggroc(list(modelroc,modelroc2),legacy.axes = T,lwd=1.5))
g+theme_minimal() + ggtitle("My ROC curve") + 
  geom_segment(aes(x = 0, xend = 1, y = 0, yend = 1), color="grey", linetype="dashed")

ROC

机器学习与特征抽取

library(phyloseq)
library(randomForest)
library(tidyverse)
#数据整理和随机僧林
# 包在github上不是很容易安装
#library(ggClusterNet)
# data(ps)

map = as.data.frame(sample_data(ps))
head(map)

#-scaleing relative abundancce#----
ps_rela  = transform_sample_counts(ps, function(x) x / sum(x) );ps_rela
mapping = as.data.frame(sample_data(ps_rela))
otutab = as.data.frame((vegan_otu(ps_rela)))

# Set classification info.
otutab$group = factor(mapping$Group)

# set random seed for reproducible reandomforest model
set.seed(315)

model_rf= randomForest::randomForest(group ~ ., data=otutab, importance=TRUE, proximity=TRUE)
print(model_rf)

重要变量提取和整理

#------------k可视化
###### inportant OTU picked out and plot
a=as.data.frame(round(importance(model_rf), 2))
a$id=row.names(a)
a2<- dplyr::arrange(a, desc(MeanDecreaseAccuracy))
row.names(a2)=a2$id

a3=head(a2,n=20)

taxonomy <- as.data.frame(vegan_tax(ps))
tax = taxonomy[rownames(a3),]
a3 = merge(a3,tax,by = "row.names",all = F)
row.names(a3) = a3$Row.names
a3$Row.names = NULL

OTU = vegan_otu(ps_rela)
### pice mapping
design = as.data.frame(sample_data(ps_rela))

#mean abundance by groups
iris.split <- split(as.data.frame(OTU),as.factor(design$Group))
iris.apply <- lapply(iris.split,function(x)colMeans(x,na.rm = TRUE))
norm2 <- do.call(rbind,iris.apply)%>% # combine result
  t()
colnames(norm2) = paste(colnames(norm2),"mean",sep = "")

ind_fal = merge(a3,norm2,by = "row.names",all = F)
head(ind_fal)

火柴棒图形展示

p1 <- ggplot(a3, aes(x = MeanDecreaseAccuracy, y = reorder(id,MeanDecreaseAccuracy))) +
  geom_point(size=6,pch=20,fill = "blue")+
  geom_segment(aes(yend=id),xend=0,size=3)+
  geom_label(aes(x =MeanDecreaseAccuracy*1.1,  label = Phylum),size = 3)

p1

火柴图玫瑰图

Literature Wordcloud