library(anno)
library(devtools)
install_github("CompEpigen/DecompPipeline")
beta1 <- readRDS("D:/code-R/指标/probe3_normal_cancer/metric2/beta1.RDS")
View(beta1)
View(beta1)
load("D:/code-R/tumor_purity/purify_DMR/DMR_go/Tumor.RData")
View(beta)
a = [1,2,3,4,5,6]
a = c(1,2,3,4,5,6)
b = c(2,3,4,5,6,7)
a > b
a = c("+","-","-","+")
a == '+'
b = c("ADAS",'FDAS','ADSD','ASDD')
b[a == '+']
c = c("dsad","dsad","fdff","fdsfds")
a == '+'
seq1 = rep(0,4)
read1 = c("ADAS",'FDAS','ADSD','ASDD')
read2 = c("dsad","dsad","fdff","fdsfds")
strand = c("+","-","-","+")
seq1[strand=='+']= read1
seq1[strand='-'] = read2
seq1 = rep(0,4)
seq1[strand=='+']= read1
seq1 = rep(0,4)
seq1[strand=='+']= read1
strand=='+'
seq1[strand=='+']= read1[strand=='+']
seq1[strand=='-'] = read2[strand=='-']
seq2[strand=='+'] = read2[strand=='+']
seq2 = rep(0,4)
seq2[strand=='+'] = read2[strand=='+']
seq2[strand=='-'] = read1[strand=='-']
seq1
seq2
read1 = rep(c('ACGTTTT','AGGGCTTT','AAAACCCC'),c(10000,10000,10000))
read1 = rep(c('ACGTTTT','AGGGCTTT','AAAACCCC'),c(100000,1000000,1000000))
read2 = rep(c('AGGGCTTT','ACGTTTT','AAAdsdssdsACCCC'),c(100000,1000000,1000000))
strand = rep(c("+","-","-"),c(100000,1000000,1000000))
seq2 = rep('0',2100000)
seq1 = rep('0',2100000)
seq1[strand=='+']= read1[strand=='+']
seq2[strand=='+'] = read2[strand=='+']
seq1[strand=='-'] = read2[strand=='-']
seq2[strand=='-'] = read1[strand=='-']
seq1
strand=='+'
strand
strand=='+'
read1[strand]
is.na(x)
is.na(a)
seq1 = rep('0',2100000)
strand=='+'
seq1[strand=='+']= read1[strand=='+']
seq1 = rep('0',2100000)
seq1[strand=='+']= read1[strand=='+']
strand=='+'
seq1 = rep('0',2100000)
seq1[strand=='+']= read1[strand=='+']
seq1
install.packages("h2o")
Control_path = 'D:/code-python/bam_process/MachineLearning/train_data/Control/Motif/'
HCC_path = 'D:/code-python/bam_process/MachineLearning/train_data/HCC/Motif/'
data = as.data.frame(matrix(0,4096,0))
y=c()
for (i in list.files(Control_path)){
fre = read.table(paste0(Control_path,i))$V2
data = rbind(data,as.numeric(fre))
}
for (i in list.files(HCC_path)){
fre = read.table(paste0(HCC_path,i))$V2
data = rbind(data,as.numeric(fre))
}
vcf_files = list.files(Control_path, pattern = "EndMotif",full.names=TRUE)
Control_path = 'D:/code-python/bam_process/MachineLearning/train_data/Control/Motif/'
HCC_path = 'D:/code-python/bam_process/MachineLearning/train_data/HCC/Motif/'
vcf_files = list.files(Control_path, pattern = "EndMotif",full.names=TRUE)
for (i in list.files(Control_files)){
fre = read.table(i)$V2
data = rbind(data,as.numeric(fre))
}
Control_files = list.files(Control_path, pattern = "EndMotif",full.names=TRUE)
for (i in list.files(Control_files)){
fre = read.table(i)$V2
data = rbind(data,as.numeric(fre))
}
for (i in Control_files){
fre = read.table(i)$V2
data = rbind(data,as.numeric(fre))
}
train_data = as.data.frame(matrix(0,256,0))
data = rbind(train_data,as.numeric(fre))
train_data = rbind(train_data,as.numeric(fre))
train_data = as.data.frame(matrix(0,256,0))
for (i in Control_files){
fre = read.table(i)$V2
train_data = rbind(train_data,as.numeric(fre))
}
for (i in HCC_files){
fre = read.table(i)$V2
train_data = rbind(train_data,as.numeric(fre))
}
HCC_files = list.files(HCC_path, pattern = "EndMotif",full.names=TRUE)
for (i in HCC_files){
fre = read.table(i)$V2
train_data = rbind(train_data,as.numeric(fre))
}
y=rep(c(0,1),c(25,43))
data = cbind(y,data)
data$y = y
names=c('y',paste0('V',seq(1,256,1)))
y=rep(c(0,1),c(25,43))
train_data = cbind(y,train_data)
names=c('y',paste0('V',seq(1,256,1)))
names
colnames(data)=names
names=c('label',paste0('V',seq(1,256,1)))
colnames(data)=names
x = paste(paste0('V',seq(1,256,1)),collapse='+')
fit <- glm(as.formula(paste("y ~ ", paste0("V", 1:256 , collapse="+"))),data=train,family='binomial')
fit <- glm(as.formula(paste("y ~ ", paste0("V", 1:256 , collapse="+"))),data=train_data,family='binomial')
colnames(train_data)=names
fit <- glm(as.formula(paste("y ~ ", paste0("V", 1:256 , collapse="+"))),data=train_data,family='binomial')
summary(fit)
pre_logistic<-as.numeric(predict(fit,newdata=test,type="response"))
index <-  sort(sample(nrow(data), nrow(data)*.7))
train <- data[index,]
test <-  data[-index,]
fit <- glm(as.formula(paste("y ~ ", paste0("V", 1:256 , collapse="+"))),data=train,family='binomial')
fit <- glm(as.formula(paste("label ~ ", paste0("V", 1:256 , collapse="+"))),data=train,family='binomial')
summary(fit)
pre_logistic<-as.numeric(predict(fit,newdata=test,type="response"))
pre_logistic
library(h2o)
dat_split <- h2o.splitFrame(train_data,ratios = 0.8)
train_data <- as.h2o(train_data)
train_data <- h2o.init(train_data)
setwd("D:/github/cfDNAFFE/data/CNVdependency")
genomeBuild = "hg19"
genomeStyle = "UCSC"
library(GenomeInfoDb)
bsg <- paste0("BSgenome.Hsapiens.UCSC.", genomeBuild)
if (!require(bsg, character.only=TRUE, quietly=TRUE, warn.conflicts=FALSE)) {
seqinfo <- Seqinfo(genome=genomeBuild)
} else {
seqinfo <- seqinfo(get(bsg))
}
seqlevelsStyle(seqinfo) <- genomeStyle
seqinfo <- keepSeqlevels(seqinfo, value = paste0("chr",c(1:22,"X")))
seqinfo
saveRDS(seqinfo,"seqinfo_hg19_ucsc")
saveRDS(seqinfo,"seqinfo_hg19_ucsc.RDS")
saveRDS(seqinfo,"seqinfo_hg19_ucsc.rds")
saveRDS(seqinfo,"seqinfo_hg19_ucsc.rds")
seqinfo_hg19_ucsc <- readRDS("D:/github/cfDNAFFE/data/CNVdependency/seqinfo_hg19_ucsc.rds")
seqinfo_hg38_ucsc <- readRDS("D:/github/cfDNAFFE/data/CNVdependency/seqinfo_hg38_ucsc.rds")
