1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
| library(xgboost) library(caret) library(pROC) library(hydroGOF)
origindata <- read.table('E:\\data\\data.txt',encoding="UTF-8",header = T,sep = "\t")
origindata <-origindata[,-1]
origindata <-na.omit(origindata)
colnames(origindata)[length(origindata)]<-"target"
set.seed(1234) choose<-createDataPartition(y=origindata$target,p=0.8,list=F) traindata<-origindata[choose,] testdata<-origindata[-choose,]
data_trainx<-as.matrix(traindata[,1:length(origindata)-1]) data_trainy<-as.numeric(as.character(traindata$target)) data_testx<-as.matrix(testdata[,1:length(origindata)-1]) data_testy<-as.numeric(as.character(testdata$target))
dtrain <- xgb.DMatrix(data = data_trainx, label = data_trainy) dtest <- xgb.DMatrix(data = data_testx, label = data_testy)
watchlist <- list(train = dtrain, test = dtest)
DO_xg <- xgb.train( data = dtrain, eta = 0.3, gamma = 0.001, max_depth = 2, subsample = 0.7, colsample_bytree = 0.4, objective = "reg:squarederror", nrounds = 1000, watchlist = watchlist, verbose = 1, print_every_n = 100, early_stopping_rounds = 200 )
trainpred<-predict(DO_xg,newdata = dtrain) testpred<-predict(DO_xg,newdata = dtest)
trainR2<-1-sum((traindata$target-trainpred)^2)/sum((traindata$target-mean(traindata$target))^2) trainRMSE<-sqrt(mean((traindata$target-trainpred)^2)) trainMAE<-mean(abs(traindata$target-trainpred)) trainRSE<-sqrt(sum((traindata$target-trainpred)^2)) / sqrt(sum((traindata$target-mean(traindata$target))^2)) trainMSE<-mean((traindata$target-trainpred)^2) trainMAPE<-mean(abs((traindata$target-trainpred) / traindata$target)) trainMSPE<-mean(((traindata$target-trainpred) / traindata$target)^2) cat("训练集R2:",trainR2,",RMSE:",trainRMSE,",MAE:",trainMAE,",RSE:",trainRSE,",MSE:",trainMSE,",MAPE:",trainMAPE,",MSPE:",trainMSPE)
testR2<-1-sum((testdata$target-testpred)^2)/sum((testdata$target-mean(testdata$target))^2) testRMSE<-sqrt(mean((testdata$target-testpred)^2)) testMAE<-mean(abs(testdata$target-testpred)) testRSE<-sqrt(sum((testdata$target-testpred)^2)) / sqrt(sum((testdata$target-mean(testdata$target))^2)) testMSE<-mean((testdata$target-testpred)^2) testMAPE<-mean(abs((testdata$target-testpred) / testdata$target)) testMSPE<-mean(((testdata$target-testpred) / testdata$target)^2) cat("测试集R2:",testR2,",RMSE:",testRMSE,",MAE:",testMAE,",RSE:",testRSE,",MSE:",testMSE,",MAPE:",testMAPE,",MSPE:",testMSPE) cat("target平均:",mean(testdata$target),"target平均误差范围:±",mean(abs(testdata$target-testpred)),"target最大误差:±",max((testdata$target-testpred)))
test_group <- rep('Test Samples',times=nrow(dtest)) testPlot <- data.frame(testdata$target,testpred,test_group) colnames(testPlot)<-c('DO', 'Prediction_DO', 'Group') train_group <- rep('Train Samples',times=nrow(dtrain)) trainPlot <- data.frame(traindata$target,trainpred,train_group) colnames(trainPlot)<-c('DO', 'Prediction_DO', 'Group') Plotset <- rbind(testPlot,trainPlot) library(ggplot2) library(ggThemeAssist) Plot2 <- ggplot(data = Plotset)+ geom_point(data = Plotset[1:nrow(testPlot),], aes(x=DO,y=Prediction_DO,color='Test Samples'), size=1.5, shape=8)+ theme(axis.line = element_line(colour = "black"))+ geom_point(data = Plotset[nrow(testPlot)+1:nrow(Plotset),], aes(x=DO,y=Prediction_DO,color='Train Samples'), shape=8) + theme(panel.grid.major = element_line(colour = NA), panel.grid.minor = element_line(colour = NA), legend.title = element_blank(), panel.background = element_rect(fill = "white"), legend.background = element_rect(colour = "black",linetype = "solid"), legend.position = c(0.25,0.72))+ geom_smooth(data=Plotset, aes(x=DO,y=Prediction_DO), method = 'lm',formula = y~x,se = FALSE, color='black',size=0.5)+ theme(plot.title = element_text(hjust = 0.5))+coord_equal()+ scale_x_continuous(limits = c(min(c(testPlot$DO,testPlot$Prediction_DO)), max(c(testPlot$DO,testPlot$Prediction_DO))))+ scale_y_continuous(limits = c(min(c(testPlot$DO,testPlot$Prediction_DO)), max(c(testPlot$DO,testPlot$Prediction_DO))))+ theme(axis.text.x = element_text(size = 14,family="serif"))+ theme(axis.text.y = element_text(size = 14,family="serif"))+ theme(legend.text=element_text(size=14,family="serif"))+ theme(axis.title.x = element_text(size = 14,family="serif"))+ theme(axis.title.y = element_text(size = 14,family="serif")) Plot2
setwd("E:\\output") jpeg(filename =paste0("target预测.jpg"),width=1000,height=1000,res=250) Plot2 dev.off()
|