# -*- coding: utf-8 -*-
|
"""
|
Created on Sun May 26 02:15:11 2024
|
|
@author: BDYGS
|
"""
|
|
|
import matplotlib.pyplot as plt
|
import numpy as np
|
import pandas as pd
|
import torch
|
import torch.nn as nn
|
from torch.autograd import Variable
|
# import tushare as ts
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_absolute_error
|
from torch.utils.data import TensorDataset
|
from tqdm import tqdm
|
from datetime import datetime
|
|
import DataTask
|
|
|
path_dir_left ="C://Users//ZMK//Desktop//GRU//"
|
|
path_dir_right ="C:\\Users\\ZMK\\Desktop\\GRU\\"
|
|
|
|
class Config():
|
# data_path = "C:\\Users\\ZMK\\Desktop\\GRU\永定河井.csv"
|
timestep = 60 # 时间步长,就是利用多少时间窗口
|
batch_size = 30 # 批次大小
|
feature_size = 8 # 每个步长对应的特征数量,这里只使用1维,每天的风速
|
hidden_size = 256 # 隐层大小
|
output_size = 15 # 由于是单输出任务,最终输出层大小为1,预测未来1天风速
|
num_layers = 2 # gru的层数
|
epochs = 100 # 迭代轮数
|
best_loss = 0 # 记录损失
|
learning_rate = 0.0003 # 学习率
|
# model_name = 'GRU_ZMK' # 模型名称
|
# save_path = 'C://Users//ZMK//Desktop//GRU//{}.pth'.format(model_name) # 最优模型保存路径
|
|
config = Config()
|
|
|
def normalization(data,label):
|
mm_x=MinMaxScaler() # 导入sklearn的预处理容器
|
mm_y=MinMaxScaler()
|
data=mm_x.fit_transform(data) # 对数据和标签进行归一化等处理
|
label=mm_y.fit_transform(label)
|
return data,label,mm_y
|
|
|
def split_windows(data,seq_len,output_size):
|
x=[]
|
y=[]
|
for i in range(len(data)-seq_len-1-output_size): # range的范围需要减去时间步长和1
|
_x=data[i:(i+seq_len),:]
|
_y=data[(i+seq_len):(i+seq_len+output_size),2:] #注意!!!这个地方是取label的
|
|
x.append(_x)
|
y.append(_y)
|
print('split_windows_i:',i)
|
print(_x.shape,_y.shape)
|
x,y=np.array(x),np.array(y)
|
print('x.shape,y.shape=\n',x.shape,y.shape)
|
return x,y
|
|
def split_windows_long(data,seq_len,output_size):
|
|
print(len(data))
|
x=[]
|
y=[]
|
for i in range(int(len(data)/output_size)-4):
|
a = i*output_size
|
# print(a)
|
_x=data[a:a+seq_len,:]
|
# print(_x.shape)
|
_y=data[a+seq_len:a+seq_len+output_size,2:] #注意!!!这个地方是取label的
|
# print(_y.shape)
|
x.append(_x)
|
y.append(_y)
|
print('split_windows_i:',i)
|
# print(_x,_y)
|
x,y=np.array(x),np.array(y)
|
print('x.shape,y.shape=\n',x.shape,y.shape) # (1035, 60, 4) (1035,)
|
return x,y
|
|
|
def nash_sutcliffe_efficiency(y_true, y_pred):
|
"""
|
计算Nash-Sutcliffe Efficiency指标。
|
参数:
|
y_true : array-like, 真实观测值
|
y_pred : array-like, 预测值
|
返回:
|
nse : float, Nash-Sutcliffe Efficiency
|
"""
|
return 1 - np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2)
|
|
|
|
# 7.定义GRU网络
|
class GRU(nn.Module):
|
def __init__(self, feature_size, hidden_size, num_layers, output_size):
|
super(GRU, self).__init__()
|
self.hidden_size = hidden_size
|
self.output_size = output_size
|
# 隐层大小
|
self.num_layers = num_layers # gru层数
|
# feature_size为特征维度,就是每个时间点对应的特征数量,这里为1
|
self.gru = nn.GRU(feature_size, hidden_size, num_layers, dropout=0.8,batch_first=True)
|
self.fc1 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc2 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc3 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc4 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc5 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc6 = nn.Linear(self.hidden_size, self.output_size)
|
|
def forward(self, x, hidden=None):
|
batch_size = x.size()[0] # 获取批次大小
|
|
# 初始化隐层状态
|
if hidden is None:
|
h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
|
else:
|
h_0 = hidden
|
|
# GRU运算
|
output, h_0 = self.gru(x, h_0)
|
|
# 获取GRU输出的维度信息
|
batch_size, timestep, hidden_size = output.shape
|
|
# 将output变成 batch_size * timestep, hidden_dim
|
# output = output.reshape(-1, hidden_size)
|
|
preds = []
|
pred1, pred2, pred3 = self.fc1(output), self.fc2(output), self.fc3(output)
|
pred1, pred2, pred3 = pred1[:, -1, :], pred2[:, -1, :], pred3[:, -1, :]
|
pred4, pred5, pred6 = self.fc4(output), self.fc5(output), self.fc6(output)
|
pred4, pred5, pred6 = pred4[:, -1, :], pred5[:, -1, :], pred6[:, -1, :]
|
pred = torch.stack([pred1, pred2, pred3,pred4, pred5, pred6], dim=2)
|
|
return pred
|
|
|
|
#根据编号获取不同的预测模型
|
def getModelName(well_num):
|
if well_num in DataTask.arr1:
|
|
return 'GRU_YDH.pth'
|
|
else :
|
return 'GRU_YQS.pth'
|
|
|
|
#写入数据到csv
|
def write_csv(model_name , np_result,csv_path):
|
|
df =""
|
if model_name =='GRU_YDH.pth':
|
df = pd.DataFrame({"date":np_result[:,0], "Myrainfall":np_result[:,1],
|
"flowrate":np_result[:,2], "LQWB":np_result[:,3],
|
"ZSSC":np_result[:,4], "WTY":np_result[:,5],
|
"LSH":np_result[:,6], "HZZ":np_result[:,7],"GC":np_result[:,8]
|
})
|
|
else:
|
df = pd.DataFrame({"date":np_result[:,0], "HDrainfall":np_result[:,1],
|
"flowrate":np_result[:,2], "SXC":np_result[:,3],
|
"XM1":np_result[:,4], "XM2":np_result[:,5],
|
"SYSC":np_result[:,6], "SJYY":np_result[:,7],"BW":np_result[:,8]
|
})
|
|
df.to_csv(csv_path, index=False)
|
|
|
#运行预测模型
|
def runPredictModel(well_num):
|
|
data =""
|
csv_path = ""
|
excel_path = ""
|
#预测模型
|
model_name = getModelName(well_num)
|
|
#永定河模型
|
if model_name == 'GRU_YDH.pth':
|
|
csv_path = path_dir_right + "永定河井-pre.csv"
|
excel_path = path_dir_right + "永定河井-预测结果.xlsx"
|
|
data = DataTask.get_ydh15_real_data()
|
else:
|
|
csv_path = path_dir_right + "玉泉山井-pre.csv"
|
excel_path = path_dir_right + "玉泉山井-预测结果.xlsx"
|
|
data = DataTask.getyqs15_real_data()
|
|
|
#写入数据到csv
|
write_csv(model_name,data,csv_path)
|
|
model_path = path_dir_left + model_name
|
|
model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size) # 定义GRU网络
|
|
loss_function = nn.MSELoss() # 定义损失函数
|
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) # 定义优化器
|
|
model.load_state_dict(torch.load(model_path))
|
model.eval()
|
|
|
#CSV 文件数据 路径
|
|
# "C:\\Users\\ZMK\\Desktop\\GRU\\永定河井-pre.csv"
|
df_pre = pd.read_csv(csv_path , parse_dates=["date"],index_col=[0])
|
|
|
data_pre = df_pre.iloc[:,0:8]
|
|
label_pre = df_pre.iloc[:,7] #label没有实际作用,主要用作正则化缩放的,不参与计算
|
|
data_pre = data_pre.values
|
|
label_pre = label_pre.values.reshape(-1,1)
|
|
data_pre,label_pre,mm_y_pre = normalization(data_pre,label_pre)
|
dataX_pre,dataY_pre = split_windows_long(data_pre,config.timestep,config.output_size)
|
|
dataX_pre = Variable(torch.Tensor(np.array(dataX_pre)))
|
dataY_pre = Variable(torch.Tensor(np.array(dataY_pre)))
|
|
test_pre = model(dataX_pre)
|
|
|
with pd.ExcelWriter( excel_path , engine='openpyxl') as writer:
|
|
for i in range(6):
|
test_pre_data = test_pre[:,:,i].data.numpy().reshape(-1,1)
|
y_test_pre = dataY_pre[:,:,i].data.numpy().reshape(-1,1)
|
|
test_pre_data_inv = mm_y_pre.inverse_transform(test_pre_data)
|
y_test_inv =mm_y_pre.inverse_transform(y_test_pre)
|
|
|
|
y_test_inv = pd.DataFrame(y_test_inv, columns=[f'True Node {i+1}'])
|
test_pre_data_inv = pd.DataFrame(test_pre_data_inv, columns=[f'pre Node {i+1}'])
|
|
# 将结果保存到不同的工作表中
|
test_pre_data_inv.to_excel(writer, sheet_name=f'True Node {i+1}', index=False)
|
y_test_inv.to_excel(writer, sheet_name=f'pre Node {i+1}', index=False)
|
|
|