# -*- coding: utf-8 -*- """ Created on Fri May 24 15:33:12 2024 @author: BDYGS """ # -*- coding: utf-8 -*- """ Created on Tue Apr 23 05:28:13 2024 @author: BDYGS """ # -*- coding: utf-8 -*- """ Created on Sat Apr 6 04:01:04 2024 @author: BDYGS """ import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn from torch.autograd import Variable from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from torch.utils.data import TensorDataset from tqdm import tqdm class Config(): data_path = "C:\\Users\\ZMK\\Desktop\\GRU\玉泉山井.csv" timestep = 60 # 时间步长,就是利用多少时间窗口 batch_size = 30 # 批次大小 feature_size = 8 # 每个步长对应的特征数量,这里只使用1维,每天的风速 hidden_size = 256 # 隐层大小 output_size = 15 # 由于是单输出任务,最终输出层大小为1,预测未来1天风速 num_layers = 2 # gru的层数 epochs = 100 # 迭代轮数 best_loss = 0 # 记录损失 learning_rate = 0.0003 # 学习率 config = Config() def normalization(data,label): mm_x=MinMaxScaler() # 导入sklearn的预处理容器 mm_y=MinMaxScaler() data=mm_x.fit_transform(data) # 对数据和标签进行归一化等处理 label=mm_y.fit_transform(label) return data,label,mm_y def split_windows(data,seq_len,output_size): x=[] y=[] for i in range(len(data)-seq_len-1-output_size): # range的范围需要减去时间步长和1 _x=data[i:(i+seq_len),:] _y=data[(i+seq_len):(i+seq_len+output_size),2:] #注意!!!这个地方是取label的 x.append(_x) y.append(_y) print('split_windows_i:',i) print(_x.shape,_y.shape) x,y=np.array(x),np.array(y) print('x.shape,y.shape=\n',x.shape,y.shape) return x,y def split_data(x,y,split_ratio): train_size=int(len(y)*split_ratio) test_size=len(y)-train_size x_data=Variable(torch.Tensor(np.array(x))) y_data=Variable(torch.Tensor(np.array(y))) x_train=Variable(torch.Tensor(np.array(x[0:train_size]))) y_train=Variable(torch.Tensor(np.array(y[0:train_size]))) y_test=Variable(torch.Tensor(np.array(y[train_size:len(y)]))) x_test=Variable(torch.Tensor(np.array(x[train_size:len(x)]))) print('x_data.shape,y_data.shape,x_train.shape,y_train.shape,x_test.shape,y_test.shape:\n{}{}{}{}{}{}' .format(x_data.shape,y_data.shape,x_train.shape,y_train.shape,x_test.shape,y_test.shape)) return x_data,y_data,x_train,y_train,x_test,y_test def nash_sutcliffe_efficiency(y_true, y_pred): """ 计算Nash-Sutcliffe Efficiency指标。 参数: y_true : array-like, 真实观测值 y_pred : array-like, 预测值 返回: nse : float, Nash-Sutcliffe Efficiency """ return 1 - np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2) # 1.加载时间序列数据 df= pd.read_csv(config.data_path,parse_dates=["date"],index_col=[0]) #parse_dates将该列视为时间索引 print(df.shape) data = df.iloc[:,0:8] # 以特征作为数据 label = df.iloc[:,7] data = data.values label = label.values.reshape(-1,1) # torch.manual_seed(7) #设置生成随机数的种子,以保证代码的可重复性 data,label,mm_y=normalization(data,label) dataX,dataY = split_windows(data,config.timestep,config.output_size) x_data,y_data,x_train,y_train,x_test,y_test = split_data(dataX,dataY,0.8) # 5.形成训练数据集 train_data = TensorDataset(x_train,y_train) test_data = TensorDataset(x_test,y_test) # 6.将数据加载成迭代器 train_loader = torch.utils.data.DataLoader(train_data, config.batch_size, False) test_loader = torch.utils.data.DataLoader(test_data, config.batch_size, False) # 7.定义GRU网络 class GRU(nn.Module): def __init__(self, feature_size, hidden_size, num_layers, output_size): super(GRU, self).__init__() self.hidden_size = hidden_size self.output_size = output_size # 隐层大小 self.num_layers = num_layers # gru层数 # feature_size为特征维度,就是每个时间点对应的特征数量,这里为1 self.gru = nn.GRU(feature_size, hidden_size, num_layers, dropout=0.8,batch_first=True) self.fc1 = nn.Linear(self.hidden_size, self.output_size) self.fc2 = nn.Linear(self.hidden_size, self.output_size) self.fc3 = nn.Linear(self.hidden_size, self.output_size) self.fc4 = nn.Linear(self.hidden_size, self.output_size) self.fc5 = nn.Linear(self.hidden_size, self.output_size) self.fc6 = nn.Linear(self.hidden_size, self.output_size) def forward(self, x, hidden=None): batch_size = x.size()[0] # 获取批次大小 # 初始化隐层状态 if hidden is None: h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float() else: h_0 = hidden # GRU运算 output, h_0 = self.gru(x, h_0) # 获取GRU输出的维度信息 batch_size, timestep, hidden_size = output.shape # 将output变成 batch_size * timestep, hidden_dim # output = output.reshape(-1, hidden_size) preds = [] pred1, pred2, pred3 = self.fc1(output), self.fc2(output), self.fc3(output) pred1, pred2, pred3 = pred1[:, -1, :], pred2[:, -1, :], pred3[:, -1, :] pred4, pred5, pred6 = self.fc4(output), self.fc5(output), self.fc6(output) pred4, pred5, pred6 = pred4[:, -1, :], pred5[:, -1, :], pred6[:, -1, :] pred = torch.stack([pred1, pred2, pred3,pred4, pred5, pred6], dim=2) return pred model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size) # 定义GRU网络 print(model) loss_function = nn.MSELoss() # 定义损失函数 optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) # 定义优化器 # 8.模型训练 for epoch in range(config.epochs): model.train() running_loss = 0 train_bar = tqdm(train_loader) # 形成进度条 for data in train_bar: x_train, y_train = data # 解包迭代器中的X和Y optimizer.zero_grad() y_train_pred = model(x_train) loss = loss_function(y_train_pred, y_train) loss.backward() optimizer.step() running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1, config.epochs, loss) print('Finished Training') model_name = 'GRU_YQS' torch.save(model.state_dict(), 'C://Users//ZMK//Desktop//GRU/{}.pth'.format(model_name)) model.eval() #模型验证:用原始数据再算一次 df= pd.read_csv(config.data_path,parse_dates=["date"],index_col=[0]) #parse_dates将该列视为时间索引 data = df.iloc[:,0:8] # 以四个特征作为数据 label = df.iloc[:,7] data = data.values label = label.values.reshape(-1,1) data,label,mm_y=normalization(data,label) dataX,dataY = split_windows(data,config.timestep,config.output_size) x_data,y_data,x_train,y_train,x_test,y_test = split_data(dataX,dataY,0.8) test_pre = model(x_data) with pd.ExcelWriter("C:\\Users\\ZMK\\Desktop\\GRU\GRU-YQS.xlsx", engine='openpyxl') as writer: for i in range(6): test_pre_data = test_pre[:,0,i].data.numpy().reshape(-1,1) y_test_pre = y_data[:,0,i].data.numpy().reshape(-1,1) print(test_pre_data.shape) test_pre_data_inv = mm_y.inverse_transform(test_pre_data) # print(test_pre_data_inv.shape) y_test_inv =mm_y.inverse_transform(y_test_pre) plt.figure(figsize=(10,5)) plt.plot(y_test_inv) plt.plot(test_pre_data_inv) plt.legend(('real', 'predict'),fontsize='15') plt.show() print('MAE/RMSE/NSE') print(mean_absolute_error(y_test_inv, test_pre_data_inv)) print(np.sqrt(mean_squared_error(y_test_inv, test_pre_data_inv))) print(nash_sutcliffe_efficiency(y_test_inv, test_pre_data_inv)) y_test_inv = pd.DataFrame(y_test_inv, columns=[f'True Node {i+1}']) test_pre_data_inv = pd.DataFrame(test_pre_data_inv, columns=[f'test Node {i+1}']) # 将结果保存到不同的工作表中 test_pre_data_inv.to_excel(writer, sheet_name=f'True Node {i+1}', index=False) y_test_inv.to_excel(writer, sheet_name=f'test Node {i+1}', index=False)