# -*- coding: utf-8 -*-
|
"""
|
Created on Fri May 24 15:33:12 2024
|
|
@author: BDYGS
|
"""
|
|
# -*- coding: utf-8 -*-
|
"""
|
Created on Tue Apr 23 05:28:13 2024
|
|
@author: BDYGS
|
"""
|
|
# -*- coding: utf-8 -*-
|
"""
|
Created on Sat Apr 6 04:01:04 2024
|
|
@author: BDYGS
|
"""
|
|
import matplotlib.pyplot as plt
|
import numpy as np
|
import pandas as pd
|
import torch
|
import torch.nn as nn
|
from torch.autograd import Variable
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_absolute_error
|
from torch.utils.data import TensorDataset
|
from tqdm import tqdm
|
|
|
|
class Config():
|
data_path = "C:\\Users\\ZMK\\Desktop\\GRU\玉泉山井.csv"
|
timestep = 60 # 时间步长,就是利用多少时间窗口
|
batch_size = 30 # 批次大小
|
feature_size = 8 # 每个步长对应的特征数量,这里只使用1维,每天的风速
|
hidden_size = 256 # 隐层大小
|
output_size = 15 # 由于是单输出任务,最终输出层大小为1,预测未来1天风速
|
num_layers = 2 # gru的层数
|
epochs = 100 # 迭代轮数
|
best_loss = 0 # 记录损失
|
learning_rate = 0.0003 # 学习率
|
|
|
config = Config()
|
|
|
def normalization(data,label):
|
mm_x=MinMaxScaler() # 导入sklearn的预处理容器
|
mm_y=MinMaxScaler()
|
data=mm_x.fit_transform(data) # 对数据和标签进行归一化等处理
|
label=mm_y.fit_transform(label)
|
return data,label,mm_y
|
|
|
def split_windows(data,seq_len,output_size):
|
x=[]
|
y=[]
|
for i in range(len(data)-seq_len-1-output_size): # range的范围需要减去时间步长和1
|
_x=data[i:(i+seq_len),:]
|
_y=data[(i+seq_len):(i+seq_len+output_size),2:] #注意!!!这个地方是取label的
|
|
x.append(_x)
|
y.append(_y)
|
print('split_windows_i:',i)
|
print(_x.shape,_y.shape)
|
x,y=np.array(x),np.array(y)
|
print('x.shape,y.shape=\n',x.shape,y.shape)
|
return x,y
|
|
|
|
|
def split_data(x,y,split_ratio):
|
|
train_size=int(len(y)*split_ratio)
|
test_size=len(y)-train_size
|
|
x_data=Variable(torch.Tensor(np.array(x)))
|
y_data=Variable(torch.Tensor(np.array(y)))
|
|
x_train=Variable(torch.Tensor(np.array(x[0:train_size])))
|
y_train=Variable(torch.Tensor(np.array(y[0:train_size])))
|
y_test=Variable(torch.Tensor(np.array(y[train_size:len(y)])))
|
x_test=Variable(torch.Tensor(np.array(x[train_size:len(x)])))
|
|
print('x_data.shape,y_data.shape,x_train.shape,y_train.shape,x_test.shape,y_test.shape:\n{}{}{}{}{}{}'
|
.format(x_data.shape,y_data.shape,x_train.shape,y_train.shape,x_test.shape,y_test.shape))
|
|
return x_data,y_data,x_train,y_train,x_test,y_test
|
|
|
def nash_sutcliffe_efficiency(y_true, y_pred):
|
"""
|
计算Nash-Sutcliffe Efficiency指标。
|
参数:
|
y_true : array-like, 真实观测值
|
y_pred : array-like, 预测值
|
返回:
|
nse : float, Nash-Sutcliffe Efficiency
|
"""
|
return 1 - np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2)
|
|
|
|
# 1.加载时间序列数据
|
|
df= pd.read_csv(config.data_path,parse_dates=["date"],index_col=[0])
|
#parse_dates将该列视为时间索引
|
|
print(df.shape)
|
|
data = df.iloc[:,0:8] # 以特征作为数据
|
label = df.iloc[:,7]
|
data = data.values
|
label = label.values.reshape(-1,1)
|
|
# torch.manual_seed(7) #设置生成随机数的种子,以保证代码的可重复性
|
|
data,label,mm_y=normalization(data,label)
|
|
dataX,dataY = split_windows(data,config.timestep,config.output_size)
|
|
x_data,y_data,x_train,y_train,x_test,y_test = split_data(dataX,dataY,0.8)
|
|
# 5.形成训练数据集
|
train_data = TensorDataset(x_train,y_train)
|
test_data = TensorDataset(x_test,y_test)
|
|
|
# 6.将数据加载成迭代器
|
train_loader = torch.utils.data.DataLoader(train_data,
|
config.batch_size,
|
False)
|
|
test_loader = torch.utils.data.DataLoader(test_data,
|
config.batch_size,
|
False)
|
|
|
# 7.定义GRU网络
|
class GRU(nn.Module):
|
def __init__(self, feature_size, hidden_size, num_layers, output_size):
|
super(GRU, self).__init__()
|
self.hidden_size = hidden_size
|
self.output_size = output_size
|
# 隐层大小
|
self.num_layers = num_layers # gru层数
|
# feature_size为特征维度,就是每个时间点对应的特征数量,这里为1
|
self.gru = nn.GRU(feature_size, hidden_size, num_layers, dropout=0.8,batch_first=True)
|
self.fc1 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc2 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc3 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc4 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc5 = nn.Linear(self.hidden_size, self.output_size)
|
self.fc6 = nn.Linear(self.hidden_size, self.output_size)
|
|
def forward(self, x, hidden=None):
|
batch_size = x.size()[0] # 获取批次大小
|
|
# 初始化隐层状态
|
if hidden is None:
|
h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
|
else:
|
h_0 = hidden
|
|
# GRU运算
|
output, h_0 = self.gru(x, h_0)
|
|
# 获取GRU输出的维度信息
|
batch_size, timestep, hidden_size = output.shape
|
|
# 将output变成 batch_size * timestep, hidden_dim
|
# output = output.reshape(-1, hidden_size)
|
|
preds = []
|
pred1, pred2, pred3 = self.fc1(output), self.fc2(output), self.fc3(output)
|
pred1, pred2, pred3 = pred1[:, -1, :], pred2[:, -1, :], pred3[:, -1, :]
|
pred4, pred5, pred6 = self.fc4(output), self.fc5(output), self.fc6(output)
|
pred4, pred5, pred6 = pred4[:, -1, :], pred5[:, -1, :], pred6[:, -1, :]
|
pred = torch.stack([pred1, pred2, pred3,pred4, pred5, pred6], dim=2)
|
|
return pred
|
|
|
model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size) # 定义GRU网络
|
print(model)
|
loss_function = nn.MSELoss() # 定义损失函数
|
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) # 定义优化器
|
|
# 8.模型训练
|
for epoch in range(config.epochs):
|
model.train()
|
running_loss = 0
|
train_bar = tqdm(train_loader) # 形成进度条
|
for data in train_bar:
|
x_train, y_train = data # 解包迭代器中的X和Y
|
optimizer.zero_grad()
|
y_train_pred = model(x_train)
|
|
loss = loss_function(y_train_pred, y_train)
|
loss.backward()
|
optimizer.step()
|
|
running_loss += loss.item()
|
train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
|
config.epochs,
|
loss)
|
|
print('Finished Training')
|
|
model_name = 'GRU_YQS'
|
torch.save(model.state_dict(), 'C://Users//ZMK//Desktop//GRU/{}.pth'.format(model_name))
|
|
|
model.eval()
|
|
#模型验证:用原始数据再算一次
|
|
df= pd.read_csv(config.data_path,parse_dates=["date"],index_col=[0])
|
#parse_dates将该列视为时间索引
|
|
data = df.iloc[:,0:8] # 以四个特征作为数据
|
label = df.iloc[:,7]
|
data = data.values
|
label = label.values.reshape(-1,1)
|
|
|
data,label,mm_y=normalization(data,label)
|
|
dataX,dataY = split_windows(data,config.timestep,config.output_size)
|
|
x_data,y_data,x_train,y_train,x_test,y_test = split_data(dataX,dataY,0.8)
|
|
test_pre = model(x_data)
|
|
with pd.ExcelWriter("C:\\Users\\ZMK\\Desktop\\GRU\GRU-YQS.xlsx", engine='openpyxl') as writer:
|
|
for i in range(6):
|
test_pre_data = test_pre[:,0,i].data.numpy().reshape(-1,1)
|
y_test_pre = y_data[:,0,i].data.numpy().reshape(-1,1)
|
|
print(test_pre_data.shape)
|
|
test_pre_data_inv = mm_y.inverse_transform(test_pre_data)
|
|
# print(test_pre_data_inv.shape)
|
y_test_inv =mm_y.inverse_transform(y_test_pre)
|
|
|
plt.figure(figsize=(10,5))
|
plt.plot(y_test_inv)
|
plt.plot(test_pre_data_inv)
|
plt.legend(('real', 'predict'),fontsize='15')
|
plt.show()
|
|
print('MAE/RMSE/NSE')
|
print(mean_absolute_error(y_test_inv, test_pre_data_inv))
|
print(np.sqrt(mean_squared_error(y_test_inv, test_pre_data_inv)))
|
print(nash_sutcliffe_efficiency(y_test_inv, test_pre_data_inv))
|
|
y_test_inv = pd.DataFrame(y_test_inv, columns=[f'True Node {i+1}'])
|
test_pre_data_inv = pd.DataFrame(test_pre_data_inv, columns=[f'test Node {i+1}'])
|
|
# 将结果保存到不同的工作表中
|
test_pre_data_inv.to_excel(writer, sheet_name=f'True Node {i+1}', index=False)
|
y_test_inv.to_excel(writer, sheet_name=f'test Node {i+1}', index=False)
|