英招

牢骚太盛防肠断,风物长宜放眼量

0%

LSTM(by Tensorflow)

前期准备

下载anaconda、Tensorflow

通过anaconda搭建虚拟环境并下载所需的工具包,通过cuDNN等实现GPU加速(可选)

本章采用python3.9+Tensorflow2.10

LSTM时间序列预测

1
2
3
4
import os
import numpy as np
import pandas as pd
import time
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 1. 数据加载,导入一个文件夹内的所有csv文件,并提取固定列合并成一个数据集
def load_data(folder_path, num_sites):
files = sorted([f for f in os.listdir(folder_path) if f.endswith('.csv')])
all_data = []
for file in files:
file_path = os.path.join(folder_path, file)
df = pd.read_csv(file_path,encoding='gbk')
all_data.append(df.iloc[:, [3]].values) # 使用第4列数据

combined_data = np.concatenate(all_data, axis=1)
return combined_data

data = load_data('E:/data', 9)
#data形状为(num_time_steps,feature_size=9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#制作滑动窗口,根据滑动窗口大小将数据形状由(num_time_steps,feature_size)变为(num_time_steps,window_size,feature_size)
def create_sliding_windows(data, window_size):
num_time_steps = data.shape[0]
num_features = data.shape[1]

windows = []
for i in range(num_time_steps - window_size):
windows.append(data[i:i + window_size])

return np.array(windows)
features_windows = create_sliding_windows(data, 50)

#提取标签数据
target_data = data[:, 4] # 目标站点
#制作标签数据
labels_data = target_data[50 + 24 - 1:,np.newaxis] # 选择24小时后数据作为标签
labels = create_sliding_windows(labels_data, 24)
labels = np.squeeze(labels)

features_windows = features_windows[:len(labels)]
labels_windows = labels

#7:3划分样本,shuffle取False
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(features_windows,labels_windows,shuffle=False,test_size=0.3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout

model=Sequential()
#激活函数用relu无法进行GPU加速,采用tanh,10个神经元,Dropout丢弃0.1的神经元
model.add(LSTM(10,input_shape=X_train.shape[1:],activation='tanh',return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(10,activation='tanh',return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(10,activation='tanh'))
model.add(Dropout(0.1))

model.add(Dense(10,activation='tanh'))
model.add(Dropout(0.1))

#1层全连接
model.add(Dense(1))

model.compile(optimizer='adam',
loss='mse',
metrics=['mape'])

model.fit(X_train,y_train,batch_size=50,epochs=100,validation_data=(X_test,y_test))
1
2
3
4
5
6
7
#保存模型
model.save('./models/9.STSS_new_tanh.keras')
#评估模型
model.summary()
model.evaluate(X_test,y_test)
pre=model.predict(X_test)
pre_flat=pre.flatten()
1
2
3
4
5
6
7
#提取数据的时间
df = pd.read_csv('E:/data/time.csv', encoding='gbk')
df.set_index('time', inplace=True)
df_time=df.index[-len(y_test):]
test=pd.DataFrame({'df_time': df_time, 'pre': pre_flat, 'y_test': y_test})
#导出
test.to_csv('./output/.csv')