Как предсказать погоду на следующие 24ч по данным за предыдущие 10 дней? Для примера берем первые 1440 образцов и по ним предсказываем. Код и ссылки ниже
Таблица данных -
https://www.kaggle.com/pankrzysiu/weather-archive-...
Сеть:
import os
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
data_dir = "C:/Users/pikro/Downloads/jena_climate"
fname = os.path.join(data_dir, "jena_climate_2009_2016.csv")
f = open(fname)
data = f.read()
f.close()
lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]
print(header)
print(len(lines))
"""Parsing the data"""
float_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
print(i)
values = [float(x) for x in line.split(',')[1:]]
float_data[i:] = values
temp = float_data[:, 1]
plt.plot(range(len(temp)), temp)
plt.show()
plt.plot(range(1440), temp[:1440])
plt.show()
# Normalizing the data
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std
print(float_data[0])
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
"""Generator yielding timeseries samples and their targets"""
if max_index is None:
max_index = len(data) - delay - 1
i = min_index + lookback
while 1:
if shuffle:
rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
else:
if i + batch_size >= max_index:
i = min_index + lookback
rows = np.arange(i, min(i + batch_size, max_index))
i += len(rows)
samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
targets = np.zeros((len(rows),))
for j, row in enumerate(rows):
indices = range(rows[j] - lookback, rows[j], step)
samples[j] = data[indices]
targets[j] = data[rows[j] + delay][1]
yield samples, targets
"""Preparing the generators"""
lookback = 1440 # 10 days
step = 6 # 5 hours
delay = 144 # 1 hours
batch_size = 128
train_gen = generator(float_data,
lookback=lookback,
delay=delay,
min_index=0,
max_index=200000,
shuffle=True,
step=step,
batch_size=batch_size)
val_gen = generator(float_data,
lookback=lookback,
delay=delay,
min_index=200001,
max_index=300001,
shuffle=True,
step=step,
batch_size=batch_size)
test_gen = generator(float_data, lookback=lookback, delay=delay, min_index=300001, max_index=None, step=step,
batch_size=batch_size)
val_steps = (300000 - 200001 - lookback) // batch_size
print(val_steps)
test_steps = (len(float_data) - 300001 - lookback) // batch_size
print(test_steps)
def evaluate_naive_method():
batch_maes = []
for step in range(val_steps):
samples, targets = next(val_gen)
preds = samples[:, -1, 1]
mae = np.mean(np.abs(preds - targets))
batch_maes.append(mae)
return np.mean(batch_maes)
model = Sequential()
model.add(layers.GRU(32, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, float_data.shape[1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
steps_per_epoch=500,
epochs=30,
validation_data=val_gen,
validation_steps=val_steps)
model.save("weather.h5")
loss = history.history['loss']
val_loss = history.history["val_loss"]
epochs = range(1, 31)
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation acc")
plt.legend()
plt.show()
predict = model.predict_generator(test_gen, steps=test_steps)
Пробовал делать так:test_gen = generator(float_data, lookback=lookback, delay=delay, min_index=300001, max_index=None, step=step,
batch_size=batch_size)
val_steps = (300000 - 200001 - lookback) // batch_size
test_steps = (len(float_data) - lookback) // batch_size
model = models.load_model("weather.h5")
predict = model.predict_generator(test_gen, test_steps)
print("____")
for i in predict:
for j in i:
print(j, j * std[1])
print(len(predict))
Но числа не сходятся( Там абсолютно другие значения. Может нормализую не так?