Данные беру с
https://www.blockchain.com/ru/charts#block.
Мой код:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', None)
data = pd.read_csv('data.csv', delimiter=';', index_col=['Time'], dayfirst=True)
data.index = pd.to_datetime(data.index, format='%d.%m.%Y')
data = data.resample('W').mean()
y = data['market_price']
x = data.drop(['market_price'], axis=1)
models = [# LinearRegression(), # метод наименьших квадратов
RandomForestRegressor(n_estimators=100, max_features='sqrt'), # случайный лес
# KNeighborsRegressor(n_neighbors=6), # метод ближайших соседей
# SVR(kernel='linear'), # метод опорных векторов с линейным ядром
]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=0)
model = models[0].fit(X_train, Y_train)
r_2 = {'R2_Y': r2_score(Y_test, model.predict(X_test))}
print(r_2)
pred_test = pd.DataFrame({
'pred': model.predict(X_test),
'real': Y_test
})
pred_test.plot()
plt.show()
new_dates = pd.date_range('2021-05-09', '2022-01-02', freq='W')
new_dates = pd.Index(x.index) | new_dates
x2 = pd.DataFrame({'Time': new_dates})
y2 = pd.DataFrame({'Time': new_dates})
x_new = pd.merge(x, x2, on='Time', how='right')
y_new = pd.merge(y, y2, on='Time', how='right')
x_new = x_new.set_index('Time')
x_new = x_new.fillna(0)
y_new = y_new.set_index('Time')
model_2 = models[0].fit(x, y)
r_2 = {'R2_Y': r2_score(y, model_2.predict(x))}
print(r_2)
pred = pd.DataFrame({
'pred': model_2.predict(x_new),
'real': y_new.market_price
})
pred.plot()
plt.show()
И вот результат:
Пример данных
market_price trade_volume hashrate transactions_per_day
Time
2016-05-08 452.953333 9.812436e+05 1.409150e+06 227158.000000
2016-05-15 453.300000 1.940460e+06 1.337420e+06 235977.500000
2016-05-22 445.025000 2.009665e+06 1.400187e+06 201746.500000
2016-05-29 471.270000 3.097414e+06 1.520550e+06 218714.666667
2016-06-05 549.925000 5.691195e+06 1.302883e+06 227920.000000
Кто знает в чем может быть проблема, подскажите пожалуйста.