Здравствуйте, изучая нейронные сети я написал класс самой сети. С задачей классификации он справляется. Но почему-то не получается с помощью класса аппроксимировать функцию синуса. Подскажите, пожалуйста, в чем может быть проблема.
P.S. Ниже прилагаю код самого класса и непосредственно код, где я аппроксимирую функцию и строю график.
import numpy as np
import random
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
def linear(x):
return x
def linear_prime(x):
return 1
def tanh(x):
return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
def tanh_prime(x):
return 1 - tanh(x)*tanh(x)
class Network:
def __init__(self, sizes, activation_func = sigmoid, activation_prime = sigmoid_prime):
self.biases = [np.random.randn(x, 1) for x in sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(sizes, sizes[1:])]
self.num_layers = len(sizes)
self.sizes = sizes
self.activation_function = activation_func
self.actiovation_prime = activation_prime
def forward_prop(self, a):
for w, b in zip(self.weights, self.biases):
a = self.activation_function(np.dot(w, a) + b)
return a
def cost_derivative(self, output_activations, y):
return (output_activations - y)
def backprop(self, x, y): # For a single example
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# forward pass
activation = x # first activation, which is input layer
a_mas = [x]
z_mas = []
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation) + b
activation = self.activation_function(z)
z_mas.append(z)
a_mas.append(activation)
pass
# backward pass
delta = self.cost_derivative(a_mas[-1], y) * self.actiovation_prime(z_mas[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, a_mas[-2].T)
# print('shape of delta = ', delta.shape, 'shape of a_mas[-2].T = ', a_mas[-2].T.shape)
for l in range(2, self.num_layers): # there is 2 such as we've already done for last layer
delta = np.dot(self.weights[-l + 1].transpose(), delta) * self.actiovation_prime(z_mas[-l])
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, a_mas[-l - 1].T)
return nabla_b, nabla_w
def update_mini_batch(self, mini_batch, eta):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
eps = eta / len(mini_batch)
self.weights = [w - eps * nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - eps * nb for b, nb in zip(self.biases, nabla_b)]
def SGD(self, training_data, epochs, mini_batch_size, eta):
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta)
И код аппроксимации:
%matplotlib inline
import matplotlib.pyplot as plt
net2 = Network([1,100,1])
x = np.linspace(0,10,1000)
y = np.sin(x)
train = [(np.array(x[i]).reshape(1,1),np.array(y[i]).reshape(1,1)) for i in range(len(x))]
net2.SGD(train,10,10,0.1)
y_pred = []
y_tmp = []
for i in range(len(x)):
y_tmp.append(net2.forward_prop(train[i][0]))
y_pred.append(float(net2.forward_prop(train[i][0])))
plt.plot(x,y,'r',x,y_pred)
plt.grid()