@tj57

Как правильно изменить размерность входных данных при обучении свёрточной сети на FashionMNIST?

Я пытаюсь обучить сеть на наборе данных Fashion MNIST с помощью свёрточной сети. Сначала я использовал обычную сеть:

class HyperParameters:
    inputs = 28 * 28
    hidden_layer_1 = 128
    hidden_layer_2 = 64
    outputs = 10
    learning_rate = 0.005
    rows = 1
    epochs = 200

model = nn.Sequential(
    OrderedDict(
        input_to_hidden=nn.Linear(HyperParameters.inputs,
                                  HyperParameters.hidden_layer_1),
        activation_1=nn.ReLU(),
        hidden_to_hidden=nn.Linear(HyperParameters.hidden_layer_1,
                                   HyperParameters.hidden_layer_2),
        activation_2=nn.ReLU(),
        hidden_to_output=nn.Linear(HyperParameters.hidden_layer_2,
                                   HyperParameters.outputs),
        activation_out=nn.LogSoftmax(dim=HyperParameters.rows),
    )
)

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=HyperParameters.learning_rate)


После я поменял её на свёрточную сеть:

class DeepCNN(nn.Module):
  def __init__(self):
    super(DeepCNN, self).__init__()
    self.model = nn.Sequential(
        nn.Conv2d(1, 8, 3),
        nn.Conv2d(8, 16, 3),
        nn.Linear(16, 10)
    )
  def forward(self, t):
    t = self.model(t)
    return t

model = DeepCNN()

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())


Не удаётся понять, как правильно изменить размерность входных данных, чтобы она подходила для свёрточной сети. Как я понял, нужно воспользоваться np.reshape. Ошибка выглядит так:

RuntimeError: Expected 4-dimensional input for 4-dimensional weight 8 1 3 3, but
got 2-dimensional input of size [64, 784] instead


Возникает в данной строке:
output = model.forward(images)

Код программы:

import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict
import seaborn

import torchvision
import torchvision.transforms as transforms

batch_size = 128

training = torchvision.datasets.FashionMNIST(root = "./data",
                                               train = True, download = True,
                                               transform = transforms.ToTensor())
testing = torchvision.datasets.FashionMNIST(root='./data',
                                               train=False, download=True,
                                               transform=transforms.ToTensor())
training_batches = torch.utils.data.DataLoader(training, batch_size=64, shuffle = True)
test_batches = torch.utils.data.DataLoader(testing,
                                           batch_size=64,
                                           shuffle=True)

data_iterator = iter(training_batches)
images, labels = data_iterator.next()

means = (0.5, 0.5, 0.5)
deviations = means
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(means, deviations)])


seaborn.set(style="whitegrid",
            rc={"axes.grid": False,
                "font.family": ["sans-serif"],
                "font.sans-serif": ["Latin Modern Sans", "Lato"],
                "figure.figsize": (8, 6)},
            font_scale=1)

im = images[1]
np_im = im.numpy()
np.squeeze(np_im).shape
plt.figure(figsize = (2,2))
plt.imshow(np.squeeze(np_im), cmap = 'gray')
plt.show()

classes = ("T-shirt/top",
                "Trouser",
                "Pullover",
                "Dress",
                "Coat",
                "Sandal",
                "Shirt",
                "Sneaker",
                "Bag",
                "Ankle boot",
                )

label_decoder = dict(zip(range(10), classes))

class DeepCNN(nn.Module):
  def __init__(self):
    super(DeepCNN, self).__init__()
    self.model = nn.Sequential(
        nn.Conv2d(1, 8, 3),
        nn.Conv2d(8, 16, 3),
        nn.Linear(16, 10)
    )
  def forward(self, t):
    t = self.model(t)
    return t

model = DeepCNN()

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.005)

def imshow(img):
    npimg = img.numpy() #convert the tensor to numpy for displaying the image
    plt.imshow(np.transpose(npimg, (1, 2, 0))) #for displaying the image, shape of the image should be height * width * channels
    plt.show()

imshow(torchvision.utils.make_grid(images))
print(' '.join(classes[labels[j]] for j in range(4)))

def accuracy_evaluation(data_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_batches:
            images = images.view(images.shape[0], -1)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return(100 * correct/total)

loss_array = []
loss_epoch_array = []
max_epochs = 10

for epoch in range(max_epochs):
    running_loss = 0
    for images, labels in training_batches:
        # some setup
        ## Flatten the images
        images = images.view(images.shape[0], -1)
        images = np.reshape(8, 1, 3, 3)
        ## Reset the optimizer
        optimizer.zero_grad()

        # forward pass
        output = model.forward(images)

        # back-propagation
        loss = criterion(output, labels)
        loss.backward()

        # take the next step
        optimizer.step()
        running_loss += loss.item()
    loss_epoch_array.append(loss.item())

    print(f"Training loss: {running_loss/len(training_batches)}")
    print('Epoch: %d/%d, Test acc: %0.2f' % (epoch, max_epochs, accuracy_evaluation(test_batches)))

plt.plot(loss_epoch_array)
plt.xlabel("Epochs"), plt.ylabel("Loss")
plt.show()
  • Вопрос задан
  • 361 просмотр
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы