Commit ec35af70 authored by YU Xiyue's avatar YU Xiyue
Browse files

5 model

parent 1f7662d2
Pipeline #720 canceled with stages
......@@ -2,21 +2,41 @@ import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import MNIST
#禁止import除了torch以外的其他包,依赖这几个包已经可以完成实验了
# 禁止import除了torch以外的其他包,依赖这几个包已经可以完成实验了
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Mixer_Layer(nn.Module):
def __init__(self, patch_size, hidden_dim):
super(Mixer_Layer, self).__init__()
########################################################################
#这里需要写Mixer_Layer(layernorm,mlp1,mlp2,skip_connection)
# 这里需要写Mixer_Layer(layernorm,mlp1,mlp2,skip_connection)
tokens_mlp_dim = 256
channels_mlp_dim = 32
S = 28 * 28 / (patch_size * patch_size)
self.ln_token = nn.LayerNorm(hidden_dim)
self.mlp1 = nn.Sequential(
nn.Linear(S, tokens_mlp_dim),
nn.GELU(),
nn.Linear(tokens_mlp_dim, S)
)
self.ln_channel = nn.LayerNorm(hidden_dim)
self.mlp2 = nn.Sequential(
nn.Linear(hidden_dim, channels_mlp_dim),
nn.GELU(),
nn.Linear(channels_mlp_dim, S)
)
########################################################################
def forward(self, x):
########################################################################
u = self.ln_token(x).transpose(1, 2)
x = x + self.mlp1(u).transpose(1, 2)
y = self.ln_channel(x)
return y + self.mlp2(self.ln_channel(y))
########################################################################
......@@ -26,15 +46,23 @@ class MLPMixer(nn.Module):
assert 28 % patch_size == 0, 'image_size must be divisible by patch_size'
assert depth > 1, 'depth must be larger than 1'
########################################################################
#这里写Pre-patch Fully-connected, Global average pooling, fully connected
# 这里写Pre-patch Fully-connected, Global average pooling, fully connected
self.ppfc = nn.Conv2d(1, hidden_dim, kernel_size=patch_size, stride=patch_size)
self.mlp = nn.Sequential(*[Mixer_Layer(patch_size, hidden_dim) for _ in range(depth)])
self.ln = nn.LayerNorm(hidden_dim)
self.fc = nn.Linear(hidden_dim, 10)
########################################################################
def forward(self, data):
########################################################################
#注意维度的变化
# 注意维度的变化
y = self.patch_ppfc(data)
y = torch.flatten(y, start_dim=2).transpose(1, 2)
y = self.mlp(y)
y = self.ln(y)
y = y.mean(dim=1)
y = self.fc(y)
return y
########################################################################
......@@ -44,8 +72,8 @@ def train(model, train_loader, optimizer, n_epochs, criterion):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
########################################################################
#计算loss并进行优化
# 计算loss并进行优化
loss = criterion()
########################################################################
if batch_idx % 100 == 0:
print('Train Epoch: {}/{} [{}/{}]\tLoss: {:.6f}'.format(
......@@ -55,19 +83,17 @@ def train(model, train_loader, optimizer, n_epochs, criterion):
def test(model, test_loader, criterion):
model.eval()
test_loss = 0.
num_correct = 0 #correct的个数
num_correct = 0 # correct的个数
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
########################################################################
#需要计算测试集的loss和accuracy
# 需要计算测试集的loss和accuracy
########################################################################
print("Test set: Average loss: {:.4f}\t Acc {:.2f}".format(test_loss.item(), accuracy))
if __name__ == '__main__':
n_epochs = 5
batch_size = 128
......@@ -75,20 +101,21 @@ if __name__ == '__main__':
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
transforms.Normalize((0.1307,), (0.3081,))])
trainset = MNIST(root = './data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
trainset = MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2,
pin_memory=True)
testset = MNIST(root = './data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
testset = MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2,
pin_memory=True)
########################################################################
model = MLPMixer(patch_size = , hidden_dim = , depth = ).to(device) # 参数自己设定,其中depth必须大于1
model = MLPMixer(patch_size=4, hidden_dim=8, depth=3).to(device) # 参数自己设定,其中depth必须大于1
# 这里需要调用optimizer,criterion(交叉熵)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
########################################################################
train(model, train_loader, optimizer, n_epochs, criterion)
test(model, test_loader, criterion)
\ No newline at end of file
test(model, test_loader, criterion)
......@@ -2,6 +2,10 @@ import torch
import matplotlib.pyplot as plt
def sigmoid(x):
return 1.0 / (1.0 + torch.exp(-x))
class MultiLayerPerceptron:
def __init__(self, lr=0.05, epochs=1000):
......@@ -14,97 +18,84 @@ class MultiLayerPerceptron:
self.b2 = torch.rand(4, 1, dtype=torch.double, requires_grad=True)
self.b3 = torch.rand(3, 1, dtype=torch.double, requires_grad=True)
def sigmoid(self, x):
return 1.0 / (1 + torch.exp(-x))
def fit(self, train_features, train_labels):
train_labels = train_labels - 1
loss_list = []
diff = True
train_features = train_features.T
y = torch.zeros(3, train_labels.size()[0])
for i in range(y.size()[1]):
y[train_labels[i].item()][i] = 1.0
print(y)
loss_list = []
y[train_labels[i].item()][i] = 1
for i in range(self.epochs):
y1 = self.sigmoid(self.W1 @ train_features.T + self.b1)
y1 = sigmoid(self.W1 @ train_features + self.b1)
y1.requires_grad_(True)
y2 = self.sigmoid(self.W2 @ y1 + self.b2)
y2 = sigmoid(self.W2 @ y1 + self.b2)
y2.requires_grad_(True)
y3 = torch.softmax(self.W3 @ y2 + self.b3, dim=0)
y3.requires_grad_(True)
loss = (- torch.log(torch.gather(y3, 0, train_labels.T))).sum()
loss.requires_grad_(True)
loss.backward(retain_graph=True)
loss_list.append(loss)
loss.backward()
dw1 = self.W1.grad.data
dw2 = self.W2.grad.data
dw3 = self.W3.grad.data
db1 = self.b1.grad.data
db2 = self.b2.grad.data
db3 = self.b3.grad.data
# d3 = y3 - y
# d2 = (self.W3.T @ d3) * (y2 * (1 - y2))
# d1 = (self.W2.T @ d2) * (y1 * (1 - y1))
# dws1 = (d1 @ train_features)
# dws2 = (d2 @ y1.T)
# dws3 = (d3 @ y2.T)
# dbs1 = torch.sum(d1, dim=1).view(4, 1)
# dbs2 = torch.sum(d2, dim=1).view(4, 1)
# dbs3 = torch.sum(d3, dim=1).view(3, 1)
# if i == self.epochs - 1:
# print(dw1 - dws1)
# print(dw2 - dws2)
# print(dw3 - dws3)
# print(db1 - dbs1)
# print(db2 - dbs2)
# print(db3 - dbs3)
d3 = y3 - y
d2 = (self.W3.T @ d3) * (y2 * (1 - y2))
d1 = (self.W2.T @ d2) * (y1 * (1 - y1))
dws1 = d1 @ train_features.T
dws2 = d2 @ y1.T
dws3 = d3 @ y2.T
dbs1 = torch.sum(d1, dim=1).view(4, 1)
dbs2 = torch.sum(d2, dim=1).view(4, 1)
dbs3 = torch.sum(d3, dim=1).view(3, 1)
if torch.prod(dw1 - dws1 > 0.001):
diff = False
if torch.prod(dw2 - dws2 > 0.001):
diff = False
if torch.prod(dw3 - dws3 > 0.001):
diff = False
if torch.prod(db1 - dbs1 > 0.001):
diff = False
if torch.prod(db2 - dbs2 > 0.001):
diff = False
if torch.prod(db3 - dws3 > 0.001):
diff = False
self.W1.data = self.W1.data - self.lr * dw1
self.W2.data = self.W2.data - self.lr * dw2
self.W3.data = self.W3.data - self.lr * dw3
self.b1.data = self.b1.data - self.lr * db1
self.b2.data = self.b2.data - self.lr * db2
self.b3.data = self.b3.data - self.lr * db3
# if i > 0 and loss.item() > 2 * loss_list[i - 1]:
# print(dw3)
# print(dw2)
# print(dw1)
# print(loss.item())
# print(i)
self.W1.grad.data.zero_()
self.W2.grad.data.zero_()
self.W3.grad.data.zero_()
self.b1.grad.data.zero_()
self.b2.grad.data.zero_()
self.W2.grad.data.zero_()
self.W1.grad.data.zero_()
self.b3.grad.data.zero_()
self.b2.grad.data.zero_()
self.b1.grad.data.zero_()
# self.W1.data = self.W1.data - self.lr * dws1
# self.W2.data = self.W2.data - self.lr * dws2
# self.W3.data = self.W3.data - self.lr * dws3
# self.b1.data = self.b1.data - self.lr * dbs1
# self.b2.data = self.b2.data - self.lr * dbs2
# self.b3.data = self.b3.data - self.lr * dbs3
loss_list.append(loss.item())
print(loss_list[self.epochs - 1])
x = range(self.epochs)
plt.plot(x, loss_list)
plt.show()
# def predict(self, test_features):
# y1 = self.sigmoid((self.W1 @ test_features.T) + self.b1)
# y2 = self.sigmoid((self.W2 @ y1) + self.b2)
# y3 = torch.softmax((self.W3 @ y2) + self.b3, dim=0)
# val, predict = y3.max(0)
# return (predict + 1).view(-1, 1)
if diff:
print("differentiate correctly! ! !")
def main():
col = 0
Train_data = torch.randint(1, 4, (100, 5), dtype=torch.double)
Train_label = Train_data.narrow(1, col, 1).long()
test_data = torch.randint(1, 4, (100, 5), dtype=torch.double)
test_label = test_data.narrow(1, col, 1).long()
lr = 0.003
train_data = torch.randint(-1, 2, (100, 5), dtype=torch.double)
train_label = train_data.narrow(1, col, 1).long()
train_label = train_label + 1
lr = 0.005
epochs = 10000
MLP = MultiLayerPerceptron(lr, epochs)
MLP.fit(Train_data, Train_label)
# pred = MLP.predict(test_data)
MLP.fit(train_data, train_label)
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment