Mục lục#

Neural Networks cơ bản
- Perceptron (Neuron đơn giản)
- Multi-Layer Perceptron (MLP)
Convolutional Neural Networks (CNN)
- CNN cho Computer Vision
- Transfer Learning với Pre-trained Models
Recurrent Neural Networks (RNN)
- LSTM cho Text Classification
Generative Adversarial Networks (GAN)
- Simple GAN
Autoencoders
- Variational Autoencoder (VAE)
Attention Mechanisms
- Self-Attention Implementation
Ứng dụng thực tế
- Image Classification với ResNet
- Text Generation với GPT-style
Kết luận

Deep Learning: Từ cơ bản đến ứng dụng thực tế 🧠#

Deep Learning là một nhánh của Machine Learning sử dụng các neural networks với nhiều lớp để học các biểu diễn phức tạp từ dữ liệu. Trong bài viết này, tôi sẽ giới thiệu từ những khái niệm cơ bản đến các ứng dụng thực tế.

1. Neural Networks cơ bản#

Perceptron (Neuron đơn giản) với PyTorch#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4

5
# Dữ liệu AND gate
6
X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
7
y = torch.tensor([[0],[0],[0],[1]], dtype=torch.float32)
8

9
class Perceptron(nn.Module):
10
    def __init__(self):
11
        super().__init__()
12
        self.linear = nn.Linear(2, 1)
13
    def forward(self, x):
14
        return torch.sigmoid(self.linear(x))
15

16
model = Perceptron()
17
criterion = nn.BCELoss()
18
optimizer = optim.SGD(model.parameters(), lr=0.1)
19

20
for epoch in range(100):
21
    optimizer.zero_grad()
22
    outputs = model(X)
23
    loss = criterion(outputs, y)
24
    loss.backward()
25
    optimizer.step()
26

27
print("Predictions:")
28
with torch.no_grad():
29
    preds = model(X)
30
    print(torch.round(preds).squeeze().numpy())

Multi-Layer Perceptron (MLP) với PyTorch#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
from sklearn.datasets import make_classification
5
from sklearn.model_selection import train_test_split
6

7
# Dữ liệu mẫu
8
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)
9
X = torch.tensor(X, dtype=torch.float32)
10
y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
11

12
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
13

14
class MLP(nn.Module):
15
    def __init__(self, input_dim):
16
        super().__init__()
17
        self.net = nn.Sequential(
18
            nn.Linear(input_dim, 64),
19
            nn.ReLU(),
20
            nn.Linear(64, 32),
21
            nn.ReLU(),
22
            nn.Linear(32, 16),
23
            nn.ReLU(),
24
            nn.Linear(16, 1),
25
            nn.Sigmoid()
26
        )
27
    def forward(self, x):
28
        return self.net(x)
29

30
model = MLP(X_train.shape[1])
31
criterion = nn.BCELoss()
32
optimizer = optim.Adam(model.parameters(), lr=0.001)
33

34
for epoch in range(50):
35
    optimizer.zero_grad()
36
    outputs = model(X_train)
37
    loss = criterion(outputs, y_train)
38
    loss.backward()
39
    optimizer.step()
40

41
with torch.no_grad():
42
    test_outputs = model(X_test)
43
    acc = ((test_outputs > 0.5) == y_test).float().mean()
44
    print(f"Test Accuracy: {acc:.3f}")

2. Convolutional Neural Networks (CNN)#

CNN cho Computer Vision#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
from torchvision import datasets, transforms
5
from torch.utils.data import DataLoader
6

7
# Chuẩn bị dữ liệu MNIST
8
transform = transforms.Compose([transforms.ToTensor()])
9
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
10
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
11
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
12
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
13

14
class SimpleCNN(nn.Module):
15
    def __init__(self):
16
        super().__init__()
17
        self.conv = nn.Sequential(
18
            nn.Conv2d(1, 32, 3, 1),
19
            nn.ReLU(),
20
            nn.MaxPool2d(2),
21
            nn.Conv2d(32, 64, 3, 1),
22
            nn.ReLU(),
23
            nn.MaxPool2d(2)
24
        )
25
        self.fc = nn.Sequential(
26
            nn.Flatten(),
27
            nn.Linear(64*5*5, 64),
28
            nn.ReLU(),
29
            nn.Dropout(0.5),
30
            nn.Linear(64, 10)
31
        )
32
    def forward(self, x):
33
        x = self.conv(x)
34
        x = self.fc(x)
35
        return x
36

37
model = SimpleCNN()
38
criterion = nn.CrossEntropyLoss()
39
optimizer = optim.Adam(model.parameters(), lr=0.001)
40

41
# Huấn luyện
42
for epoch in range(3):
43
    model.train()
44
    for images, labels in train_loader:
45
        optimizer.zero_grad()
46
        outputs = model(images)
47
        loss = criterion(outputs, labels)
48
        loss.backward()
49
        optimizer.step()
50

51
# Đánh giá
52
model.eval()
53
correct = 0
54
total = 0
55
with torch.no_grad():
56
    for images, labels in test_loader:
57
        outputs = model(images)
58
        _, predicted = torch.max(outputs, 1)
59
        total += labels.size(0)
60
        correct += (predicted == labels).sum().item()
61
print(f"Test Accuracy: {correct/total:.3f}")

Transfer Learning với Pre-trained Models (VGG16)#

1
import torch
2
from torchvision import models, transforms
3
from PIL import Image
4

5
# Load pre-trained VGG16
6
vgg16 = models.vgg16(pretrained=True)
7
vgg16.eval()
8

9
# Freeze base model
10
for param in vgg16.parameters():
11
    param.requires_grad = False
12

13
# Thay đổi classifier nếu muốn fine-tune
14
vgg16.classifier[6] = torch.nn.Linear(4096, 10)  # Ví dụ: 10 classes
15

16
# Dự đoán ảnh
17
def predict_image(img_path):
18
    preprocess = transforms.Compose([
19
        transforms.Resize((224, 224)),
20
        transforms.ToTensor(),
21
    ])
22
    img = Image.open(img_path).convert('RGB')
23
    img = preprocess(img).unsqueeze(0)
24
    with torch.no_grad():
25
        output = vgg16(img)
26
        _, pred = torch.max(output, 1)
27
    return pred.item()

3. Recurrent Neural Networks (RNN)#

LSTM cho Text Classification#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
from torch.nn.utils.rnn import pad_sequence
5

6
# Giả lập dữ liệu
7
texts = [
8
    "Tôi thích machine learning",
9
    "Deep learning rất thú vị",
10
    "AI là tương lai của công nghệ",
11
    "Thuật toán rất quan trọng",
12
    "Neural networks rất phức tạp"
13
]
14
labels = [1, 1, 1, 0, 0]
15

16
# Tokenization đơn giản
17
vocab = {}
18
def tokenize(text):
19
    return [vocab.setdefault(word, len(vocab)) for word in text.lower().split()]
20

21
sequences = [torch.tensor(tokenize(t)) for t in texts]
22
padded = pad_sequence(sequences, batch_first=True)
23
labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)
24

25
class LSTMClassifier(nn.Module):
26
    def __init__(self, vocab_size, embed_dim=16, hidden_dim=32):
27
        super().__init__()
28
        self.embedding = nn.Embedding(vocab_size, embed_dim)
29
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
30
        self.fc = nn.Linear(hidden_dim, 1)
31
        self.sigmoid = nn.Sigmoid()
32
    def forward(self, x):
33
        x = self.embedding(x)
34
        _, (h, _) = self.lstm(x)
35
        out = self.fc(h[-1])
36
        return self.sigmoid(out)
37

38
model = LSTMClassifier(len(vocab))
39
criterion = nn.BCELoss()
40
optimizer = optim.Adam(model.parameters(), lr=0.01)
41

42
for epoch in range(50):
43
    optimizer.zero_grad()
44
    outputs = model(padded)
45
    loss = criterion(outputs, labels)
46
    loss.backward()
47
    optimizer.step()
48

49
with torch.no_grad():
50
    preds = model(padded)
51
    print("Predictions:", (preds > 0.5).int().squeeze().tolist())

4. Generative Adversarial Networks (GAN)#

Simple GAN#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4

5
# Generator
6
class Generator(nn.Module):
7
    def __init__(self, latent_dim):
8
        super().__init__()
9
        self.model = nn.Sequential(
10
            nn.Linear(latent_dim, 256),
11
            nn.LeakyReLU(0.2),
12
            nn.BatchNorm1d(256),
13
            nn.Linear(256, 512),
14
            nn.LeakyReLU(0.2),
15
            nn.BatchNorm1d(512),
16
            nn.Linear(512, 1024),
17
            nn.LeakyReLU(0.2),
18
            nn.BatchNorm1d(1024),
19
            nn.Linear(1024, 784), # 28x28 = 784
20
            nn.Tanh()
21
        )
22

23
    def forward(self, x):
24
        return self.model(x)
25

26
# Discriminator
27
class Discriminator(nn.Module):
28
    def __init__(self):
29
        super().__init__()
30
        self.model = nn.Sequential(
31
            nn.Linear(784, 512),
32
            nn.LeakyReLU(0.2),
33
            nn.Linear(512, 256),
34
            nn.LeakyReLU(0.2),
35
            nn.Linear(256, 128),
36
            nn.LeakyReLU(0.2),
37
            nn.Linear(128, 1),
38
            nn.Sigmoid()
39
        )
40

41
    def forward(self, x):
42
        return self.model(x)
43

44
# Tạo models
45
latent_dim = 100
46
generator = Generator(latent_dim)
47
discriminator = Discriminator()
48

49
# Compile
50
criterion = nn.BCELoss()
51
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
52
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
53

54
# GAN
55
discriminator.trainable = False
56
gan_input = torch.randn(1, latent_dim)
57
gan_output = discriminator(generator(gan_input))
58
gan = torch.nn.Model(gan_input, gan_output)
59
gan.compile(optimizer=optimizer_g, loss=criterion)
60

61
print("GAN Architecture:")
62
print(gan)

5. Autoencoders#

Variational Autoencoder (VAE)#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4

5
# Encoder
6
class Encoder(nn.Module):
7
    def __init__(self, latent_dim):
8
        super().__init__()
9
        self.model = nn.Sequential(
10
            nn.Linear(784, 512),
11
            nn.ReLU(),
12
            nn.Linear(512, 256),
13
            nn.ReLU()
14
        )
15
        self.mu = nn.Linear(256, latent_dim)
16
        self.log_var = nn.Linear(256, latent_dim)
17

18
    def forward(self, x):
19
        x = x.view(x.size(0), -1) # Flatten
20
        x = self.model(x)
21
        mu = self.mu(x)
22
        log_var = self.log_var(x)
23
        return mu, log_var
24

25
# Sampling layer
26
def sampling(mu, log_var):
27
    std = torch.exp(0.5 * log_var)
28
    eps = torch.randn_like(std)
29
    return mu + eps * std
30

31
# Decoder
32
class Decoder(nn.Module):
33
    def __init__(self, latent_dim):
34
        super().__init__()
35
        self.model = nn.Sequential(
36
            nn.Linear(latent_dim, 256),
37
            nn.ReLU(),
38
            nn.Linear(256, 512),
39
            nn.ReLU(),
40
            nn.Linear(512, 784), # 28x28 = 784
41
            nn.Sigmoid()
42
        )
43

44
    def forward(self, z):
45
        return self.model(z)
46

47
# Build VAE
48
latent_dim = 2
49
encoder = Encoder(latent_dim)
50
decoder = Decoder(latent_dim)
51

52
# VAE model
53
inputs = torch.randn(1, 784) # Example input
54
mu, log_var = encoder(inputs)
55
z = sampling(mu, log_var)
56
outputs = decoder(z)
57

58
vae = nn.Model(inputs, outputs)
59

60
# Loss function
61
reconstruction_loss = nn.MSELoss()(inputs, outputs)
62
kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
63
vae_loss = reconstruction_loss + kl_loss
64

65
vae.add_loss(vae_loss)
66
vae.compile(optimizer=optimizer_g)
67

68
print("VAE Architecture:")
69
print(vae)

6. Attention Mechanisms#

Self-Attention Implementation#

1
import torch
2

3
def scaled_dot_product_attention(query, key, value, mask=None):
4
    # Tính attention scores
5
    matmul_qk = torch.matmul(query, key.transpose(-2, -1))
6

7
    # Scale
8
    dk = query.size(-1)
9
    scaled_attention_logits = matmul_qk / torch.sqrt(torch.tensor(dk, dtype=torch.float32))
10

11
    # Apply mask nếu có
12
    if mask is not None:
13
        scaled_attention_logits += (mask * -1e9)
14

15
    # Softmax
16
    attention_weights = torch.softmax(scaled_attention_logits, dim=-1)
17

18
    # Apply attention weights
19
    output = torch.matmul(attention_weights, value)
20

21
    return output, attention_weights
22

23
# Multi-head attention
24
def multi_head_attention(d_model, num_heads):
25
    def attention(inputs):
26
        batch_size = inputs.size(0)
27
        seq_len = inputs.size(1)
28

29
        # Linear transformations
30
        query = torch.nn.Linear(d_model)(inputs)
31
        key = torch.nn.Linear(d_model)(inputs)
32
        value = torch.nn.Linear(d_model)(inputs)
33

34
        # Reshape for multi-head
35
        query = query.view(batch_size, seq_len, num_heads, d_model // num_heads)
36
        key = key.view(batch_size, seq_len, num_heads, d_model // num_heads)
37
        value = value.view(batch_size, seq_len, num_heads, d_model // num_heads)
38

39
        # Transpose for attention
40
        query = query.transpose(1, 2)
41
        key = key.transpose(1, 2)
42
        value = value.transpose(1, 2)
43

44
        # Apply attention
45
        attention_output, attention_weights = scaled_dot_product_attention(
46
            query, key, value)
47

48
        # Reshape back
49
        attention_output = attention_output.transpose(1, 2)
50
        attention_output = attention_output.reshape(batch_size, seq_len, d_model)
51

52
        return attention_output, attention_weights
53

54
    return attention
55

56
# Test
57
d_model = 512
58
num_heads = 8
59
attention_layer = multi_head_attention(d_model, num_heads) # có thể sử dụng nn.MultiheadAttention module
60

61
# Input shape: (batch_size, seq_len, d_model)
62
test_input = torch.randn(1, 10, d_model)
63
output, weights = attention_layer(test_input)
64

65
print(f"Input shape: {test_input.shape}")
66
print(f"Output shape: {output.shape}")
67
print(f"Attention weights shape: {weights.shape}")

7. Ứng dụng thực tế#

Image Classification với ResNet#

1
import torch
2
import torchvision.models as models
3
import torchvision.transforms as transforms
4
from PIL import Image
5

6
# Load pre-trained ResNet50
7
model = models.resnet50(pretrained=True)
8
model.eval()
9

10
# Hàm dự đoán ảnh
11
def predict_image(img_path):
12
    preprocess = transforms.Compose([
13
        transforms.Resize((224, 224)),
14
        transforms.ToTensor(),
15
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
16
    ])
17
    img = Image.open(img_path).convert('RGB')
18
    img = preprocess(img).unsqueeze(0)
19
    with torch.no_grad():
20
        output = model(img)
21
        _, pred = torch.max(output, 1)
22
    return pred.item()
23

24
# Ví dụ sử dụng (cần có ảnh thực tế)
25
# results = predict_image('path/to/image.jpg')
26
# print(f"Predicted class: {results}")

Text Generation với GPT-style#

1
import torch
2
import torch.nn as nn
3
import torch.optim as optim
4
from transformers import GPT2LMHeadModel, GPT2Tokenizer
5

6
# tải pre-trained model
7
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
8
model = GPT2LMHeadModel.from_pretrained('gpt2')
9

10
def generate_text(prompt, max_length=100):
11
    # Encode input
12
    inputs = tokenizer.encode(prompt, return_tensors='pt')
13

14
    # Generate
15
    outputs = model.generate(inputs,
16
                           max_length=max_length,
17
                           num_return_sequences=1,
18
                           temperature=0.7,
19
                           do_sample=True)
20

21
    # Decode
22
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
    return generated_text
24

25
# Test
26
prompt = "Machine learning is"
27
generated = generate_text(prompt)
28
print(generated)

Kết luận#

Deep Learning đã mở ra những khả năng mới trong AI. Để thành công trong lĩnh vực này:

1. Nền tảng vững chắc#

Toán học: Linear Algebra, Calculus, Statistics
Lập trình: Python, TensorFlow/PyTorch
Kiến thức ML cơ bản

2. Tools và Frameworks#

TensorFlow/PyTorch: Deep learning
Transformers: NLP
OpenCV: Computer vision
JAX: Research