4.3.2 Image Classification ResNet in Action: Eye Recognition - Model Construction

4.3.2 Model Construction

In the previous section, the ResNet model structure has been understood by defining the good, and this section directly uses Resnet50 in the Flying Paddle high-level API for image classification experiments.

In [7]

from paddle.vision.models import resnet50

model = resnet50()

W0714 20:32:55.131150 102 device_context.cc:447] PLEASE NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version. 10.1

W0714 20:32:55.136173 102 device_context.cc:465] device: 0, cuDNN Version: 7.6.

4.3.3 Loss Functions

The Flying Paddle high level APIs are provided for you to implement a good cross entropy loss function, the code is shown below.

In [8]

import paddle.nn.functional as F

loss_fn = F.cross_entropy

4.3.4 Model Training

The ResNet network is trained using the cross-entropy loss function and using SGD as an optimizer.

In [9]

# -*- coding: utf-8 -*-

# LeNet to recognize eye images

import os

import random

import paddle

import numpy as np

class Runner(object): def __init__(self, model, optimizer, loss_fn)

def __init__(self, model, optimizer, loss_fn).

self.model = model

self.optimizer = optimizer

self.loss_fn = loss_fn

# Record the global optimum

self.best_acc = 0

# Define the training process

def train_pm(self, train_datadir, val_datadir, **kwargs).

print('start training ... ')

self.model.train()

num_epochs = kwargs.get('num_epochs', 0)

csv_file = kwargs.get('csv_file', 0)

save_path = kwargs.get(“save_path”, “/home/aistudio/output/”)

# Define the data reader, train the data reader

train_loader = data_loader(train_datadir, batch_size=10, mode='train')

for epoch in range(num_epochs).

for batch_id, data in enumerate(train_loader()).

x_data, y_data = data

img = paddle.to_tensor(x_data)

label = paddle.to_tensor(y_data)

# Run the model forward computation to get the predicted values

logits = model(img)

avg_loss = self.loss_fn(logits, label)

if batch_id % 20 == 0.

print(“epoch: {}, batch_id: {}, loss is: {:.4f}”.format(epoch, batch_id, float(avg_loss.numpy())))

# backpropagation, update weights, clear gradients

avg_loss.backward()

self.optimizer.step()

self.optimizer.clear_grad()

acc = self.evaluate_pm(val_datadir, csv_file)

self.model.train()

if acc > self.best_acc: self.save_model(save_model)

self.save_model(save_path)

self.best_acc = acc

# Model evaluation phase, use 'paddle.no_grad()' to control not computing and storing gradients

@paddle.no_grad()

def evaluate_pm(self, val_datadir, csv_file).

self.model.evaluate()

accuracies = []

losses = []

# Validate the data reader

valid_loader = valid_data_loader(val_datadir, csv_file)

for batch_id, data in enumerate(valid_loader()):

x_data, y_data = data

img = paddle.to_tensor(x_data)

label = paddle.to_tensor(y_data)

# Run the model forward computation to get the predicted values

logits = self.model(img)

# Multi-categorize, use softmax to compute predicted probability

pred = F.softmax(logits)

loss = self.loss_fn(pred, label)

acc = paddle.metric.accuracy(pred, label)

accuracies.append(acc.numpy())

losses.append(loss.numpy())

print(“[validation] accuracy/loss: {:.4f}/{:.4f}”.format(np.mean(accuracies), np.mean(losses)))

return np.mean(accuracies)

# Model evaluation phase, use 'paddle.no_grad()' control to not compute and store gradients

@paddle.no_grad()

def predict_pm(self, x, **kwargs).

# set the model to evaluation mode

self.model.eval()

# Run model forward computation to get predicted values

logits = self.model(x)

return logits

def save_model(self, save_path).

paddle.save(self.model.state_dict(), save_path + 'palm.pdparams')

paddle.save(self.optimizer.state_dict(), save_path + 'palm.pdopt')

def load_model(self, model_path).

model_state_dict = paddle.load(model_path)

self.model.set_state_dict(model_state_dict)

Instantiate the Runner class and pass in the training configuration, the code is implemented as follows:

In [12]

# Turn on GPU 0 for training

use_gpu = True

paddle.device.set_device('gpu:0') if use_gpu else paddle.device.set_device('cpu')

# Define the optimizer

# opt = paddle.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameters=model.parameters(), weight_decay=0.001)

opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters())

runner = Runner(model, opt, loss_fn)

Use Runner to train 5 epochs on the training set and save the model with the highest accuracy as the best model.

In [13]

import os

# dataset path

DATADIR = '/home/aistudio/work/palm/PALM-Training400/PALM-Training400'

DATADIR2 = '/home/aistudio/work/palm/PALM-Validation400'

CSVFILE = '/home/aistudio/labels.csv'

# Set the number of iteration rounds

EPOCH_NUM = 5

# Path to save the model

PATH = '/home/aistudio/output/'

if not os.path.exists(PATH):: os.makedirs(PATH): os.makedirs(PATH)

os.makedirs(PATH)

# Start the training process

runner.train_pm(DATADIR, DATADIR2.

num_epochs=EPOCH_NUM, csv_file=CSVFILE, save_path=PATH)

start training ...

epoch: 0, batch_id: 0, loss is: 0.3287

epoch: 0, batch_id: 20, loss is: 0.0716

[validation] accuracy/loss: 0.9625/5.9818

/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/framework/io.py:729: UserWarning: The input state dict is empty , no need to save. The input state dict is empty , no need to save.

warnings.warn(“The input state dict is empty, no need to save.”)

epoch: 1, batch_id: 0, loss is: 0.1286

epoch: 1, batch_id: 20, loss is: 0.4718

[validation] accuracy/loss: 0.9650/5.9824

epoch: 2, batch_id: 0, loss is: 0.0892

epoch: 2, batch_id: 20, loss is: 0.0313

[validation] accuracy/loss: 0.9625/5.9801

epoch: 3, batch_id: 0, loss is: 0.1362

epoch: 3, batch_id: 20, loss is: 0.0569

[validation] accuracy/loss: 0.9625/5.9746

epoch: 4, batch_id: 0, loss is: 0.1036

epoch: 4, batch_id: 20, loss is: 0.0873

[validation] accuracy/loss: 0.9575/5.9856

It can be found from the running results that using ResNet on the eye screening dataset iChallenge-PM, after 5 epochs of training, the accuracy on the validation set can reach about 96%.

4.3.5 Model Evaluation

The best model saved during training is evaluated using test data to observe the accuracy of the model on the evaluation set. The code is implemented as follows:

In [ ]

# load optimal model

runner.load_model('/home/aistudio/output/palm.pdparams')

# Evaluate the model

score = runner.evaluate_pm(DATADIR2, CSVFILE)

[validation] accuracy/loss: 0.9725/5.9591

4.3.6 Model Prediction

Similarly, it is possible to use a saved model to perform model prediction on one of the data in the test set and observe the model effect. The code implementation is as follows:

In [18]

import cv2

from PIL import Image

import matplotlib.pyplot as plt

import paddle

import paddle.nn.functional as F

%matplotlib inline

# Load the optimal model

runner.load_model('/home/aistudio/output/palm.pdparams')

# Get the first piece of data in the test set

DATADIRv2 = '/home/aistudio/work/palm/PALM-Validation400'

filelists = open('/home/aistudio/labels.csv').readlines()

# Other test images can be obtained by modifying the number in the filelists list, which can take values from 1-400.

line = filelists[1].strip().split(',')

name, label = line[1], int(line[2])

# Read the test image

img = cv2.imread(os.path.join(DATADIRv2, name))

# Preprocess the test image

trans_img = transform_img(img)

unsqueeze_img = paddle.unsqueeze(paddle.to_tensor(trans_img), axis=0)

# model prediction

logits = runner.predict_pm(unsqueeze_img)

result = F.softmax(logits)

pred_class = paddle.argmax(result).numpy()

# Output true category and pred_class

print(“The true category is {} and the predicted category is {}”.format(label, pred_class))

# Image visualization

show_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

plt.imshow(show_img)

plt.show()

The true category is 0 and the predicted category is [0]

Summary

In this section, we have implemented eye disease recognition through ResNet model with a prediction accuracy of around 95% on the validation set, and familiarized ourselves with the basic vision task construction process through this case study. If the reader is interested, he/she can further adjust the hyperparameters such as learning rate and number of training rounds to see if higher accuracy can be obtained.

Homework.

This section implements eye recognition by calling the paddle high-level API Resnet50 model from paddle.vision.models import resnet50. Replace it with another model and see if you can get higher accuracy.