Classification on MNIST

In this tutorial, we will train a SVM classifier with a quantum kernel on the MNIST dataset. The basic pipeline is implemented with following steps:

Load the dataset.
Define the quantum feature mapping.
Construct the quantum kernel.
Construct and train the SVM.

We begin by importing all related libraries:

import pennylane as qml

from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt

We then load the MNIST dataset. Here we focus on two classes of 3 and 6, leading to a binary classification problem. PCA is firstly applied to reduce the feature dimension of images in MNIST to reduce the required number of qubits to encode this classifcal feature. The compressed features are then normalized to match the periodicity of the quantum feature mapping used later.

def load_mnist(n_qubit):
    # Load MNIST dataset from OpenML
    mnist = fetch_openml('mnist_784', version=1)
    X, y = mnist.data, mnist.target

    # Filter out the digits 3 and 6
    mask = (y == '3') | (y == '6')
    X_filtered = X[mask]
    y_filtered = y[mask]

    # Convert labels to binary (0 for digit 3 and 1 for digit 6)
    y_filtered = np.where(y_filtered == '3', 0, 1)

    # Apply PCA to reduce the feature dimension
    pca = PCA(n_components=n_qubit)
    X_reduced = pca.fit_transform(X_filtered)

    # Normalize the input feature
    scaler = StandardScaler().fit(X_reduced)
    X_scaled = scaler.transform(X_reduced)

    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_filtered, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

We use the angle embedding as the quantum feature mapping method. The quantum kernel is therefore implemented as:

n_qubit = 8
dev = qml.device('default.qubit', wires=n_qubit)

@qml.qnode(dev)
def kernel(x1, x2, n_qubit):
    qml.AngleEmbedding(x1, wires=range(n_qubit))
    qml.adjoint(qml.AngleEmbedding)(x2, wires=range(n_qubit))
    return qml.expval(qml.Projector([0]*n_qubit, wires=range(n_qubit)))

With the estimated quantum kernel, a SVM classifier is constructed:

def kernel_mat(A, B):
    mat = []
    for a in A:
        row = []
        for b in B:
            row.append(kernel(a, b, n_qubit))
        mat.append(row)
    return np.array(mat)

svm = SVC(kernel=kernel_mat)

We then train and evaluate the performance of the SVM with quantum kernel.

X_train, X_test, y_train, y_test = load_mnist(n_qubit)
svm.fit(X_train, y_train)
pred = svm.predict(X_test)
accuracy_score(y_test, pred)

svm = SVC(kernel='precomputed')
n_sample_max = 100
X_train_sample = []
y_train_sample = []
for label in np.unique(y_train):
    index = y_train == label
    X_train_sample.append(X_train[index][:n_sample_max])
    y_train_sample.append(y_train[index][:n_sample_max])
X_train_sample = np.concatenate(X_train_sample, axis=0)
y_train_sample = np.concatenate(y_train_sample, axis=0)
kernel_mat_train = kernel_mat(X_train_sample, X_train_sample)
kernel_mat_test = kernel_mat(X_test, X_train_sample)

accuracy = []
n_samples = []
for n_sample in range(10, n_sample_max+10, 10):
    class1_indices = np.arange(n_sample)
    class2_indices = np.arange(n_sample_max, n_sample_max+n_sample)
    selected_indices = np.concatenate([class1_indices, class2_indices])

    svm.fit(kernel_mat_train[np.ix_(selected_indices, selected_indices)], np.concatenate([y_train_sample[:n_sample], y_train_sample[n_sample_max:n_sample_max+n_sample]]))
    pred = svm.predict(np.concatenate([kernel_mat_test[:, :n_sample], kernel_mat_test[:, n_sample_max:n_sample_max+n_sample]], axis=1))
    accuracy.append(accuracy_score(y_test, pred))
    n_samples.append(n_sample)

plt.plot(n_sample, accuracy, marker='o')
plt.title('Classification Accuracy vs. #Training Samples')
plt.xlabel('#Training Samples')
plt.xticks(n_sample, n_sample)
plt.ylabel('Accuracy')
plt.grid()
plt.tight_layout()
plt.show()