MNIST classification using structured-RerF

This was adapted from: https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html

[1]:
import time
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state

from rerf.rerfClassifier import rerfClassifier
[2]:
# Author: Arthur Mensch <arthur.mensch@m4x.org>
# License: BSD 3 clause

# Turn down for faster convergence
train_samples = 5000

# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
[3]:
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
[4]:
clf_rerf = rerfClassifier(projection_matrix="Base", n_jobs=8, n_estimators=100)
clf_s_rerf = rerfClassifier(projection_matrix="S-RerF",
                             image_height=28,
                             image_width=28,
                             n_estimators=100,
                             patch_height_min=1,
                             patch_width_min=1,
                             patch_height_max=5,
                             patch_width_max=5,
                             n_jobs=8,
                           )
print(clf_rerf)
print(clf_s_rerf)
rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None,
               max_depth=None, max_features='auto', min_parent=1,
               n_estimators=100, n_jobs=8, oob_score=False,
               patch_height_max=None, patch_height_min=1, patch_width_max=None,
               patch_width_min=1, projection_matrix='Base', random_state=None)
rerfClassifier(feature_combinations=1.5, image_height=28, image_width=28,
               max_depth=None, max_features='auto', min_parent=1,
               n_estimators=100, n_jobs=8, oob_score=False, patch_height_max=5,
               patch_height_min=1, patch_width_max=5, patch_width_min=1,
               projection_matrix='S-RerF', random_state=None)
[5]:
t0 = time.time()
clf_rerf.fit(X_train, y_train)
run_time = time.time() - t0
print("run time rerf", run_time)
run time rerf 0.7958900928497314
[6]:
t0 = time.time()
clf_s_rerf.fit(X_train, y_train)
run_time = time.time() - t0
print("run time s_rerf", run_time)
run time s_rerf 3.6138932704925537
[7]:
train_acc_rerf = clf_rerf.score(X_train, y_train.astype(int))
print("train_acc rerf", train_acc_rerf)

train_acc_s_rerf = clf_s_rerf.score(X_train, y_train.astype(int))
print("train_acc s_rerf", train_acc_s_rerf)
train_acc rerf 1.0
train_acc s_rerf 1.0
[8]:
# sparsity = np.mean(clf.coef_ == 0) * 100
score = clf_rerf.score(X_test, y_test.astype(int))
print("score rerf", score)

# sparsity = np.mean(clf.coef_ == 0) * 100
score = clf_s_rerf.score(X_test, y_test.astype(int))
print("score s_rerf", score)
score rerf 0.9361
score s_rerf 0.9427