import pandas as pd
import numpy as np


# Data generation function to create a checkerboard-patterned dataset
def make_data_checkerboard(n, noise=0):
    n_samples = int(n/4)
    scale = 5
    shift = 2.5
    center = 0.5
    c1a = (np.random.rand(n_samples,2)-center)*scale + [-shift, shift]
    c1b = (np.random.rand(n_samples,2)-center)*scale + [shift, -shift]
    c0a = (np.random.rand(n_samples,2)-center)*scale + [shift, shift]
    c0b = (np.random.rand(n_samples,2)-center)*scale + [-shift, -shift]
    X = np.concatenate((c1a,c1b,c0a,c0b),axis=0)
    y = np.concatenate((np.ones(2*n_samples), np.zeros(2*n_samples)))
    # Randomly flips a fraction of the labels to add noise
    for i,value in enumerate(y):
        if np.random.rand() < noise:
            y[i] = 1-value
    return (X,y)

# Training datasets (we create 3 to use to average over model)
np.random.seed(88)
N = 3
X_train = []
y_train = []
for i in range(N):
    Xt,yt = make_data_checkerboard(500, noise=0.25)
    X_train.append(Xt)
    y_train.append(yt)
    
# Validation and test data
X_val,y_val = make_data_checkerboard(3000, noise=0.25)
X_test,y_test = make_data_checkerboard(3000, noise=0.25)

# For the final performance evaluation, train on all of the training and validation data:
X_train_plus_val = np.concatenate((X_train[0], X_train[1], X_train[2], X_val), axis=0)
y_train_plus_val = np.concatenate((y_train[0], y_train[1], y_train[2], y_val), axis=0)


import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier


# Create the list of parameters to loop through
node_list = [(2,2), (5,5), (30,30)]
lr_list = [.0001, .01, 1]
reg_list = [0, 1, 10]
batch_list = [5, 50, 500]


# Create a plot function to call as I loop through the models
def plot(X_train, y_train, model, i, title):    
    X = X_train
    y = y_train
    X = StandardScaler().fit_transform(X)

    # Create decision boundry grid to map onto 
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .03),
                         np.arange(y_min, y_max, .03))
    
    # Plot the data set with colormap
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    ax = plt.subplot(4, 3, i)
    
    # Fit the model and make predictions, plot them onto the grapg
    model = model.fit(X, y)    
    Z = model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)
    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
    ax.scatter(X[:, 0], X[:, 1], c=y_train, cmap=cm_bright,
               edgecolors='k')

    # Set the chart formatting
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title, fontsize = 13)
    #ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
    ax.set_xlabel('x1', fontsize = 10)
    ax.set_ylabel('x2', fontsize = 10)


#Aggregate models into one list
mlp_list = []

### Iterate through each paramer and append model to list
for node in node_list:
    mlp_list.append(MLPClassifier(learning_rate_init = 0.03, hidden_layer_sizes = node, alpha = 0, solver = 'sgd', 
                                  tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                                  batch_size = 50, max_iter = 500))
for lr in lr_list:
    mlp_list.append(MLPClassifier(learning_rate_init = lr, hidden_layer_sizes = (30, 30), alpha = 0, solver = 'sgd', 
                                  tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                                  batch_size = 50, max_iter = 500))  
for reg in reg_list:
    mlp_list.append(MLPClassifier(learning_rate_init = 0.03, hidden_layer_sizes = (30, 30), alpha = reg, solver = 'sgd', 
                                  tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                                  batch_size = 50, max_iter = 500))  
    
for batch in batch_list:
    mlp_list.append(MLPClassifier(learning_rate_init = 0.03, hidden_layer_sizes = (30, 30), alpha = 0, solver = 'sgd', 
                                  tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                                  batch_size = batch, max_iter = 500))  
#


# Hide warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# Create parameters for sub plots
rows = 4
cols = 4
i = 1
fig, ax = plt.subplots(4, 3, figsize = (15,15))

#loop through model list created about and plot them
for index, model in enumerate(mlp_list):
    if i <= 3:
        #print(i, index)
        title = f'Nodes Per Layer: {node_list[index]}'
        plot(X_train[0], y_train[0], model, i, title)
        i += 1
    if (i > 3 and i <= 6): 
        #print(i, index)
        title = f'Learning Rate: {lr_list[index-3]}'
        plot(X_train[0], y_train[0], model, i, title)
        i += 1    
    if (i > 6 and i <= 9): 
        #print(i, index)
        title = f'Regularization Coefficient: {reg_list[index-6]}'
        plot(X_train[0], y_train[0], model, i, title)
        i += 1  
    if (i > 9 and i <= 12):        
        #print(i, index)
        title = f'Batch Size: {batch_list[index-9]}'
        plot(X_train[0], y_train[0], model, i, title)
        i += 1


# Create parameters for learning rate, regularization, and batch size
log_lrs = np.logspace(-5,0, 20)
log_reg_param = np.logspace(-8,2, 20)
batch_size = [1,3,5,10,20,50,100,250,500]

# Initialize empty lists to appends scores too 
log_lr_list = []
reg_param_list = []
batchsize_list = []


# Loop through all the parameters we are testing, train a model and get a score to append to list
for lr in log_lrs:
    model = MLPClassifier(learning_rate_init = lr, hidden_layer_sizes = (30, 30), alpha = 0, solver = 'sgd', 
                          tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                          batch_size = 50, max_iter = 500) 
 
    # Average the score on all three training sets
    score_list = []
    for index in range(0,3):
        model.fit(X_train[index], y_train[index])
        score_list.append(model.score(X_val,y_val))
    log_lr_list.append(sum(score_list)/len(score_list))
    
for reg in log_reg_param:
    model = MLPClassifier(learning_rate_init = .03, hidden_layer_sizes = (30, 30), alpha = reg, solver = 'sgd', 
                          tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                          batch_size = 50, max_iter = 500) 
   
    # Average the score on all three training sets
    score_list = []
    for index in range(0,3):
        model.fit(X_train[index], y_train[index])
        score_list.append(model.score(X_val,y_val))
    reg_param_list.append(sum(score_list)/len(score_list))
    
    
for batch in batch_size:
    model = MLPClassifier(learning_rate_init = .03, hidden_layer_sizes = (30, 30), alpha = 0, solver = 'sgd', 
                          tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                          batch_size = batch, max_iter = 500) 
    
    # Average the score on all three training sets
    score_list = []
    for index in range(0,3):
        model.fit(X_train[index], y_train[index])
        score_list.append(model.score(X_val,y_val))
    batchsize_list.append(sum(score_list)/len(score_list))


plt.figure(figsize = (10,10))

# Plot learning rates
plt.subplot(2,2,1)
plt.plot(log_lrs, log_lr_list)
plt.xscale("log")
plt.title('Prediction Scores Varried by Learning Rate')
plt.xlabel('Log of (C) parameter')
plt.ylabel('Score')
plt.show()


# Plot Regulatization 
plt.figure(figsize = (10,10))
plt.subplot(2,2,2)
plt.plot(log_reg_param, reg_param_list)
plt.xscale("log")
plt.title('Prediction Scores Varried by Regularization')
plt.xlabel('Log of (C) parameter')
plt.ylabel('Score')
plt.show()



# Plot batch size
plt.figure(figsize = (10,10))
plt.subplot(2,2,3)
plt.plot(batch_size, batchsize_list)
#plt.xscale("log")
plt.title('Prediction Scores Varried by Batch Size')
plt.xlabel('Batch Size')
plt.ylabel('Score')
plt.show()


# Find the index of the best score value, and get corresponding score associated with them
lrmax = np.argsort(log_lr_list)[19]
lr_param = round(log_lrs[lrmax], 4)

regmax = np.argsort(reg_param_list)[19]
reg_param = round(log_reg_param[regmax], 4)

batchmax = np.argsort(batchsize_list)[8]
batch_param = batch_size[batchmax]

print(f'Optimal Hyperparameters- Learning Rate: {lr_param}, Regularization: {reg_param}, Batch size: {batch_param}')

Optimal Hyperparameters- Learning Rate: 0.0043, Regularization: 0.0018, Batch size: 250


nodes = [1,2,3,4,5,10,15,25,30]
layers = [1,2,3,4]
node_score_list = []
#hl_score_list = []

for node in nodes:
    hl_score_list = []
    for layer in layers:
        hidden = None
        if layer == 1:
            hidden = (node)
        if layer == 2:
            hidden = (node, node)
        if layer == 3:
            hidden = (node, node, node)
        if layer == 4:
            hidden = (node, node, node, node)
        model = MLPClassifier(learning_rate_init = .0043, hidden_layer_sizes = hidden, alpha = .0018, solver = 'sgd', 
                          tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                          batch_size = 250, max_iter = 500)    
        score_list = []
        for index in range(0,3):
            model.fit(X_train[index], y_train[index])
            score_list.append(model.score(X_val,y_val))
        hl_score_list.append((sum(score_list)/len(score_list)))
        #print(f'h1 {hl_score_list}')
    node_score_list.append(hl_score_list)
    #print(f'node {node_score_list}')


node_array = np.asarray(node_score_list)
node_array = node_array.round(2)


import seaborn as sns 

# Reformat the list of scores and put into dataframe
node_score_list = np.reshape(node_score_list, (4, 9))
df = pd.DataFrame(node_array)
df.column = ['1', '2', '3', '4']
df = df.rename({0:"1 Layer",1:"2 Layer", 2:"3 Layer", 3:"4 Layer",}, axis='columns')
df = df.rename({0:"1 Node",1:"2 Nodes", 2:"3 Nodes", 3:"4 Nodes", 4:"5 Nodes",5:"10 Nodes", 6:"15 Nodes", 7:"25 Nodes", 8:"30 Nodes"}, axis='index')

# Plot and the scores
plt.figure(figsize=(20, 10))
plt.title('Heatmap Scorelist')
plt.xlabel('Number of Layers')
plt.ylabel('Number of Nodes')
sns.heatmap(df, cmap="YlGnBu", annot=True, fmt=".2f", annot_kws={'size':10}, square=True)

<AxesSubplot:title={'center':'Heatmap Scorelist'}>


from sklearn.metrics import roc_curve, auc


# Initialize model with best parameters from parts B and C
modeld = MLPClassifier(learning_rate_init = lr_param, hidden_layer_sizes = (10, 10, 10, 10), alpha = reg_param, solver = 'sgd', 
                          tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                          batch_size = batch_param, max_iter = 500)

#Train and score model
modeld.fit(X_train_plus_val, y_train_plus_val)
print(f'The final accuracy of my model is {round(modeld.score(X_test, y_test),2)}')

The final accuracy of my model is 0.73


# Create a node and layer list that includes every combination to feed into the random search model
nodes = [1,2,3,4,5,10,15,25,30]
layers = [1,2,3,4]
node_score_list = []
#hl_score_list = []
hl_score_list = []
for node in nodes:
    # hl_score_list = []
    for layer in layers:
        hidden = None
        if layer == 1:
            hidden = (node)
        if layer == 2:
            hidden = (node, node)
        if layer == 3:
            hidden = (node, node, node)
        if layer == 4:
            hidden = (node, node, node, node)
        hl_score_list.append(hidden)


from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from time import time
import scipy.stats as stats
from sklearn.utils.fixes import loguniform


# Create MLP model with desired parameters, omitting the ones we will use random search to find
clf = MLPClassifier(solver = 'sgd', tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                           max_iter = 500)

# A function to print results of random search
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})"
                  .format(results['mean_test_score'][candidate],
                          results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")
            
# specify parameters and distributions to sample from
param_dist = {'learning_rate_init': loguniform(1e-5, 1e0),
              'alpha': loguniform(1e-8, 1e2),
              'hidden_layer_sizes':  [(2,2), (5,5), (20,20), (30,30)],
              'batch_size': list(range(1,500))}

# run randomized search
n_iter_search = 25
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search, verbose=1).fit(X_train_plus_val,y_train_plus_val)

# Time and output results of random search model
start = time()
random_search.fit(X_train_plus_val, y_train_plus_val)
print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time() - start), n_iter_search))
report(random_search.cv_results_)

Fitting 5 folds for each of 25 candidates, totalling 125 fits

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed: 23.2min finished

Fitting 5 folds for each of 25 candidates, totalling 125 fits

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed: 10.1min finished

RandomizedSearchCV took 616.66 seconds for 25 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.727 (std: 0.118)
Parameters: {'alpha': 1.7621565331655533e-07, 'batch_size': 117, 'hidden_layer_sizes': (5, 5), 'learning_rate_init': 0.013250026582359962}

Model with rank: 2
Mean validation score: 0.726 (std: 0.122)
Parameters: {'alpha': 0.2738765026000446, 'batch_size': 245, 'hidden_layer_sizes': (20, 20), 'learning_rate_init': 0.03890182490940563}

Model with rank: 3
Mean validation score: 0.722 (std: 0.112)
Parameters: {'alpha': 3.682940821720398e-05, 'batch_size': 58, 'hidden_layer_sizes': (20, 20), 'learning_rate_init': 0.0017023572117358936}


from sklearn import metrics

# Create a model with optimal parameters from random search
randomMod =  MLPClassifier(learning_rate_init = 0.013250026582359962, alpha = 1.7621565331655533e-07, hidden_layer_sizes = (5, 5),
                           solver = 'sgd', tol = 1e-5, early_stopping = False, activation = 'relu', n_iter_no_change = 1000, 
                           max_iter = 500)
randomMod.fit(X_train_plus_val, y_train_plus_val)

# Creacte ROC scores and thresholds
predictionsD = modeld.predict_proba(X_test)[:,1]
fprModD, tprModD, thresholdModD = roc_curve(y_test, modeld.predict_proba(X_test)[:,1])
auc_modD = metrics.roc_auc_score(y_test, predictionsD)

predictionsRand = randomMod.predict_proba(X_test)[:,1]
fprRand, tprRand, thresholdModD = roc_curve(y_test, randomMod.predict_proba(X_test)[:,1])
auc_modRand = metrics.roc_auc_score(y_test, predictionsRand)


# Plot ROC Scores 
plt.figure(figsize = (10,10))

plt.plot(fprModD, tprModD,label = 'Parameter Iteration Model AUC score = %0.2f' % auc_modD)
plt.plot(fprRand, tprRand,label = 'Random Search AUC score = %0.2f' % auc_modRand)
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Random guess')

plt.legend(loc = 'lower right')
plt.title('ROC Curve For Neural Nets')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid('on')
plt.axis('square')



plt.show()


randomMod.fit(X_train_plus_val, y_train_plus_val)
print(f'The final accuracy of my Random Search model is {round(randomMod.score(X_test, y_test),2)}')

The final accuracy of my Random Search model is 0.72


# Create the list of parameters to loop through
model_list = []
model_list.append(randomMod)
model_list.append(modeld)

# Create a plot function to call for both models
def plot(X_train, y_train, model, i, title):    
    X = X_train
    y = y_train
    X = StandardScaler().fit_transform(X)

    # Create decision boundry grid to map onto 
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, .03),
                         np.arange(y_min, y_max, .03))
    
    # Plot the data set with colormap
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    ax = plt.subplot(4, 3, i)
    
    # Fit the model and make predictions, plot them onto the grapg
    model = model.fit(X, y)    
    Z = model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)
    ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
    ax.scatter(X[:, 0], X[:, 1], c=y_train, cmap=cm_bright,
               edgecolors='k')

    # Set the chart formatting
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(title, fontsize = 13)
    #ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes")
    ax.set_xlabel('x1', fontsize = 10)
    ax.set_ylabel('x2', fontsize = 10)


# Hide warnings
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# Create parameters for sub plots
rows = 4
cols = 4
i = 1
fig, ax = plt.subplots(1,2, figsize = (15,15))

#loop through model list created about and plot them
for index, model in enumerate(model_list):
        if i == 1:
            title = f'Random Search Model'
        else:
            title = "Optimized Parameter model from D"
        plot(X_train[0], y_train[0], model, i, title)
        i += 1

Neural Networks¶

Caleb O'Neel¶

Learning objectives¶

1¶

Get to know your networks¶