Grid Search For ML & Deep Learning Models

Original article was published by Bob Rupak Roy on Deep Learning on Medium


Grid Search for Deep Learning

First we will create a simple Neural Network with default parameters and later we will improve over time using Grid Search

For this example we will use a Churn modelling dataset with details having gender, credits core, age, tenure, location etc. A common churn modelling data set that we already have came across.

#Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
#Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
#country column
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
X = ct.fit_transform(X)
#to avoid dummy variable trap
X = X[:, 1:]
#Male/Female
labelencoder_X = LabelEncoder()
X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
#Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#Creating the Ann model
#Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
#Initialising the ANN
classifier = Sequential()
#Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
#Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
#Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
#Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
#Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
#Part 3 - Making the predictions and evaluating the model#Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
#Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
ANN Output

We have Accuracy Score of 83.5% remember that. Now let’s

  1. Grid Search the batch_size and epochs then followed by
  2. Grid Search Optimizer
  3. Grid Search Learning Rate and Momentum
  4. Network Weight Initialization
  5. Neuron Activation
  6. Tune Dropout Regularization
  7. Tune Drop out Regularization
  8. Tune Number of Neurons
#scikit-learn to grid search the batch size and epochsimport numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
#Function to create model, required for KerasClassifier
def create_model():
model=Sequential()
model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu',input_dim = 11))
model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
#compile model
model.compile(optimizer = 'adam',loss = 'binary_crossentropy', metrics = ['accuracy'])
return model#Importing the libraries
import numpy as np
import pandas as pd
#Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
#Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
#country column
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
X = ct.fit_transform(X)
#to avoid dummy variable trap
X = X[:, 1:]
#Male/Female
labelencoder_X = LabelEncoder()
X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
#Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Till here it’s the our regular data pre-processing step, now let’s define parameter list for batch size and epochs

model = KerasClassifier(build_fn=create_model, verbose=1)#define the grid search parameters
batch_size = [10, 20, 40]
epochs = [10, 50,100,200]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=3)
grid_result = grid.fit(X_train, y_train)

Here we have first bind the list of parameters as dict ‘dictionary’ in param_grid then define our model in estimator and the parameter list in param_grid with cross validation cv = 3, means it will test 3 times and will give u the average results of 3 iterations.

Finally we will summarize the results.

#summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))

Let me put all the pieces together.

#tune Batch_size and epoch
#Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
#Function to create model, required for KerasClassifier
def create_model():
model=Sequential()
model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu',input_dim = 11))
model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
#compile model
model.compile(optimizer = 'adam',loss = 'binary_crossentropy', metrics = ['accuracy'])

return model
#Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
#Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
#country column
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
X = ct.fit_transform(X)
#to avoid dummy variable trap
X = X[:, 1:]
#Male/Female
labelencoder_X = LabelEncoder()
X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
#Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
######################################################3#create model
model = KerasClassifier(build_fn=create_model, verbose=1)
#define the grid search parameters
batch_size = [10, 20, 40]
epochs = [10, 50,100,200]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=3)
grid_result = grid.fit(X_train, y_train)#summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))

We will have Output Best results as Epoch = 210, batch_size = 10

Alright we have our best optimal epoch and batch_size settings that we need to put in our model to increase our model accuracy. Let’s redo our model with these settings and see if it improves or not.

#Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
#Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
#country column
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
X = ct.fit_transform(X)
#to avoid dummy variable trap
X = X[:, 1:]
#Male/Female
labelencoder_X = LabelEncoder()
X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
#Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#Part 2 - Now let's make the ANN!#Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
#Initialising the ANN
classifier = Sequential()
#Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
#Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
#Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
#Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
#Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 210)
#Part 3 - Making the predictions and evaluating the model#Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
#Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
Ann Output with Batch_size =10 & Epoch = 210

Nice! we have improved our model from 83% to 86%