Original article was published by Bob Rupak Roy on Deep Learning on Medium

**Grid Search for Deep Learning**

First we will create a simple Neural Network with default parameters and later we will improve over time using Grid Search

For this example we will use a Churn modelling dataset with details having gender, credits core, age, tenure, location etc. A common churn modelling data set that we already have came across.

#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd#Importing the dataset

dataset = pd.read_csv('Churn_Modelling.csv')

X = dataset.iloc[:, 3:13].values

y = dataset.iloc[:, 13].values#Encoding categorical data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from sklearn.compose import ColumnTransformer#country column

ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')

X = ct.fit_transform(X)#to avoid dummy variable trap

X = X[:, 1:]#Male/Female

labelencoder_X = LabelEncoder()

X[:, 3] = labelencoder_X.fit_transform(X[:, 3])#Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)#Feature Scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)#Creating the Ann model

#Importing the Keras libraries and packages

import keras

from keras.models import Sequential

from keras.layers import Dense#Initialising the ANN

classifier = Sequential()#Adding the input layer and the first hidden layer

classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))#Adding the second hidden layer

classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))#Adding the output layer

classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))#Compiling the ANN

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])#Fitting the ANN to the Training set

classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)#Part 3 - Making the predictions and evaluating the model#Predicting the Test set results

y_pred = classifier.predict(X_test)

y_pred = (y_pred > 0.5)#Making the Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

We have Accuracy Score of 83.5% remember that. Now let’s

**Grid Search the batch_size and epochs**then followed by**Grid Search Optimizer****Grid Search Learning Rate and Momentum****Network Weight Initialization****Neuron Activation****Tune Dropout Regularization****Tune Drop out Regularization****Tune Number of Neurons**

#scikit-learn to grid search the batch size and epochsimport numpy

from sklearn.model_selection import GridSearchCV

from keras.models import Sequential

from keras.layers import Dense

from keras.wrappers.scikit_learn import KerasClassifier#Function to create model, required for KerasClassifier

def create_model():

model=Sequential()

model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu',input_dim = 11))

model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu'))

model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))#compile model

model.compile(optimizer = 'adam',loss = 'binary_crossentropy', metrics = ['accuracy'])return model#Importing the libraries

import numpy as np

import pandas as pd#Importing the dataset

dataset = pd.read_csv('Churn_Modelling.csv')X = dataset.iloc[:, 3:13].values

y = dataset.iloc[:, 13].values#Encoding categorical data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from sklearn.compose import ColumnTransformer#country column

ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')X = ct.fit_transform(X)

#to avoid dummy variable trap

X = X[:, 1:]#Male/Female

labelencoder_X = LabelEncoder()

X[:, 3] = labelencoder_X.fit_transform(X[:, 3])#Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)#Feature Scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

Till here it’s the our regular data pre-processing step, now let’s define parameter list for batch size and epochs

model = KerasClassifier(build_fn=create_model, verbose=1)#define the grid search parameters

batch_size = [10, 20, 40]

epochs = [10, 50,100,200]

param_grid = dict(batch_size=batch_size, epochs=epochs)grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=3)

grid_result = grid.fit(X_train, y_train)

Here we have first bind the list of parameters as dict ‘dictionary’ in param_grid then define our model in estimator and the parameter list in param_grid with cross validation cv = 3, means it will test 3 times and will give u the average results of 3 iterations.

Finally we will summarize the results.

#summarize results

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))means = grid_result.cv_results_['mean_test_score']

stds = grid_result.cv_results_['std_test_score']params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):

print("%f (%f) with: %r" % (mean, stdev, param))

Let me put all the pieces together.

#tune Batch_size and epoch

#Use scikit-learn to grid search the batch size and epochs

import numpy

from sklearn.model_selection import GridSearchCV

from keras.models import Sequential

from keras.layers import Dense

from keras.wrappers.scikit_learn import KerasClassifier#Function to create model, required for KerasClassifier

def create_model():

model=Sequential()

model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu',input_dim = 11))

model.add(Dense(units = 6, kernel_initializer='uniform',activation = 'relu'))

model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

#compile model

model.compile(optimizer = 'adam',loss = 'binary_crossentropy', metrics = ['accuracy'])

return model#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd#Importing the dataset

dataset = pd.read_csv('Churn_Modelling.csv')

X = dataset.iloc[:, 3:13].values

y = dataset.iloc[:, 13].values#Encoding categorical data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from sklearn.compose import ColumnTransformer#country column

ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')

X = ct.fit_transform(X)

#to avoid dummy variable trap

X = X[:, 1:]#Male/Female

labelencoder_X = LabelEncoder()

X[:, 3] = labelencoder_X.fit_transform(X[:, 3])#Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)#Feature Scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)######################################################3#create model

model = KerasClassifier(build_fn=create_model, verbose=1)#define the grid search parameters

batch_size = [10, 20, 40]

epochs = [10, 50,100,200]

param_grid = dict(batch_size=batch_size, epochs=epochs)

grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=3)grid_result = grid.fit(X_train, y_train)#summarize results

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))means = grid_result.cv_results_['mean_test_score']

stds = grid_result.cv_results_['std_test_score']params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):

print("%f (%f) with: %r" % (mean, stdev, param))

We will have Output Best results as Epoch = 210, batch_size = 10

Alright we have our best optimal epoch and batch_size settings that we need to put in our model to increase our model accuracy. Let’s redo our model with these settings and see if it improves or not.

#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd#Importing the dataset

dataset = pd.read_csv('Churn_Modelling.csv')

X = dataset.iloc[:, 3:13].values

y = dataset.iloc[:, 13].values#Encoding categorical data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from sklearn.compose import ColumnTransformer#country column

ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')

X = ct.fit_transform(X)

#to avoid dummy variable trap

X = X[:, 1:]#Male/Female

labelencoder_X = LabelEncoder()

X[:, 3] = labelencoder_X.fit_transform(X[:, 3])#Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)#Feature Scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)#Part 2 - Now let's make the ANN!#Importing the Keras libraries and packages

import keras

from keras.models import Sequential

from keras.layers import Dense#Initialising the ANN

classifier = Sequential()#Adding the input layer and the first hidden layer

classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))#Adding the second hidden layer

classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))#Adding the output layer

classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))#Compiling the ANN

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])#Fitting the ANN to the Training set

classifier.fit(X_train, y_train, batch_size = 10, epochs = 210)#Part 3 - Making the predictions and evaluating the model#Predicting the Test set results

y_pred = classifier.predict(X_test)

y_pred = (y_pred > 0.5)#Making the Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

Nice! we have improved our model from 83% to 86%