Early Stopping with TensorFlow and TFLearn

import tensorflow as tf
import tflearn
import tflearn.datasets.mnist as mnist

trainX, trainY, testX, testY = mnist.load_data(one_hot=True)
hdf5 not supported (please install/reinstall h5py)
Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz
n_features = 784
n_hidden = 256
n_classes = 10

# Define the inputs/outputs/weights as usual.
X = tf.placeholder("float", [None, n_features])
Y = tf.placeholder("float", [None, n_classes])

# Define the connections/weights and biases between layers.
W1 = tf.Variable(tf.random_normal([n_features, n_hidden]), name='W1')
W2 = tf.Variable(tf.random_normal([n_hidden, n_hidden]), name='W2')
W3 = tf.Variable(tf.random_normal([n_hidden, n_classes]), name='W3')

b1 = tf.Variable(tf.random_normal([n_hidden]), name='b1')
b2 = tf.Variable(tf.random_normal([n_hidden]), name='b2')
b3 = tf.Variable(tf.random_normal([n_classes]), name='b3')

# Define the operations throughout the network.
net = tf.tanh(tf.add(tf.matmul(X, W1), b1))
net = tf.tanh(tf.add(tf.matmul(net, W2), b2))
net = tf.add(tf.matmul(net, W3), b3)

# Define the optimization problem.
loss      = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
accuracy  = tf.reduce_mean(tf.cast(
        tf.equal(tf.argmax(net, 1), tf.argmax(Y, 1) ), tf.float32), name='acc')

Early Stopping

Training Setup

In tflearn, we can train our model with a tflearn.Trainer object: “Generic class to handle any TensorFlow graph training. It requires the use of TrainOp to specify all optimization parameters.”

  • TrainOp represents a set of operation used for optimizing a network.

  • Example: Time to initialize our trainer to work with our MNIST network. Below we create a TrainOp object that is then used for the purpose of telling our trainer

    1. Our loss function. (softmax cross entropy with logits)
    2. Our optimizer. (GradientDescentOptimizer)
    3. Our evaluation [tensor] metric. (classification accuracy)
trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer, metric=accuracy, batch_size=128)
trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=1)


The Callbacks interface describes a set of methods that we can implement ourselves that will be called during runtime. Below are our options, where here we will be primarily concerned with the on_epoch_end() method. * __ Methods __ :

    def on_train_begin(self, training_state):
    def on_epoch_begin(self, training_state):
    def on_batch_begin(self, training_state):
    def on_sub_batch_begin(self, training_state):
    def on_sub_batch_end(self, training_state, train_index=0):
    def on_batch_end(self, training_state, snapshot=False):
    def on_epoch_end(self, training_state):
    def on_train_end(self, training_state):
  • TrainingState: Notice that each method requires us to pass a training_state object as an argument. These useful helpers will be able to provide us with the information we need to determine when to stop training. Below is a list of the instance variables we can access with a training_state object:
    • self.epoch
    • self.step
    • self.current_iter
    • self.acc_value
    • self.loss_value
    • self.val_acc
    • self.val_loss
    • self.best_accuracy
    • self.global_acc
    • self.global_loss
  • Implementing our Callback: Let’s say we want to stop training when the validation accuracy reaches a certain threshold. Below, we implement the code required to define such a callback and fit the MNIST data.
class EarlyStoppingCallback(tflearn.callbacks.Callback):
    def __init__(self, val_acc_thresh):
        """ Note: We are free to define our init function however we please. """
        self.val_acc_thresh = val_acc_thresh
    def on_epoch_end(self, training_state):
        """ """
        # Apparently this can happen.
        if training_state.val_acc is None: return
        if training_state.val_acc > self.val_acc_thresh:
            raise StopIteration
# Initializae our callback.
early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.5)
# Give it to our trainer and let it fit the data. 
trainer.fit(feed_dicts={X: trainX, Y: trainY}, 
            val_feed_dicts={X: testX, Y: testY}, 
            show_metric=True, # Calculate accuracy and display at every step.
Training Step: 1720  | total loss: 0.81290
| Optimizer | epoch: 004 | loss: 0.81290 - acc_2: 0.8854 -- iter: 55000/55000

Using tf.contrib.learn instead

Iris data loading/tutorial prep

Note: can also load via: ```python import csv import random import numpy as np from sklearn import datasets from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42) iris = datasets.load_iris() print(iris.data.shape) print(“Xt”, X_train.shape, “Yt”, y_train.shape) ```

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Suppress the massive amount of warnings.

# Data sets
IRIS_TRAINING = "iris_training.csv"
IRIS_TEST = "iris_test.csv"

# Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TRAINING,
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TEST,

# Specify that all features have real-value data
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]

# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
                                            hidden_units=[10, 20, 10],

# Fit model.

# Evaluate accuracy.
accuracy_score = classifier.evaluate(x=X_test, y=y_test)["accuracy"]
print('Accuracy: {0:f}'.format(accuracy_score))

# Classify two new flower samples.
new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)

y = classifier.predict(new_samples)
print('Predictions: {}'.format(str(y)))
Accuracy: 0.980000
Predictions: [1 1]

Validation Monitors

# Vanilla version
validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(test_set.data,

classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
                                            hidden_units=[10, 20, 10],

Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': <function relu at 0x7f8568caa598>, 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})

Customizing the Evaluation Metrics and Stopping Early

If we run the code below, it stops early! Warning: You’re going to see a lot of WARNING print outputs from tf. I guess this tutorial is a bit out of date. But that’s not what we care abot here, we just want that early stopping! The important output to notice is

INFO:tensorflow:Validation (step 22556): accuracy = 0.966667, global_step = 22535, loss = 0.2767
INFO:tensorflow:Stopping. Best step: 22356 with loss = 0.2758353650569916.
validation_metrics = {"accuracy": tf.contrib.metrics.streaming_accuracy,
                      "precision": tf.contrib.metrics.streaming_precision,
                      "recall": tf.contrib.metrics.streaming_recall}

validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(

Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': <function relu at 0x7f8568caa598>, 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})