I'm trying to modify tensorflow slim overfeat network to classify small image classes, image size is 60*60 and 3 classes. I'm use tensorflow v0.12 on Ubuntu 14.04 with TITAN X GPU.
My first network is
import tensorflow as tf
slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
def overfeat_arg_scope(weight_decay=0.0005):
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(weight_decay),
biases_initializer=tf.constant_initializer()):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def overfeat(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=False,
reuse=None,
scope='overfeat'):
with tf.variable_scope(scope, 'overfeat', [inputs], reuse=reuse) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection):
net = slim.conv2d(inputs, 64, 3, padding='VALID',
scope='conv11')
net = slim.conv2d(inputs, 128, 3, padding='VALID',
scope='conv12')
net = slim.max_pool2d(net, 2, scope='pool1')
net = slim.conv2d(net, 128, 3, padding='VALID', scope='conv2')
net = slim.max_pool2d(net, 2, scope='pool2')
net = slim.conv2d(net, 256, 3, scope='conv3')
net = slim.conv2d(net, 256, 3, scope='conv4')
net = slim.conv2d(net, 256, 3, scope='conv5')
net = slim.max_pool2d(net, 2, scope='pool5')
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)):
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 512, 3, padding='VALID', scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 1024, 1, scope='fc7')
with tf.variable_scope('Logits'):
#pylint: disable=no-member
if is_training:
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
scope='AvgPool_1a_8x8')
net = slim.conv2d(
net,
num_classes, 1,
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.constant_initializer(),
scope='fc9')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='Dropout')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
def inference(images, num_classes, keep_probability, phase_train=True, weight_decay=0.0, reuse=None):
batch_norm_params = {
# Decay for the moving averages.
'decay': 0.995,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
# force in-place updates of mean and variance estimates
'updates_collections': None,
}
with slim.arg_scope(overfeat_arg_scope()):
return overfeat(images, num_classes, is_training=phase_train,
dropout_keep_prob=keep_probability, reuse=reuse)
I'm use cross entroy loss with tf.nn.sparse_softmax_cross_entropy_with_logits function.
And train result is Loss And Accuracy with one 1x Conv
This result is passable. I'm trying to add one 1x1 conv after fc7 because I think 1x1 conv is same full connected layer and may be improve accuracy.
...
net = slim.conv2d(net, 1024, 1, scope='fc7')
net = slim.conv2d(net, 1024, 1, scope='fc7_1')
...
But I got unreliable result : Loss And Accuracy with two 1x1 Conv
This network don't be optimized with loss 1.
Why I can't add more 1x1 conv or fc layers?
And how can I improve this network?
(1,1) convolution layer is not a fully connected layer. if you want to implement fully connected layer as convolution layer you should add last layer kernel size of the layer before.
(if the feature map of the layer before is 50x50 your last layer should have a kernel of 50 x 50). convolution layer with (1,1) kernel size is similar ro mlp layer. if tyou want to undertsand more how it works read this paper Network in Network
If I understood well, you want to get ride of the fully connected layer. so you have to do two things: