I want to wrap the tf.metrics around a Sonnet module for measuring performance of each batch, and the following is the work I have done:
import tensorflow as tf
import sonnet as snt
class Metrics(snt.AbstractModule):
    def __init__(self, indicator, summaries = None, name = "metrics"):
        super(Metrics, self).__init__(name = name)
        self._indicator = indicator
        self._summaries = summaries
    def _build(self, labels, logits):
        if self._indicator == "accuracy":
            metric, metric_update = tf.metrics.accuracy(labels, logits)
            with tf.control_dependencies([metric_update]):
                outputs = tf.identity(metric)
        elif self._indicator == "precision":
            metric, metric_update = tf.metrics.precision(labels, logits)
            with tf.control_dependencies([metric_update]):
                outputs = tf.identity(metric)
        elif self._indicator == "recall":
            metric, metric_update = tf.metrics.recall(labels, logits)
            with tf.control_dependencies([metric_update]):
                outputs = tf.identity(metric)
        elif self._indicator == "f1_score":
            metric_recall, metric_update_recall = tf.metrics.recall(labels, logits)
            metric_precision, metric_update_precision = tf.metrics.precision(labels, logits)
            with tf.control_dependencies([metric_update_recall, metric_update_precision]):
                outputs = 2.0 / (1.0 / metric_recall + 1.0 / metric_precision)
        else:
            raise ValueError("unsupported metrics")
        if type(self._summaries) == list:
            self._summaries.append(tf.summary.scalar(self._indicator, outputs))
        return outputs
However, when I want to test the module, the following code works:
def test3():
    import numpy as np
    labels = tf.constant([1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], tf.int32)
    logits = tf.constant([1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], tf.int32)
    metrics = Metrics("accuracy")
    accuracy = metrics(labels, logits)
    metrics2 = Metrics("f1_score")
    f1_score = metrics2(labels, logits)
    writer = tf.summary.FileWriter("utils-const", tf.get_default_graph())
    with tf.Session() as sess:
        sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
        accu, f1 = sess.run([accuracy, f1_score])
        print(accu)
        print(f1)
    writer.close()
However the following code does NOT work:
def test4():
    from tensorflow.python import debug as tf_debug
    import numpy as np
    tf_labels = tf.placeholder(dtype=tf.int32, shape=[None])
    tf_logits = tf.placeholder(dtype=tf.int32, shape=[None])
    labels = np.array([1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], np.int32)
    logits = np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], np.int32)
    metrics = Metrics("accuracy")
    accuracy = metrics(tf_labels, tf_logits)
    metrics2 = Metrics("f1_score")
    f1_score = metrics2(tf_labels, tf_logits)
    writer = tf.summary.FileWriter("utils-feed", tf.get_default_graph())
    with tf.Session() as sess:
        sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        accu, f1 = sess.run([accuracy, f1_score], feed_dict = {tf_labels: labels, tf_logits: logits})
        print(accu)
        print(f1)
    writer.close()
The output of test3() is correct, 0.88. The output of test4() is wrong, 0.0. However, they should be equivalent.
Anyone has any idea?
 
                        
Are you sure it is not the
tf.constantversion that fails? I findtf.metricshaving a weird behavior in combination withtf.constant:returns, when run on the GPU,
instead of
1s. It looks as if the count is lagging by one. (I am assuming the first value would beinfbut is zero due to some conditions oncount). A placeholder version of this code is running as expected on the other hand.On the CPU, the behavior is even weirder, as the output is non-deterministic. Example of output:
Looks like a bug you could log on tensorflow's github repo. (Note that using running metrics on constants is less than useful -- but it is still a bug).
EDIT Now I also stumbled on weird examples with a
tf.placeholder, it seems thattf.metricshas a bug that is unfortunately not limited to its use withtf.constants.