I want to wrap the tf.metrics around a Sonnet module for measuring performance of each batch, and the following is the work I have done:
import tensorflow as tf
import sonnet as snt
class Metrics(snt.AbstractModule):
def __init__(self, indicator, summaries = None, name = "metrics"):
super(Metrics, self).__init__(name = name)
self._indicator = indicator
self._summaries = summaries
def _build(self, labels, logits):
if self._indicator == "accuracy":
metric, metric_update = tf.metrics.accuracy(labels, logits)
with tf.control_dependencies([metric_update]):
outputs = tf.identity(metric)
elif self._indicator == "precision":
metric, metric_update = tf.metrics.precision(labels, logits)
with tf.control_dependencies([metric_update]):
outputs = tf.identity(metric)
elif self._indicator == "recall":
metric, metric_update = tf.metrics.recall(labels, logits)
with tf.control_dependencies([metric_update]):
outputs = tf.identity(metric)
elif self._indicator == "f1_score":
metric_recall, metric_update_recall = tf.metrics.recall(labels, logits)
metric_precision, metric_update_precision = tf.metrics.precision(labels, logits)
with tf.control_dependencies([metric_update_recall, metric_update_precision]):
outputs = 2.0 / (1.0 / metric_recall + 1.0 / metric_precision)
raise ValueError("unsupported metrics")
if type(self._summaries) == list:
self._summaries.append(tf.summary.scalar(self._indicator, outputs))
return outputs
However, when I want to test the module, the following code works:
def test3():
import numpy as np
labels = tf.constant([1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], tf.int32)
logits = tf.constant([1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], tf.int32)
metrics = Metrics("accuracy")
accuracy = metrics(labels, logits)
metrics2 = Metrics("f1_score")
f1_score = metrics2(labels, logits)
writer = tf.summary.FileWriter("utils-const", tf.get_default_graph())
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
accu, f1 = sess.run([accuracy, f1_score])
However the following code does NOT work:
def test4():
from tensorflow.python import debug as tf_debug
import numpy as np
tf_labels = tf.placeholder(dtype=tf.int32, shape=[None])
tf_logits = tf.placeholder(dtype=tf.int32, shape=[None])
labels = np.array([1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], np.int32)
logits = np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], np.int32)
metrics = Metrics("accuracy")
accuracy = metrics(tf_labels, tf_logits)
metrics2 = Metrics("f1_score")
f1_score = metrics2(tf_labels, tf_logits)
writer = tf.summary.FileWriter("utils-feed", tf.get_default_graph())
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
sess = tf_debug.LocalCLIDebugWrapperSession(sess)
accu, f1 = sess.run([accuracy, f1_score], feed_dict = {tf_labels: labels, tf_logits: logits})
The output of test3() is correct, 0.88. The output of test4() is wrong, 0.0. However, they should be equivalent.
Anyone has any idea?
Are you sure it is not the
version that fails? I findtf.metrics
having a weird behavior in combination withtf.constant
:returns, when run on the GPU,
instead of
s. It looks as if the count is lagging by one. (I am assuming the first value would beinf
but is zero due to some conditions oncount
). A placeholder version of this code is running as expected on the other hand.On the CPU, the behavior is even weirder, as the output is non-deterministic. Example of output:
Looks like a bug you could log on tensorflow's github repo. (Note that using running metrics on constants is less than useful -- but it is still a bug).
EDIT Now I also stumbled on weird examples with a
, it seems thattf.metrics
has a bug that is unfortunately not limited to its use withtf.constant