I'm investigating an issue where my worker thread deadlocks when I try to stop it.
Here's the minimal version that has the problem:
#include <atomic>
#include <condition_variable>
#include <functional>
#include <iostream>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
class Worker
{
private:
std::atomic<bool> m_running;
std::condition_variable m_cond;
std::mutex m_mutex;
std::function<void()> m_workItem;
std::thread m_thread;
public:
Worker() :
m_running(true),
m_thread(std::bind(&Worker::DoWork, this))
{
}
~Worker()
{
Stop();
m_thread.join();
}
bool QueueWork(std::function<void()> item)
{
std::unique_lock<std::mutex> padlock(m_mutex);
if (m_workItem)
{
return false;
}
m_workItem = item;
padlock.unlock();
m_cond.notify_all();
return true;
}
void Stop()
{
bool expected = true;
if (m_running.compare_exchange_strong(expected, false))
{
m_cond.notify_all();
}
}
private:
void DoWork() noexcept
{
while (m_running)
{
std::unique_lock<std::mutex> padlock(m_mutex);
m_cond.wait(padlock,
[this]
()
{
return !m_running || m_workItem;
});
if (m_workItem)
{
decltype(m_workItem) workItem;
std::swap(m_workItem, workItem);
padlock.unlock();
workItem();
}
}
}
};
int main()
{
std::cout << "Start." << std::endl;
{
std::vector<std::unique_ptr<Worker>> workers;
for (int i = 0; i < 10; ++i)
{
workers.push_back(std::unique_ptr<Worker>(new Worker()));
}
workers[0]->QueueWork(
[]()
{
std::cout << "Work item" << std::endl;
});
std::this_thread::sleep_for(std::chrono::milliseconds(10));
for (auto & worker : workers)
{
worker->Stop();
}
for (auto & worker : workers)
{
worker.reset();
}
}
std::cout << "Stop." << std::endl;
return 0;
}
The idea is that when calling Worker::Stop()
from the host thread, Worker::m_running
is set to false
, and notify_all()
is called on Worker::m_cond
. The worker thread wakes up from its m_cond.wait()
, checks m_running
and breaks out, exiting the thread function.
Sometimes, however, this deadlocks. The worker thread wakes up, sees that m_running
is true
(how is that possible?) and goes back to wait()
. There's no additional call to m_cond.notify_all()
so the thread ends up in a deadlocked state.
I spawn 10 Worker
objects in this code. I don't think it has anything to do with the number of threads, but to be able to trigger the race condition (if that's what it is), I needed more threads.
What is wrong with the code?
Running gcc:
g++ --version
g++ (Ubuntu 4.9.2-0ubuntu1~14.04) 4.9.2
Copyright (C) 2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Compiling using:
g++ -std=c++11 -pedantic test.cpp -lpthread
Edit:
Changed the order of Worker
's private members. m_thread
is now last. Still the same problem.