I'm investigating an issue where my worker thread deadlocks when I try to stop it.
Here's the minimal version that has the problem:
#include <atomic>
#include <condition_variable>
#include <functional>
#include <iostream>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
class Worker
{
private:
    std::atomic<bool>       m_running;
    std::condition_variable m_cond;
    std::mutex              m_mutex;
    std::function<void()>   m_workItem;
    std::thread             m_thread;
public:
    Worker() :
        m_running(true),
        m_thread(std::bind(&Worker::DoWork, this))
    {
    }
    ~Worker()
    {
        Stop();
        m_thread.join();
    }
    bool QueueWork(std::function<void()> item)
    {
        std::unique_lock<std::mutex> padlock(m_mutex);
        if (m_workItem)
        {
            return false;
        }
        m_workItem = item;
        padlock.unlock();
        m_cond.notify_all();
        return true;
    }
    void Stop()
    {
        bool expected = true;
        if (m_running.compare_exchange_strong(expected, false))
        {
            m_cond.notify_all();
        }
    }
private:
    void DoWork() noexcept
    {
        while (m_running)
        {
            std::unique_lock<std::mutex> padlock(m_mutex);
            m_cond.wait(padlock,
                [this]
                ()
                {
                    return !m_running || m_workItem;
                });
            if (m_workItem)
            {
                decltype(m_workItem) workItem;
                std::swap(m_workItem, workItem);
                padlock.unlock();
                workItem();
            }
        }
    }
};
int main()
{
    std::cout << "Start." << std::endl;
    {
        std::vector<std::unique_ptr<Worker>> workers;
        for (int i = 0; i < 10; ++i)
        {
            workers.push_back(std::unique_ptr<Worker>(new Worker()));
        }
        workers[0]->QueueWork(
            []()
            {
                std::cout << "Work item" << std::endl;
            });
        std::this_thread::sleep_for(std::chrono::milliseconds(10));
        for (auto & worker : workers)
        {
            worker->Stop();
        }
        for (auto & worker : workers)
        {
            worker.reset();
        }
    }
    std::cout << "Stop." << std::endl;
    return 0;
}
The idea is that when calling Worker::Stop() from the host thread, Worker::m_running is set to false, and notify_all() is called on Worker::m_cond. The worker thread wakes up from its m_cond.wait(), checks m_running and breaks out, exiting the thread function.
Sometimes, however, this deadlocks. The worker thread wakes up, sees that m_running is true (how is that possible?) and goes back to wait(). There's no additional call to m_cond.notify_all() so the thread ends up in a deadlocked state.
I spawn 10 Worker objects in this code. I don't think it has anything to do with the number of threads, but to be able to trigger the race condition (if that's what it is), I needed more threads.
What is wrong with the code?
Running gcc:
g++ --version
g++ (Ubuntu 4.9.2-0ubuntu1~14.04) 4.9.2
Copyright (C) 2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Compiling using:
g++ -std=c++11 -pedantic test.cpp -lpthread
Edit:
Changed the order of Worker's private members. m_thread is now last. Still the same problem.