How to (properly) use robust pthreads for process synchronization?

949 views Asked by At

We have a bug in a production system, where a process segfaults while holding a shared memory mutex. We'd like it to release the lock when dying. We use sem_wait()/sem_post(), but doing my homework, I've found that this API does not allow for such a behavior:

http://www.usenetmessages.com/view.php?c=computer&g=1074&id=78029&p=0

The answer, the article says, is using the robust pthreads API. I've found the following article about this topic:

http://www.embedded-linux.co.uk/tutorial/mutex_mutandis

But, having implemented the following code, I'm having an unreliable behavior, that is, should I tell process 3, for instance, to segfault, the code works just fine. The other processes wake up, recognize that a process died while holding the mutex, and recover. However, should I tell process 0 to die, or should I remove the sleep call on line 63, the other processes do not wake up once the failling process kills itself. Am I doing something wrong?

#include <stdio.h>
#include <stdlib.h>
#include <features.h>
#define __USE_POSIX
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#define __USE_MISC
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#define __USE_GNU   /* Necessario para usar a API PTHREAD_MUTEX_ROBUST_NP */
#define __USE_UNIX98 /* Necessario para usar a funcao pthread_mutexattr_settype */
#include <pthread.h>
#include <sys/wait.h>
static void *shrd;

static int child_main(int slot, int segfault) {
    pthread_mutex_t   *lock = (pthread_mutex_t *) shrd;
    int                err;

    if ( 0 != (err=pthread_mutex_lock(lock)) ) {
        switch(err) {
        case EINVAL:
            printf("Lock invalido no filho [%d]\n", slot);
            goto excecao; 

        case EDEADLK:
            printf("O filho [%d] tentou travar um lock que jah possui.\n", slot);
            break;

        case EOWNERDEAD:
            printf("Filho [%d] foi informado que o processo que estava com o lock morreu.\n", slot);
            if ( 0 == pthread_mutex_consistent_np(lock) ) {
                printf("Filho [%d] retornou o lock para um estado consistente.\n", slot);
            } else {
                fprintf(stderr, "Nao foi possivel retornar o lock a um estado consistente.\n");
                goto desistir;
            }

            if ( 0 != (err=pthread_mutex_lock(lock)) ) {
                fprintf(stderr, "Apos recuperar o estado do lock, nao foi possivel trava-lo: %d\n", err);
                goto desistir;
            }


        case ENOTRECOVERABLE:
            printf("O filho [%d] foi informado de que o lock estah permanentemente em estado inconsistente.\n", slot);
            goto desistir;

        default:
            printf("Erro desconhecido ao tentar travar o lock no filho [%d]: [%d]\n", slot, err);
            goto excecao; 
        }
    }

    printf("Filho [%d] adquiriu o lock.\n", slot);

    if ( segfault == slot ) {
        printf("Matando o PID [%d] com SIGSEGV.\n", getpid());
        kill(getpid(), SIGSEGV); 
    } else {
        sleep(1);
    }

    if ( 0 != (err = pthread_mutex_unlock(lock)) ) {
        switch (err) {
        case EPERM:
            printf("O filho [%d] tentou liberar o lock, mas nao o possui.\n", slot);
            break;

        default:
            fprintf(stderr, "Erro inesperado ao liberar o lock do filho [%d]: [%d]\n", slot, err);
        }
    } else {
        printf("Filho [%d] retornou o lock.\n", slot);
    }

    return 0;

excecao:
    fprintf(stderr, "Programa terminado devido excecao.\n");
    return 1;

desistir:
    fprintf(stderr, "A execucao do sistema nao deve prosseguir. Abortando todos os processos.\n");
    kill(0, SIGTERM);

    /* unreachable */
    return 1;
}

int main(int argc, const char * const argv[]) {
    pid_t               filhos[10];
    int                 status;
    pid_t               p;
    int                 segfault = -1;
    pthread_mutexattr_t attrs;

    if ( argc > 1 ) {
        segfault = atoi(argv[1]);
        if ( segfault < 0 || segfault > 9 )
            segfault = -1;
    }

    if ( (shrd = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED ) {
        perror("Erro ao criar shrd mem:\n");
        exit(1);
    }

    pthread_mutexattr_init         (&attrs);
    pthread_mutexattr_settype      (&attrs, PTHREAD_MUTEX_RECURSIVE_NP);
    pthread_mutexattr_setrobust_np (&attrs, PTHREAD_MUTEX_ROBUST_NP);
    pthread_mutexattr_setpshared   (&attrs, PTHREAD_PROCESS_SHARED);
    /* 
        Devido a um BUG na glibc 2.5 (que eh a usada pelo CentOS 5,
        a unica forma de fazer os mutexes robustos funcionarem eh
        setando o protocolo para PTHREAD_PRIO_INHERIT:
        http://sourceware.org/ml/libc-help/2010-04/msg00028.html
    */
    pthread_mutexattr_setprotocol  (&attrs, PTHREAD_PRIO_INHERIT);
    pthread_mutex_init             ((pthread_mutex_t*) shrd, &attrs);
    pthread_mutexattr_destroy      (&attrs);

    for (size_t i=0; i<sizeof(filhos)/sizeof(pid_t); ++i) {
        if ( (filhos[i]=fork()) == 0 ) {
            return child_main((int) i, segfault);
        } else {
            if ( filhos[i] < 0 ) {
                fprintf(stderr, "Erro ao criar o filho [%zu]. Abortando.\n", i);
                exit(1);
            }
        }
    }

    for (size_t i=0; i<sizeof(filhos)/sizeof(pid_t); ++i) {
        do {
            p = waitpid(filhos[i], &status, 0);
        } while (p != -1);
    }

    printf("Pai encerrou a sua execucao.\n");

    return 0;
}

BTW: I'm compiling on CentOS 5, 64 bits:

$ uname -rm
2.6.18-194.el5 x86_64
glibc-2.5-49
gcc-4.1.2-48.el5

(Sorry, the sentences and comments on the code are in portuguese, my native language.)

2

There are 2 answers

1
caruccio On

Your EOWNERDEAD block misses a break before ENOTRECOVERABLE block. Also, according to pthread_mutex_lock manpage, after the first call to pthread_mutex_lock(), the lock is held by caller even when EOWNERDEAD is returned. Thus, you should not call it again inside EOWNERDEAD's block.

1
Acacio Centeno On

I've tried some other approaches, namely: 1. Using POSIX barriers 2. Having the parent holding the lock while forking() and releasing it after each child incremented a counter.

The first approach did not work at all, but I’m publishing the source code I used, as I might have made some mistake using the API:

On child_main:

pthread_barrier_t *barr = (pthread_barrier_t *) ((char *) shrd + sizeof(pthread_mutex_t));
...
int rc = pthread_barrier_wait(barr);
if(rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD)
{
   printf("Nao foi possivel esperar na barreira.\n");
   exit(-1);
}

On main:

pthread_barrierattr_t   barr_attrs;
pthread_barrier_t      *barr;
...
initialize(pthread_barrierattr_init,       &barr_attrs);
initialize(pthread_barrierattr_setpshared, &barr_attrs, PTHREAD_PROCESS_SHARED);
barr = (pthread_barrier_t *) ((char *) shrd + sizeof(pthread_mutex_t));

if ( (init_result = pthread_barrier_init(barr, &barr_attrs, 10)) != 0 ) {
   printf("Nao foi possivel iniciar a barreira.\n");
   exit(EXIT_FAILURE);
}

Initialize is a macro, defined as:

 #define initialize(func, ...) \
 do { \
    init_result = func(__VA_ARGS__); \
    if ( 0 != init_result ) { \
      stored_errno = errno; \
      func_name = #func; \
      goto erro_criacao_semaforo; \
    } \
 } while(0);

The second approach seems to work:

On child_main:

int               *contador = (int *) ((char *) shrd + sizeof(pthread_mutex_t) + sizeof(int));
...
int *n = (int *)(lock+1);
...
if ( 0 != (err=pthread_mutex_lock(lock)) ) {
...

On main:

volatile int           *n; // Cada filho iniciado incrementa esta variavel. 
                           // Qdo ela chega em 10, liberamos o lock.
...
n          = (int *) ((char *) shrd + sizeof(pthread_mutex_t));
...
pthread_mutex_lock(mutex);
for (i=0; i<sizeof(filhos)/sizeof(pid_t); ++i) {
... // the fork goes here.
}

while (*n != 10); // Isto garante que todos os filhos cheguem ao lock.
pthread_mutex_unlock(mutex);

But once I add a random sleep time, so they get unsynchronized, once again I’ve got a deadlock:

On child_main:

int                num_sorteado;
struct timespec    dessincronizador = { 1, 0 };

int *n = (int *)(lock+1);

num_sorteado = 1 + (int) (999999.0 * (rand() / (RAND_MAX + 1.0)));
dessincronizador.tv_nsec = num_sorteado;
nanosleep(&dessincronizador, NULL);

if ( 0 != (err=pthread_mutex_lock(lock)) ) {
...

Sadly, there seems to be no reliable way to learn that a process died while holding a lock, so the best way around to our issue is to capture the signal on the dying process and raise a kill(0, SIGTERM) to get the other processes to die too.