TCP recv error! Connection reset by peer?

1.4k views Asked by At

Why? I didn't do anything on the other peer!

I was using massive threads to get data from the server. When the thread count is small, it's ok. But when the thread count is very large, recv() return -1 and errno indicates "Connection reset by peer".

Here is an example to reproduce the issue:

server.cc

#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <unistd.h>

char buffer[4096];

inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{
    int result;
    const char* pos = data;

    while (size > 0)
    {
        result = send(socket_fd, pos, size, flags);
        assert(result > 0);
        pos += result;
        size -= result;
    }

    return 0;
}

inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{
    int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
    assert(((size_t) result) == size);
    return 0;
}

void* server_thread(void* arg)
{
    int socket_fd = (int) ((long) arg);
    // recv some info first
    recv_all(socket_fd, buffer, 1, 0);

    // simulate some computation
    for (int i = 0; i < 0xffff; i ++)
        for (int j = 0; j < 0xffff; j ++);

    // send data
    for (int i = 0; i < 4096; i ++)
        send_all(socket_fd, buffer, sizeof(buffer), 0);

    // the peer is closed, return 0
    recv(socket_fd, buffer, 1, MSG_WAITALL);
    close(socket_fd);

    pthread_detach(pthread_self());
    return NULL;
}

int main(void)
{
    int listen_socket = socket(AF_INET, SOCK_STREAM, 0);
    assert(listen_socket != -1);

    struct sockaddr_in listen_address;
    listen_address.sin_family = AF_INET;
    listen_address.sin_port = htons(11282);
    listen_address.sin_addr.s_addr = INADDR_ANY;

    int result = bind(listen_socket,
            (struct sockaddr*) &listen_address,
            sizeof(listen_address));
    assert(result != -1);

    result = listen(listen_socket, 5);
    assert(result != -1);

    while (true)
    {
        int server_socket = accept(listen_socket, NULL, NULL);
        assert(server_socket != -1);

        pthread_t tid;
        result = pthread_create(&tid,
                NULL,
                server_thread,
                (void *) ((long) server_socket));
        assert(result != -1);
    }
    return 0;
}

client.cc

#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>

pthread_t threads[4096];
char buffer[4096];

inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{   
    int result;
    const char* pos = data;

    while (size > 0)
    {   
        result = send(socket_fd, pos, size, flags);
        assert(result > 0); 
        pos += result;
        size -= result;
    }   
    return 0;
}   

inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{   
    int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
    assert(((size_t) result) == size);

    return 0;
}   

void* client_thread(void* arg)
{   
    int socket_fd = socket(AF_INET, SOCK_STREAM, 0); 
    assert(socket_fd != -1);

    struct sockaddr_in server_address;
    server_address.sin_family = AF_INET;
    server_address.sin_port = htons(11282);
    server_address.sin_addr.s_addr = inet_addr("127.0.0.1");

    int result = connect(socket_fd,
            (struct sockaddr *) &server_address,
            sizeof(server_address));
    assert(result != -1);

    // send some info first
    send_all(socket_fd, buffer, 1, 0);

    // recv the reply data
    for (int i = 0; i < 4096; i ++)
        recv_all(socket_fd, buffer, sizeof(buffer), 0);

    close(socket_fd);

    return NULL;
}

int main(int argc, char* argv[])
{
    assert(argc == 2);

    // get client thread count
    int thread_count = atoi(argv[1]);
    assert(thread_count <= 4096);

    for (int i = 0; i < thread_count; i ++)
    {
        int result = pthread_create(&threads[i], NULL, client_thread, NULL);
        assert(result != -1);
    }

    for (int i = 0; i < thread_count; i ++)
        pthread_join(threads[i], NULL);

    return 0;
}

Usage:

./server
./client [thread_count]

I was using 480 as the thread_count, sometime I could reproduce the issue.

The server

The client

The gdb info

0

There are 0 answers