Why? I didn't do anything on the other peer!
I was using massive threads to get data from the server. When the thread count is small, it's ok. But when the thread count is very large, recv()
return -1 and errno
indicates "Connection reset by peer".
Here is an example to reproduce the issue:
server.cc
#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <unistd.h>
char buffer[4096];
inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{
int result;
const char* pos = data;
while (size > 0)
{
result = send(socket_fd, pos, size, flags);
assert(result > 0);
pos += result;
size -= result;
}
return 0;
}
inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{
int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
assert(((size_t) result) == size);
return 0;
}
void* server_thread(void* arg)
{
int socket_fd = (int) ((long) arg);
// recv some info first
recv_all(socket_fd, buffer, 1, 0);
// simulate some computation
for (int i = 0; i < 0xffff; i ++)
for (int j = 0; j < 0xffff; j ++);
// send data
for (int i = 0; i < 4096; i ++)
send_all(socket_fd, buffer, sizeof(buffer), 0);
// the peer is closed, return 0
recv(socket_fd, buffer, 1, MSG_WAITALL);
close(socket_fd);
pthread_detach(pthread_self());
return NULL;
}
int main(void)
{
int listen_socket = socket(AF_INET, SOCK_STREAM, 0);
assert(listen_socket != -1);
struct sockaddr_in listen_address;
listen_address.sin_family = AF_INET;
listen_address.sin_port = htons(11282);
listen_address.sin_addr.s_addr = INADDR_ANY;
int result = bind(listen_socket,
(struct sockaddr*) &listen_address,
sizeof(listen_address));
assert(result != -1);
result = listen(listen_socket, 5);
assert(result != -1);
while (true)
{
int server_socket = accept(listen_socket, NULL, NULL);
assert(server_socket != -1);
pthread_t tid;
result = pthread_create(&tid,
NULL,
server_thread,
(void *) ((long) server_socket));
assert(result != -1);
}
return 0;
}
client.cc
#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
pthread_t threads[4096];
char buffer[4096];
inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{
int result;
const char* pos = data;
while (size > 0)
{
result = send(socket_fd, pos, size, flags);
assert(result > 0);
pos += result;
size -= result;
}
return 0;
}
inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{
int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
assert(((size_t) result) == size);
return 0;
}
void* client_thread(void* arg)
{
int socket_fd = socket(AF_INET, SOCK_STREAM, 0);
assert(socket_fd != -1);
struct sockaddr_in server_address;
server_address.sin_family = AF_INET;
server_address.sin_port = htons(11282);
server_address.sin_addr.s_addr = inet_addr("127.0.0.1");
int result = connect(socket_fd,
(struct sockaddr *) &server_address,
sizeof(server_address));
assert(result != -1);
// send some info first
send_all(socket_fd, buffer, 1, 0);
// recv the reply data
for (int i = 0; i < 4096; i ++)
recv_all(socket_fd, buffer, sizeof(buffer), 0);
close(socket_fd);
return NULL;
}
int main(int argc, char* argv[])
{
assert(argc == 2);
// get client thread count
int thread_count = atoi(argv[1]);
assert(thread_count <= 4096);
for (int i = 0; i < thread_count; i ++)
{
int result = pthread_create(&threads[i], NULL, client_thread, NULL);
assert(result != -1);
}
for (int i = 0; i < thread_count; i ++)
pthread_join(threads[i], NULL);
return 0;
}
Usage:
./server
./client [thread_count]
I was using 480 as the thread_count, sometime I could reproduce the issue.