Using mbind
, one can set the memory policy for a given mapped memory segment.
Q: How can I tell mbind
to interleave a segment on all nodes?
If done after allocation but before usage, MPOL_INTERLEAVE
on all nodes will do what we expect -- memory will be allocated uniformly on all nodes.
However, if the segment has already been written to and is allocated in e.g. node zero, there is no way to tell the kernel to uniformly interleave it on all NUMA nodes.
The operation simply becomes a no-op, as the kernel interprets it as "please place this segment on this set of nodes". Since we're passing the set of all NUMA nodes, there is no memory allocated outside that requires being moved.
Minimal, Complete, and Verifiable example
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/syscall.h>
#include <numaif.h>
#include <numa.h>
#define N ((1<<29) / sizeof(int))
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define PAGE_MASK (~(PAGE_SIZE - 1))
void print_command(char *cmd) {
FILE *fp;
char buf[1024];
if ((fp = popen(cmd, "r")) == NULL) {
perror("popen");
exit(-1);
}
while(fgets(buf, sizeof(buf), fp) != NULL) {
printf("%s", buf);
}
if(pclose(fp)) {
perror("pclose");
exit(-1);
}
}
void print_node_allocations() {
char buf[1024];
snprintf(buf, sizeof(buf), "numastat -c %d", getpid());
printf("\x1B[32m");
print_command(buf);
printf("\x1B[0m");
}
int main(int argc, char **argv) {
int *a = numa_alloc_local(N * sizeof(int));
size_t len = (N * sizeof(int)) & PAGE_MASK;
unsigned long mymask = *numa_get_mems_allowed()->maskp;
unsigned long maxnode = numa_get_mems_allowed()->size;
// pin thread to core zero
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
if (sched_setaffinity(syscall(SYS_gettid), sizeof(mask), &mask) < 0) {
perror("sched_setaffinity");
exit(-1);
}
// initialize array
printf("\n\n(1) array allocated on local node\n");
a[0] = 997;
for(size_t i=1; i < N; i++) {
a[i] = a[i-1] * a[i-1] % 1000000000;
}
print_node_allocations();
// attempt to get it to be uniformly interleaved on all nodes
printf("\n\n(2) array interleaved on all nodes\n");
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
// what if we interleave on all but the local node?
printf("\n\n(3) array interleaved on all nodes (except local node)\n");
mymask -= 0x01;
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
return 0;
}
Compiling and running with gcc -o interleave_all interleave_all.c -lnuma && sudo ./interleave_all
yields:
(1) array allocated on local node
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 514 0 0 0 514
------- ------ ------ ------ ------ -----
Total 514 0 0 0 514
(2) array interleaved on all nodes
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 514 0 0 0 514
------- ------ ------ ------ ------ -----
Total 514 0 0 0 514
(3) array interleaved on all nodes (except local node)
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 2 171 171 171 514
------- ------ ------ ------ ------ -----
Total 2 171 171 171 514