Linux memory mapped file consuming more disk than expected

339 views Asked by At

Context: I'm using memory mapped file in my code created using ACE_Mem_Map. It is observed that the memory mapped file is consuming more disk space than expected.

Scenario: I have a structure containing a char array of 15KB. I have created a memory map file for array of this struct with file size ~2GB.

  1. If I try to access few bytes of the char array(say 256), then, file size consumed is shown as 521 MB but actual disk usage shown by filesystem(using df -h) is more than 3GB.
  2. If I access all bytes of the memory, then both file size and disk usage is shown as 2 GB.

Environment: OS: Oracle Linux 7.3 Kernel version: 3.10.0/4.1.12

Code:

#include<ace/Mem_Map.h>
#include <stdio.h>

#define TEST_BUFF_SIZE 15*1024

typedef struct _test_struct_ {
    char test[TEST_BUFF_SIZE];

    _test_struct_() {
        reset();
    }

    void reset() {
        /* Issue replicating */
        memset(test, '\0', 256);

        /* Issue not replicating */
        memset(test, '\0', TEST_BUFF_SIZE);
    }
}TestStruct_t;

int main(int argc, char *argv[]) {

    if(3 != argc) {
        printf("Usage: %s <num of blocks> <filename>\n",
                argv[0]);
        return -1;
    }
    ACE_Mem_Map map_buf_;

    size_t num_of_blocks = strtoull(argv[1], NULL, 10);

    size_t MAX_SIZE = num_of_blocks*sizeof(TestStruct_t);

    char* mmap_file_name = argv[2];

    printf("num_of_blocks[%llu], sizeof(TestStruct_t)[%llu], MAX_SIZE[%llu], mmap_file_name[%s]\n",
            num_of_blocks,
            sizeof(TestStruct_t),
            MAX_SIZE,
            mmap_file_name);

    TestStruct_t *base_addr_;

    ACE_HANDLE fp_ = ACE_OS::open(mmap_file_name,O_RDWR|O_CREAT,
            ACE_DEFAULT_OPEN_PERMS,0);

    if (fp_ == ACE_INVALID_HANDLE)
    {
        printf("Error opening file\n");
        return -1;
    }
    map_buf_.map(fp_,MAX_SIZE,PROT_WRITE,MAP_SHARED);

    base_addr_ = (TestStruct_t*)map_buf_.addr();
    if (base_addr_ == MAP_FAILED)
    {
        printf("Map init failure\n");
        ACE_OS::close(fp_);
        return -1;
    }

    printf("map_buf_ size[%llu]\n",
            map_buf_.size());

    for(size_t i = 0; i < num_of_blocks; i++) {
        base_addr_[i].reset();
    }

    return 0;
}

Can anyone explain why is scenario 1 happening??

Note: In scenario 1, if I make a copy of generated mmap file and then delete that copy, then the additional 2.5GB disk space gets freed. Don't know the reason

1

There are 1 answers

4
mevets On

I 'upgraded' your program to nearly C and minus whatever ACE is and got this:

$ ./a.out 32 fred
num_of_blocks[32], sizeof(TestStruct_t)[15360], MAX_SIZE[491520], mmap_file_name[fred]
Bus error: 10

Which is pretty much expected. Mmap does not extend the size of the mapped file, so it generates an address error when you try to reference an unfilled part. So, the answer is that whatever ACE.map does, it likely invokes something like ftruncate(2) to extend the file to the size you give as a parameter. @John Bollinger hints at this by asking how are you measuring that: ls or du. You should use the latter. Anyway, almost C version:

#include <sys/mman.h>
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>

#define TEST_BUFF_SIZE 15*1024

typedef struct _test_struct_ {
    char test[TEST_BUFF_SIZE];

    _test_struct_() {
        reset();
    }

    void reset() {
        /* Issue replicating */
        memset(test, '\0', 256);

        /* Issue not replicating */
        memset(test, '\0', TEST_BUFF_SIZE);
    }
}TestStruct_t;

int main(int argc, char *argv[]) {

    if(argc < 3) {
        printf("Usage: %s <num of blocks> <filename>\n",
                argv[0]);
        return 1;
    }
    void *buf;

    size_t num_of_blocks = strtoull(argv[1], NULL, 10);

    size_t MAX_SIZE = num_of_blocks*sizeof(TestStruct_t);

    char* mmap_file_name = argv[2];

    printf("num_of_blocks[%zu], sizeof(TestStruct_t)[%zu], MAX_SIZE[%zu], mmap_file_name[%s]\n",
            num_of_blocks,
            sizeof(TestStruct_t),
            MAX_SIZE,
            mmap_file_name);


    int fp = open(mmap_file_name,O_RDWR|O_CREAT,0666);

    if (fp == -1)
    {
        perror("Error opening file");
        return 1;
    }
    /*SOMETHING CLEVER*/
    switch (argc) {
    case 3:
        break;
    case 4:
        if (ftruncate(fp, MAX_SIZE) != 0) {
            perror("ftruncate");
            return 1;
        }
        break;
    case 5:
        if (lseek(fp, MAX_SIZE-1, SEEK_SET) != MAX_SIZE-1 ||
            write(fp, "", 1) != 1) {
            perror("seek,write");
            return 1;
        }
    }
    void *b = mmap(0, MAX_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fp, 0);
    if (b == MAP_FAILED)
    {
        perror("Map init failure");
        return 1;
    }
    TestStruct_t *base_addr = (TestStruct_t *)b;

    for(size_t i = 0; i < num_of_blocks; i++) {
        base_addr[i].reset();
    }

    return 0;
}

The SOMETHING CLEVER bit allows you to either work with an empty file (argc == 3), grow it with ftruncate (argc == 4), or grow it with lseek && write (argc == 5).

On UNIX-y systems, ftruncate may or may not reserve space for your file; a lengthened file without reserved space is called sparce. Almost universally, the lseek && write will create a sparse file, unless your system doesn't support that.

The sparce file will allocate actual disk blocks as you write to it, however, if it fails, it will deliver a signal whereas the pre-allocated one will not. Your loop at the bottom walks the whole extent, so the file will always be grown; reduce that loop and you can see if the options make a difference on your system.