Why is MD5Sum so fast

1.9k views Asked by At

I've been studying hashing in C/C++ and tried to replicate the md5sum command in Linux. After analysing the source code, it seems that md5sum relies on the md5 library's md5_stream. I've approximated the md5_stream function from the md5.h library into the code below, and it runs in ~13-14 seconds. I've tried to call the md5_stream function directly and got ~13-14 seconds. The md5sum runs in 4 seconds. What have the GNU people done to get the speed out of the code?

The md5.h/md5.c code is available in the CoreUtils source code.

#include <QtCore/QCoreApplication>
#include <QtCore/QDebug>

#include <iostream>
#include <iomanip>
#include <fstream>
#include "md5.h"

#define BLOCKSIZE 32784

int main()
{
    FILE *fpinput, *fpoutput;

    if ((fpinput = fopen("/dev/sdb", "rb")) == 0) {
        throw std::runtime_error("input file doesn't exist");
    }

    struct md5_ctx ctx;
    size_t sum;

    char *buffer = (char*)malloc (BLOCKSIZE + 72);
    unsigned char *resblock = (unsigned char*)malloc (16);
    if (!buffer)
      return 1;

    md5_init_ctx (&ctx);
    size_t n;
    sum = 0;

    while (!ferror(fpinput) && !feof(fpinput)) {
        n = fread (buffer + sum, 1, BLOCKSIZE - sum, fpinput);
        if (n == 0){
            break;
        }
        sum += n;

        if (sum == BLOCKSIZE) {
            md5_process_block (buffer, BLOCKSIZE, &ctx);
            sum = 0;
        }
    }

    if (n == 0 && ferror (fpinput)) {
        free (buffer);
        return 1;
    }

    /* Process any remaining bytes.  */
    if (sum > 0){
      md5_process_bytes (buffer, sum, &ctx);
    }

    /* Construct result in desired memory.  */
    md5_finish_ctx (&ctx, resblock);
    free (buffer);

    for (int x = 0; x < 16; ++x){
        std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast<uint16_t>(resblock[x]);
        std::cout << " ";
    }
    std::cout << std::endl;
    free(resblock);
    return 0;
}

EDIT: Was a default mkspec problem in Fedora 19 64-bit.

2

There are 2 answers

0
MagikWorx On BEST ANSWER

It turned out to be an error in the Qt mkspecs regarding an optimization flag not being set properly.

3
karl On

fread() is convenient, but don't use fread() if you care about performance. fread() will copy from the OS to a libc buffer, then to your buffer. This extra copying cost CPU cycles and cache.

For better performance use open() then read() to avoid the extra copy. Make sure your read() calls are multiples of the block size, but lower than your CPU cache size.

For best performance use mmap() map the disk directly to RAM.

If you try something like the below code, it should go faster.

//  compile  gcc  mmap_md5.c  -lgcrypt
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <gcrypt.h>
#include <linux/fs.h> // ioctl

#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)


int main(int argc, char *argv[])
    {
        char *addr;
        int fd;
        struct stat sb;
        off_t offset, pa_offset;
        size_t length;
        ssize_t s;
        unsigned char digest[16];
        char digest_ascii[32+1] = {0,};
        int digest_length = gcry_md_get_algo_dlen (GCRY_MD_MD5);
        int i;

        if (argc < 3 || argc > 4) {
            fprintf(stderr, "%s file offset [length]\n", argv[0]);
            exit(EXIT_FAILURE);
        }
        fd = open(argv[1], O_RDONLY);
        if (fd == -1)
            handle_error("open");
        if (fstat(fd, &sb) == -1)           /* To obtain file size */
            handle_error("fstat");
        offset = atoi(argv[2]);
        pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1);


        if (sb.st_mode | S_IFBLK ) {
            // block device. use ioctl to find length
            ioctl(fd, BLKGETSIZE64, &length);

        } else  {
            /* offset for mmap() must be page aligned */
            if (offset >= sb.st_size) {
                fprintf(stderr, "offset is past end of file size=%zd, offset=%d\n", sb.st_size, (int) offset);
                exit(EXIT_FAILURE);
            }
            if (argc == 4) {
                length = atoi(argv[3]);
                if (offset + length > sb.st_size)
                    length = sb.st_size - offset;
                /* Canaqt display bytes past end of file */
            } else {    /* No length arg ==> display to end of file */
                length = sb.st_size - offset;
            }
        }
        printf("length= %zd\n", length);
        addr = mmap(NULL, length + offset - pa_offset, PROT_READ,
                                MAP_PRIVATE, fd, pa_offset);
        if (addr == MAP_FAILED)
            handle_error("mmap");


        gcry_md_hash_buffer(GCRY_MD_MD5, digest, addr + offset - pa_offset, length);

        for (i=0; i < digest_length; i++) {
            sprintf(digest_ascii+(i*2), "%02x", digest[i]);
        }
        printf("hash=%s\n", digest_ascii);

        exit(EXIT_SUCCESS);
}