I want to rewrite the md5 hashing algorithm for learning purposes so I used the wiki (https://en.wikipedia.org/wiki/MD5) and so far it works as intended if I put no string
$ ./hashing ""
80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00000080 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
d41d8cd98f00b204e9800998ecf8427e - my algo
d41d8cd98f00b204e9800998ecf8427e -
but not if I put any other string
$ ./hashing "a"
61 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00008061 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
3a389d88a349727c6751e9acae0b0f37 - my algo
0cc175b9c0f1b6a831c399e269772661 -
(I'm comparing it using {echo -n "[STRING]" | md5sum} which is the one in the bottom)
here is the code I got
#include "../inc/hashing.h"
/* reminder of the struct
*
* typedef struct md5_s{
* uint64_t size; // size of input in byte
* uint32_t buffer[4]; // current accumulation of hash
* uint32_t input[16]; // input to be used in the next step
* uint8_t digest[16]; // result of algorithm
* }md5_t;
*/
// shift amounts per round
const uint32_t S[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,// 0..15
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,// 16..31
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,// 32..47
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};// 48..63
// K[i] = floor(2^32 × abs(sin(i + 1))) precomputed table
const uint32_t K[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,// 0..3
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,// 4..7
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,// 8..11
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,// 12..15
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,// 16..19
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,// 20..23
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,// 24..27
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,// 28..31
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,// 32..35
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,// 36..39
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,// 40..43
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,// 44..47
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,// 48..51
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,// 52..55
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,// 56..59
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};// 60..63
uint32_t swap32(uint32_t x)
{
return ((x & 0xFF) << 24) |
(((x >> 8) & 0xFF) << 16) |
(((x >> 16) & 0xFF) << 8) |
((x >> 24) & 0xFF);
}
// main function calling other one
void ft_md5(md5_t *md5, uint8_t *s)
{
uint8_t *new_s;
new_s = ft_md5_padding(md5, s,ft_strlen((char *)s));
md5->buffer[0] = 0x67452301;
md5->buffer[1] = 0xefcdab89;
md5->buffer[2] = 0x98badcfe;
md5->buffer[3] = 0x10325476;
// run algo on each block of 512 bit
uint32_t block = 0;
while (md5->size > block)
{
ft_memcpy(md5->input, new_s + block , 64);// 512 bit / 8 (1 byte = 8 bit) = 64 byte
for (int i = 0; i < 64; i++)
printf("%02x ", new_s[i]);
printf("\n\n");
for (int i = 0; i < 16; i++)
printf("%08x ", md5->input[i]);
printf("\n\n");
ft_md5_algo(md5);
block += 64;
}
free(new_s);
ft_memcpy(md5->digest, md5->buffer, 16); // 128 / 8
}
// binary operation according to md5 algo (thx Wikipedia)
#define E(B, C, D) ((B & C) | ((~B) & D))
#define G(B, C, D) ((D & B) | ((~D) & C))
#define H(B, C, D) (B ^ C ^ D)
#define I(B, C, D) (C ^ (B | (~D)))
// md5 algorithm according to Wikipedia
void ft_md5_algo(md5_t *md5)
{
uint32_t i;
uint32_t A = md5->buffer[0];
uint32_t B = md5->buffer[1];
uint32_t C = md5->buffer[2];
uint32_t D = md5->buffer[3];
i = 0;
while (i < 64)
{
uint32_t f;
uint32_t g;
if (i < 16)
{
f = E(B, C, D);
g = i;
}
else if (i < 32)
{
f = G(B, C, D);
g = (5 * i + 1) % 16;
}
else if (i < 48)
{
f = H(B, C, D);
g = (3 * i + 5) % 16;
}
else if (i < 64)
{
f = I(B, C, D);
g = (7 * i) % 16;
}
f += A + K[i] + md5->input[g];
A = D;
D = C;
C = B;
B += ft_leftrotate(f, S[i]);
i++;
// printf("Iteration %u: A=%08x, B=%08x, C=%08x, D=%08x, f=%08x, g=%u\n", i, A, B, C, D, f, g);
}
md5->buffer[0] += A;
md5->buffer[1] += B;
md5->buffer[2] += C;
md5->buffer[3] += D;
}
uint32_t ft_leftrotate(uint32_t x, uint32_t offset)
{
return ( x << offset ) | ( x >> (32 - offset));
}
// padding of the main string
unsigned char *ft_md5_padding(md5_t *md5, uint8_t *s, size_t l)
{
size_t new_len = l;
uint8_t *new_s;
while(++new_len % 64 != 0);// 512 / 8
md5->size = new_len;
new_s = (uint8_t *)malloc(new_len + 1);
ft_memcpy(new_s, s, l);
new_s[l] = 0x80;
size_t i = l + 1;
while (i < new_len)
{
new_s[i] = 0x00;
i++;
}
return (new_s);
}
I print it in the main (which is in another file) using
for (int i = 0; i < 16; i++)
printf("%2.2x", md5.digest[i]);
(every already known function with a ft before is me who rewrite it and it work "exactly" like the official one I already tried to change it between the one I rewrite and the official one and nothing change in this prog at least)
so I already tried to switch between little-endian and big endian but it doesn't work (I'm currently using little endian as said in the wiki)
I tested the padding and it's okay I also tested (as you can see in the output) if the string (which is 64 * 8 bits) is well copied into the input I give to the algo (which is 16 * 32 bits) and it also seems okay
I assumed that my algorithm and constant are also good since it work with an empty string it's been 2 day and I literally have no clue where the issue can be
here the updated version of the padding function in case someone need it
unsigned char *ft_md5_padding(md5_t *md5, uint8_t *s, size_t l)
{
size_t new_len = l + 8;
uint8_t *new_s;
while(++new_len % 64 != 0);// 512 / 8
md5->size = new_len;
new_s = (uint8_t *)malloc(new_len + 1);
ft_memcpy(new_s, s, l);
new_s[l] = 0x00000080;
size_t i = l + 1;
while (i < new_len - 8)
{
new_s[i] = 0x00;
i++;
}
// append the len to the padding
new_s[i + 3] = l >> 21;
new_s[i + 2] = l >> 13;
new_s[i + 1] = l >> 5;
new_s[i + 0] = l << 3;
new_s[i + 7] = l >> 53;
new_s[i + 6] = l >> 45;
new_s[i + 5] = l >> 37;
new_s[i + 4] = l >> 29;
return (new_s);
}
it's now perfectly working thank's to you all (how can i close this question ?)