By Jon Nimrod


2019-02-10 20:56:07 8 Comments

My latest school project was to implement malloc(), free(), realloc() and calloc() from the standard C library. I came up with something a bit similar to the glibc malloc(). It supports multi-threading; speed is pretty good according to my tests. Not very portable (meant for Linux 64bits and Darwin 64bits). You will notice I do not use sbrk(): the reason is that the project explicitly forbids us to; we have to use mmap() and mmap() only.

I started coding a year and a half ago, I obviously have a lot to learn still, but I would like to know what do you guys think about it. Not only the implementation, but also the coding style, the readability, etc.

The project is available here: https://github.com/terry-finkel/ft_malloc.


malloc.c:

#include "mallocp.h"


t_arena_data    *g_arena_data = NULL;

__attribute__((always_inline)) static inline void
update_max_chunk (t_bin *bin, t_chunk *next_chunk, unsigned long old_size) {

    unsigned long req_size = __mchunk_type_match(bin, CHUNK_TINY) ? SIZE_TINY : SIZE_SMALL;

    if (next_chunk != __mbin_end(bin) && __mchunk_not_used(next_chunk) && __mchunk_size(next_chunk) >= req_size) {

        bin->max_chunk_size = __mchunk_size(next_chunk);
    } else {

        t_chunk *biggest_chunk = bin->chunk;
        unsigned long remaining = 0;
        while (biggest_chunk != __mbin_end(bin)) {

            if (__mchunk_not_used(biggest_chunk)) {

                if (__mchunk_size(biggest_chunk) > remaining) remaining = __mchunk_size(biggest_chunk);
                if (remaining >= req_size) break;
            }

            biggest_chunk = __mchunk_next(biggest_chunk);
        }

        bin->max_chunk_size = remaining;
    }

    __marena_update_max_chunks(bin, old_size);
}

__attribute__((always_inline)) static inline void *
user_area (t_bin *bin, t_chunk *chunk, size_t size, pthread_mutex_t *mutex, int zero_set) {

    /*
       Populate chunk headers.
       A chunk header precedes the user area with the size of the user area (to allow us to navigate between allocated
       chunks which are not part of any linked list, and defragment the memory in free and realloc calls) and the pool
       header address. We also use the size to store CHUNK_USED to know if the chunk is used or not.
    */

    size += sizeof(t_chunk);
    bin->free_size -= size;

    unsigned long old_size = __mchunk_size(chunk);
    chunk->size = size | (1UL << CHUNK_USED);
    chunk->bin = bin;
    t_chunk *next_chunk = __mchunk_next(chunk);

    if (next_chunk != __mbin_end(bin) && next_chunk->size == 0) next_chunk->size = old_size - size;
    if (old_size == bin->max_chunk_size) update_max_chunk(bin, next_chunk, old_size);

    pthread_mutex_unlock(mutex);

    if (zero_set == 1) memset(chunk->user_area, 0, size - sizeof(t_chunk));

    return (void *)chunk->user_area;
}

__attribute__((always_inline)) static inline t_bin *
create_new_bin (t_arena *arena, int chunk_type, unsigned long size, long pagesize, pthread_mutex_t *mutex) {

    static unsigned long    headers_size = sizeof(t_bin) + sizeof(t_chunk);

    /*
       Allocate memory using mmap. If the requested data isn't that big, we allocate enough memory to hold
       CHUNKS_PER_POOL times this data to prepare for future malloc. If the requested data exceeds that value, nearest
       page data will do.
    */

    unsigned long mmap_size;

    if (chunk_type != CHUNK_LARGE) mmap_size = sizeof(t_bin) + (size + sizeof(t_chunk)) * CHUNKS_PER_POOL;
    else mmap_size = size + headers_size;

    mmap_size = mmap_size + (unsigned long)pagesize - (mmap_size % (unsigned long)pagesize);
    t_bin *bin = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);

    if (__builtin_expect(bin == MAP_FAILED, 0)) {
        errno = ENOMEM;
        pthread_mutex_unlock(mutex);
        return MAP_FAILED;
    }

    /* Keep track of the size and free size available. */
    bin->size = mmap_size | (1UL << chunk_type);
    bin->free_size = mmap_size - sizeof(t_bin);
    bin->arena = arena;
    bin->left = NULL;
    bin->right = NULL;
    bin->max_chunk_size = bin->free_size;
    bin->chunk->size = bin->free_size;
    __marena_update_max_chunks(bin, 0);

    return bin;
}

__attribute__((always_inline)) static inline void *
__malloc (size_t size, int zero_set) {

    static long             pagesize = 0;
    static pthread_mutex_t  main_arena_mutex = PTHREAD_MUTEX_INITIALIZER,
                            new_arena_mutex = PTHREAD_MUTEX_INITIALIZER;
    static t_arena_data     arena_data = { .arena_count = 0 };


    size = (size + 0xfUL) & ~0xfUL;
    int chunk_type;
    if (size >= (1UL << SIZE_THRESHOLD)) return NULL;
    else if (size > SIZE_SMALL) chunk_type = CHUNK_LARGE;
    else chunk_type = (size <= SIZE_TINY) ? CHUNK_TINY : CHUNK_SMALL;

    /* If first call to malloc, create our arena data struct. */
    pthread_mutex_lock(&main_arena_mutex);

    if (__builtin_expect(g_arena_data == NULL, 0)) {
        pagesize = sysconf(_SC_PAGESIZE);

        t_bin *bin = create_new_bin(&arena_data.arenas[0], chunk_type, size, pagesize, &main_arena_mutex);
        if (bin == MAP_FAILED) return NULL;

        arena_data.arena_count = 1;
        arena_data.arenas[0] = (t_arena){
            .small_bins = (chunk_type != CHUNK_LARGE) ? bin : NULL,
            .large_bins = (chunk_type == CHUNK_LARGE) ? bin : NULL,
            .max_chunk_small = 0,
            .max_chunk_tiny = 0
        };
        pthread_mutex_init(&arena_data.arenas[0].mutex, NULL);
        pthread_mutex_lock(&arena_data.arenas[0].mutex);
        g_arena_data = &arena_data;

        pthread_mutex_unlock(&main_arena_mutex);
        return user_area(bin, bin->chunk, size, &arena_data.arenas[0].mutex, zero_set);
    }

    /*
       In order to prevent threads to compete for the same memory area, multiple arenas can be created to allow
       for faster memory allocation in multi-threaded programs. Each arena has it's own mutex that will allow each
       thread to operate independently. If M_ARENA_MAX is reached, threads will loop over all arenas until one
       is available.
    */

    pthread_mutex_unlock(&main_arena_mutex);

    /* Look for an open arena. */
    t_arena *arena = &arena_data.arenas[0];
    int arena_index = 0;
    while (pthread_mutex_trylock(&arena->mutex) != 0) {

        if (arena_index == arena_data.arena_count - 1) {

            if (pthread_mutex_trylock(&new_arena_mutex) == 0) {

                if (arena_data.arena_count < M_ARENA_MAX) {
                    arena_index = arena_data.arena_count - 1;
                    ++arena_data.arena_count;
                    arena = NULL;
                    break;

                } else pthread_mutex_unlock(&new_arena_mutex);
            }

            arena = &arena_data.arenas[(arena_index = 0)];
            continue;
        }

        arena = &arena_data.arenas[arena_index++];
    }

    /* All arenas are occupied by other threads but M_ARENA_MAX isn't reached. Let's just create a new one. */
    if (arena == NULL) {

        arena = &arena_data.arenas[arena_index + 1];
        t_bin *bin = create_new_bin(arena, chunk_type, size, pagesize, &new_arena_mutex);
        if (bin == MAP_FAILED) return NULL;

        arena_data.arenas[arena_index + 1] = (t_arena){
                .small_bins = (chunk_type != CHUNK_LARGE) ? bin : NULL,
                .large_bins = (chunk_type == CHUNK_LARGE) ? bin : NULL,
                .max_chunk_small = 0,
                .max_chunk_tiny = 0
        };
        pthread_mutex_init(&arena->mutex, NULL);
        pthread_mutex_lock(&arena->mutex);

        pthread_mutex_unlock(&new_arena_mutex);
        return user_area(bin, bin->chunk, size, &arena->mutex, zero_set);
    }

    /*
       Otherwise, thread has accessed an arena and locked it. Now let's try to find a chunk of memory that is big
       enough to accommodate the user-requested size.
    */

    t_bin *bin = (chunk_type == CHUNK_LARGE) ? arena->large_bins : NULL;
    unsigned long required_size = size + sizeof(t_chunk);

    /* Look for a bin with a matching chunk type and enough space to accommodate user request. */
    if ((chunk_type == CHUNK_TINY && arena->max_chunk_tiny >= required_size)
        || (chunk_type == CHUNK_SMALL && arena->max_chunk_small >= required_size)) {

        bin = arena->small_bins;
        if (bin != NULL && chunk_type == CHUNK_TINY && __mchunk_type_match(bin, CHUNK_SMALL)) bin = bin->left;
        else if (bin != NULL && chunk_type == CHUNK_SMALL && __mchunk_type_match(bin, CHUNK_TINY)) bin = bin->right;
        while (bin != NULL && bin->max_chunk_size < required_size) {
            bin = (chunk_type == CHUNK_TINY) ? bin->left : bin->right;
        }
    }

    if (bin == NULL || (chunk_type == CHUNK_LARGE && bin->max_chunk_size < required_size )) {

        /* A suitable bin could not be found, we need to create one. */
        bin = create_new_bin(arena, chunk_type, size, pagesize, &arena->mutex);
        if (bin == MAP_FAILED) return NULL;

        /*
           As large bins generally don't need to be searched for empty space, they have their own linked list.
           For tiny and small bins, we are using a non-circular double linked list anchored to the main bin.
           Tiny chunks bins will be placed to the left of the main bin, and small chunks bins to it's right.
        */

        t_bin *main_bin = (chunk_type == CHUNK_LARGE) ? arena->large_bins : arena->small_bins;

        if (main_bin != NULL) {
            t_bin *tmp = (chunk_type == CHUNK_TINY) ? main_bin->left : main_bin->right;

            /* Insert the new bin into the bin list. */
            if (chunk_type == CHUNK_TINY) {
                main_bin->left = bin;
                bin->right = main_bin;
                bin->left = tmp;

                if (tmp != NULL) tmp->right = bin;

            } else {
                main_bin->right = bin;
                bin->left = main_bin;
                bin->right = tmp;

                if (tmp != NULL) tmp->left = bin;
            }

        } else if (chunk_type == CHUNK_LARGE) {
            arena->large_bins = bin;
        } else {
            arena->small_bins = bin;
        }

        return user_area(bin, bin->chunk, size, &arena->mutex, zero_set);
    }

    /*
       Find the first free memory chunk and look for a chunk large enough to accommodate user request.
       This loop is not protected, ie. if the chunk reaches the end of the bin it is undefined behavior.
       However, a chunk should NEVER reach the end of the bin as that would mean that a chunk of suitable size could
       not be found. If this happens, maths are wrong somewhere.
    */

    t_chunk *chunk = bin->chunk;
    while (__mchunk_is_used(chunk) || __mchunk_size(chunk) < required_size) {
        chunk = __mchunk_next(chunk);
    }

    return user_area(bin, chunk, size, &arena->mutex, zero_set);
}

void
*realloc (void *ptr, size_t size) {

    if (ptr == NULL) {
        return malloc(size);
    } else if (size == 0) {
        free(ptr);
        return NULL;
    }

    t_chunk *chunk = (t_chunk *)((unsigned long)ptr - sizeof(t_chunk));

    /* If the pointer is not aligned on a 16bytes boundary, it is invalid by definition. */
    if (g_arena_data == NULL || (unsigned long)chunk % 16UL != 0 || __mchunk_invalid(chunk)) {
        (void)(write(STDERR_FILENO, "realloc(): invalid pointer\n", 27) + 1);
        abort();
    }

    if (chunk->size >= size) return ptr;

    t_bin *bin = chunk->bin;
    t_arena *arena = bin->arena;
    pthread_mutex_lock(&arena->mutex);
    t_chunk *next_chunk = __mchunk_next(chunk);
    unsigned long req_size = ((size + 0xfUL) & ~0xfUL) + sizeof(t_chunk);

    if (next_chunk != __mbin_end(bin) && __mchunk_not_used(next_chunk)
        && __mchunk_size(chunk) + __mchunk_size(next_chunk) >= req_size) {

        unsigned long realloc_size = __mchunk_size(chunk) + __mchunk_size(next_chunk);
        unsigned long old_size = __mchunk_size(next_chunk);
        chunk->size = req_size | (1UL << CHUNK_USED);
        memset(next_chunk, 0, sizeof(t_chunk));
        next_chunk = __mchunk_next(chunk);

        if (next_chunk != __mbin_end(bin) && next_chunk->size == 0) next_chunk->size = realloc_size - req_size;
        if (old_size == bin->max_chunk_size) update_max_chunk(bin, next_chunk, old_size);

        pthread_mutex_unlock(&arena->mutex);
        return chunk->user_area;
    }

    pthread_mutex_unlock(&arena->mutex);
    free(ptr);
    return malloc(size);
}

void *
malloc(size_t size) {

    return __malloc(size, 0);
}

void *
calloc (size_t nmemb, size_t size) {

    return __malloc(nmemb * size, 1);
}

free.c:

#include "malloc.h"
#include "arenap.h"


void
free (void *ptr) {

    if (ptr == NULL) return;

    t_chunk *chunk = (t_chunk *)((unsigned long)ptr - sizeof(t_chunk));

    /* If the pointer is not aligned on a 16bytes boundary, it is invalid by definition. */
    if (g_arena_data == NULL || (unsigned long)chunk % 16UL != 0 || __mchunk_invalid(chunk)) {
        (void)(write(STDERR_FILENO, "free(): invalid pointer\n", 24) + 1);
        abort();
    }

    t_bin *bin = chunk->bin;
    t_arena *arena = bin->arena;
    pthread_mutex_lock(&arena->mutex);

    /* We return the memory space to the bin free size. If the bin is empty, we unmap it. */
    bin->free_size += __mchunk_size(chunk);
    if (bin->free_size + sizeof(t_bin) == __mbin_size(bin)) {

        if (__mbin_main(bin)) {

            chunk = bin->chunk;
            chunk->size = bin->free_size;
            bin->max_chunk_size = chunk->size;

            if  (__mchunk_type_nomatch(bin, CHUNK_LARGE)) __marena_update_max_chunks(bin, 0);

            memset(chunk->user_area, 0, chunk->size - sizeof(t_chunk));

        } else {

            if (bin->left != NULL) bin->left->right = bin->right;
            if (bin->right != NULL) bin->right->left = bin->left;

            munmap(bin, bin->size & SIZE_MASK);
        }

    } else {
        chunk->size &= ~(1UL << CHUNK_USED);

        /* Defragment memory. */
        t_chunk *next_chunk = __mchunk_next(chunk);
        if (next_chunk != __mbin_end(bin) && __mchunk_not_used(next_chunk)) {
            chunk->size += next_chunk->size;
            memset(next_chunk, 0, sizeof(t_chunk));
        }

        if (__mchunk_size(chunk) > bin->max_chunk_size) {
            bin->max_chunk_size = __mchunk_size(chunk);
            __marena_update_max_chunks(bin, 0);
        }
    }

    pthread_mutex_unlock(&arena->mutex);
}

mallocp.h:

#ifndef __MALLOC_PRIVATE_H
# define __MALLOC_PRIVATE_H

# include "malloc.h"
# include "arenap.h"
# include <errno.h>

# define CHUNKS_PER_POOL 100
# define SIZE_TINY 256
# define SIZE_SMALL 4096

#endif /* __MALLOC_PRIVATE_H */

arenap.h:

#ifndef __ARENA_PRIVATE_H
# define __ARENA_PRIVATE_H

# include <pthread.h>
# include <sys/mman.h>

/* For memset. */
# include <string.h>

/* For abort. */
# include <stdlib.h>

# define M_ARENA_MAX 8
# define SIZE_THRESHOLD 59

enum                    e_type {
    CHUNK_USED = SIZE_THRESHOLD + 1,
    CHUNK_TINY,
    CHUNK_SMALL,
    CHUNK_LARGE
};

# define FLAG_MASK (~SIZE_MASK)
# define SIZE_MASK ((1UL << (SIZE_THRESHOLD + 1)) - 1)

# define __mabs(x) ({ __typeof__(x) _x = (x); _x < 0 ? -_x : _x; })
# define __mbin_end(bin) ((void *)((unsigned long)bin + __mbin_size(bin)))
# define __mbin_main(bin) (__mchunk_type_match(bin, CHUNK_LARGE) ? bin == arena->large_bins : bin == arena->small_bins)
# define __mbin_size(bin) (bin->size & SIZE_MASK)
# define __mchunk_is_used(chunk) (chunk->size & (1UL << CHUNK_USED))
# define __mchunk_next(chunk) ((t_chunk *)((unsigned long)chunk + __mchunk_size(chunk)))
# define __mchunk_not_used(chunk) (__mchunk_is_used(chunk) == 0)
# define __mchunk_size(chunk) (chunk->size & SIZE_MASK)
# define __mchunk_type_match(bin, chunk_type) (bin->size & (1UL << chunk_type))
# define __mchunk_type_nomatch(bin, chunk_type) (__mchunk_type_match(bin, chunk_type) == 0)

# define __mchunk_invalid(chunk) \
    ((chunk->size & FLAG_MASK) != (1UL << CHUNK_USED) || __mabs((ssize_t)chunk - (ssize_t)chunk->bin) > (1UL << 32))

# define __marena_update_max_chunks(bin, old_size)                                                  \
({                                                                                                  \
    if (__mchunk_type_match(bin, CHUNK_TINY) && (old_size == bin->arena->max_chunk_tiny             \
        || bin->max_chunk_size > bin->arena->max_chunk_tiny)) {                                     \
        bin->arena->max_chunk_tiny = bin->max_chunk_size;                                           \
    } else if (__mchunk_type_match(bin, CHUNK_SMALL) && (old_size == bin->arena->max_chunk_small    \
        || bin->max_chunk_size > bin->arena->max_chunk_small)) {                                    \
        bin->arena->max_chunk_small = bin->max_chunk_size;                                          \
    }                                                                                               \
})


typedef struct          s_chunk {
    unsigned long       size;
    struct s_bin        *bin;
    void                *user_area[0];
}                       __attribute__((packed)) t_chunk;

typedef struct          s_bin {
    unsigned long       free_size;
    unsigned long       size;
    unsigned long       max_chunk_size;
    struct s_arena      *arena;
    struct s_bin        *left;
    struct s_bin        *right;
    t_chunk             chunk[0];
}                       __attribute__((packed)) t_bin;

typedef struct          s_arena {
    pthread_mutex_t     mutex;
    t_bin               *small_bins;
    t_bin               *large_bins;
    unsigned long       max_chunk_tiny;
    unsigned long       max_chunk_small;
}                       t_arena;

typedef struct          s_arena_data {
    _Atomic int         arena_count;
    t_arena             arenas[M_ARENA_MAX];
}                       t_arena_data;

extern t_arena_data     *g_arena_data;

#endif /* __ARENA_PRIVATE_H */
```

1 comments

@Lundin 2019-02-11 10:10:05

This is a general C code review, not addressing functionality. Overall the code is fairly well-written so most of my remarks are minor nit-picks. Could do with more comments. Your coding style is a bit exotic but as long as you keep it consistent, that's ok.

Program design

  • You don't seem to have an actual API for the functions. Even if they are supposed to replace the standard library ones, you still need some user entry point header where the function declarations and use are found. I suppose this is "malloc.h" but you didn't post that one.

  • You need to drop inlining. Both the gcc one and the standard C inline. When writing library/system code, it is frowned upon to use inlining all over, because it leads to big executables. Several of your functions are not great candidates for inlining. The decision when to inline should be made by the compiler, not the programmer.

    inline is mostly to be regarded as an obsolete keyword, much like register.

Standard compliance

  • There's a potential for namespace clashes with stdlib.h. Apart from the obvious names malloc etc: if your code isn't actually part of Glibc or equivalent, then you shouldn't use __ prefixes.

  • void *user_area[0]; etc is not valid C. This is obsolete non-standard gnu since 20 years back and shouldn't be used. Use standard C flexible array members instead, they work the same.

  • Whenever possible, drop non-standard __typeof__ in favour of standard C _Generic.

Potential bugs

  • g_arena_data is occasionally accessed outside mutex locks. Could be problematic and perhaps solved with _Atomic.

  • return (void *)chunk->user_area; is fishy, why the cast? Looks like a cast from void** to void* and thereby a bug?

Performance

  • I'd thread carefully around __attribute__((packed)). While you might want to keep heap memory small, this might lead to various misaligned access and slower code. Would have to be benchmarked in detail.

Coding style

  • Avoid "secret macro language" macros. Hiding trivial operations behind macros is bad practice, it makes the code harder to read, not easier. Readability is more important than concerns about potential code repetition. For example instead of this:

    # define __mchunk_is_used(chunk) (chunk->size & (1UL << CHUNK_USED))
    

    You should simply write:

    bool chunk is_chunk_used = (chunk->size & (1UL << CHUNK_USED));
    

    There's no need to "protect" the programmer against reading code containing bit-wise operators etc. You can assume that the reader knows C, but not the "secret macro language".

  • Similarly, there is no reason to implement __marena_update_max_chunks as a function-like macro. Write a function instead, for readability. But also so you can make it static to the specific translation unit instead of the global namespace.

  • if statements such as this one are hard to read and should be avoided:

     if (chunk_type != CHUNK_LARGE) mmap_size = sizeof(t_bin) + (size + sizeof(t_chunk)) * CHUNKS_PER_POOL;
     else mmap_size = size + headers_size;
    

    Keep the contents indented, at a line of their own. Also, it is good practice to always use braces even when there is just a single expression following if/else.

  • Consistently use size_t for storing the size of an array. Not int or unsigned long.

  • Avoid using the lvalue result of = inside expressions. Something like arena = &arena_data.arenas[(arena_index = 0)]; should be rewritten as arena_index = 0; arena = &arena_data.arenas[arena_index];

  • The presence of continue in C code almost exclusively means that a loop should be rewritten in more readable ways. For example the while (pthread_mutex_trylock(&arena->mutex) != 0) loop could likely be rewritten along the lines of for(arena_index = 0; arena_index < N; arena_index++).

    Keep in mind that even if you have a loop condition or loop iterator that needs to be accessed inside mutex locks, that necessarily doesn't mean that you have to write that part in the loop body. Something like for(mystruct* ms=...; access(ms) < count; ... is possible, where access is a wrapper function containing the mutex lock/unlock around the variable.

  • t_chunk *chunk = (t_chunk *)((unsigned long)ptr - sizeof(t_chunk)); could be rewritten as t_chunk *chunk = (t_chunk*)ptr - 1;

@Jon Nimrod 2019-02-12 15:07:15

Thank you for all your inputs. I adjusted my code to take them into consideration.

Related Questions

Sponsored Content

1 Answered Questions

[SOLVED] Simple Malloc Implementation

2 Answered Questions

[SOLVED] Implementation of K&R Malloc

2 Answered Questions

[SOLVED] Yet another implementation of malloc() in C

1 Answered Questions

[SOLVED] Test code for custom malloc

4 Answered Questions

[SOLVED] My own malloc() function in C

1 Answered Questions

[SOLVED] New malloc implementation

1 Answered Questions

[SOLVED] Testing different implementations of malloc()

2 Answered Questions

[SOLVED] malloc implementation

1 Answered Questions

2 Answered Questions

[SOLVED] malloc / free implementation

  • 2012-12-15 16:03:43
  • Ohad Schneider
  • 15893 View
  • 7 Score
  • 2 Answer
  • Tags:   c memory-management

Sponsored Content