/* This file is part of an open-address hash table implementation.
   Copyright (C) 2005 Martin Dickopp

   This file is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this file; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307,
   USA.  */

#include <stddef.h>
#include <stdlib.h>
#include <string.h>

#include "hash.h"


/* Alignment of the data associated with a key.  */
#define ALIGNMENT 16

/* Size of `size_t' rounded up to an integer multiple of `ALIGNMENT'.  */
#define SIZEOF_SIZE_T (sizeof (size_t) + (ALIGNMENT - 1) - (sizeof (size_t) - 1) % ALIGNMENT)

/* Get data pointer from pointer to slot index.  */
#define INDEXPTR2DATA(index_ptr) ((void *)((char *)(index_ptr) + SIZEOF_SIZE_T))

/* Get pointer to slot index from data pointer.  */
#define DATA2INDEXPTR(data) ((size_t *)((char *)(data) - SIZEOF_SIZE_T))

/* Special key to indicate a deleted slot.  */
#define DELETED ((char *)(&deleted_key))
static const char deleted_key;



/* Hash table slot.  */
struct htable_slot {
    char *key;             /* Key; null pointer for empty slots; `DELETED' for deleted slots.  If this member is
                              a null pointer or `DELETED', the other members are not valid.  */
    void *data;            /* Data associated with the key.  */
    unsigned int hash0;    /* First hash value.  */
    unsigned int hash1;    /* Second hash value.  */
};



/* Possible hash table sizes.  The numbers are prime numbers with the greatest possible distance to
   exact powers of 2. */
static const size_t htable_size [] = {47, 97, 191, 383, 769, 1531, 3067, 6143, 12289, 24571, 49157, 98299, 196613,
                                      393209, 786431, 1572869, 3145721, 6291449, 12582917, 25165813, 50331653,
                                      100663291, 201326611, 402653189, 805306357, 1610612741};

static int htable_rehash (struct htable *table, size_t new_size_index);
static size_t hash_string (const char *str, unsigned int *hash0, unsigned int *hash1);



/* Create a hash table.  On success, zero is returned.  If memory allocation fails, -1 is returned.  */
int
htable_create (struct htable *const table)
{
    struct htable_slot *const slots = malloc (htable_size [0] * sizeof *slots);

    if (slots == 0)
        return -1;

    {
        size_t i;
        for (i = 0; i < htable_size [0]; ++i)
            slots [i].key = 0;
    }

    table->slots = slots;
    table->size_index = 0;
    table->occupancy = 0;

    return 0;
}



/* Destroy a hash table.  Free all associated memory.  */
void
htable_destroy (struct htable *const table)
{
    size_t i;
    for (i = 0; i < htable_size [table->size_index]; ++i)
        if (table->slots [i].key != 0 && table->slots [i].key != DELETED)
            free (DATA2INDEXPTR (table->slots [i].data));

    free (table->slots);
}



/* Insert a new key and associated data into a hash table.  The key must be a NUL-terminated string.  The data is
   copied; the location of the copied data is returned, and if `exists_ptr' is not a null pointer, the integer at
   the location pointed to is set to zero.  If the key already exists, the location of the data associated with the
   existing key data is returned, and if `exists_ptr' is not a null pointer, the integer at the location pointed to
   is set to one.  If `data_len' is zero, `data' is not evaluated and the returned pointer must not be dereferenced.
   If memory allocation fails, a null pointer is returned.  */
void *
htable_insert (struct htable *const table, const char *const key, const void *const data, const size_t data_len,
               int *const exists_ptr)
{
    size_t size = htable_size [table->size_index], key_len, *index_ptr, slot_index;
    unsigned int hash0, hash1;


    if (table->occupancy > size / 2)
    {
        if (table->size_index < sizeof htable_size / sizeof *htable_size - 1)
        {
            if (htable_rehash (table, table->size_index + 1) == -1)
                return 0;
        }
        else if (table->occupancy >= htable_size [sizeof htable_size / sizeof *htable_size - 1] / 5 * 4)
            return 0;

        size = htable_size [table->size_index];
    }

    key_len = hash_string (key, &hash0, &hash1);
    index_ptr = malloc (SIZEOF_SIZE_T + data_len + key_len);
    if (index_ptr == 0)
        return 0;

    {
        const size_t increment = hash1 % (size - 2) + 1;
        size_t i = hash0 % size;

        slot_index = (size_t)-1;
        while (1)
        {
            if (table->slots [i].key == 0)
            {
                if (slot_index == (size_t)-1)
                    slot_index = i;
                break;
            }
            else if (table->slots [i].key == DELETED)
            {
                if (slot_index == (size_t)-1)
                    slot_index = i;
            }
            else if (table->slots [i].hash0 == hash0 && table->slots [i].hash1 == hash1
                     && strcmp (key, table->slots [i].key) == 0)
            {
                if (exists_ptr != 0)
                    *exists_ptr = 1;
                return table->slots [i].data;
            }

            i = (i + increment) % size;
        }
    }

    table->slots [slot_index].key = (char *)INDEXPTR2DATA (index_ptr) + data_len;
    table->slots [slot_index].data = INDEXPTR2DATA (index_ptr);
    table->slots [slot_index].hash0 = hash0;
    table->slots [slot_index].hash1 = hash1;
    ++table->occupancy;

    *index_ptr = slot_index;
    memcpy (table->slots [slot_index].key, key, key_len);
    if (data_len > 0)
        memcpy (table->slots [slot_index].data, data, data_len);

    if (exists_ptr != 0)
        *exists_ptr = 0;
    return table->slots [slot_index].data;
}



/* Delete a key and associated data.  The `data' parameter must point to the data associated with an existing key.  */
void
htable_delete (struct htable *const table, void *const data)
{
    table->slots [*DATA2INDEXPTR (data)].key = DELETED;
    free (DATA2INDEXPTR (data));
    --table->occupancy;

    if (table->occupancy < htable_size [table->size_index] / 5 && table->size_index > 0)
        htable_rehash (table, table->size_index - 1);
}



/* Find the data associated with a key.  If the key does not exist, a null pointer is returned.  */
void *
htable_find (const struct htable *const table, const char *const key)
{
    const size_t size = htable_size [table->size_index];
    unsigned int hash0, hash1;
    size_t increment, i;

    hash_string (key, &hash0, &hash1);
    increment = hash1 % (size - 2) + 1;
    i = hash0 % size;

    while (1)
    {
        if (table->slots [i].key == 0)
            return 0;
        else if (table->slots [i].key != DELETED && table->slots [i].hash0 == hash0 && table->slots [i].hash1 == hash1
                 && strcmp (key, table->slots [i].key) == 0)
            return table->slots [i].data;

        i = (i + increment) % size;
    }
}



/* Copy an existing table into the new one which has a different size.  If memory allocation fails, -1 is returned;
   otherwise zero is returned.  */
static int
htable_rehash (struct htable *const table, const size_t new_size_index)
{
    const size_t new_size = htable_size [new_size_index];
    struct htable_slot *const new_slots = malloc (new_size * sizeof *new_slots);

    if (new_slots == 0)
        return -1;

    {
        size_t i;
        for (i = 0; i < new_size; ++i)
            new_slots [i].key = 0;
    }

    {
        size_t j;
        for (j = 0; j < htable_size [table->size_index]; ++j)
            if (table->slots [j].key != 0 && table->slots [j].key != DELETED)
            {
                const size_t increment = table->slots [j].hash1 % (new_size - 2) + 1;
                size_t i = table->slots [j].hash0 % new_size;

                while (new_slots [i].key != 0)
                    i = (i + increment) % new_size;

                new_slots [i] = table->slots [j];
                *DATA2INDEXPTR (new_slots [i].data) = i;
            }
    }

    free (table->slots);
    table->slots = new_slots;
    table->size_index = new_size_index;
    return 0;
}



/* Calculate two hash values for a given string.  Return string length, including the terminating NUL character.  */
static size_t
hash_string (const char *str, unsigned int *const hash0, unsigned int *const hash1)
{
    size_t len;

    *hash0 = 0;
    *hash1 = 0xffffffff;

    for (len = 1; *str != '\0'; ++str, ++len)
    {
        *hash0 = (((*hash0 << 7) & 0xffffffff) | (*hash0 >> 25));
        *hash0 ^= *(const unsigned char *)str;

        *hash1 ^= *(const unsigned char *)str;
        *hash1 = (((*hash1 << 5) & 0xffffffff) | (*hash1 >> 27));
    }

    return len;
}
