No.
2022-07-18
  • Jan
  • Feb
  • Mar
  • Apr
  • May
  • Jun
  • Jul
  • Aug
  • Sep
  • Oct
  • Nov
  • Dec
  • Sun
  • Mon
  • Tue
  • Wed
  • Thu
  • Fri
  • Sat
  • 28
  • 29
  • 30
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

散列表(二)- 散列表解决冲突的方式

散列表冲突

当哈希函数把不同的关键字映射到同一个散列表地址(即哈希数组索引)上时,就产生了冲突(collision),当关键字集比散列表地址(哈希数组个数)大时,关键字映射必然会出现冲突。我们用链接法(chaining)和开放寻址法(open addressing)来处理冲突。

使用链接法处理冲突

链接法,又叫开链法,顾名思义,把映射到同一散列表地址中的所有元素都放在一个链表中,如下图所示:

hashTableChaining

(图片取自算法导论图11-2)

链接法的优点:链接法处理冲突比较简单,处理数据存储比较灵活,数据处理的平均性能也较高效。

代码实现

这里为了方便,采用关键字为整数,值也为整数的 key-value 结构,哈希函数采用代码中 integerhash 函数内整数取模的方式。

#define HASH_OK    (0)
#define HASH_ERR    (-1)

typedef struct hash_node
{
    struct hash_node* prev;    //哈希结点前向指针
    struct hash_node* next;    //哈希结点后向指针
    int key;    //关键字
    int value;    //值
}stHashNode;


typedef int (*hashFunc)(int key, unsigned int tableSize);

typedef struct hash_table
{
    stHashNode** table;    //哈希表
    hashFunc keyHash;    //哈希函数
    unsigned int count;    //哈希数组个数
}stHashTable;

//整数哈希函数
int integerHash(int intKey, unsigned int tableSize)
{
    return intKey % tableSize;
}

static stHashNode* mallocNode(int key, int value)
{
    stHashNode* node = (stHashNode*) malloc(sizeof(stHashNode));
    if (node == NULL)
        return NULL;

    node->key = key;
    node->value = value;
    node->prev = NULL;
    node->next = NULL;

    return node;
}

static void freeNode(stHashNode* node)
{
    free(node);
}

//初始化哈希表
int hashTableInit(stHashTable* hashHandler, unsigned int count, hashFunc func)
{
    if (func == NULL)
        return HASH_ERR;

    hashHandler->table = (stHashNode**) malloc(count * sizeof(stHashNode*));
    if (hashHandler->table == NULL)
        return HASH_ERR;

    hashHandler->count = count;
    hashHandler->keyHash = func;

    for (unsigned int i = 0; i < count; i++) 
        hashHandler->table[i] = NULL;

    return HASH_OK;
}

//查找哈希表结点
stHashNode* hashTableFindNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    stHashNode* tableListHead = table->table[index];
    stHashNode* node = tableListHead;
    while (node != NULL)
    {
        if (node->value == value)
            return node;
        node = node->next;
    }

    return NULL;
}

//插入结点
int hashTableInsertNode(stHashTable* table, int key, int value)
{
    stHashNode* node = mallocNode(key, value);
    int index = table->keyHash(node->key, table->count);
    stHashNode* tableListHead = table->table[index];
    if (tableListHead != NULL)
        tableListHead->prev = node;
    node->next = tableListHead;
    table->table[index] = node;
    return HASH_OK;
}

//删除结点
int hashTableDelNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    stHashNode* tableListHead = table->table[index];
    stHashNode* node = tableListHead;

    while(node != NULL)
    {
        if (node->value == value)
        {
            if (node->prev == NULL)
                table->table[index] = node->next;
            else
                node->prev->next = node->next;

            if (node->next != NULL)
                node->next->prev = node->prev;

            freeNode(node);

            return HASH_OK;
        }
        node = node->next;
    }

    return HASH_ERR;
}

使用开放寻址法处理冲突

在开放寻址法中,所有元素都存放在散列表中。也就是说,每个表项或包含动态集合的一个元素,可包含NULL。当查找某个元素时,要系统地检查所有的表项,直到找到所需的元素,或最终查明该元素不在表中。在开放寻址法中,散列表可能会被填满,这时就要扩充散列表,扩充时需要将散列表的容量(即数组的大小)扩大,并将原散列表中的数据重新散列到新散列表中,这里我们代码未实现。

开放寻址法中,当出现冲突时,需要探查元素下一个可使用的地址,探查地址的技术有三种,因此我们也有三种方式来实现开放寻址法:

线性探查(linear probing)

设 m 为散列表大小,i 为探查次数,则对关键字 k 进行第 i 次探查得到的散列值为:

h(k, i) = (h’(k) + i) mod m, i = 0, 1, …, m-1

为了方便查找和删除,我们给散列表结点置上状态,STATUS_NULL, STATUS_DEL, STATUS_EXIST, 初始状态为 STATUS_NULL,添加结点时状态置为 STATUS_EXIST,删除结点时不需要物理删除,只需要将结点置为 STATUS_DEL 状态即可。

代码实现:

typedef enum node_status
{
    STATUS_NULL = 0,    //空结点
    STATUS_DEL = 1,    //结点删除
    STATUS_EXIST = 2    //结点在使用
}eNodeStatus;

typedef struct hash_node
{
    int key;
    int value;
    eNodeStatus status;
}stHashNode;

typedef int (*hashFunc)(int key, unsigned int tableSize);

typedef struct hash_table
{
    stHashNode* table;
    hashFunc keyHash;
    unsigned int count;
}stHashTable;

int hashTableInit(stHashTable* hashHandler, unsigned int count, hashFunc func)
{
    if (func == NULL)
        return HASH_ERR;

    hashHandler->table = (stHashNode*) malloc(count * sizeof(stHashNode));
    if (hashHandler->table == NULL)
        return HASH_ERR;

    hashHandler->count = count;
    hashHandler->keyHash = func;
    memset(hashHandler->table, 0, count * sizeof(stHashNode));

    return HASH_OK;
}

int hashTableDelNode(stHashTable* table, int key, int value)
{
    stHashNode* node = hashTableFindNode(table, key, value);
    if (node == NULL)
        return HASH_ERR;
    node->status = STATUS_DEL;
}

stHashNode* hashTableFindNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int linearIndex = index;
    while (table->table[linearIndex].status != STATUS_NULL && table->table[linearIndex].key != key)
    {
        linearIndex = (linearIndex + 1) % table->count;    //h(k, i) = (h'(k) + i) mod m
        if (linearIndex == index)
            return NULL;
    }

    if (table->table[linearIndex].status == STATUS_EXIST)
        return &table->table[linearIndex];
    return NULL;
}

int hashTableInsertNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int linearIndex = index;
    while (table->table[linearIndex].status == STATUS_EXIST)
    {
        //已存在
        if (table->table[linearIndex].key == key && table->table[linearIndex].value == value)
            return HASH_OK;
        linearIndex = (linearIndex + 1) % table->count;    //h(k, i) = (h'(k) + i) mod m
        //未找到空位,返回错误
        if (linearIndex == index)
            return HASH_ERR;
    }

    //找到空位,插入
    table->table[linearIndex].key = key;
    table->table[linearIndex].value = value;
    table->table[linearIndex].status = STATUS_EXIST;
    return HASH_OK;
}

二次探查(quadratic probing)

二次探查的散列函数:

h(k, i) = (h’(k) + c1*i + c2*i2) mod m

此方式数据结构与初始化代码与线性探查相同,只在查找与添加结点进行探查时代码不同。

代码实现:

stHashNode* hashTableFindNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int odd = 0;
    int i = 1;
    int quadIndex = index;
    while (table->table[quadIndex].status != STATUS_NULL && table->table[quadIndex].key != key)
    {
        if (odd == 0)
        {
            odd = 1;
            quadIndex = index + i * i;
            quadIndex %= table->count;
        }
        else
        {
            odd = 0;
            quadIndex = index - i * i;
            i++;
            while (quadIndex < 0)
                quadIndex += table->count;
        }
        if (i > table->count / 2)
            return NULL;
    }

    if (table->table[quadIndex].status == STATUS_EXIST)
        return &table->table[quadIndex];
    return NULL;
}

int hashTableInsertNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int quadIndex = index;
    int odd = 0;
    int i = 1;
    while (table->table[quadIndex].status == STATUS_EXIST)
    {
        //已存在
        if (table->table[quadIndex].key == key && table->table[quadIndex].value == value)
            return HASH_OK;

        if (odd == 0)
        {
            odd = 1;
            quadIndex = index + i * i;
            quadIndex %= table->count;
        }
        else
        {
            odd = 0;
            quadIndex = index - i * i;
            i++;
            while (quadIndex < 0)
                quadIndex += table->count;
        }

        //未找到
        if (i > table->count / 2)
            return HASH_ERR;
    }

    //插入
    table->table[quadIndex].key = key;
    table->table[quadIndex].value = value;
    table->table[quadIndex].status = STATUS_EXIST;
    return HASH_OK;
}

双重散列(double hashing)

双重散列的散列函数:

h(k, i) = (h1(k) + i*h2(k)) mod m

为了能够查找整个散列表,h2(k) 的值必须要与表大小 m 互素,我们这里取小于 m 的最大素数。代码实现如下:

//是否为素数
int isPrimer(int num)
{
    for (int i = 2; i * i <= num; i++)
    {
        if (num % i == 0)
            return 0;
    }
    return 1;
}

//获取小于表大小的最大素数
static int getPrimer(unsigned int tableSize)
{
    int num = tableSize - 1;
    while (isPrimer(num) == 0)
        num--;

    return num;
}

//h2(k), primer为小于表大小的最大素数
int integerHash2(int intKey, int primer)
{
    return primer - (intKey % primer);
}

stHashNode* hashTableFindNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int index2 = integerHash2(key, getPrimer(table->count));
    int i = 1;
    int quadIndex = index;
    while (table->table[quadIndex].status != STATUS_NULL && table->table[quadIndex].key != key)
    {
        quadIndex = (index + i * index2) % table->count;    //h(k, i) = (h1(k) + i*h2(k)) mod m  
        if (quadIndex == index)
            return NULL;
        i++;
    }

    if (table->table[quadIndex].status == STATUS_EXIST)
        return &table->table[quadIndex];
    return NULL;
}

int hashTableInsertNode(stHashTable* table, int key, int value)
{
    int index = table->keyHash(key, table->count);
    int index2 = integerHash2(key, getPrimer(table->count));
    int i = 1;
    int quadIndex = index;
    while (table->table[quadIndex].status == STATUS_EXIST)
    {
        if (table->table[quadIndex].key == key && table->table[quadIndex].value == value)
            return HASH_OK;

        quadIndex = (index + i * index2) % table->count;    //h(k, i) = (h1(k) + i*h2(k)) mod m  
        if (quadIndex == index)
            return HASH_ERR;
        i++;
    }

    table->table[quadIndex].key = key;
    table->table[quadIndex].value = value;
    table->table[quadIndex].status = STATUS_EXIST;
    return HASH_OK;
}


参考
散列表 4篇
《算法导论》第三版中文版 P11