哈希搜索

Posted on 2013-04-29 Edited on 2013-05-06

哈希搜索是典型的以空间换时间的算法，不论其建立哈希表时间，还是搜索时间均小于优质算法：快速排序和二分搜索。在数组尺寸为1万，哈希参数为1000（哈希函数是val%HASH_ARG）时，其时间参数为

qsort takes: 1755060 ns (1.75 ms)
build hash table takes: 1181122 ns (1.18 ms)

Array size: 10000, Value range: 10000
average search time:
binary search: 166 ns
hash search: 48 ns

其排序时间减少30%，单次搜索时间减少70%。性能提升很多。缺点是由于引入hash_node结构，1万个整数，大小为4Bx1w，需要扩展出16Bx1w，空间使用提升了4倍。另有空间提升两倍的算法（参考散列法解决1000个整数的搜索 - cqnuztq的专栏 - 博客频道 - CSDN.NET）

当数组尺寸进一步上升（10w），而值域不变时，时间参数为

qsort takes: 25675050 ns (25.67 ms)
build hash table takes: 47017148 ns (47.02 ms)

Array size: 100000, Value range: 10000
average search time:
binary search: 220 ns
hash search: 276 ns

此时哈希搜索已经在排序和搜索上，全面落后了。究其原因是什么？因为此时数组尺寸上升，但哈希函数和哈希参数并没变，所以哈希冲突数大大上升。考虑冲突的概率相同，所以每个哈希索引下的冲突数约为（ARRAY_SIZE / HASH_ARG），以上面的取值为例，则冲突数为100，即其时间复杂度为100，而二分搜索的复杂度为logn = 16.6，已经远小于哈希搜索的复杂度了。

综上，哈希搜索适用于属性值范围远小于数组个数的情况，下面的笔记中有使用哈希搜索的一个例子，比较好的阐述了这个特性（从大量整数中选取最小/大的若干个 « GoCalf Blog）。但数组范围不能过大，导致冲突数上升过快。或者说为了提高哈希搜索的效率，最好能找到一个能消除冲突的哈希函数。

下面附上源码

// Hash search
// Ben @ 2013-04-29

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

struct hash_node
{
     int inited;
     int key;
     int pos;
     struct hash_node* next;
};

#define HASH_ARG     1000
static struct hash_node hash_table[HASH_ARG];

void build_hash_table(int* L, int size)
{
     memset ((char*)hash_table, 0, sizeof(hash_table));

     int i;
     for (i=0; i<size; i++)
     {
          int index = hash(L[i]);

          if (hash_table[index].inited == 1)
          {
               struct hash_node* n = &hash_table[index];
               while (n->next != NULL)
                    n = n->next;
               n->next = (struct hash_node*)malloc(sizeof(struct hash_node));
               n->next->inited = 1;
               n->next->key = L[i];
               n->next->pos = i;
               n->next->next = NULL;
          }
          else
          {
               hash_table[index].inited = 1;
               hash_table[index].key = L[i];
               hash_table[index].pos = i;
          }
     }
}

void destroy_hash_table()
{
     int i;
     for(i=0; i<HASH_ARG; i++)
     {
          struct hash_node* n = hash_table[i].next;

          while (n != NULL)
          {
               struct hash_node* tmp = n->next;
               free(n);
               n = tmp;
          }
     }
}

int hash(int val)
{
     return val%HASH_ARG;
}

int hsearch(int t, int* L, int size)
{
     int index = hash(t);
     struct hash_node* i = &hash_table[index];
     while (i != NULL)
     {
          if (t == i->key)
               break;
          else
               i = i->next;
     }

     if (i == NULL)
          return -1;
     else
          return i->pos;
}