数据结构：哈希表

这一篇博客将介绍哈希表

哈希表

哈希表，又名散列表，通过建立键key与值value之间的映射，实现高效的元素查询，向哈希表输入一个键，可在时间内获取其对应的值

	数组	链表	哈希表
查找元素
添加元素
删除元素

常用操作

包括初始化、查询和添加（删除）键值对

Python
C++

# 哈希表的初始化
hash_map = {}

hash_map[12836] = "小哈"
hash_map[15937] = "小啰"
hash_map[16750] = "小算"
hash_map[13276] = "小法"
hash_map[10583] = "小鸭"

name = hash_map[15937]

hash_map.pop(10583)

# 遍历哈希表
# 遍历键值对
for key, value in hash_map.items():
    ...

# 单独遍历键
for key in hash_map.keys():
    ...

# 单独遍历value
for value in hash_map.values():
    ...

unordered_map<int, string> hashMap;

hashMap[12836] = "小哈";
hashMap[15937] = "小啰";
hashMap[16750] = "小算";
hashMap[13276] = "小法";
hashMap[10583] = "小鸭";

string name = hashMap[15937];

hashMap.erase(10583);

// 遍历键值对
for (auto kv: hashMap){
    cout << kv.first << " -> " << kv.second << endl;
}

// 使用迭代器遍历
for (auto iter = hashMap.begin(); iter != hashMap.end(); iter++){
    cout << iter->first << " -> " << iter->second << endl;
}

简单实现

哈希表中的每个空位称为桶，每个桶可存储一个键值对。查询操作就是通过哈希函数找到key对应的桶，并在桶中获取value。

哈希函数的作用是将一个较大的输入空间（所有key）映射到一个较小的输出空间（所有桶）

哈希函数的计算过程分为以下两步

key通过某种哈希算法hash()计算得到哈希值
将哈希值对桶数量capacity取模，得到对应桶的索引index

index = hash(key) % capacity

class Pair:
    """键值对"""

    def __init(self, key, value):
        self.key = key
        self.value =value

class ArrayHashMap:

    def __init__(self):
        self._buckets = [None] * 100

    def hash_func(self, key):
        index = key % 100
        return index

    def get(self, key):
        index = self.hash_func(key)
        pair = self._buckets[index]
        if pair is None:
            return None
        return pair.value

    def put(self, key, value):
        pair = Pair(key, value)
        index = self.hash_func(key)
        self._buckets[index] = pair

    def entry_set(self):
        """获取所有键值对"""
        result = []
        for pair in self.buckets:
            if pair is not None:
                result.append(pair)
        return result

    def key_set(self):
        """获取所有键"""
        result = []
        for pair in self.buckets:
            if pair is not None:
                result.append(pair.key)
        return result

    def value_set(self):
        """获取所有值"""
        result = []
        for pair in self._buckets:
            if pair is not None:
                result.append(pair.value)
        return result

    def print(self):
        """打印哈希表"""
        for pair in self._buckets:
            if pair is not None:
                print(pair.key, "->", pair.value)

struct Pair{
    public:
        int key;
        string value;
        Pair(int key, string value){
            this->key = key;
            this->value = value;
        }
};

class ArrayHashMap{
    private:
        vector<Pair*> buckets;

    public:
        ArrayHashMap(){
            buckets = vector<Pair*>(100);
        }

        ~ArrayHashMap(){
            // 释放每个Pair指针指向的内容
            for (const auto &bucket : buckets){
                delete bucket;
            }
            buckets.clear(); // 清除Pair指针的值，避免对已释放区域的引用
        }

        int hashFunc(int key){
            int index = key % 100;
            return index;
        }

        string get(int key){
            int index = hashFunc(key);
            Pair* pair = buckets[index];
            if(pair == nullptr){
                return "";
            }
            return pair.value;
        }

        void put(int key, string value){
            Pair* pair = new Pair(key, value);
            int index = hashFunc(key);
            buckets[index] = pair;
        }

        void remove(int key){
            int index = hashFunc(key);
            if (buckets[index] != nullptr){
                delete buckets[index];
            }
            buckets[index] = nullptr;
        }

        vector<Pair*> pairSet(){
            vector<Pair*> result;
            for (Pair* pair : buckets){
                if(pair != nullptr){
                    result.push_back(pair);
                }
            }
            return result;
        }

        vector<int> keySet(){
            vector<int> result;
            for (Pair* pair : buckets){
                if(pair != nullptr){
                    result.push_back(pair.key);
                }
            }
            return result;
        }

        vector<string> valueSet(){
            vector<int> result;
            for (Pair* pair : buckets){
                if(pair != nullptr){
                    result.push_back(pair.value);
                }
            }
            return result;
        }

        void print(){
            for (Pair* pair : buckets){
                if(pair != nullptr){
                    cout << pair->key << " -> " << pair->val << endl;
                }
            }
        }
}

typedef struct {
    int key;
    char* val;
}Pair;

typedef struct{
    Pair* buckets[HASHTABLE_CAPACITY];
}ArrayHashMap;

ArrayHashMap* newArrayHashMap(){
    ArrayHashMap* hashMap = malloc(sizeof(ArrayHashMap));
    return hashMap;
}

void deleteArrayHashMap(ArrayHashMap* hashMap){
    for(int i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            free(hashMap->buckets[i]->val);
            free(hashMap->buckets[i]);
        }
    }
    free(hashMap);
}

void put(ArrayHashMap* hashMap, const int key, const char* value){
    Pair* pair = (Pair*)malloc(sizeof(Pair));
    pair->key = key;
    pair->val = (char*)malloc(strlen(value) + 1);

    strcpy(pair->val, value);

    int index = hashFunc(key);
    hashMap->buckets[index] = pair;
}

void remove(ArrayHashMap* hashMap, const int key){
    int index = hashFunc(key);
    if (hashMap->buckets[index] != NULL){
        free(hashMap->buckets[index]->val);
        free(hashMap->buckets[index]);
    }
    hashMap->buckets[index] = NULL;
}

void pairSet(ArrayHashMap* hashMap, MapSet* set){
    Pair * entries;
    int i = 0, index = 0;
    int total = 0;
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            total++;
        }
    }
    entries = (Pair*)malloc(sizeof(Pair) * total);
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            entries[index]->key = hashMap->buckets[i]->key;
            entries[index]->val = (char*)malloc(strlen(hashMap->buckets[i]->val) + 1);
            strcpt(entries[index]->val, hashMap->buckets[i]->val);
            index++;
        }
    }
    set->set = entries;
    set->len = total;
}

void keySet(ArrayHashMap* hashMap, MapSet *set){
    int* keys;
    int i = 0, index = 0;
    int total = 0;
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            total++;
        }
    }
    keys = (int*)malloc(sizeof(int) * total);
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            keys[index] = hashMap->buckets[i]->key;
            index++;
        }
    }
    set->set = keys;
    set->len = total;
}

void valueSet(ArrayHashMap* hashMap, MapSet *set){
    char** values;
    int i = 0, index = 0;
    int total = 0;
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            total++;
        }
    }
    values = (char**)malloc(sizeof(char*) * total);
    for(i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            values[index] = hashMap->buckets[i]->val;
            index++;
        }
    }
    set->set = values;
    set->len = total;
}

void print(ArrayHashMap* hashMap){
    for (int i = 0; i < HASHTABLE_CAPACITY; i++){
        if(hashMap->buckets[i] != NULL){
            printf("%d -> %s\n", hashMap->buckets[i].key, hashMap->buckets[i].val);
        }
    }
}

哈希冲突

多个不同key输入对应同一个bucket的情况称为哈希冲突

哈希表的容量capacity越大，多个key被分配到同一个桶中的概率就越低，哈希冲突就越少。因此我们可以通过扩容哈希表来减少哈希冲突

哈希表扩容需要将所有键值对从原哈希表迁移至新哈希表，并且键值对的储存位置需要通过哈希函数重新计算，开销很大。内置哈希表的编程语言通常会预留足够大的哈希表容量，防止频繁扩容

负载因子是哈希表元素数量除以桶数量，用于衡量哈希冲突的严重程度，也常用作哈希表扩容的触发条件。如Java中，负载因子超过0.75，系统会将哈希表容量扩展未原先的2倍

解决哈希冲突最简单粗暴的方法是：遇到哈希冲突时就进行哈希扩容，直到冲突消失。这个方法虽然解决了哈希冲突，但是效率实在太低，我们采用一些替代方案

改良哈希表数据结构（链式地址、开放寻址，使得即使有哈希冲突发生，哈希表仍然可以正常工作
仅在必要（哈希冲突比较严重）时，才进行扩容操作

链式地址

将桶中存储的单个元素转换为链表，键值对作为链表节点，将所有发生冲突的键值对都存储在同一链表中

链式地址哈希表操作方法如下：

查询元素：输入key，经过哈希函数得到桶索引index，访问对应链表头节点，然后遍历链表对比key查找目标键值对
添加元素：通过哈希函数访问链表头节点，然后将键值对添加到链表中
输出元素：先查询元素，然后在对应链表中将其删除

缺点：

需要节点指针，占用空间增大
查询元素效率降低，需要线性遍历链表

当链表长度很长时，可以将链表转换为AVL树或红黑树，将查询操作的时间复杂度优化至

Python
C++

# 使用列表（动态数组）代替链表，因此每个桶都是一个列表
# 当负载因子超过2/3时扩容为两倍
class HashMapChaining:

    def __init__(self):
        self.size = 0
        self.capacity = 4
        self.load_thres = 2.0 / 3.0
        self.extend_ratio = 2
        self.buckets = [[] for _ in range(self.capacity)]

    def hash_func(self, key):
        return key % self.capacity

    def load_factor(self):
        return self.size / self.capacity

    def get(self, key):
        index = self.hash_func(key)
        bucket = self.buckets[index]

        for pair in bucket:
            if pair.key == key:
                return pair.val

        return None

    def put(self, key, value):
        if self.load_factor() > self.load_thres:
            self.extend()
        index = self.hash_func(key)
        bucket = self.buckets[index]

        for pair in bucket:
            if pair.key == key:
                pair.val = value
                return
        pair = Pair(key, value)
        bucket.append(pair)
        self.size +=  1

    def remove(self, key):
        index = self.hash_func(key)
        bucket = self.buckets[index]
        for pair in bucket:
            if pair.key == key:
                bucket.remove(pair)
                self.size -= 1
                break

    def extend(self):
        buckets = self.buckets

        self.capacity *= self.extend_ratio
        self.buckets = [[] for _ in range(self.capacity)]
        self.size = 0
        for bucket in buckets:
            for pair in buckets:
                self.put(pair.key, pair.val)

    def print(self):
        for bucket in self.buckets:
            res = []
            for pair in bucket:
                res.append(str(pair.key) + " -> " + pair.val)
            print(res)

class HashMapChaining{
    private:
        int size;
        int capacity;
        double loadThres;
        int extendRatio;
        vector<vector<Pair*>> buckets;

    public:
        HashMapChaining() : size(0), capacity(4), loadThres(2.0 / 3.0), extendRatio(2){
            buckets.resize(capacity);
        }

        ~HashMapChaining(){
            for (auto &bucket : buckets){
                for (Pair* pair : bucket){
                    delete pair;
                }
            }
        }

        int hashFunc(int key){
            return key % capacity;
        }

        double loadFactor(){
            return (double)size / (double)capacity;
        }

        string get(int key){
            int index = hashFunc(key);
            for(Pair* pair : buckets[index]){
                if(pair->key == key){
                    return pair->val;
                }
            }
            return "";
        }

        void put(int key, string value){
            if(loadFactor() > loadThres){
                extend();
            }
            int index = hashFunc(key);
            for(Pair* pair : buckets[index]){
                if (pair->key == key){
                    pair->val = val;
                    return;
                }
            }
            buckets[index].push_back(new Pair(key, value));
            size++;
        }

        void remove(int key){
            int index = hashFunc(key);
            auto &bucket = buckets[index];

            for (int i = 0; i < bucket.size(); i++){
                if (bucket[i]->key == key){
                    Pair* temp = bucket[i];
                    bucket.erase(bucket.begin() + i);
                    delete temp;
                    size--;
                    return; 
                }
            } 
        }

        void extend(){
            vector<vector<Pair*>> tempBuckets = buckets;
            capacity *= extendRatio;
            buckets.clear();
            buckets.resize(capacity);
            size = 0;

            for(auto bucket : tempBuckets){
                for (Pair* pair : bucket){
                    put(pair->key, pair->val);
                    delete pair;
                }
            }
        }

        void print(){
            for(auto &bucket : buckets){
                cout << "[";
                for (Pair *pair : bucket) {
                    cout << pair->key << " -> " << pair->val << ", ";
                }
                cout << "]\n";
            }
        }
}

开放寻址

开放寻址不引入额外的数据结构，而是通过多次探测来处理哈希冲突，探测方式主要有线性探测、平方探测、多次哈希等

线性探测：采用固定步长的线性搜索来进行探测

插入元素：若发生哈希冲突，从冲突位置向后线性遍历固定步长，直到找到空桶
查找元素：若发生哈希冲突，从冲突位置向后线性遍历固定步长。找到对应元素则返回值；遇到空桶说明目标元素不再哈希表中

线性探测的缺点：

容易产生聚集现象：数组中连续被占用的位置越长，这些连续位置发生哈希冲突的可能性越大，从而加重聚集现象，形成恶性循环，效率不断降低

开放寻址的缺点：

不能在开放寻址哈希表中直接删除元素：删除元素会在数组内产生一个空桶，查询元素时遇到该空桶则返回，该空桶之下的元素无法再被访问。

懒删除机制可以解决这个问题：

不直接从哈希表中删除元素，而是用一个常量TOMBSTONE来标记这个桶
插入元素时，TOMBSTONE被视作空桶
查询元素时，TOMBSTONE不被视作空桶

然而，懒删除可能会加速哈希表的性能退化，随着时间增加，线性探测可能要跳过多个TOMBSTONE才能找到目标元素。可以在线性探测中记录遇到的首个TOMBSTONE的索引，并将搜索到的目标元素与该TOMBSTONE交换位置，这样可以优化查询效率

线性探测中，可以将哈希表看作一个环形数组

class HashMapOpenAddressing:

    def __init__(self):
        self.size = 0
        self.capacity = 4
        self.load_thres = 2.0 / 3.0
        self.extend_ratio = 2
        self.buckets = [None] * self.capacity
        self.TOMBSTONE = Pair(-1, "-1")

    def hash_func(self, key):
        return key % self.capacity

    def load_factor(self):
        return self.size / self.capacity

    def find_buckets(self, key):
        index = self.hash_func(key)
        first_tombstone = -1
        
        while self.buckets[index] is not None:
            if self.buckets[index].key == key:
                if first_tombstone != -1:
                    self.buckets[first_tombstone] = self.buckets[index]
                    self.buckets[index] = self.TOMBSTONE
                    return first_tombstone
                return index
            if first_tombstone == -1 and self.buckets[index] is self.TOMBSTONE:
                first_tombstone = index
            index = (index + 1) % self.capacity

        return index if first_tombstone == -1 else fiirst_tombstone

    def get(self, key):
        index = self.find_bucket(key)

        if self.buckets[index] not in [None, self.TOMBSTONE]:
            return self.buckets[index].val

        return None

    def put(self, key, val):
        if self.load_factor() > self.load_thres:
            self.extend()
        index = self.find_bucket(key)
        if self.buckets[index] not in [None, self.TOMBSTONE]:
            self.buckets[index].val = val
            return
        self.buckets[index] = Pair9(key, val)
        self.size += 1

    def remove(self, key):
        index = self.find_bucket(key)
        if self.buckets[index] not in [None, self.TOMBSTONE]:
            self.buckets[index] = self.TOMBSTONE
            self.size -= 1

    def extend(self):
        temp_buckets = self.buckets
        self.capaity *= self.extend_ratio
        self.buckets = [None] * self.capacity
        self.size = 0
        for pair in temp_buckets:
            if pair not in [None, self.TOMBSTONE]:
                self.put(pair.key, pair.val)

    def print(self):
        for pair in self.buckets:
            if pair is None:
                print("None")
            elif pair is self.TOMBSTONE:
                print("TOMBSTONE")
            else:
                print(pair.key, "->", pair.val)

class HashMapOpenAddressing{
    private:
        int size;
        int capacity;
        const double loadThres;
        const int extendRatio;
        vector<Pair*>buckets;
        Pair* TOMBSTONE = new Pair(-1, "-1");

    public:
        HashMapOpenAddressing() : size(0), capacity(4), loadThres(2.0 / 3.0), extendRatio(2), buckets(capacity, nullptr) // capacity指定哈希表的桶的数量，nullptr指定每个元素的初始值
        {}

        ~HashMapOpenAddressing(){
            for (Pair* pair : buckets){
                if (pair != nullptr && pair != TOMBSTONE){
                    delete pair;
                }
            }
            delete TOMBSTONE;
        }

        int hashFunc(int key){
            return key % capacity;
        }

        double loadFactor(){
            return size / capacity;
        }

        int findBucket(int key){
            int index = hashFunc(key);
            int firstTombStone = -1;
            while(bucket[index] != nullptr){
                if (bucket[index].key == key){
                    if (firstTombstone != -1){
                        buckets[firstTombstone] = buckets[index];
                        buckets[index] = TOMBSTONE;
                        return firstTombstone;
                    }
                    return index;
                }
                if (firstTombstone == -1 && buckets[index] == TOMBSTONE){
                    firstTombstone = index;
                }
                index = (index + 1) % capacity;
            }
            return firstTombstone == -1 ? index : firstTombstone;
        }

        string get(int key){
            int index = findBuckey(key);
            if(buckets[index] != nullptr && buckets[index] != TOMBSTONE){
                return buckets[index]->val;
            }
            return "";
        }

        void put(int key, string value){
            if(loadFactor() > loadThres){
                extend();
            }
            int index = findBucket(key);
            if (buckets[index] != nullptr && buckets[index] != TOMBSTONE){
                buckets[index]->val = value;
                return;
            }
            buckets[index] = new Pair(key, value);
            size++;
        }

        void remove(int key){
            int index = findBucket(key);
            if(buckets[index] != nullptr && buckets[index] != TOMBSTONE){
                delete buckets[index];
                buckets[index] = TOMBSTONE;
                size--;
            }
        }

        void extend(){
            vector<Pair *> tempBuckets = buckets;
            capacity *= extendRatio;
            buckets = vector<Pair*>(capacity, nullptr);
            size = 0;
            for (Pair *pair : tempBuckets){
                if (pair != nullptr && pair != TOMBSTONE){
                    put(pair->key, pair->val);
                    delete pair;
                }
            }
        }

        void print(){
            for(Pair* pair : buckets){
                if (pair == nullptr) {
                    cout << "nullptr" << endl;
                } else if (pair == TOMBSTONE) {
                    cout << "TOMBSTONE" << endl;
                } else {
                    cout << pair->key << " -> " << pair->val << endl;
                }
            }
        }
}

typedef struct{
    int size;
    int capacity;
    double loadThres;
    int extendRatio;
    Pair** buckets; // 桶数组
    Pair* TOMBSTONE;
}HashMapOpenAddressing;

HashMapOpenAddressing* newHashMapOpenAddressing(){
    HashMapOpenAddressing* hashMap = (HashMapOpenAddressing*)malloc(sizeof(HashMapOpenAddressing));
    hashMap->size = 0;
    hashMap->capacity = 4;
    hashMap->loadThres = 2.0 / 3.0
    hashMap->extendRatio = 2;
    hashMap->buckets = (Pair**)malloc(sizeof(Pair*) * hashMap->capacity);
    hashMap->TOMBSTONE = (Pair*)malloc(sizeof(Pair));
    hashMap->TOMBSTONE->key = -1;
    hashMap->TOMBSTONE->val = "-1";

    return hashMap;
}

void delHashMapOpenAddressing(HashMapOpenAddressing* hashMap){
    Pair* pair;
    for(int i = 0; i < hashMap->capacity; i++){
        pair = hashMap->buckets[i];
        if (pair != NULL && pair != hashMap->TOMBSTONE){
            free(pair->val);
            free(pair);
        }
    }
    free(hashMap);
}

int hashFunc(HashMapOpenAddressing* hashMap, int key){
    return key % hashMap->capacity;
}

double loadFator(HashMapOpenAddressing* hashMap){
    return hashMap->size / hashMap->capacity;
}

int findBuckets(HashMapOpenAddressing* hashMap, int key){
    int index = hashFunc(hashMap, key);
    int firstTombstone = -1;
    while(hashMap->buckets[index] != NULL){
        if(hashMap->buckets[index]->key == key){
            if(firstTombstone != -1){
                hashMap->buckets[firstTombstone] = hashMap->buckets[index];
                hashMap->buckets[index] = hashMap->TOMBSTONE;
                return firstTombstone;
            }
            return index;
        }

        if(firstTombstone == -1 && hashMap->buckets[index] == hashMap->TOMBSTONE){
            firstTombstone = index;
        }
        index = (index + 1) % hashMap->capacity;
    }

    return firstTombstone == -1 ? index : firstTomstone;
}

const char* get(HashMapOpenAddressing* hashMap, int key){
    int index = findBucket(hashMap, key);
    if(hashMap->buckets[index] != NULL && hashMap->buckets[index] != hashMap->TOMBSTONE){
        return (const char*)hashMap->buckets[index]->val;
    }
    return "";
}

void put(HashMapOpenAddressing* hashMap, int key, const char* val){
    if(loadFactor() > hashMap->loadThres){
        extend(hashMap);
    }
    int index = findBucket(hashMap, key);
    if(hashMap->buckets[index] != NULL && hashMap->buckets[index] != hashMap->TOMBSTONE){
        free(hashMap->buckets[index]->Val);
        hashMap->buckets[index]->val = (char *)malloc(strlen(val) + 1);
        strcpy(hashMap->buckets[index]->val, val);
        return;
    }

    Pair* pair = (Pair*)malloc(sizeof(Pair));
    pair->key = key;
    pair->val = (char *)malloc(strlen(val) + 1);
    strcpy(pair->val, val);

    hashMap->buckets[index] = pair;
    hashMap->size++;
}

void remove(HashMapOpenAddressing* hashMap, int key){
    int index = findBucket(hashMap, key);
    if (hashMap->buckets[index] != NULL && hashMap->buckets[index] != hashMap->TOMBSTONE) {
        Pair *pair = hashMap->buckets[index];
        free(pair->val);
        free(pair);
        hashMap->buckets[index] = hashMap->TOMBSTONE;
        hashMap->size--;
    }
}

void extend(HashMapOpenAddressing* hashMap){
    Pair** tempBuckets = hashMap->buckets;
    int oldCapacity = hashMap->capacity;
    hashMap->capacity *= hashMap->extendRatio;

    hashMap->buckets = (Pair **)malloc(sizeof(Pair *) * hashMap->capacity);
    hashMap->size = 0;

    for(int i = 0; i < oldCapacity; i++){
        if(tempBuckets[i] != NULL && tempBuckets[i] != hashMap->TOMBSTONE){
            put(hashMap, tempBuckets[i]->key, tempBuckets[i]->val);
            free(tempBuckets[i]->val);
            free(tempBuckets[i]);
        }
    }
    free(tempBukcets);
}

void print(HashMapOpenAddressing *hashMap) {
    for (int i = 0; i < hashMap->capacity; i++) {
        Pair *pair = hashMap->buckets[i];
        if (pair == NULL) {
            printf("NULL\n");
        } else if (pair == hashMap->TOMBSTONE) {
            printf("TOMBSTONE\n");
        } else {
            printf("%d -> %s\n", pair->key, pair->val);
        }
    }
}

平方探测

发生哈希冲突时，跳过探测次数的平方

优势：

可以缓解（不是解决）线性探测造成的聚集效应
会不断跳过更大的距离来寻找空位，有助于使数据分布得更加均匀

缺点：

仍存在聚集现象，某些位置比其他位置更容易被占用
探测步数为探测次数的平方，可能无法探测整个哈希表，即使有空桶也无法被访问

多次哈希

使用多个哈希函数、、、···进行探测

插入元素：依次使用各个哈希函数，直到找到空桶
查找元素：依次使用各个哈希函数查找，若遇到空桶或已尝试所有哈希函数，说明元素不存在

不同编程语言对哈希表的实现

Java：链式地址，从JDK1.8开始，当HashMap内数组长度达到64且链表长度达到8时，链表会被转换为红黑树以提升查找性能
Python 采用开放寻址。字典 dict 使用伪随机数进行探测。
Golang 采用链式地址。Go 规定每个桶最多存储 8 个键值对，超出容量则连接一个溢出桶。当溢出桶过多时，会执行一次特殊的等量扩容操作，以确保性能。

哈希算法

开放寻址和链式地址只能保证哈希表在发生哈希冲突时可用，没有从根本上减少哈希冲突发生的次数。而哈希冲突越频繁，哈希表的性能就越低。想要减小哈希冲突发生的概率，就要改进我们的哈希算法

哈希算法的目标

当哈希算法用于哈希表时，有以下目标：

确定性：对于相同输入始终有相同输出 -> 可靠
效率高：计算哈希值够快。开销越小，实用性越高
均匀分布：键值对分布越均匀，哈希冲突概率越低

当哈希算法用于密码存储、数据完整性检查时，有以下额外目标：

单向性：无法通过哈希值反推出关于输入数据的任何信息
抗碰撞性：极其困难找到两个不同的输入，使得它们的哈希值相同
雪崩效应：输入的微小变化应当导致输出的显著且不可预测的变化

哈希算法的设计

介绍一些简单的哈希算法

加法哈希：将输入的字符的ASCⅡ码相加，总和作为哈希值

def add_hash(key: str) -> int:
    """加法哈希"""
    hash = 0
    modulus = 1000000007
    for c in key:
        hash += ord(c)
    return hash % modulus

int addHash(string key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (unsigned char c : key) {
        hash = (hash + (int)c) % MODULUS;
    }
    return (int)hash;
}

int addHash(char *key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (int i = 0; i < strlen(key); i++) {
        hash = (hash + (unsigned char)key[i]) % MODULUS;
    }
    return (int)hash;
}

乘法哈希：利用乘法的不相关性，每轮乘以一个常数，将各个字符的ASCⅡ码累积到哈希值中

def mul_hash(key: str) -> int:
    """乘法哈希"""
    hash = 0
    modulus = 1000000007
    for c in key:
        hash = 31 * hash + ord(c)
    return hash % modulus

int mulHash(string key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (unsigned char c : key) {
        hash = (31 * hash + (int)c) % MODULUS;
    }
    return (int)hash;
}

int mulHash(char *key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (int i = 0; i < strlen(key); i++) {
        hash = (31 * hash + (unsigned char)key[i]) % MODULUS;
    }
    return (int)hash;
}

异或哈希：将输入数据的每个元素通过异或操作积累到一个哈希值中

def xor_hash(key: str) -> int:
    """异或哈希"""
    hash = 0
    modulus = 1000000007
    for c in key:
        hash ^= ord(c)
    return hash % modulus

int xorHash(string key) {
    int hash = 0;
    const int MODULUS = 1000000007;
    for (unsigned char c : key) {
        hash ^= (int)c;
    }
    return hash & MODULUS;
}

int xorHash(char *key) {
    int hash = 0;
    const int MODULUS = 1000000007;

    for (int i = 0; i < strlen(key); i++) {
        hash ^= (unsigned char)key[i];
    }
    return hash & MODULUS;
}

旋转哈希：将每个字符的 ASCII 码累积到一个哈希值中，每次累积之前都会对哈希值进行旋转操作。

def rot_hash(key: str) -> int:
    """旋转哈希"""
    hash = 0
    modulus = 1000000007
    for c in key:
        hash = (hash << 4) ^ (hash >> 28) ^ ord(c)
    return hash % modulus

int rotHash(string key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (unsigned char c : key) {
        hash = ((hash << 4) ^ (hash >> 28) ^ (int)c) % MODULUS;
    }
    return (int)hash;
}

int rotHash(char *key) {
    long long hash = 0;
    const int MODULUS = 1000000007;
    for (int i = 0; i < strlen(key); i++) {
        hash = ((hash << 4) ^ (hash >> 28) ^ (unsigned char)key[i]) % MODULUS;
    }

    return (int)hash;
}

每种哈希算法的最后一步都对一个大质数取模，以确保哈希值在合适的范围内，当我们使用大质数作为模数时，可以最大化地保证哈希值地均匀分布，因为质数不会和其他数字存在公约数，可以减少因取模操作而产生地周期性模式，从而避免哈希冲突。

如果key是随机均匀分布的，模数选质数或是合数都可以；如果key存在某种周期性，对合数取模更容易出现聚集现象。通常选择足够大的质数作为模数，以尽可能消除周期性模式，提升哈希算法的稳健性

常见哈希算法

在实际中通常用一些标准哈希算法，如MD5、SHA-1、SHA-2、SHA3等，它们可将任意长度的输入数据映射到恒定长度的哈希值

数据结构的哈希值

哈希表的key可以是整数、小数或字符串等数据类型。编程语言通常会为这些数据类型提供内置的哈希算法，用于计算桶索引

Python
C++

data # 这个数据可以是任意数据类型，可以是整数、布尔变量、小数、字符串、元组、对象
hash(data) # hash()可以计算该数据的哈希值

// C++标准库函数std::hash()仅提供基本数据类型的哈希值计算，数组、对象的哈希值计算需要自行实现
int num = 3;
size_t hashNum = hash<int>()(num);

在许多编程语言中，只有不可变对象可用作哈希表的key。但是自定义对象是可哈希的，因为对象的哈希值通常基于内存地址生成，哈希值不会随着对象内容变化而变化。Python解释器在每次启动时，都会为字符串哈希函数加入一个随机的盐(salt)值，可有效防止HashDos攻击，提升哈希算法的安全性

敖炜的博客