C++哈希

一.哈希概念

哈希又叫做散列。本質就是通過哈希函數把關鍵字key和存儲位置建立映射關系，查找時通過這個哈希函數計算出key存儲的位置，進行快速查找。

上述概念可能不那么好懂，下面的例子可以輔助我們理解。

無論是數組還是鏈表，查找一個數都需要通過遍歷的方式，假設用單向查找最壞的情況就是查找n次，假設雙向查找最壞的情況是查找n/2次，在數據量很大的情況下查找一個數就變得比較困難。最好的情況就是希望一下子就可以找到目標數字，由此出現了哈希表。

什么是映射關系？

映射關系分為兩種：一對一和一對多

一對一：假設有2 3 90 100這四個數據，那我就開100個空間，將每一個數存在對應下標的位置，這樣查找數據的時候一下子就找到了。

一對一的弊端十分明顯，就是當數據較為分散，例如存儲1和1001，那就要開辟1001個空間，十分浪費。那么一對多的存儲就會比較好。

一對多：就是一個空間對應著多個數據。按照概念存儲數據的時候肯定會產生沖突，這就是哈希沖突。

二.除法散列法

針對沖突的情況可以用除法散列法來解決。顧名思義，假設哈希表大小為M，那么通過Key除以M的余數作為映射位置下標，也就是哈希函數為：h(key)=key%M

例如：假設數字為12，哈希表大小為10

12/10的余數是2，那么就將12映射到2的位置，但是如果還要存儲32這個數據，不難發現32/10的余數仍然是2于是就產生了沖突，那該怎么解決呢？

2.1線性探測

從發生沖突的位置開始，依次線性向后探測，直到尋找到下一個沒有存儲數據的位置為止，如果走到哈希表尾，則繞回到哈希表頭的位置。

就像上面的例子，12已經映射到2的位置，那么32就需要向后線性探測，假如下標為3的位置沒有數據存儲那么就將32存儲到下標為3的位置。

我們最初的目標就是避免遍歷，那如果數據一直沖突的話也就無法解決這個問題了，很簡單通過擴容就可以解決這個問題。

2.1.1哈希因子

說到擴容就不得不提到哈希因子了，哈希因子就是有效數據個數/哈希表大小。

例如數據個數為2，哈希表大小為10，那么哈希因子就是0.2，一般來說當哈希因子到0.7時就需要開始擴容了。

2.1.2哈希桶

有時候擴容仍然無法解決沖突的問題，那么哈希桶就很好解決了這個問題。

哈希桶概念：哈希表中存儲一個指針，當沒有數據映射到這個位置時，這個指針為空；有多個數據映射到這個位置時，把沖突的數據連接成一個鏈表，掛在哈希表這個位置下面。

哈希桶的擴容：

 // 擴容函數void Resize() {size_t newSize = _tables.size() * 2;vector<Node*> newTables(newSize, nullptr);KeyOfT kot;Hash hs;// 重新哈希所有元素for (size_t i = 0; i < _tables.size(); ++i) {Node* cur = _tables[i];while (cur) {Node* next = cur->_next;size_t hashi = hs(kot(cur->_data)) % newSize;cur->_next = newTables[hashi];newTables[hashi] = cur;cur = next;}}_tables.swap(newTables);}

?代碼中出現KeyOfT是因為set和map取出key方式不一樣，set可以直接取出key而map需要從pair中取出。

三.key不是整數的情況?

在計算位置的時候需要用到key來除以哈希表大小，當key不為整數時分兩種情況：

double等可以直接強制類型轉換為int
string不能強制類型轉換成int

template<class K>
struct HashFunc {size_t operator()(const K& key) {return (size_t)key; // 默認實現：適用于整數類型}
};// 字符串類型特化
template<>
struct HashFunc<string> {size_t operator()(const string& key) {size_t hash = 0;for (char c : key) {hash = hash * 131 + c; // 使用BKDR哈希算法處理字符串}return hash;}
};

完整代碼：

#pragma once
#include<vector>
#include<iostream>
using namespace std;
// 通用哈希函數模板（默認使用類型轉換）
template<class K>
struct HashFunc {size_t operator()(const K& key) {return (size_t)key; // 默認實現：適用于整數類型}
};// 字符串類型特化
template<>
struct HashFunc<string> {size_t operator()(const string& key) {size_t hash = 0;for (char c : key) {hash = hash * 131 + c; // 使用BKDR哈希算法處理字符串}return hash;}
};
namespace hash_bucket {template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data), _next(nullptr){}};template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>class HashTable {typedef HashNode<T> Node;public:// 構造函數HashTable() {_tables.resize(10, nullptr);}// 析構函數：釋放所有節點內存~HashTable() {for (auto& bucket : _tables) {Node* cur = bucket;while (cur) {Node* next = cur->_next;delete cur;cur = next;}bucket = nullptr;}_n = 0;}// 插入元素bool Insert(const T& data) {KeyOfT kot;Hash hs;const K& key = kot(data); // 提取鍵// 檢查鍵是否已存在if (Find(key)) return false;// 負載因子超過1時擴容if (_n >= _tables.size()) {Resize();}// 計算哈希值并插入size_t hashi = hs(key) % _tables.size();Node* newNode = new Node(data);newNode->_next = _tables[hashi];_tables[hashi] = newNode;++_n;return true;}// 查找元素bool Find(const K& key) {Hash hs;KeyOfT kot;size_t hashi = hs(key) % _tables.size();Node* cur = _tables[hashi];while (cur) {if (kot(cur->_data) == key) {return true;}cur = cur->_next;}return false;}// 刪除元素bool Erase(const K& key) {Hash hs;size_t hashi = hs(key) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur) {if (kot(cur->_data) == key) {if (prev == nullptr) {_tables[hashi] = cur->_next;}else {prev->_next = cur->_next;}delete cur;--_n;return true;}prev = cur;cur = cur->_next;}return false;}private:// 擴容函數void Resize() {size_t newSize = _tables.size() * 2;vector<Node*> newTables(newSize, nullptr);KeyOfT kot;Hash hs;// 重新哈希所有元素for (size_t i = 0; i < _tables.size(); ++i) {Node* cur = _tables[i];while (cur) {Node* next = cur->_next;size_t hashi = hs(kot(cur->_data)) % newSize;cur->_next = newTables[hashi];newTables[hashi] = cur;cur = next;}}_tables.swap(newTables);}private:vector<Node*> _tables;size_t _n = 0;};} // namespace hash_bucket

四.unordered_set和unordered_map的封裝

1.unordered_set

#define _CRT_SECURE_NO_WARNINGS
#include"hashi.h"
namespace happy
{template<class K>class unorder_set{struct SetKeyOfT{ const K& operator()(const K& key) {return key;}};public://取類模板的內嵌類型需要typenametypedef typename hash_bucket::HashTable<K, const K, SetKeyOfT>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT>::const_Iterator Citerator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}Citerator begin()const{return _ht.begin();}Citerator end()const{return _ht.end();}pair<iterator,bool> insert(const K& key){return _ht.Insert(key);}private:hash_bucket::HashTable<K, const K,SetKeyOfT> _ht;};void print(const unorder_set<int>& s){unorder_set<int>::Citerator it = s.begin();while (it != s.end()){cout << *it << " ";++it;}cout << endl;}
}

2.unordered_map

#pragma once
#include"hashi.h"
#include <utility> // pair
#include <iostream>
using namespace std;
namespace happy
{template<class K, class V, class Hash = HashFunc<K>>class unorder_map{struct MapKeyOfT{const K& operator() (const pair<const K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::const_Iterator const_iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}const_iterator begin() const{return _ht.begin();}const_iterator end() const{return _ht.end();}pair<iterator, bool> insert(const pair<const K, V>& kv){return _ht.Insert(kv);}V& operator[](const K& key){pair<iterator, bool> ret = insert({ key, V() });return ret.first->second;}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;};void test_unordered_map(){unorder_map<string, string> dict;dict.insert({ "string", "字符串" });dict.insert({ "left", "左邊" });unorder_map<string, string>::iterator it = dict.begin();while (it != dict.end()){cout << it->first << ":" << it->second << endl;++it;}cout << endl;// 測試 operator[]dict["right"] = "右邊";cout << "dict[\"right\"]: " << dict["right"] << endl;}
}

3.hashi?

#pragma once
#include<vector>
#include<iostream>
using namespace std;
// 通用哈希函數模板（默認使用類型轉換）
template<class K>
struct HashFunc {size_t operator()(const K& key) {return (size_t)key; // 默認實現：適用于整數類型}
};// 字符串類型特化
template<>
struct HashFunc<string> {size_t operator()(const string& key) {size_t hash = 0;for (char c : key) {hash = hash * 131 + c; // 使用BKDR哈希算法處理字符串}return hash;}
};
namespace hash_bucket {template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data), _next(nullptr){}};template<class K, class T, class KeyOfT, class Hash >class HashTable;template<class K, class T,class Ref,class Ptr, class KeyOfT, class Hash = HashFunc<K>>struct HTIterator{typedef HashNode<T> Node;typedef HashTable<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T,Ref,Ptr, KeyOfT, Hash> iterator;Node* _node;//結點指針const HT* _ht;//哈希表指針HTIterator(Node*node,const HT*ht):_node(node),_ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const iterator& s){return _node != s._node;}bool operator==(const iterator& s){return _node == s._node;}iterator& operator++(){if (_node->_next){_node = _node->_next;}else{KeyOfT kot;Hash hs;size_t hashi = hs(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){if (_ht->_tables[hashi]){_node = _ht->_tables[hashi];break;}else{++hashi;}}//所有桶走完if (hashi == _ht->_tables.size()){_node = nullptr;}}return *this;}};template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>class HashTable {typedef HashNode<T> Node;//友元聲明template<class K, class T,class Ref,class Ptr, class KeyOfT, class Hash >friend struct HTIterator;public:typedef HTIterator<K, T,T&,T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T,const T&,const T*, KeyOfT, Hash> const_Iterator;Iterator begin(){for (size_t hashi = 0; hashi < _tables.size(); hashi++){if (_tables[hashi]){return Iterator(_tables[hashi],this);}}return end();}Iterator end(){return Iterator(nullptr, this);}const_Iterator begin()const{for (size_t hashi = 0; hashi < _tables.size(); hashi++){if (_tables[hashi]){return const_Iterator(_tables[hashi], this);}}return end();}const_Iterator end()const{return const_Iterator(nullptr, this);}// 構造函數HashTable() {_tables.resize(10, nullptr);}// 析構函數：釋放所有節點內存~HashTable() {for (auto& bucket : _tables) {Node* cur = bucket;while (cur) {Node* next = cur->_next;delete cur;cur = next;}bucket = nullptr;}_n = 0;}// 插入元素pair<Iterator,bool> Insert(const T& data) {KeyOfT kot;Hash hs;Iterator it = Find(kot(data));const K& key = kot(data); // 提取鍵// 檢查鍵是否已存在if (it != end()) return { it,false };// 負載因子超過1時擴容if (_n >= _tables.size()) {Resize();}// 計算哈希值并插入size_t hashi = hs(key) % _tables.size();Node* newNode = new Node(data);newNode->_next = _tables[hashi];_tables[hashi] = newNode;++_n;return { {newNode,this},true };}// 查找元素Iterator Find(const K& key) {Hash hs;KeyOfT kot;size_t hashi = hs(key) % _tables.size();Node* cur = _tables[hashi];while (cur) {if (kot(cur->_data) == key) {return Iterator(cur, nullptr);}cur = cur->_next;}return end();}// 刪除元素bool Erase(const K& key) {Hash hs;size_t hashi = hs(key) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur) {if (kot(cur->_data) == key) {if (prev == nullptr) {_tables[hashi] = cur->_next;}else {prev->_next = cur->_next;}delete cur;--_n;return true;}prev = cur;cur = cur->_next;}return false;}private:// 擴容函數void Resize() {size_t newSize = _tables.size() * 2;vector<Node*> newTables(newSize, nullptr);KeyOfT kot;Hash hs;// 重新哈希所有元素for (size_t i = 0; i < _tables.size(); ++i) {Node* cur = _tables[i];while (cur) {Node* next = cur->_next;size_t hashi = hs(kot(cur->_data)) % newSize;cur->_next = newTables[hashi];newTables[hashi] = cur;cur = next;}}_tables.swap(newTables);}private:vector<Node*> _tables;size_t _n = 0;};} // namespace hash_bucket