当前位置：首页 > news >正文

【C++】---STL之用哈希桶模拟实现：unordered_set和unordered_map

news 2025/12/21 16:36:42

哈希桶模拟实现：unordered_set和unordered_map

一、哈希桶节点的修改
二、哈希表
- 1、构造
- 2、拷贝构造
- 3、赋值运算符重载
- 4、析构
- 5、迭代器
- 6、插入
- 7、查找
- 8、删除
三、迭代器
- 1、operator++()
- 2、operator*()
- 3、operator->()
- 4、operator！=()
- 5、operator==()
四、封装unordered_set和unordered_map
- 1、封装unordered_set
- - （1）仿函数SetKeyOfT
  - （2）迭代器
  - （3）实例
- 2、封装unordered_map
- - （1）仿函数MapKeyOfT
  - （2）迭代器
  - （3）实例
五、完整代码段

一、哈希桶节点的修改

如果我们要用哈希桶对unordered_set和unordered_map进行封装的话，我们面临一个问题那就是 unordered_set传给哈希桶的是K，而unordered_map传给哈希桶的是pair，同样我们之前也遇到过set和map的封装，是用的红黑树。
面对传给底层的数据类型不同怎么办？我们统一用T来代替：K和pair。

template <class T>
struct HashNode
{T _data;HashNode<T>* _next;//构造HashNode(const T& data):_data(data),_next(nullptr){}};

二、哈希表

关于哈希表的第一个类模板参数就是K，因为这个K不能去掉，因为K可以用来Find()函数等需要用到！

template<class K,class T,class KeyOfT,class Hash=HashFunc<K>>
class HashTable
{typedef HashNode<T> Node;// 因为__HashIterator要访问： HashTable的私有成员变量_tables[i] ，所以在class HashTable里面要加入友元！friend struct __HashIterator;private:vector<Node*> _tables;// 指针数组！size_t _n;// 标记有效元素个数public: typedef __HashIterator<K,T,KeyOfT,Hash> iterator;
};

1、构造

	HashTable()
{_tables.resize(10,nullptr);// 先开10个空间_n=0;
};

2、拷贝构造

        //拷贝构造HashTable(const HashTable& ht){_n = ht._n;//存储有效数据的个数一致_table.resize(ht._table.size());//开同样大小的空间//遍历ht，将ht的_table的每个结点都拷贝到_table中for (size_t i = 0; i < ht._table.size(); i++){Node* cur = ht._table[i];while (cur){Node* copy = new Node(cur->_data);//头插到新表copy->_next = _table[i];//copy的下一个桶为_table[i]_table[i] = copy;//把copy作为当前位置的第一个桶cur = cur->_next;//cur往下移					}}}

3、赋值运算符重载

		//赋值运算符重载HashTable& operator=(HashTable ht){_table.swap(ht._table);swap(_n, ht._n);return *this;}

4、析构

// 析构函数:针对于自定义类型(实际上就是链表的析构)

因为哈希桶：①哈希表②表下面挂的节点（节点是我们的自定义类型，对于自定义类型，我们需要自己写一个析构函数来销毁他）

~HashTable()
{for(size_t i=0;i<_tables.size();i++){Node* cur= _tables[i];while(cur){Node* next=cur->_next;delete cur;cur=next;}_tables[i]=nullptr;}
};

5、迭代器

iterator begin(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur)return iterator(cur, this);}return end();}

iterator end(){return iterator(nullptr, this);}

const_iterator begin() const{for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){// this -> const HashTable*return const_iterator(cur, this);}}return end();}

const_iterator end() const{return const_iterator(nullptr, this);}

6、插入

// 1、插入bool Insert(const T& data){// 扩容：// 负载因子==1的时候就扩容！KeyOfT kot;Hash hs;if (Find(kot(data)))return false;// 方法二：遍历旧数组、直接将旧表的数据计算好位置，然后直接插入到新表。if (_n == _tables.size())// 扩容{vector<Node*> new_table(_tables.size() * 2, nullptr);//遍历旧数组for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;// 先保存一下 next// 头插到新表相应位置！// 老套路：先计算位置size_t hashi = cur->_kv.first % new_table.size();// 头插：不要搞错了：cur是旧表中将要插入的数据！// 头插思路：// ①new[hashi]就是桶的第一个节点，操作：只需要将要插入节点的_next指向new就好了// ②然后更新 桶的第一个位置！！！cur->_next= new_table[hashi]new_table[hashi] = cur;cur = next;// 继续对原数组进行遍历查找出所有带有桶节点的。}_tables[i] = nullptr;// 防止 有两个位置 指向同一个节点！对原数组的数据进行指向空}_tables.swap(new_table);}size_t hashi =hs(kot(data))% _tables.size();Node* newnode = new Node(data);// 头插newnode->_next = _tables[hashi];_tables[hashi] = newnode;++_n;return true;}

7、查找

// 2、查找：Node* Find(const K& key){Hash hs;size_t hashi = hs(key) % _tables.size();Node* cur = _tables[hashi];while (cur){// 找到了就返回！if (cur->_data == key)return cur;// 没找到 就继续往下遍历！cur = cur->_next;}return nullptr;}

8、删除

// 3、删除：bool Erase(const K& key){// 删除就相当于在单链表中进行删除，需要保存前一个位置:prev// 大致分两种情况：①删除头结点 ②删除中间 和 尾结点Hash hs;KeyOfT kot;size_t hashi = hs(kot(key)) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur){if (cur->_data== key){// 可以删除了// ①如果删除的是头结点if (prev == nullptr){_tables[hashi] = cur->_next;}else{prev->_next = cur->_next;}delete cur;return true;}else{prev = cur;cur = cur->_next;}}return false;}

三、迭代器

迭代器需要前置声明HashTable，因为HashTable类中使用了__HTIterator迭代器，且__HTIterator中使用了HashTable类的指针，为什么要用指针呢？
因为C++编译器自上而下编译源文件的时候，对每一个数据的定义，需要知道定义的数据类型的大小。在预先声明语句class HashTable;之后，编译器已经知道HashTable是一个类，但是其中的数据却是未知的，因此HashTable类型的大小也不知道，这样就造成了编译失败；将__HTIterator中的_node更改为HashTable指针类型之后，由于在特定的平台上，指针所占的空间是一定的（在Win32平台上是4字节，在64位平台上是8字节），这样可以通过编译。

迭代器的参数包含哈希节点和哈希表：

	// 前置声明template<class K, class T, class KeyOfT, class HashFunc>class HashTable;// 迭代器template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>struct __HTIterator{typedef HashNode<T> Node;//哈希节点typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;//实现++、--typedef HashTable<K, T, KeyOfT, HashFunc> HT;//哈希表Node* _node;HT* _pht;__HTIterator(Node* node, HT* pht):_node(node)//哈希节点, _pht(pht)//哈希表{}};

1、operator++()

迭代器最重要的部分呢~
在这里插入图片描述

Self& operator++(){// 1、一个桶没遍历完的情况！if (_node->_next){_node = _node->_next;}// 2、一个桶遍历完了，寻找下一个有节点的桶！else{// 下一个有节点的桶 怎么找？就需要利用原来哈希表KeyOfT kot;Hash hs;size_t i = hs(kot(_node->_data)) % _tables.size();// 这是目前 遍历完一个桶的下标i++i;for (; i < _pht->_tables.size(); i++){if (_pht->_tables[i]){break;}}// 此处的_tables[i]就是我们的目标：下一个不为空的桶if (i == _pht->_tables.size()){_node = nullptr;}else{_node =_pht->_tables[i];}}return *this;}

2、operator*()

T& operator*(){return _node->_data;}

3、operator->()

Ptr operator->(){return &_node->_data;}

4、operator！=()

bool operator!=(const Self& s){return _node != s._node;}

5、operator==()

		bool operator == (const Self& s){return _node == s._node;}

四、封装unordered_set和unordered_map

1、封装unordered_set

unordered_set的成员就是哈希表，不过要传模板参数，SetKeyOfT就是传给哈希表的仿函数

#pragma once
#include "HashTable.h"namespace yjl
{template<class K>class unordered_set{private:OpenHash::HashTable<K, K, SetKeyOfT> _ht ;};
}

（1）仿函数SetKeyOfT

		//set就取kstruct SetKeyOfT{const K& operator()(const K& k){return k;}};

（2）迭代器

对于unordered_set的迭代器，如果不加typename，类模板里面找迭代器的类型找不到，因为OpenHash::HashTable<K, K, SetKeyOfT>::iterator没有实例化，因此编译器并不知道它是类型还是成员函数或成员变量，编译无法通过。
用typename表明OpenHash::HashTable<K, K, SetKeyOfT>::iterator是一个类型，不是成员函数或成员变量，不需要等到实例化时才能确定。

	public:typedef typename OpenHash::HashTable<K, K, SetKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> Insert(const K& k){return _ht.Insert(k);}

（3）实例

	void test_unordered_set1(){unordered_set<int> us;us.Insert(100);us.Insert(5);us.Insert(6);us.Insert(32);us.Insert(8);us.Insert(14);us.Insert(65);us.Insert(27);us.Insert(39);unordered_set<int>::iterator it = us.begin();while (it != us.end()){cout << *it << " ";++it;}cout << endl;}

2、封装unordered_map

unordered_map的成员就是哈希表，不过要传模板参数，MapKeyOfT就是传给哈希表的仿函数

#pragma once
#include "HashTable.h"namespace yjl
{template<class K,class V>class unordered_map{private:OpenHash::HashTable<K, pair<K, V>, MapKeyOfT> _ht;};

（1）仿函数MapKeyOfT

		//map就取kv的firststruct MapKeyOfT{const K& operator()(const pair<K,V>& kv){return kv.first;}};

（2）迭代器

	public:typedef typename OpenHash::HashTable<K, pair<K,V>, MapKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> Insert(const pair<K,V>& kv){return _ht.Insert(kv);}V& operator[](const K& key){pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));return ret.first->iterator;}

（3）实例

	void test_unordered_map1(){unordered_map<string,string> um;um.Insert(make_pair("spring", "春天"));um.Insert(make_pair("summer", "夏天"));um.Insert(make_pair("autumn", "秋天"));um.Insert(make_pair("winter", "冬天"));unordered_map<string,string>::iterator it = um.begin();while (it != um.end()){cout << it->first << ":" << it->second << endl;++it;}}

五、完整代码段

HashTable.h

#pragma once
#include<vector>
#include<iostream>
using namespace std;namespace OpenHash
{//哈希仿函数template<class K>struct Hash{size_t operator()(const K& key){return key;}};//string仿函数template<>struct Hash<string>//模板特化{size_t operator()(const string& s){size_t value = 0;for (auto e : s){value += e;value *= 131;}return value;}};template<class T>struct HashNode{HashNode<T>* _next;T _data;HashNode(const T& data):_data(data), _next(nullptr){}};//前置声明HashTable,因为HashTable类中使用了__HTIterator，且__HTIterator中使用了HashTable类的指针，为什么要用指针//C++编译器自上而下编译源文件的时候，对每一个数据的定义，总是需要知道定义的数据类型的大小。在预先声明语句class HashTable;之后，编译器已经知道HashTable是一个类，但是其中的数据却是未知的，因此HashTable类型的大小也不知道，这样就造成了编译失败；将__HTIterator中的_node更改为HashTable指针类型之后，由于在特定的平台上，指针所占的空间是一定的（在Win32平台上是4字节，在64位平台上是8字节），这样可以通过编译// 前置声明template<class K, class T, class KeyOfT, class HashFunc>class HashTable;// 迭代器template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>struct __HTIterator{typedef HashNode<T> Node;//哈希节点typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;//实现++、--typedef HashTable<K, T, KeyOfT, HashFunc> HT;//哈希表Node* _node;HT* _pht;__HTIterator(Node* node, HT* pht):_node(node)//哈希节点, _pht(pht)//哈希表{}Self& operator++(){//不是当前位置最后一个节点if (_node->_next){_node = _node->_next;}else//是当前位置最后一个节点{KeyOfT kot;HashFunc hf;size_t index = hf(kot(_node->_data)) % _pht->_table.size();//找哈希表中下一个不为空的位置++index;while (index < _pht->_table.size()){if (_pht->_table[index])//不为空{_node = _pht->_table[index];return *this;}else//为空{++index;}}_node = nullptr;//后面的所有位置都为空，_node置空，返回当前位置即可}return *this;}T& operator*(){return _node->_data;}T* operator->(){return &_node->_data;}bool operator != (const Self& s) const{return _node != s._node;}bool operator == (const Self& s) const{return _node == s._node;}};template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>class HashTable{typedef HashNode<T> Node;template<class K,class T,class KeyOfT,class HashFunc>friend struct __HTIterator;public:typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;//构造函数,default指定生成默认构造函数HashTable() = default;//拷贝构造HashTable(const HashTable& ht){_n = ht._n;//存储有效数据的个数一致_table.resize(ht._table.size());//开同样大小的空间//遍历ht，将ht的每个结点都拷贝到_table中for (size_t i = 0; i < ht._table.size(); i++){Node* cur = ht._table[i];while (cur){Node* copy = new Node(cur->_data);//头插到新表copy->_next = _table[i];//copy的下一个桶为_table[i]_table[i] = copy;//把copy作为当前位置的第一个桶cur = cur->_next;//cur往下移					}}}//赋值运算符重载HashTable& operator=(HashTable ht){_table.swap(ht._table);swap(_n, ht._n);return *this;}//析构~HashTable(){for (size_t i = 0; i < _table.size(); i++){Node* cur = _table[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_table[i] = nullptr;}}iterator begin(){size_t i = 0;while (i < _table.size()){if (_table[i]){return iterator(_table[i], this);}++i;}return end();}iterator end(){return iterator(nullptr, this);}size_t GetNextPrime(size_t prime){const int PRIMECOUNT = 28;static const size_t primeList[PRIMECOUNT] ={53ul,97ul,193ul,389ul,769ul,1543ul,3079ul,6151ul,12289ul,24593ul,49157ul, 98317ul, 196613ul, 393241ul, 786433ul,1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,1610612741ul, 3221225473ul, 4294967291ul};size_t i = 0;for (; i < PRIMECOUNT; i++){if (primeList[i] > prime){return primeList[i];}}return primeList[i];}//插入pair<iterator,bool> Insert(const T& data){KeyOfT kot;auto ret = Find(kot(data));if (ret != end()){return make_pair(ret,false);}//仿函数HashFunc hf;//负载因子为1时，进行增容if (_n == _table.size()){vector<Node*> newTable;newTable.resize(GetNextPrime(_table.size()));//取旧表中的结点，重新计算映射到新表中的位置，挂到新表中for (size_t i = 0; i < _table.size(); i++){if (_table[i]){Node* cur = _table[i];while (cur){Node* next = cur->_next;//保存下一个结点size_t index = hf(kot(cur->_data)) % newTable.size();//计算结映射到新表中的位置//头插cur->_next = newTable[index];//=nullptr,将cur->_next置空newTable[index] = cur;//将结点插入到新表cur = next;//哈希桶往下挪一个}_table[i] = nullptr;//当前哈希桶的第一个位置置空}}_table.swap(newTable);}//不需要增容时，头插size_t index = hf(kot(data)) % _table.size();Node* newNode = new Node(data);newNode->_next = _table[index];//让新节点newNode的next指向第一个桶_table[index] = newNode;//让新节点newNode做第一个桶++_n;//更新哈希表大小	return make_pair(iterator(newNode, this), true);}//查找iterator Find(const K& key){//哈希表为空if (_table.size() == 0){return end();}KeyOfT kot;HashFunc hf;size_t index = hf(key) % _table.size();//计算在哈希表中的位置//在哈希表当前位置的所有桶中找keyNode* cur = _table[index];while (cur){if (kot(cur->_data) == key){return iterator(cur,this);}else{cur = cur->_next;}}return end();}//删除bool Erase(const K& key){size_t index = hf(key) % _table.size();Node* prev = nullptr;Node* cur = _table[index];while (cur){if (kot(cur->data) == key)//cur这个桶就是key{if (_table[index] == cur)//cur是第一个桶{_table[index] = cur->_next;}else//cur不是第一个桶{prev->_next = cur->_next;}--_n;//更新表大小delete cur;//删除节点return true;}prev = cur;cur = cur->_next;}return false;}private:vector<Node*> _table;size_t _n = 0;};
}

UnorderedSet.h

#pragma once
#include "HashTable.h"namespace yjl
{template<class K>class unordered_set{//set就取kstruct SetKeyOfT{const K& operator()(const K& k){return k;}};public://如果不加typename，类模板里面找迭代器的类型找不到，因为OpenHash::HashTable<K, K, SetKeyOfT>::iterator没有实例化，因此编译器并不知道它是类型还是成员函数或成员变量，编译无法通过//用typename表明OpenHash::HashTable<K, K, SetKeyOfT>::iterator是一个类型，不是成员函数或成员变量，不需要等到实例化时才能确定typedef typename OpenHash::HashTable<K, K, SetKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> Insert(const K& k){return _ht.Insert(k);}private:OpenHash::HashTable<K, K, SetKeyOfT> _ht ;};void test_unordered_set1(){unordered_set<int> us;us.Insert(100);us.Insert(5);us.Insert(6);us.Insert(32);us.Insert(8);us.Insert(14);us.Insert(65);us.Insert(27);us.Insert(39);unordered_set<int>::iterator it = us.begin();while (it != us.end()){cout << *it << " ";++it;}cout << endl;}
}

UnorderedMap.h

#pragma once
#include "HashTable.h"namespace yjl
{template<class K,class V>class unordered_map{//map就取kv的firststruct MapKeyOfT{const K& operator()(const pair<K,V>& kv){return kv.first;}};public:typedef typename OpenHash::HashTable<K, pair<K,V>, MapKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> Insert(const pair<K,V>& kv){return _ht.Insert(kv);}V& operator[](const K& key){pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));return ret.first->iterator;}private:OpenHash::HashTable<K, pair<K, V>, MapKeyOfT> _ht;};void test_unordered_map1(){unordered_map<string,string> um;um.Insert(make_pair("spring", "春天"));um.Insert(make_pair("summer", "夏天"));um.Insert(make_pair("autumn", "秋天"));um.Insert(make_pair("winter", "冬天"));unordered_map<string,string>::iterator it = um.begin();while (it != um.end()){cout << it->first << ":" << it->second << endl;++it;}}
}

Test.cpp

#define  _CRT_SECURE_NO_WARNINGS  1
#include "HashTable.h"
#include "UnorderedSet.h"
#include "UnorderedMap.h"int main()
{delia::test_unordered_set1();delia::test_unordered_map1();return 0;
}