哈夫曼算法證明
哈夫曼算法是一種貪心算法,我們考慮證明其最優子結構和貪心選擇性質:
-
最優子結構:假設一個樹是哈夫曼樹,則以其任意節點為根節點的最大子樹也是哈夫曼樹。
證明:子樹的根節點的值是其所有葉子節點出現權值之和,因此無論子樹是什么形式,對子樹上方的節點計算WPL2都沒有影響。
根據哈夫曼樹的定義:WPL最小的二叉樹。如果子樹不是哈夫曼樹,其WPL1就不會是最小,那么整個樹的WPL=WPL1+WPL2就不會是最小,這與哈夫曼樹的定義相悖,因此子樹是哈夫曼樹。
-
貪心選擇性質(哈夫曼算法):每次去掉權值最低的兩個節點作為葉子節點形成一顆二叉樹,并將其父親節點放入待選節點中。
證明:對于哈夫曼樹我們總可以通過取出兩個節點作為葉子節點并將其父親節點重新作為葉子節點的方式構造,需要證明的是是否每次選擇權值最低的節點可以構造成功。
當含有2個及以下的葉子節點的時候,顯然正確。
假設當含有小于k個葉子節點的時候哈夫曼算法可以形成哈夫曼樹
對于含有k個葉子節點形成的樹,設其中權重最小的葉子節點為a,其中深度最深的葉子節點為b,h表示節點的深度,w表示節點的權值。則:
WPL1=∑+wa?ha+wb?hbWPL_1=\sum+w_a*h_a+w_b*h_b WPL1?=∑+wa??ha?+wb??hb?
交換這兩個節點:
WPL2=∑+wb?ha+wa?hbWPL_2=\sum+w_b*h_a+w_a*h_b WPL2?=∑+wb??ha?+wa??hb?
WPL1?WPL2=(wa?wb)?ha+(wb?wa)?hb=(hb?ha)?(wb?wa)WPL_1-WPL_2=(w_a-w_b)*h_a+(w_b-w_a)*h_b=(h_b-h_a)*(w_b-w_a) WPL1??WPL2?=(wa??wb?)?ha?+(wb??wa?)?hb?=(hb??ha?)?(wb??wa?)
而hb?hah_b-h_ahb??ha?和wb?waw_b-w_awb??wa?都為正(由a,b節點的性質),所以我們得到結論:對于任何一棵樹,將權值小的節點盡可能移動到較深層的節點會使整個樹的WPL比較小。
對于k個節點,我們首先取出兩個節點,將其合并成一個節點以后我們有k-1個節點,可以使用哈夫曼算法構造。
如果這兩個節點不是所有節點中權值最小的兩個,則我們總可以通過交換使得構造的樹的WPL減小,因此不是哈夫曼樹。只有兩個節點是所有節點中權值最小的兩個時我們無法再降低樹的WPL。因為我們總可以構造成功,所以選擇權值最小的節點構造的樹就是哈夫曼樹。證畢。
哈夫曼編碼譯碼程序
#pragma once
#include<iostream>
#include<fstream>
#include<vector>
#include<queue>
#include<cstdio>
#include<string>using namespace std;static const int MAXN = 1005;struct times
{double weight;//字符的權值int num;//字符的序號,同時也是他的ASCALL值times(int _num=0,double _weight=0) :num(_num),weight(_weight){}friend bool operator < (const times & a,const times & b){return a.weight > b.weight;}
}p,q;struct Chara:times
{int father;//父親的序號int lson, rson;//左右兒子的序號string code;Chara(int _num = 0, double _weight = 0, int _lson = 0, int _rson = 0, int _father = 0) :times(_num,_weight), lson(_lson), rson(_rson), father(_father){code = "";//沒有編碼}void operator = (const Chara& x){weight = x.weight; num = x.num; father = x.father; lson = x.lson; rson = x.rson;}
};struct txt
{string t;double weight;txt(string _t,double _weight):t(_t),weight(_weight){}
};class HuffmanCode
{
public:int n=0;//字符的個數int cur;//當前所在位置int root;//哈夫曼樹根節點Chara A[MAXN];//順序表保存哈夫曼樹priority_queue<times> T;//用來構建哈夫曼樹void CreatHuffmanCode(int x, string now){if (A[x].lson != 0){CreatHuffmanCode(A[x].lson, now + "0");}if (A[x].rson != 0){CreatHuffmanCode(A[x].rson, now + "1");}if (A[x].lson == 0 && A[x].rson == 0)//說明是字符{A[x].code = now;}}void _HuffmanCode(int _n,vector<txt>& input){string tmp;n = _n;for (int i = 0; i < n; i++){tmp = input[i].t;A[tmp[0]].num = tmp[0];A[tmp[0]].weight = input[i].weight;T.push(times(tmp[0], A[tmp[0]].weight));}cur = 500;//構建哈夫曼樹while (T.size() > 1){p = T.top(); T.pop(); q = T.top(); T.pop();A[cur] = Chara(cur, p.weight + q.weight, p.num, q.num, 0); A[p.num].father = A[q.num].father = cur;T.push(times(A[cur])); cur++;}T.pop(); root = cur-1;CreatHuffmanCode(root, "");}
};class Huffman
{int n;vector<txt> input;HuffmanCode x;
public:void _Huffman(){string t; double weight; n = 0;FILE* stream;freopen_s(&stream,"hfmTree.txt","r",stdin);while (1){cin >> t;if (t == "Esc") break;n++;cin >> weight;input.push_back(txt(t, weight));}freopen_s(&stream, "CON", "r", stdin);input.push_back(txt(" ", 10000.0));//給空格很大的權值n++;x._HuffmanCode(n, input);}Huffman(){string t; double weight; n = 0;cout << "請輸入字符集 \n[Delete撤銷輸入,Esc退出輸入]\n[直接輸入Default按照默認文件組成哈夫曼編碼]\n[空格已經編碼]"<< endl;while(1){cout << "請輸入字符:";cin >> t;if (n == 0 && t == "Default")//按照默認文件構造哈夫曼樹{_Huffman();return;}else if (t == "Delete"){input.erase(input.end()-1);//刪除最后一個輸入的字符n--;continue;}else if (t == "Esc"){break;}n++;cout << "請輸入" << t[0] << "的權重:";cin >> weight;input.push_back(txt(t, weight));}FILE* stream;freopen_s(&stream, "hfmTree.txt", "w", stdout);for (int i = 0; i < n; i++){cout << input[i].t << " " << input[i].weight << endl;}cout << "Esc" << endl;freopen_s(&stream, "CON", "w", stdout);input.push_back(txt(" ", 10000.0));//給空格很大的權值n++;x._HuffmanCode(n, input);}void HuffmanDisplay(){cout << "哈夫曼編碼:" << endl;for (int i = 0; i < n; i++){cout << input[i].t[0] << ":" << x.A[input[i].t[0]].code << endl;}}void GenerateCode()//壓縮文件{cout << "請輸入需要壓縮文件的路徑[輸入Default將打開默認文件ToBeTran.txt]" << endl;string in;cin >> in;if (in == "Default"){in = "ToBeTran.txt";}cout << "請輸入保存壓縮后文件的路徑[輸入Default將打開默認文件CodeFile.txt]" << endl;string out;cin >> out;if (out == "Default"){out = "CodeFile.txt";}ifstream infile(in, ios::in);ofstream outfile(out, ios::out);char c;bool flag = true;while ((c = infile.get()) != EOF){if (x.A[c].code == ""){flag = false;break;}outfile << x.A[c].code;}infile.close();outfile.close();if (!flag){cout << "壓縮失敗,文件中出現了字符集中未包含的字符" << endl;return;}//展示壓縮的結果:infile.open(out, ios::in);string tmp;infile >> tmp;infile.close();cout << "編碼后的文件為:" << endl;for (int i = 0; i < tmp.size(); i++){if (i && i % 50 == 0) cout << endl;cout << tmp[i];}cout << endl;//將結果放入文件中outfile.open("CodePrint.txt", ios::out);for (int i = 0; i < tmp.size(); i++){if (i % 50 == 0) outfile << endl;outfile << tmp[i];}outfile.close();}void Decode(){cout << "請輸入需要解碼文件的路徑[輸入Default將打開默認文件CodeFile.txt]" << endl;string in;cin >> in;if (in == "Default"){in = "CodeFile.txt";}cout << "請輸入保存壓縮后文件的路徑[輸入Default將打開默認文件TextFile.txt]" << endl;string out;cin >> out;if (out == "Default"){out = "TextFile.txt";}ifstream infile(in, ios::in);string ss;infile >> ss;//cout << "test:" << ss << endl;int i = 0;string sss;while (i < ss.size()){int t = x.root;while ((x.A[t].lson != 0 || x.A[t].rson != 0) && i < ss.size()){if (ss[i] == '0') t = x.A[t].lson;else t = x.A[t].rson;i++;}if (x.A[t].lson == 0 || x.A[t].rson == 0){sss = sss + (char)t;}}infile.close();cout << "解碼后為:" << endl;cout << sss << endl;ofstream outfile(out,ios::out);outfile << sss << endl;outfile.close();}
};
測試代碼
#include"Huffman.h"#include<iostream>using namespace std;int main()
{Huffman x;x.HuffmanDisplay();x.GenerateCode();x.Decode();}