利用哈弗曼編碼進行壓縮

// sZipDemo.cpp : 定義控制臺應用程序的入口點。
//

#include "stdafx.h"
#include "HuffmanTree.cpp"
#include "sZip.h"
#include <fstream>
#include <iostream>
using namespace std;

int _tmain(int argc, _TCHAR* argv[])
{
	
	char str1[10000];
	ifstream in;
	in.open("text.txt",ios::in|ios::binary);
		in>>str1;
	cout<<"成功讀取text.doc!"<<endl;
	int num = in.gcount();
	in.close();
	
	char tempstr[100000];
	for(int i = 0;i <= num;i++)
		tempstr[i] = str1[i];

	unsigned int count[128];
	char data[128];
	
	HuffmanTree<char> Tree;
	Tree.creat(tempstr,data,count);

	char charCount[256];
	for(int i = 0;count[i] != 0;i++){
		static int n = -1;
		if(count[i] < 10){                                                //個位
			charCount[++n] = count[i]+48;
			charCount[++n] = '#';
		}
		else if(count[i] >= 10 && count[i] < 100){                        //十位
			charCount[++n] = count[i]/10+48;
			charCount[++n] = count[i]%10+48;
			charCount[++n] = '#';
		}
		else if(count[i] >= 100 && count[i] < 1000){                      //百位
			charCount[++n] = count[i]/100+48;
			charCount[++n] = count[i]%100/10+48;
			charCount[++n] = count[i]%10+48;
			charCount[++n] = '#';
		}
		else{                                                             //千位
			charCount[++n] = count[i]/1000+48;
			charCount[++n] = count[i]%1000/100+48;
			charCount[++n] = count[i]%100/10+48;
			charCount[++n] = count[i]%10+48;
			charCount[++n] = '#';

		}
		charCount[n+1] = 0;
	}
	ofstream out;
	out.open("text11.txt",ios::out|ios::binary);
	out<<data<<'#'<<'#'<<charCount;
	out<<'#'<<'#';


	sZip zip;
	char str2[100000];
	int size = 0;
	zip.zip(str1,str2,Tree.root,size,data);
	out<<str2;
	out.close();
	cout<<"成功壓縮text.doc文件,壓縮文件為text11.doc"<<endl;

	cout<<"------------------------------------------------------------------------"<<endl;
	char str3[100000];
	char str4[10000];
	in.open("text11.txt",ios::in|ios::binary);
	in.read(str3,80000);
	in.close();
    str3[in.gcount()] = 0;
	zip.unzip(str3,str4);
	
	out.open("text22.txt",ios::out|ios::binary);
	out<<str4;
	out.close();
	return 0;
}
#pragma once

template <class T>
struct HuffmanTreeNode{
	T data;                                                                         //數(shù)據(jù)
	unsigned int count;                                                             //權值
	char code[128];                                                                   //結點的哈弗曼編碼
	HuffmanTreeNode<T> *leftChild;                                                  //左子女
	HuffmanTreeNode<T> *rightChild;                                                 //右子女
	HuffmanTreeNode<T> *parent;                                                     //父節(jié)點
	HuffmanTreeNode():leftChild(NULL),rightChild(NULL),parent(NULL){}               //構造函數(shù)

	bool operator <=(HuffmanTreeNode &R){return count <= R.count;}                  //重載<=和>運算符
	bool operator >(HuffmanTreeNode &R){return count > R.count;}

};

template <class T>
class HuffmanTree
{
public:
	HuffmanTree();
	HuffmanTree(HuffmanTree<T> &t);
	~HuffmanTree(void);

	void printData();
	int getWPL();                                                                            //獲取WPL                                                                //打開文件
	void creat(char str[],char data[],unsigned int count[]);                            //哈弗曼的建立
	void creat(char data[],unsigned int count[]);                                                //重載
	
	template <class T>
	friend void findKey(HuffmanTreeNode<T> *subTree,T key,char code[]){                    //尋找結點的code
		static T firstNodeData = subTree->data;
		if(subTree != NULL){
			if(subTree->data != key){
				findKey(subTree->leftChild,key,code);
				findKey(subTree->rightChild,key,code);
			}
			else{
				int i = 0;
				for(;subTree->code[i] != 0;i++)
					code[i] = subTree->code[i];
				code[i] = 0;
			}
		}
	}
	HuffmanTreeNode<T> *root;
protected:
	
private:
	int WPL;
	void census(unsigned int count[],char data[],char str[],unsigned int &size);       //建立哈弗曼樹時統(tǒng)計各字符出現(xiàn)的次數(shù)
	void deleteTree(HuffmanTreeNode<T> *subTree);                                      //刪除subTree結點的所有子結點
	void encode(HuffmanTreeNode<T> *subTree,char code[] ,char m = 0);                //編碼
	void calculateWPL(HuffmanTreeNode<T> *subTree,int i = 0);                          //計算WPL
	void printCode(HuffmanTreeNode<T> *subTree);                                       //輸出哈弗曼編碼的子函數(shù)
	void printData(HuffmanTreeNode<T> *subTree);                                       //輸出所有結點data和權值的子函數(shù)
};
#pragma once
#include "StdAfx.h"
#include "HuffmanTree.h"
#include "MinHeap.cpp"
#include <fstream>

template <class T>
HuffmanTree<T>::HuffmanTree(){
	WPL = 0;
	root = new HuffmanTreeNode<T>;

};

template <class T>
HuffmanTree<T>::~HuffmanTree(){
	deleteTree(root);                                           //刪除哈弗曼樹
}

template <class T>
void HuffmanTree<T>::creat(char str[],char data[],unsigned int count[]){
    unsigned int size;                                          //字符的個數(shù)
	for(unsigned int i = 0; i < 128; i++)                       //count的初始化
		count[i] = 0;         

	census(count,data,str,size);
	minHeap<HuffmanTreeNode<T>> hp;
	HuffmanTreeNode<T> *parent, *first, *second;
	HuffmanTreeNode<T>  *work;

	for(unsigned int i = 0; i < size; i++){                                      //把每個元素都插入最小堆中
		work = new HuffmanTreeNode<T>;
		work->data = data[i];
		work->count = count[i];
		work->leftChild = NULL;
		work->rightChild = NULL;
		work->parent = NULL;
		hp.insert(*work);
	}

	for(unsigned int i = 0; i < size-1; i++){
		parent = new HuffmanTreeNode<T>;
		first = new HuffmanTreeNode<T>;
		second = new HuffmanTreeNode<T>;
		hp.removeMin(*first);                                         //每次從最小堆中取出兩個最小的數(shù)
		hp.removeMin(*second);
		
		parent->leftChild = first;                                    //parent作為他們的父節(jié)點
		parent->rightChild = second;
		parent->count = first->count + second->count;
		parent->data = '#';                                           //parent不是根結點所以把它的data設為'#'
		first->parent = parent;
		second->parent = parent;
		hp.insert(*parent);                                           //再把parent插入最小堆中,進行循環(huán)判斷

	}
	root = parent;                                                    //最后一個parent就是根結點

	char code[128];
	for(unsigned int i = 0;i < 128; i++)
		code[i] = 0;
	encode(root,code);                                                //對結點進行哈弗曼編碼(空的地方取0)

};

template <class T>
void HuffmanTree<T>::creat(char data[],unsigned int count[]){
	unsigned int size = 0;
	for(unsigned int i = 0; count[i] != 0; i++)
		size++;

	minHeap<HuffmanTreeNode<T>> hp;
	HuffmanTreeNode<T> *parent, *first, *second;
	HuffmanTreeNode<T>  *work;

	for(unsigned int i = 0; i < size; i++){                                      //把每個元素都插入最小堆中
		work = new HuffmanTreeNode<T>;
		work->data = data[i];
		work->count = count[i];
		work->leftChild = NULL;
		work->rightChild = NULL;
		work->parent = NULL;
		hp.insert(*work);
	}

	for(unsigned int i = 0; i < size-1; i++){
		parent = new HuffmanTreeNode<T>;
		first = new HuffmanTreeNode<T>;
		second = new HuffmanTreeNode<T>;
		hp.removeMin(*first);                                         //每次從最小堆中取出兩個最小的數(shù)
		hp.removeMin(*second);
		
		parent->leftChild = first;                                    //parent作為他們的父節(jié)點
		parent->rightChild = second;
		parent->count = first->count + second->count;
		parent->data = '#';                                           //parent不是根結點所以把它的data設為'#'
		first->parent = parent;
		second->parent = parent;
		hp.insert(*parent);                                           //再把parent插入最小堆中,進行循環(huán)判斷

	}
	root = parent;                                                    //最后一個parent就是根結點

	char code[128];
	for(unsigned int i = 0;i < 128; i++)
		code[i] = 0;
	encode(root,code);                                                //對結點進行哈弗曼編碼(空的地方取0)
}

template <class T>
void HuffmanTree<T>::deleteTree(HuffmanTreeNode<T> *subTree){
	if(subTree != NULL){
		deleteTree(subTree->leftChild);                          //后序遍歷來刪除結點
		deleteTree(subTree->rightChild);
		delete subTree;
	}
}

template <class T>
void HuffmanTree<T>::census(unsigned int count[],char data[],char str[],unsigned int &size){
	//用于統(tǒng)計字符出現(xiàn)的次數(shù)

	for(int i = 0; str[i] != 0;i++){
		if(str[i] == '#')                        //當出現(xiàn)的是已出現(xiàn)過的字符時就執(zhí)行下次循環(huán)
			continue;

		static int n = 0;
		count[n]++;                               //第一次出現(xiàn)時加一
		data[n] = str[i];
		for(int j = 0; str[j] != 0;j++){
			if(str[i] == str[j] && i != j){
				count[n]++;                    //每次出現(xiàn)重復的時候加一并且
				str[j] = '#';                     //表明已出現(xiàn)過
			}
		}
		data[++n] = 0;
		size = n;
	}
}

template <class T>
void HuffmanTree<T>::encode(HuffmanTreeNode<T> *subTree,char code[] ,char m = 0){

	if(subTree != NULL){
		int j;
		for(j = 0;code[j] != 0;j++){
			subTree->code[j] = code[j];
		}
		for(j = 0;code[j] != 0;j++){
		}
		subTree->code[j] = m;
		subTree->code[j+1] = 0;
		encode(subTree->leftChild,subTree->code,'0');
		encode(subTree->rightChild,subTree->code,'1');
	}
}

template <class T>
void HuffmanTree<T>::calculateWPL(HuffmanTreeNode<T> *subTree,int i = 0){
	if(subTree != NULL){
		if(subTree->data != '#')
			WPL += i*subTree->count;                     //i為當前層數(shù),count為結點權值
		calculateWPL(subTree->leftChild,++i);            //前序遍歷
		i--;
		calculateWPL(subTree->rightChild,++i);
		i--;
	}
}

template <class T>
int HuffmanTree<T>::getWPL(){
	return WPL;
}
#pragma once

#define DafaultSize 50

template<class E>
class minHeap{
public:
	minHeap(int size = DafaultSize);
	~minHeap();

	bool insert(const E& x);
	bool removeMin(E& x);

private:
	E *heap;
	int currentSize;
	int maxHeapSize;
	void siftDown(int start, int m);
	void siftUp(int start);
};
#pragma once
#include "StdAfx.h"
#include "MinHeap.h"

template<class E>
minHeap<E>::minHeap(int size){
	maxHeapSize = (DafaultSize<size)? size:DafaultSize;

	heap = new E[maxHeapSize];
	if(heap == NULL){
		//cout<<"堆存儲分配失敗"<<endl;
	}
	currentSize = 0;
};

template<class E>
minHeap<E>::~minHeap(){
	delete heap;
}

template<class E>
void minHeap<E>::siftDown(int start, int m){
	int i = start;                       //初始位置
	int j = 2*i+1;                       //j是i的左子女位置
	E temp = heap[i];   
	while(j <= m){ 
		if(j<m && heap[j] > heap[j+1])   //左子女大于右子女時,j指向右子女
			j++;
		if(temp <= heap[j])
			break;
		else{                            //大則向上移
			heap[i] = heap[j];
			i = j;
			j = 2*j+1;
		}
	}
	heap[i] = temp;
};

template<class E>
void minHeap<E>::siftUp(int start){
	int j = start;
	int i = (j-1)/2;                //i的左子女是j
	E temp = heap[j];
	while(j>0){
		if(heap[i] <= temp)        //如果父節(jié)點大
			break;
		else{                      //如果父節(jié)點小,上滑
			heap[j] = heap[i];     
			j = i;
			i = (i-1)/2;
		}
	}
	heap[j] = temp;
};

template<class E>
bool minHeap<E>::insert(const E& x){
	if(currentSize == maxHeapSize){     //堆滿時
	//	cout<<"堆已滿"<<endl;
		return false;
	}
	heap[currentSize] = x;              //在heap尾插入x
	siftUp(currentSize);                //對x進行上滑判斷
	currentSize++;
	return true;
};

template<class E>
bool minHeap<E>::removeMin(E& x){
	if(currentSize == 0){                  //堆空時
	//	cout<<"堆為空!!"<<endl;
		return false;
	}
	x = heap[0];                           //從heap頭獲取remove的數(shù)據(jù)
	heap[0] = heap[currentSize-1];         //從heap尾獲取元素補到heap頭的位置
	currentSize--;                         //元素個數(shù)減一
	siftDown(0,currentSize-1);             //再對heap從頭到尾進行下滑判斷
	return true;
};
#pragma once
#include "HuffmanTree.cpp"
class sZip
{
public:
	sZip(void);
	~sZip(void);
	void zip(char str1[],char str2[],HuffmanTreeNode<char>* subTree,int &size,char data[]);
	void unzip(char str1[],char str2[]);
private:
	bool codeEqual(char code1[],char code2[][128],int &num);
	void getHuffCode(HuffmanTreeNode<char>* subTree,char code[][128],char data[]);
	void strBinary(char str[],int &size);
	void binaryStr(char str1[]);
};
#include "StdAfx.h"
#include "sZip.h"
#include <iostream>
using namespace std;

sZip::sZip(void){

}


sZip::~sZip(void)
{
}

void sZip::zip(char str1[],char str2[],HuffmanTreeNode<char>* subTree,int &size,char data[]){
	char code[128][128];
	
	getHuffCode(subTree,code,data);                                            //獲取subTree的哈弗曼編碼
	unsigned int i = 0;
    unsigned int n = -1;
	for(; str1[i] != 0 ;i++){                                                //遍歷str1,把里面的字符轉化為哈弗曼編碼存在str2中
		int j = 0;
		while(data[j] != str1[i]){
			j++;
			if(data[j] == 0)
				break;
		}
		if(data[j] == str1[i]){
				for(int count = 0;code[j][count] != 0;count++)
					str2[++n] = code[j][count];
			str2[n+1]=0;
		}
	}
		strBinary(str2,size);                                                   //把str2的每一個字符變成每一個bit,8個bit合成一個字符
}

void sZip::unzip(char str1[],char str2[]){

	unsigned int count[128];
	for(int i = 0;i < 127;i++)
		count[i] = 0;
	char code[128][128];
	char data[128];
	for(int i = 0;i < 128;i++)
		code[i][0] = 0;
	int i = 0;

	for(;str1[i] != '#';i++)
		data[i] = str1[i];
	data[i] = 0;
	int j = i+2;
	i = -1;
	while(1){
		if(str1[j] != '#' && str1[j+1] == '#')                                                    //個位
			count[++i] = str1[j]-48;
		else if(str1[j+1] != '#' && str1[j+2] == '#'){                                            //十位
			count[++i] = int(str1[j]-48)*10+int(str1[j+1]-48);
			j++;
		}
		else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] == '#'){                        //百位
			count[++i] = int(str1[j]-48)*100+int(str1[j+1]-48)*10+int(str1[j+2]-48);
			j = j+2;
		}
		else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] != '#'&& str1[j+4] =='#'){      //千位
			count[++i] = int(str1[j]-48)*1000+int(str1[j+1]-48)*100+int(str1[j+2]-48)*10+int(str1[j+3]-48);
		}
		else if(str1[j] == '#' && str1[j+1] == '#')                                               //讀完
			break;
		else
			cout<<"讀取錯誤"<<endl; 
		j = j+2;
	}

	HuffmanTree<char> tree;
	tree.creat(data,count);
	getHuffCode(tree.root,code,data);

	j = j+2;
	i = -1;
	char tempchar[100000];
	for(;str1[j] != 0;j++)
		tempchar[++i] = str1[j];
	tempchar[i+1] = 0;
	binaryStr(tempchar);                                       //把壓縮文件轉化為二進制編碼

	char tempcode[128];
	j = -1;
	int num;
	int n = -1;
	i = 0;
	for(;tempchar[i] != 0;i++){                            //遍歷tempchar,把它從哈弗曼編碼轉化為對應的字符
		tempcode[++n] = tempchar[i];
		tempcode[n+1] = 0;
		if(codeEqual(tempcode,code,num)){
			str2[++j] = data[num];
			for(n = 0;tempcode[n] != 0;n++)               //重置tempcode
				tempcode[n] = 0;
			n = -1;
		}
		else
			continue;
	}
	str2[++j] = 0;
}

void sZip::getHuffCode(HuffmanTreeNode<char>* subTree,char code[][128],char data[]){
     for(int i = 0;data[i] != 0;i++)
		 findKey(subTree,data[i],code[i]);
}

void sZip::strBinary(char str[],int &size){
	char str2[100000];
	char bit[8];
	int n = 0;
	int count = 0;
	while(str[n] == '1' || str[n] == '0'){
		for(int i = 0;i < 8 ;i++){
			if(str[n] == 0){
				str[n] = '0';
				str[n+1] = 0;
			}
			bit[i] = str[n];
			n++;
		}
		str2[count] = 0;
		for(int i = 0;i < 8 ;i++){
			if(bit[i] == '1'){
				char temp = 1;
				str2[count] = (str2[count]<<1)|temp;                  //給它最后一位加上一即:左移一位,然后和00000001求或
			}
			else
				str2[count] = (str2[count]<<1);
		}
		count++;
	}
	for(int i = 0;i <= count;i++)
		str[i] = str2[i];
	for(int i = count;str[i] != 0;i++)
		str[i] = 0;
	size = count;
}

void sZip::binaryStr(char str1[]){
	char str2[100000];
	int i = -1;
	int n = 0;
	while(str1[n] != 0){
		int temp[8];
		int tempchar = abs(str1[n]);
		for(int count = 0;count < 8;count++){
			temp[count] = tempchar%2;
			tempchar /= 2;
		}
		if(str1[n]<0 && str1[n] > -128){                           //當為負數(shù)時,二進制為正數(shù)第一位為1(符號位)的反碼最后一位加一(補碼)
			temp[7] = 1;
			for(int count = 0;count < 7;count++){                  //求反碼
				if(temp[count] == 0)
					temp[count] = 1;
				else
					temp[count] = 0;
			}
			int x = 1;                                            //進位數(shù)
			for(int count = 0;count < 8;count++){                 //末位加一                                             
				if(temp[count] == 0){
					if(x == 1){
						temp[count] = 1;
						x = 0;
					}
				}
				else
					if(x == 1)
						temp[count] = 0;
			}
		}
		for(int count = 7;count != -1;count--)                    
			str2[++i] = char(temp[count]+48);
		n++;
	}
	str2[++i] = 0;
	for(int count = 0;count <= i;count++)
		str1[count] = str2[count];
}

bool sZip::codeEqual(char code1[],char code2 [][128],int &num){
	unsigned int size1 = 0;
	unsigned int size2 = 0;
	for(int i = 0;code1[i] != 0;i++)
		size1++;
	for(int i = 0;code2[i][0] != 0;i++)
		size2++;

	for(int i = 0;i < size2;i++){
		int j = 0;
		int size3 = 0;
		for(int n = 0;code2[i][n] != 0;n++)
		size3++;
		for(;j < size1;j++){
			if(code1[j] != code2[i][j])
			break;                                          //有一位不等于就直接跳出
		}
		if(j == size1 && j == size3){
			num = i;
			return true;
		}
	}
	return false;
}

文章題目:利用哈弗曼編碼進行壓縮
當前地址:http://bm7419.com/article48/pscdhp.html

成都網(wǎng)站建設公司_創(chuàng)新互聯(lián),為您提供企業(yè)建站、、自適應網(wǎng)站、網(wǎng)站收錄虛擬主機、網(wǎng)站營銷

廣告

聲明:本網(wǎng)站發(fā)布的內容(圖片、視頻和文字)以用戶投稿、用戶轉載內容為主,如果涉及侵權請盡快告知,我們將會在第一時間刪除。文章觀點不代表本網(wǎng)站立場,如需處理請聯(lián)系客服。電話:028-86922220;郵箱:631063699@qq.com。內容未經(jīng)允許不得轉載,或轉載時需注明來源: 創(chuàng)新互聯(lián)

網(wǎng)站優(yōu)化排名