尼采般地抒情

尼采般地抒情

尼采般地抒情

音乐盒

站点信息

文章总数目: 316
已运行时间: 1570

问题描述

压缩算法,目的就是根据字母的出现频率来“按需分配”编码来优化编码方式。

比如:给出一串字母 Huffman Coding   ,按照计算机处理形式,会根据ascll码将这串字符编码,具体形式(十进制)就是104 117 102 102 109 97 110 32 67 111 100 105 110 103,然后转换成二进制,最后会得到需要97个比特来存储。

算法描述

算法角度来讲对上述问题ascll编码方式是浪费空间的,优化方向是改变编码方式,根据字母出现的频率来“按需分配”进制位。


给出下面所给出的字母,以及出现的频率,来得到哈夫曼编码

先提出将频率小的依次加入。d和h组合权值为9(或者说A只是称呼方便),然后将这个9“替换d和h”代入整个序列,在进行插入树操作,

过程中,遵循数字大的在左数字小的在右原则(互换也没关系,只不过换的是二进制的0和1)

在进行到E的时候,此时的队列应该为120 107 42 37,所以此时需要重新调整队列,然后进行到结束。


最后的编码结果为:

编码实现

#include <stdio.h>
#include <vector>
#include <algorithm>
namespace NS_HuffmanCoding {
using namespace std;
void BuildHuffmanTree();
void Initialization(vector<pair<char, int>> chars);
void Finalization();
struct HFMNode {
  char Ch; int Freq;
  HFMNode* Left, * Right;
  HFMNode(char pCh, int pFreq, HFMNode* pLeft, HFMNode* pRight)
    : Ch(pCh), Freq(pFreq), Left(pLeft), Right(pRight) {}
  HFMNode(char pCh, int pFreq)
    : HFMNode(pCh, pFreq, NULL, NULL) {}
};
void MinHeapify(vector<HFMNode*>& H);
void InsertH(vector<HFMNode*>& H, HFMNode* node);
void SiftDown(vector<HFMNode*>& H, int i);
void SiftUp(vector<HFMNode*>& H, int i);
HFMNode* ExtractMin(vector<HFMNode*>& H);
void DeleteANode(HFMNode* node);
void ShowInput(vector<pair<char, int>> chars);
void Output();
static vector<HFMNode*> Q;
void HuffmanCodingCaller(vector<pair<char, int>> chars)
{
  ShowInput(chars);
  Initialization(chars);
  BuildHuffmanTree();
  Output();
  Finalization();
}
void BuildHuffmanTree()
{
  char C = 'A';
  while (Q.size() > 1)
  {
    HFMNode* x = ExtractMin(Q);
    HFMNode* y = ExtractMin(Q);
    HFMNode* z = new HFMNode(C++, x->Freq + y->Freq, x, y);
    InsertH(Q, z);
  }
}
HFMNode* ExtractMin(vector<HFMNode*>& H)
{
  swap(H.front(), H.back());
  HFMNode* p = H.back();
  H.pop_back();
  if (!H.empty())
    SiftDown(H, 0);
  return p;
}
void SiftDown(vector<HFMNode*>& H, int i)
{
    while ((i = (i << 1) + 1) < H.size()) {
        if ((i + 1 < H.size()) && (H[i + 1]->Freq < H[i]->Freq))
            i = i + 1;
        if (H[(i - 1) >> 1]->Freq > H[i]->Freq)
            swap(H[(i - 1) >> 1], H[i]);
        else break;
    }
}
void InsertH(vector<HFMNode*>& H, HFMNode* node)
{
  H.push_back(node);
  SiftUp(H, H.size() - 1);
}
void SiftUp(vector<HFMNode*>& H, int i)
{
  while (i > 0 && H[i]->Freq < H[(i - 1) >> 1]->Freq) {
    swap(H[i], H[(i - 1) >> 1]);
    i = (i - 1) >> 1;
  }
}
void MinHeapify(vector<HFMNode*>& H)
{
  for (int i = (H.size() >> 1) - 1; i >= 0; i--) {
    SiftDown(H, i);
  }
}

void Initialization(vector<pair<char, int>> chars)
{
Q.clear();
for (auto ch : chars)
Q.push_back(new HFMNode(ch.first, ch.second));
MinHeapify(Q);
}
void Finalization()
{
DeleteANode(Q[0]);
}
void DeleteANode(HFMNode* node)
{
if (node->Left)
{
DeleteANode(node->Left);
DeleteANode(node->Right);
}
delete node;
}
void ShowInput(vector<pair<char, int>> chars)
{
printf("Huffman coding input: \n");
for (auto c : chars)
printf("%c,%d; ", c.first, c.second);
printf("\n");
}
static vector<char> coding;
static vector<pair<char, vector<char>>> codingList;
void GetHuffmanCoding(HFMNode* node)
{
if (node->Left)
{
coding.push_back(‘0’);
GetHuffmanCoding(node->Left);
coding.pop_back();
coding.push_back(‘1’);
GetHuffmanCoding(node->Right);
coding.pop_back();
}
else
{
codingList.push_back(pair<char,
vector<char>>(node->Ch, coding));
}
}
void Output()
{
printf("Huffman coding:\n");
coding.clear();
codingList.clear();
GetHuffmanCoding(Q[0]);
sort(codingList.begin(), codingList.end());
for (auto c1 : codingList)
{
printf(" %c: ", c1.first);
for (auto c2 : c1.second)
printf("%c", c2);
printf("\n");
}
printf("\n");
}
} //namespace NS_HuffmanCoding
using namespace NS_HuffmanCoding;
void TestHuffmanCoding()
{
vector<vector<pair<char, int>>> charLists = {
//Introduction to Algorithms
{
{ {‘a’,40}, {‘b’,13}, {‘c’,12},
{‘d’,16}, {‘e’,9}, {‘f’,5} },
},
//ÑÏεÃô
{
{ {‘a’,5}, {‘b’,29}, {‘c’,7}, {‘d’,8},
{‘e’,14}, {‘f’,23}, {‘g’,3}, {‘h’,11} },
},
};
int n = charLists.size();
for (int i = 0; i < n; i++)
{
HuffmanCodingCaller(charLists[i]);
}
}

评论区