Huffman
Algorithm creator(s)
David A. Huffman
PB author(s)
Semen Matusovski
Description
The Huffman algorithms are based around the idea that a datastream which is to be compressed contains a high frequence of certain character, while others are not so common. A character which is common is assigned a short path in a tree, while an uncommon character is assigned a longer path.
Note
Huffman compression works well for text files and images (such as BMP) and is typically used in combination with LZ methods.
Source
https://forum.powerbasic.com/forum/user-to-user-discussions/source-code/24025-huffman-encoding-decoding?t=23400
See also
n/a
Source Code
Download source code file huffman.bas (Right-click -> "Save as ...")
'=============================================
' Huffman.inc (was .bas), by Semen Matusovski
' Updated by Henning Wallgren June 7th 2002
'=============================================
Type tagHuffmanTree
ParentNode As Integer
RightNode As Integer
LeftNode As Integer
Value As Integer
Weight As Long
End Type
Type tagHuffmanChar
Bits(255) As Byte
End Type
Sub ErrMsg(szText As String)
MsgBox szText, %MB_ICONEXCLAMATION Or %MB_TASKMODAL, "Critical error"
End Sub
Sub CreateBitSequences(Nodes() As tagHuffmanTree, ByVal NodeIndex As Integer, Bytes As tagHuffmanChar, CharValue() As tagHuffmanChar)
Local NewBytes As tagHuffmanChar
' If this is a leaf we set the characters bit sequence in the CharValue array
If Nodes(NodeIndex).Value >= 0 Then CharValue(Nodes(NodeIndex).Value) = Bytes: Exit Sub
' Traverse the left child
If Nodes(NodeIndex).LeftNode > = 0 Then
NewBytes = Bytes
Incr NewBytes.Bits(0)
NewBytes.Bits(NewBytes.Bits(0)) = 0
CreateBitSequences Nodes(), Nodes(NodeIndex).LeftNode, NewBytes, CharValue()
End If
' Traverse the right child
If Nodes(NodeIndex).RightNode >= 0 Then
NewBytes = Bytes
Incr NewBytes.Bits(0)
NewBytes.Bits(NewBytes.Bits(0)) = 1
CreateBitSequences Nodes(), Nodes(NodeIndex).RightNode, NewBytes, CharValue()
End If
End Sub
Function Compress_Huffman (InBuf As String, OutBuf As String) As Long
Dim i As Local Dword, j As Local Dword, k As Local Dword
Dim lNode1 As Local Long, lNode2 As Local Long, lNodes As Local Long, NodesCount As Local Integer
Dim lWeight1 As Local Long, lWeight2 As Local Long
Dim CharCount(255) As Local Dword
Dim Nodes(511) As Local tagHuffmanTree
Dim CharValue(255) As Local tagHuffmanChar
Dim Bytes As Local tagHuffmanChar
Dim bInBuf() As Local Byte, bOutBuf() As Local Byte
Dim InBufSize As Local Dword, OutBufSize As Local Long
Dim BitValue As Local Byte, Count As Local Integer, ByteValue As Local Byte
InBufSize = Len(InBuf)
ReDim bInBuf(0) At StrPtr(InBuf)
If InBufSize = 0 Then OutBuf ="": Function = -1: Exit Function
' Count the frequency of each ASCII code
For i = 0 To InBufSize - 1: Incr CharCount(bInBuf(i)): Next
' Create a leaf for each character
For i = 0 To 255
If CharCount(i) Then
Nodes(NodesCount).Weight = CharCount(i)
Nodes(NodesCount).Value = i
Nodes(NodesCount).LeftNode = -1
Nodes(NodesCount).RightNode = -1
Nodes(NodesCount).ParentNode = -1
NodesCount = NodesCount + 1
End If
Next
' Create the Huffman Tree
For lNodes = NodesCount To 2 Step -1
' Get the two leafs with the smallest weights
lNode1 = -1: lNode2 = -1
For i = 0 To NodesCount - 1
If Nodes(i).ParentNode < 0 Then
If lNode1 < 0 Then
lWeight1 = Nodes(i).Weight
lNode1 = i
ElseIf (lNode2 = -1) Then
lWeight2 = Nodes(i).Weight
lNode2 = i
ElseIf Nodes(i).Weight < lWeight1 Then
If Nodes(i).Weight < lWeight2 Then
If (lWeight1 < lWeight2) Then
lWeight2 = Nodes(i).Weight
lNode2 = i
Else
lWeight1 = Nodes(i).Weight
lNode1 = i
End If
Else
lWeight1 = Nodes(i).Weight
lNode1 = i
End If
ElseIf (Nodes(i).Weight < lWeight2) Then
lWeight2 = Nodes(i).Weight
lNode2 = i
End If
End If
Next
' Create a new leaf
Nodes(NodesCount).Weight = lWeight1 + lWeight2
Nodes(NodesCount).LeftNode = lNode1
Nodes(NodesCount).RightNode = lNode2
Nodes(NodesCount).ParentNode = -1
Nodes(NodesCount).Value = -1
' Set the parentnodes of the two leafs
Nodes(lNode1).ParentNode = NodesCount
Nodes(lNode2).ParentNode = NodesCount
' Increase the node counter
Incr NodesCount
Next
' Traverse the tree to get the bit sequence for each character
CreateBitSequences Nodes(), NodesCount - 1, Bytes, CharValue()
' Calculate the length of the destination string after encoding
j = 0: k = 0: Count = 0 ' chars in use
For i = 0 To 255
If CharCount(i) Then
Incr Count ' number of characters used
j = j + CharValue(i).Bits(0)
k = k + CharValue(i).Bits(0) * CharCount(i)
End If
Next
j = (j + 7) \ 8: k = (k + 7) \ 8 ' size in bytes
OutBuf = Space$(5 + 2 * Count + j + k)
ReDim bOutBuf(4 + 2 * Count + j + k) At StrPtr(OutBuf)
' Store the length of the source string and the number of characters used
CopyMemory ByRef bOutBuf(0), ByRef InBufSize, 4
bOutBuf(4) = Count - 1
OutBufSize = 5
' Store the used characters and the length of their respective bit sequences
Count = 0
For i = 0 To 255
If CharValue(i).Bits(0) Then
bOutBuf(OutBufSize) = i: Incr OutBufSize
bOutBuf(OutBufSize) = CharValue(i).Bits(0): Incr OutBufSize
Count = Count + 16 + CharValue(i).Bits(0)
End If
Next
' Store the Huffman Tree into the result converting the bit sequences into bytes
BitValue = 1
ByteValue = 0
For i = 0 To 255
If CharValue(i).Bits(0) Then
For j = 1 To CharValue(i).Bits(0)
If (CharValue(i).Bits(j)) Then ByteValue = ByteValue + BitValue
If (BitValue = 128) Then
bOutBuf(OutBufSize) = ByteValue: Incr OutBufSize: ByteValue = 0
BitValue = 1
Else
BitValue = BitValue + BitValue
End If
Next
End If
Next
If BitValue > 1 Then bOutBuf(OutBufSize) = ByteValue: Incr OutBufSize
' Encode the data by exchanging each ASCII byte
BitValue = 1
k = 0
For i = 0 To InBufSize - 1
For j = 1 To CharValue(bInBuf(i)).Bits(0)
If (CharValue(bInBuf(i)).Bits(j) = 1) Then k = k + BitValue
If (BitValue = 128) Then
bOutBuf(OutBufSize) = k
OutBufSize = OutBufSize + 1
BitValue = 1
k = 0
Else
BitValue = BitValue + BitValue
End If
Next
Next
' Add the last byte
If (BitValue > 1) Then bOutBuf(OutBufSize) = k: Incr OutBufSize
End Function
Sub CreateTree(Nodes() As tagHuffmanTree, NodesCount As Integer, Char As Long, Bytes As tagHuffmanChar)
Local i As Integer
Local NodeIndex As Long
NodeIndex = 0
For i = 1 To Bytes.Bits(0)
If (Bytes.Bits(i) = 0) Then
' Left node
If (Nodes(NodeIndex).LeftNode = -1) Then
Nodes(NodeIndex).LeftNode = NodesCount
Nodes(NodesCount).ParentNode = NodeIndex
Nodes(NodesCount).LeftNode = -1
Nodes(NodesCount).RightNode = -1
Nodes(NodesCount).Value = -1
NodesCount = NodesCount + 1
End If
NodeIndex = Nodes(NodeIndex).LeftNode
Else
' Right node
If (Nodes(NodeIndex).RightNode = -1) Then
Nodes(NodeIndex).RightNode = NodesCount
Nodes(NodesCount).ParentNode = NodeIndex
Nodes(NodesCount).LeftNode = -1
Nodes(NodesCount).RightNode = -1
Nodes(NodesCount).Value = -1
NodesCount = NodesCount + 1
End If
NodeIndex = Nodes(NodeIndex).RightNode
End If
Next
Nodes(NodeIndex).Value = Char
End Sub
Function UnCompress_Huffman (InBuf As String, OutBuf As String) As Long
Dim i As Local Dword, j As Local Dword, k As Local Dword
Dim lNode1 As Local Long, lNode2 As Local Long, lNodes As Local Long, NodesCount As Local Integer
Dim lWeight1 As Local Long, lWeight2 As Local Long
Dim CharCount(255) As Local Dword
Dim Nodes(511) As Local tagHuffmanTree
Dim CharValue(255) As Local tagHuffmanChar
Dim Bytes As Local tagHuffmanChar
Dim bInBuf() As Local Byte, bOutBuf() As Local Byte
Dim InBufSize As Local Dword, OutBufSize As Local Long
Dim BitValue As Local Byte, Count As Local Integer, ByteValue As Local Byte
Dim InBufPos As Dword
Dim NodeIndex As Long
Dim lOutBufSize As Long
InBufSize = Len(InBuf)
If InBufSize < 5 Then OutBuf = "": Function = -1: Exit Function
ReDim bInBuf(0) At StrPtr(InBuf)
' Extract the length of the original string and the number of characters used
CopyMemory ByRef OutBufSize, ByRef bInBuf(0), 4
Count = bInBuf(4) + 1
InBufPos = 5
OutBuf = Space$(OutBufSize)
ReDim bOutBuf(OutBufSize - 1) At StrPtr(OutBuf)
lOutBufSize = OutBufSize
' Get the used characters and their respective bit sequence lengths
For i = 1 To Count
CharValue(bInBuf(InBufPos)).Bits(0) = bInBuf(InBufPos + 1): InBufPos = InBufPos + 2
Next
' Extract the Huffman Tree, converting the byte sequence to bit sequences
ByteValue = bInBuf(InBufPos): Incr InBufPos
BitValue = 1
For i = 0 To 255
If CharValue(i).Bits(0) Then
For j = 1 To CharValue(i).Bits(0)
If (ByteValue And BitValue) Then CharValue(i).Bits(j) = 1
If (BitValue = 128) Then
ByteValue = bInBuf(InBufPos): Incr InBufPos
BitValue = 1
Else
BitValue = BitValue + BitValue
End If
Next
End If
Next
If (BitValue = 1) Then Decr InBufPos
' Create the Huffman Tree
NodesCount = 1
Nodes(0).LeftNode = -1
Nodes(0).RightNode = -1
Nodes(0).ParentNode = -1
Nodes(0).Value = -1
For i = 0 To 255
CreateTree Nodes(), NodesCount, i, CharValue(i)
Next
' Decode the actual data
OutBufSize = 0
Do
If InBufPos >= InBufSize Then Exit Do
ByteValue = bInBuf(InBufPos)
BitValue = 1
Do
If (ByteValue And BitValue) Then NodeIndex = Nodes(NodeIndex).RightNode Else _
NodeIndex = Nodes(NodeIndex).LeftNode
If Nodes(NodeIndex).Value >= 0 Then
bOutBuf(OutBufSize) = Nodes(NodeIndex).Value: Incr OutBufSize
If OutBufSize = lOutBufSize Then InBufPos = InBufSize - 1: Exit Do
NodeIndex = 0
End If
If BitValue = 128 Then Exit Do Else BitValue = BitValue + BitValue
Loop
Incr InBufPos
Loop
End Function
Function SaveAsHuffman(ByVal SourceFile As String,ByVal TargetFile As String) As Long
Local f As Long, InBuf As String, OutBufC As String
'Read sourceFile and Compress it:
f = FreeFile: ErrClear
Open SourceFile For Binary As #f
If Err = 0 Then If Lof(f) = 0 Then Err = 1 Else Get$ #f, Lof(f), InBuf
Close #f
If Err Then ErrMsg "Can't read the source file": Exit Function
'Compress source file:
If Compress_HuffMan (InBuf, OutBufC) < 0 Then ErrMsg "Can't compress": Exit Function
'Save Compressed file:
f = FreeFile: ErrClear
Open TargetFile For Output As #f
If Err = 0 Then Print #f, OutBufC;
Close #f
If Err Then ErrMsg "Can't write the target file": Exit Function
Function = 1
End Function
Function RetrieveFromHuffman(ByVal SourceFile As String,ByVal TargetFile As String) As Long
Local f As Long, InBufC As String, OutBuf As String
'Read Compressed SourceFile:
f = FreeFile: ErrClear
Open SourceFile For Binary As #f
If Err = 0 Then If Lof(f) = 0 Then Err = 1 Else Get$ #f, Lof(f), InBufC
Close #f
If Err Then ErrMsg "Can't read the source file": Exit Function
'Decompress:
If UnCompress_HuffMan (InBufC, OutBuf) < 0 Then ErrMsg "Can't decompress": Exit Function
'Save Decompressed Huffman:
f = FreeFile: ErrClear
Open TargetFile For Output As #f
If Err = 0 Then Print #f, OutBuf;
Close #f
If Err Then ErrMsg "Can't write the target file": Exit Function
Function = 1
End Function