2024-11-21

Navigation

Skip Navigation Links

Hash algorithms

Asymmetric Algorithms

Symmetric Cipher Algorithms

Encoding Algorithms

Compression Algorithms

Pseudo Random Number Algorithms

Steganography

Library Wrappers

String Comparison

Others

Syntax highlighting by Prism
PBCrypto.com Mirror

Huffman

Algorithm creator(s)

David A. Huffman


PB author(s)

Semen Matusovski


Description

The Huffman algorithms are based around the idea that a datastream which is to be compressed contains a high frequence of certain character, while others are not so common. A character which is common is assigned a short path in a tree, while an uncommon character is assigned a longer path.


Note

Huffman compression works well for text files and images (such as BMP) and is typically used in combination with LZ methods.


Source

https://forum.powerbasic.com/forum/user-to-user-discussions/source-code/24025-huffman-encoding-decoding?t=23400


See also

n/a


Source Code

Download source code file huffman.bas (Right-click -> "Save as ...")

   '=============================================
   ' Huffman.inc (was .bas), by Semen Matusovski
   ' Updated by Henning Wallgren June 7th 2002
   '=============================================
                      
   Type tagHuffmanTree
      ParentNode As Integer
      RightNode  As Integer
      LeftNode   As Integer
      Value      As Integer
      Weight     As Long
   End Type   
   
   Type tagHuffmanChar
      Bits(255) As Byte
   End Type        
   
   Sub ErrMsg(szText As String)
      MsgBox szText, %MB_ICONEXCLAMATION Or %MB_TASKMODAL, "Critical error"
   End Sub        
   
   Sub CreateBitSequences(Nodes() As tagHuffmanTree, ByVal NodeIndex As Integer, Bytes As tagHuffmanChar, CharValue() As tagHuffmanChar)
      Local NewBytes As tagHuffmanChar
      ' If this is a leaf we set the characters bit sequence in the CharValue array
      If Nodes(NodeIndex).Value >= 0 Then CharValue(Nodes(NodeIndex).Value) = Bytes: Exit Sub
      ' Traverse the left child
      If Nodes(NodeIndex).LeftNode > = 0 Then
         NewBytes = Bytes
         Incr NewBytes.Bits(0)
         NewBytes.Bits(NewBytes.Bits(0)) = 0
         CreateBitSequences Nodes(), Nodes(NodeIndex).LeftNode, NewBytes, CharValue()
      End If
      ' Traverse the right child
      If Nodes(NodeIndex).RightNode >= 0 Then
         NewBytes = Bytes
         Incr NewBytes.Bits(0)
         NewBytes.Bits(NewBytes.Bits(0)) = 1
         CreateBitSequences Nodes(), Nodes(NodeIndex).RightNode, NewBytes, CharValue()
      End If
   End Sub           
   
   Function Compress_Huffman (InBuf As String, OutBuf As String) As Long
      Dim i As Local Dword, j As Local Dword, k As Local Dword
      Dim lNode1 As Local Long, lNode2 As Local Long, lNodes As Local Long, NodesCount As Local Integer
      Dim lWeight1 As Local Long, lWeight2 As Local Long
      Dim CharCount(255) As Local Dword
      Dim Nodes(511) As Local tagHuffmanTree
      Dim CharValue(255) As Local tagHuffmanChar
      Dim Bytes As Local tagHuffmanChar
      Dim bInBuf() As Local Byte, bOutBuf() As Local Byte
      Dim InBufSize As Local Dword, OutBufSize As Local Long
      Dim BitValue As Local Byte, Count As Local Integer, ByteValue As Local Byte 
      InBufSize = Len(InBuf)
      ReDim bInBuf(0) At StrPtr(InBuf)
      If InBufSize = 0 Then OutBuf ="": Function = -1: Exit Function
      ' Count the frequency of each ASCII code
      For i = 0 To InBufSize - 1: Incr CharCount(bInBuf(i)): Next
      ' Create a leaf for each character
      For i = 0 To 255
         If CharCount(i) Then
            Nodes(NodesCount).Weight = CharCount(i)
            Nodes(NodesCount).Value = i
            Nodes(NodesCount).LeftNode = -1
            Nodes(NodesCount).RightNode = -1
            Nodes(NodesCount).ParentNode = -1
            NodesCount = NodesCount + 1
         End If
      Next
      ' Create the Huffman Tree
      For lNodes = NodesCount To 2 Step -1
         ' Get the two leafs with the smallest weights
         lNode1 = -1: lNode2 = -1
         For i = 0 To NodesCount - 1
            If Nodes(i).ParentNode < 0 Then
               If lNode1 < 0 Then
                  lWeight1 = Nodes(i).Weight
                  lNode1 = i
               ElseIf (lNode2 = -1) Then
                  lWeight2 = Nodes(i).Weight
                  lNode2 = i
               ElseIf Nodes(i).Weight < lWeight1 Then
                  If Nodes(i).Weight < lWeight2 Then
                     If (lWeight1 < lWeight2) Then
                        lWeight2 = Nodes(i).Weight
                        lNode2 = i
                     Else
                        lWeight1 = Nodes(i).Weight
                        lNode1 = i
                     End If
                  Else
                     lWeight1 = Nodes(i).Weight
                     lNode1 = i
                  End If
               ElseIf (Nodes(i).Weight < lWeight2) Then
                  lWeight2 = Nodes(i).Weight
                  lNode2 = i
               End If
            End If
         Next
         ' Create a new leaf
         Nodes(NodesCount).Weight = lWeight1 + lWeight2
         Nodes(NodesCount).LeftNode = lNode1
         Nodes(NodesCount).RightNode = lNode2
         Nodes(NodesCount).ParentNode = -1
         Nodes(NodesCount).Value = -1
         ' Set the parentnodes of the two leafs
         Nodes(lNode1).ParentNode = NodesCount
         Nodes(lNode2).ParentNode = NodesCount
         ' Increase the node counter
         Incr NodesCount
      Next
      ' Traverse the tree to get the bit sequence for each character
      CreateBitSequences Nodes(), NodesCount - 1, Bytes, CharValue()
      ' Calculate the length of the destination string after encoding
      j = 0: k = 0: Count = 0 ' chars in use
      For i = 0 To 255
         If CharCount(i) Then
            Incr Count ' number of characters used
            j = j + CharValue(i).Bits(0)
            k = k + CharValue(i).Bits(0) * CharCount(i)
         End If
      Next
      j = (j + 7) \ 8: k = (k + 7) \ 8 ' size in bytes
      OutBuf = Space$(5 + 2 * Count + j + k)
      ReDim bOutBuf(4 + 2 * Count + j + k) At StrPtr(OutBuf)
      ' Store the length of the source string and the number of characters used
      CopyMemory ByRef bOutBuf(0), ByRef InBufSize, 4
      bOutBuf(4) = Count - 1
      OutBufSize = 5
      ' Store the used characters and the length of their respective bit sequences
      Count = 0
      For i = 0 To 255
         If CharValue(i).Bits(0) Then
            bOutBuf(OutBufSize) = i: Incr OutBufSize
            bOutBuf(OutBufSize) = CharValue(i).Bits(0): Incr OutBufSize
            Count = Count + 16 + CharValue(i).Bits(0)
         End If
      Next
      ' Store the Huffman Tree into the result converting the bit sequences into bytes
      BitValue = 1
      ByteValue = 0
      For i = 0 To 255
         If CharValue(i).Bits(0) Then
            For j = 1 To CharValue(i).Bits(0)
               If (CharValue(i).Bits(j)) Then ByteValue = ByteValue + BitValue
               If (BitValue = 128) Then
                  bOutBuf(OutBufSize) = ByteValue: Incr OutBufSize: ByteValue = 0
                  BitValue = 1
               Else
                  BitValue = BitValue + BitValue
               End If
            Next
         End If
      Next
      If BitValue > 1 Then bOutBuf(OutBufSize) = ByteValue: Incr OutBufSize
      ' Encode the data by exchanging each ASCII byte
      BitValue = 1
      k = 0
      For i = 0 To InBufSize - 1
         For j = 1 To CharValue(bInBuf(i)).Bits(0)
            If (CharValue(bInBuf(i)).Bits(j) = 1) Then k = k + BitValue
            If (BitValue = 128) Then
               bOutBuf(OutBufSize) = k
               OutBufSize = OutBufSize + 1
               BitValue = 1
               k = 0
            Else
               BitValue = BitValue + BitValue
            End If
         Next
      Next
      ' Add the last byte
      If (BitValue > 1) Then bOutBuf(OutBufSize) = k: Incr OutBufSize
   End Function                    
   
   Sub CreateTree(Nodes() As tagHuffmanTree, NodesCount As Integer, Char As Long, Bytes As tagHuffmanChar)
      Local i As Integer
      Local NodeIndex As Long
      NodeIndex = 0
      For i = 1 To Bytes.Bits(0)
         If (Bytes.Bits(i) = 0) Then
            ' Left node
             If (Nodes(NodeIndex).LeftNode = -1) Then
                Nodes(NodeIndex).LeftNode = NodesCount
                Nodes(NodesCount).ParentNode = NodeIndex
                Nodes(NodesCount).LeftNode = -1
                Nodes(NodesCount).RightNode = -1
                Nodes(NodesCount).Value = -1
                NodesCount = NodesCount + 1
             End If
             NodeIndex = Nodes(NodeIndex).LeftNode
          Else
             ' Right node
             If (Nodes(NodeIndex).RightNode = -1) Then
                Nodes(NodeIndex).RightNode = NodesCount
                Nodes(NodesCount).ParentNode = NodeIndex
                Nodes(NodesCount).LeftNode = -1
                Nodes(NodesCount).RightNode = -1
                Nodes(NodesCount).Value = -1
                NodesCount = NodesCount + 1
             End If
             NodeIndex = Nodes(NodeIndex).RightNode
         End If
      Next
      Nodes(NodeIndex).Value = Char
   End Sub
   
   Function UnCompress_Huffman (InBuf As String, OutBuf As String) As Long
      Dim i As Local Dword, j As Local Dword, k As Local Dword
      Dim lNode1 As Local Long, lNode2 As Local Long, lNodes As Local Long, NodesCount As Local Integer
      Dim lWeight1 As Local Long, lWeight2 As Local Long
      Dim CharCount(255) As Local Dword
      Dim Nodes(511) As Local tagHuffmanTree
      Dim CharValue(255) As Local tagHuffmanChar
      Dim Bytes As Local tagHuffmanChar
      Dim bInBuf() As Local Byte, bOutBuf() As Local Byte
      Dim InBufSize As Local Dword, OutBufSize As Local Long
      Dim BitValue As Local Byte, Count As Local Integer, ByteValue As Local Byte
      Dim InBufPos As Dword
      Dim NodeIndex As Long
      Dim lOutBufSize As Long 
      InBufSize = Len(InBuf)
      If InBufSize < 5 Then OutBuf = "": Function = -1: Exit Function
      ReDim bInBuf(0) At StrPtr(InBuf)
      ' Extract the length of the original string and the number of characters used
      CopyMemory ByRef OutBufSize, ByRef bInBuf(0), 4
      Count = bInBuf(4) + 1
      InBufPos = 5
      OutBuf = Space$(OutBufSize)
      ReDim bOutBuf(OutBufSize - 1) At StrPtr(OutBuf)
      lOutBufSize = OutBufSize
      ' Get the used characters and their respective bit sequence lengths
      For i = 1 To Count
         CharValue(bInBuf(InBufPos)).Bits(0) = bInBuf(InBufPos + 1): InBufPos = InBufPos + 2
      Next
      ' Extract the Huffman Tree, converting the byte sequence to bit sequences
      ByteValue = bInBuf(InBufPos): Incr InBufPos
      BitValue = 1
      For i = 0 To 255
         If CharValue(i).Bits(0) Then
            For j = 1 To CharValue(i).Bits(0)
               If (ByteValue And BitValue) Then CharValue(i).Bits(j) = 1
               If (BitValue = 128) Then
                  ByteValue = bInBuf(InBufPos): Incr InBufPos
                  BitValue = 1
               Else
                  BitValue = BitValue + BitValue
               End If
            Next
         End If
      Next
      If (BitValue = 1) Then Decr InBufPos
      ' Create the Huffman Tree
      NodesCount = 1
      Nodes(0).LeftNode = -1
      Nodes(0).RightNode = -1
      Nodes(0).ParentNode = -1
      Nodes(0).Value = -1
      For i = 0 To 255
         CreateTree Nodes(), NodesCount, i, CharValue(i)
      Next
      ' Decode the actual data
      OutBufSize = 0
      Do
         If InBufPos >= InBufSize Then Exit Do
         ByteValue = bInBuf(InBufPos)
         BitValue = 1
         Do
            If (ByteValue And BitValue) Then NodeIndex = Nodes(NodeIndex).RightNode Else _
                                           NodeIndex = Nodes(NodeIndex).LeftNode
            If Nodes(NodeIndex).Value >= 0 Then
               bOutBuf(OutBufSize) = Nodes(NodeIndex).Value: Incr OutBufSize
               If OutBufSize = lOutBufSize Then InBufPos = InBufSize - 1: Exit Do
               NodeIndex = 0
            End If
            If BitValue = 128 Then Exit Do Else BitValue = BitValue + BitValue
         Loop
         Incr InBufPos
      Loop
   End Function       
   
   Function SaveAsHuffman(ByVal SourceFile As String,ByVal TargetFile As String) As Long
      Local f As Long, InBuf As String, OutBufC As String
            
      'Read sourceFile and Compress it:
      f = FreeFile: ErrClear
      Open SourceFile For Binary As #f 
      If Err = 0 Then If Lof(f) = 0 Then Err = 1 Else Get$ #f, Lof(f), InBuf
      Close #f
      If Err Then ErrMsg "Can't read the source file": Exit Function
                                                                
      'Compress source file:
      If Compress_HuffMan (InBuf, OutBufC) < 0 Then ErrMsg "Can't compress": Exit Function
      
      'Save Compressed file:
      f = FreeFile: ErrClear
      Open TargetFile For Output As #f
      If Err = 0 Then Print #f, OutBufC;
      Close #f
      If Err Then ErrMsg "Can't write the target file": Exit Function
      Function = 1
   End Function         
   
   Function RetrieveFromHuffman(ByVal SourceFile As String,ByVal TargetFile As String) As Long
      Local f As Long, InBufC As String, OutBuf As String
      
      'Read Compressed SourceFile:
      f = FreeFile: ErrClear
      Open SourceFile For Binary As #f
      If Err = 0 Then If Lof(f) = 0 Then Err = 1 Else Get$ #f, Lof(f), InBufC
      Close #f
      If Err Then ErrMsg "Can't read the source file": Exit Function 
      
      'Decompress:      
      If UnCompress_HuffMan (InBufC, OutBuf) < 0 Then ErrMsg "Can't decompress": Exit Function

      'Save Decompressed Huffman:
      f = FreeFile: ErrClear
      Open TargetFile For Output As #f
      If Err = 0 Then Print #f, OutBuf;
      Close #f
      If Err Then ErrMsg "Can't write the target file": Exit Function
      Function = 1
   End Function         
   

Mirror provided by Knuth Konrad