LZ78
Algorithm creator(s)
Abraham Lempel, Jakob Ziv
PB author(s)
Semen Matusovski
Description
LZ78, like LZ77, has slow compression but very fast decompression. LZ78 is faster than LZ77 but doesn't always achieve as high a compression ratio as LZ77.
Note
The biggest advantage LZ78 has over the LZ77 algorithm is the reduced number of string comparisons in each encoding step.
Source
https://forum.powerbasic.com/forum/user-to-user-discussions/source-code/24007-lz78?t=23382
See also
n/a
Source Code
Download source code file lz78.bas (Right-click -> "Save as ...")
'"Clean" LZ78 algorithm (like I understand it).
'Written from "zero" base
'(using mathematical description only).
'Some notes:
'1) This is exactly LZ78 (not patented).
'I especially didn't read LZW description, but when I wrote a first variant and then read, I found that I did LZW (which I didn't want).
'The main difference - LZW uses pre-allocated one-letter words (for all ASCII) and didn't write them into the file.
'When I wrote a program, I understood that this way is very natural.
'Don't know, how it was possible to patent such moment (it's like patent for digits 0 & 1 ) ...
'Anyway I returned a program to canonic LZ78 form (initial dictionary is empty) and I need to output "roots".
'Really, not a big problem - I use a big dictionary, and if to compress a file of hundreds KB, we increase compressed size per 1-2% only.
'2) LZ77/LZSS compresses better, but LZ78 works much faster.
'3) Unlike Huffman's algo, LZ encodes seriously slowly, but decodes extremely fast.
'4) "Serious" arhivators combine many methods together and use LZSS, not LZ78.
'So don't expect the same ratio as in pkzip/arj/rar.
'Probably, i'll try to construct advanced algo (and I saw C implementations, such as LZHUF by LHA author), but it's not so simple to understand them, using a code only.
'
'
#Compile Exe
#Register None
#Dim All
$SourceFile = "C:\System.1st" ' <-- Change me
$TargetFile = "C:\System.Lz"
%nTest = 1 ' to have correct timing
%LZ78_DICTIONARY = 65535?? ' < 64K
%LZ78_NIL = %LZ78_DICTIONARY
Function DeCompress_LZ(InBuf As String, OutBuf As String) As Long
Dim Parents(%LZ78_DICTIONARY - 1) As Local Word
Dim abc(%LZ78_DICTIONARY - 1) As Local Byte
Dim abcPos As Local Word, abcSize As Local Word
Dim InStreamSize As Local Word, InStreamPos As Local Word
Dim OutBufPos As Local Dword, rOutBufPos As Dword, OutBufSize As Local Dword
Dim InBufPos As Local Dword, InBufSize As Local Dword
Dim LastInBufPos As Local Dword
Dim Parent As Local Word, rParent As Local Word
Dim Initialized As Local Long
OutBuf = "": InBufSize = Len(InBuf)
Do
If Initialized = 0 Then
If LastInBufPos >= InBufSize Then Exit Do
If LastInBufPos = 0 Then
OutBufSize = CvDwd(InBuf, 1): LastInBufPos = 4
OutBuf = Space$(OutBufSize)
ReDim bOutBuf(0) As Local Byte At StrPtr(OutBuf) - 1
End If
InStreamSize = CvDwd(InBuf, LastInBufPos + 1)
ReDim InStream(0) As Local Word At StrPtr(InBuf) + LastInBufPos + 2
LastInBufPos = LastInBufPos + 2 + InStreamSize * 2
abcSize = CvDwd(InBuf, LastInBufPos + 1)
ReDim abc(0) As Local Byte At StrPtr(InBuf) + LastInBufPos + 2
LastInBufPos = LastInBufPos + 2 + abcSize
If (abcSize Mod 2) Then Incr LastInBufPos
InStreamPos = 0: abcPos = 0
Initialized = 1
' MsgBox Str$(abcSize) + Str$(InStreamSize) + Str$(LastInBufPos)
End If
Parent = InStream(InStreamPos)
If abcPos < abcSize Then
Parents(abcPos) = Parent
Parent = abcPos
Incr abcPos
End If
OutBufPos = rOutBufPos
rParent = Parent
While rParent <> %LZ78_NIL
Incr OutBufPos
rParent = Parents(rParent)
Wend
While Parent <> %LZ78_NIL
Incr rOutBufPos
bOutBuf(OutBufPos) = abc(Parent)
Decr OutBufPos
Parent = Parents(Parent)
Wend
Incr InstreamPos
If InStreamPos >= InStreamSize Then Initialized = 0
Loop
End Function
Function Compress_LZ (InBuf As String, OutBuf As String) As Long
Dim Parents(%LZ78_DICTIONARY - 1) As Local Word
Dim Childs(%LZ78_DICTIONARY - 1) As Word
Dim FirstChild(%LZ78_NIL) As Word
Dim abc(%LZ78_DICTIONARY - 1) As Local Byte
Dim OutStream(%LZ78_DICTIONARY - 1) As Local Word
Dim Top(255) As Local Word
Dim OutStreamPos As Local Word
Dim abcPos As Local Word
Dim InBufSize As Local Dword, InBufPos As Dword
Dim Parent As Local Word, rParent As Local Word, tParent As Local Word
Dim Symbol As Local Byte, AlignMent As Local String
Dim Initialized As Local Long
OutBuf = ""
InBufSize = Len(InBuf): If InBufSize = 0 Then Function = -1: Exit Function
ReDim bInBuf(0) As Byte At StrPtr(InBuf) - 1
For InBufPos = 1 To InBufSize
If Initialized = 0 Then
For tParent = 0 To 255: Top(tParent) = %LZ78_NIL: Next
FirstChild(%LZ78_NIL) = %LZ78_NIL: Parent = %LZ78_NIL
abcPos = 0: OutStreamPos = 0
Initialized = 1
End If
Symbol = bInBuf(InBufPos)
If Parent = %LZ78_NIL Then
If Top(Symbol) <> %LZ78_NIL Then tParent = Top(Symbol) Else tParent = %LZ78_NIL
Else
tParent = FirstChild(Parent)
While tParent <> %LZ78_NIL
If abc(tParent) = Symbol Then Exit Do Else tParent = Childs(tParent)
Wend
End If
If tParent = %LZ78_NIL Then
If Parent = %LZ78_NIL Then Top(Symbol) = abcPos
abc(abcPos) = Symbol
Parents(abcPos) = Parent
rParent = FirstChild(Parent)
Childs(abcPos) = rParent
FirstChild(Parent) = abcPos
FirstChild(abcPos) = %LZ78_NIL
OutStream(OutStreamPos) = Parent: Incr OutStreamPos
Incr abcPos
Parent = %LZ78_NIL
Else
Parent = tParent
End If
If (abcPos >= %LZ78_DICTIONARY) Or (InBufPos = InBufSize) Then
If Parent <> %LZ78_NIL Then OutStream(OutStreamPos) = Parent: Incr OutStreamPos
If (abcPos Mod 2) Then AlignMent = " " Else AlignMent = ""
If OutBuf = "" Then OutBuf = MkDwd$(InBufSize) ' header
OutBuf = OutBuf + MkWrd$(OutStreamPos) + Peek$(VarPtr(OutStream(0)), OutStreamPos * 2) + _
MkWrd$(abcPos) + Peek$(VarPtr(abc(0)), abcPos) + AlignMent
Initialized = 0
End If
Next
End Function
Function PbMain
Local InBuf As String, OutBuf As String, InBufC As String
Local i As Long, f As Long, t1 As Single, t2 As Single, t3 As Single
f = FreeFile: ErrClear
Open $SourceFile For Binary As #f
If Err = 0 Then Get$ #f, Lof(f), InBuf
Close #f
If Err Then MsgBox "Can't read the source file": Exit Function
t1 = Timer
For i = 1 To %nTest
Compress_LZ InBuf, OutBuf
Next
t2 = Timer
For i = 1 To %nTest
DeCompress_LZ OutBuf, InBufC
Next
t3 = Timer
f = FreeFile: ErrClear
Open $TargetFile For Output As #f
If Err = 0 Then Print #f, OutBuf
Close #f
If Err Then MsgBox "Can't write the target file": Exit Function
MsgBox "Compress: " + Format$(1000 * (t2 - t1) / %nTest, "#.#") + " ms" + $CrLf + _
"Decompress: " + Format$(1000 * (t3 - t2) / %nTest, "#.#") + "ms" + $CrLf + _
"Ratio : " + Format$(Len(InBuf)) + " -> " + Format$(Len(OutBuf))
If InBuf <> InBufC Then MsgBox Str$(Len(InBufC)) + "troubles"
End Function