2024-04-29

Navigation

Skip Navigation Links

Hash algorithms

Asymmetric Algorithms

Symmetric Cipher Algorithms

Encoding Algorithms

Compression Algorithms

Pseudo Random Number Algorithms

Steganography

Library Wrappers

String Comparison

Others

Syntax highlighting by Prism
PBCrypto.com Mirror

Boyer-Moore

Algorithm creator(s)

Robert S. Boyer, J Strother Moore


PB author(s)

Steve Hutchesson


Description

The algorithm preprocesses the string being searched for (the pattern), but not the string being searched in (the text). It is thus well-suited for applications in which the pattern is much shorter than the text or where it persists across multiple searches.


Note

In general, the algorithm runs faster as the pattern length increases. The key features of the algorithm are to match on the tail of the pattern rather than the head, and to skip along the text in jumps of multiple characters rather than searching every single character in the text.


Source

https://forum.powerbasic.com/forum/user-to-user-discussions/powerbasic-for-windows/4130-fast-string-searching?p=96264#post96264


See also

n/a


Source Code

Download source code file boyer-moore.bas (Right-click -> "Save as ...")

  ' #########################################################################
  
  FUNCTION BMBinSearch(ByVal startpos as LONG, _
                       ByVal lpSource as LONG,ByVal srcLngth as LONG, _
                       ByVal lpSubStr as LONG,ByVal subLngth as LONG) as LONG
  
      #REGISTER NONE
  
      LOCAL cval    as DWORD
      LOCAL shift_table as STRING * 1024
  
      ! cmp subLngth, 1
      ! jg OKsize
      ! mov eax, -2                 ; string too short, must be > 1
      ! jmp Cleanup
    OKsize:
  
      ! mov esi, lpSource
      ! add esi, srcLngth
      ! sub esi, subLngth
      ! mov ebx, esi            ; set Exit Length
  
    ' ----------------------------------------
    ' load shift table with value in subLngth
    ' ----------------------------------------
      ! mov ecx, 256
      ! mov eax, subLngth
      ! lea edi, shift_table
      ! rep stosd
  
    ' ----------------------------------------------
    ' load decending count values into shift table
    ' ----------------------------------------------
      ! mov ecx, subLngth           ; SubString length in ECX
      ! dec ecx                     ; correct for zero based index
      ! mov esi, lpSubStr           ; address of SubString in ESI
      ! lea edi, shift_table
  
      ! xor eax, eax
  
    Write_Shift_Chars:
      ! mov al, [esi]               ; get the character
      ! inc esi
      ! mov [edi+eax*4], ecx        ; write shift for each character
      ! dec ecx                     ; to ascii location in table
      ! jnz Write_Shift_Chars
  
    ' -----------------------------
    ' set up for main compare loop
    ' -----------------------------
      ! mov ecx, subLngth
      ! dec ecx
      ! mov cval, ecx
  
      ! mov esi, lpSource
      ! mov edi, lpSubStr
      ! add esi, startpos           ; add starting position
  
      ! mov edx, subLngth           ; pattern length in edx
  
      ! jmp Cmp_Loop
  
  ' *********************** Loop Code ***************************
  
    Suffix_Shift:
      ! add esi, eax                ; add suffix shift
  
    Pre_Cmp:
      ! mov ecx, cval               ; reset counter in compare loop
      ! cmp esi, ebx                ; test exit length
      ! jg  No_Match
  
      ! xor eax, eax                ; zero EAX
  
    Cmp_Loop:
      ! mov al, [esi+ecx]
      ! cmp al, [edi+ecx]           ; cmp characters in ESI / EDI
      ! jne Set_Shift               ; if not equal, get next shift
      ! dec ecx
      ! jns Cmp_Loop
  
      ! jmp Match
  
    Set_Shift:
      ! mov eax, shift_table[eax*4] ; get char shift value
      ! cmp eax, edx                ; is it pattern length ?
      ! jne Suffix_Shift            ; if not, jump to Suffix_Shift
      ! lea esi, [esi+ecx+1]        ; add bad char shift
      ! jmp Pre_Cmp
  
  ' ***************************************************************
  
    Match:
      ! sub esi, lpSource           ; sub source from ESI
      ! mov eax, esi                ; put length in eax
      ! jmp Cleanup
  
    No_Match:
      ! mov eax, -1                 ; set value for no match
  
    Cleanup:
  
      ! mov FUNCTION, eax
  
  END FUNCTION
  
  ' #########################################################################

Mirror provided by Knuth Konrad