2024-04-29

Navigation

Skip Navigation Links

Hash algorithms

Asymmetric Algorithms

Symmetric Cipher Algorithms

Encoding Algorithms

Compression Algorithms

Pseudo Random Number Algorithms

Steganography

Library Wrappers

String Comparison

Others

Syntax highlighting by Prism
PBCrypto.com Mirror

Ratcliff / Obershelp Pattern Recognition

Algorithm creator(s)

John W. Ratcliff, Obershelp (?), David Metzener (?)


PB author(s)

Mike Tora, Semen Matusovski


Description

Computes the similarity of two strings as the number of matching characters divided by the total number of characters in the two strings. Matching characters are those in the longest common subsequence plus, recursively, matching characters in the unmatched region on either side of the longest common subsequence.


Note

n/a


Source

https://forum.powerbasic.com/forum/user-to-user-discussions/programming/19002-soundex-search-routine-for-pb-dll?p=245578#post245578


See also

n/a


Source Code

Download source code file ratcliff-obershelp.bas (Right-click -> "Save as ...")

#Compile Exe
#Dim All
#Register None
 
Function Similarity(String1 As String, String2 As String) As Double
     #Register None

      Dim startstr1 As Long, endstr1 As Long, lenstr1 As Long, startstr2 As Long, endstr2 As Long, lenstr2 As Long
      Dim ns1 As Long, ns2 As Long, Maximum As Long, kMaximum As Long, nRef As Long

      lenstr1 = Len(String1) - 1
      lenstr2 = Len(String2) - 1
      startstr1 = StrPtr(String1)
      endstr1 = startstr1 + lenstr1
      startstr2 = StrPtr(String2)
      endstr2 = startstr2 + lenstr2

      '--- Preserve registers ---

      ! PUSH EBX
      ! PUSH EDI
      ! PUSH ESI

      '--- Put first element to query ---
      ! PUSH startstr1       ' Address (not index) of first byte in String1
      ! PUSH endstr1         ' Address of last byte in String1
      ! PUSH startstr2       ' Address of first byte in String2
      ! PUSH endstr2         ' Address of last byte in String2

 Label001:
      ' Query is empty ?
      ! CMP nRef, 0          ' If nRef < 0 Then _
      ! JL Label009          '    Goto Label009

      ' Take a combination from query
      ! POP endstr2
      ! POP startstr2
      ! POP endstr1
      ! POP startstr1
      ! DEC nRef

      ! MOV kMaximum, 0      ' kMaximum = 0

      ! MOV ESI, startstr1   ' c1 (ESI) = startstr1      ' Replacement of
 Label002:                                               '
      ! CMP ESI, endstr1     ' If c1 > endstr1 Then _    '
      ! JG Label008          '    Goto Label008          ' For c1 = startstr1 To endstr1

      ! MOV EDI, startstr2   ' c2 (EDI)  = startstr1     ' Replacement of
 Label003:                                               '
      ! CMP EDI, endstr2     ' If c2 > endstr2 Then _    '
      ! JG Label007          '    Goto Label007          ' For c2 = startstr2 To endstr2

      ! MOV EAX, ESI         ' Temporary variables k1 (EAX), k2 (EBX) (are used instead of i)
      ! MOV EBX, EDI         ' k1 = c1: k2 = c2

 Label004:
      ! MOV CL, [EAX]        ' Eq. arr1(c1 + i)
      ! MOV CH, [EBX]        ' Eq. arr2(c2 + i)
      ! CMP CL, CH           ' If arr1(c1 + i) <> arr2(c2 + i) Then _
      ! JNE Label006         '    Goto Label006

      ! INC EAX              ' Incr k1  (Instead of Incr i)
      ! INC EBX              ' Incr k2

      ! MOV ECX, EAX         ' ECX (eq. i) = k1 - c1
      ! SUB ECX, ESI         '

      ! CMP ECX, kMaximum    ' If i <= kMaximum Then _
      ! JLE Label005         '    Goto Label005

      ! MOV ns1, ESI         ' ns1 = c1
      ! MOV ns2, EDI         ' ns2 = c2
      ! MOV kMaximum, ECX    ' kMaximum = i

 Label005:
      ! CMP EAX, endstr1     ' If c1 > endstr1 Then _
      ! JG Label006          '    Goto Label006 (Exit Do)

      ! CMP EBX, endstr2     ' If c2 > endstr2 Then _
      ! JLE Label004         '    Goto Label004 (Exit Do)

 Label006:
      ! INC EDI              ' Incr c2
      ! JMP Label003         ' Goto Label003 (eq. Next c2)

 Label007:
      ! INC ESI              ' Incr c1
      ! JMP Label002         ' Goto Label003 (eq. Next c1)

 Label008:
      ! CMP kMaximum, 0      ' If kMaximum = 0 Then _
      ! JE Label001          '    Goto Label001

      ' Instead of SubStrSim(ns1 + Maximum, endstr1, ns2 + Maximum, endstr2)
      ! INC nRef             ' Next combination in query

      ! MOV ESI, ns1
      ! ADD ESI, kMaximum    ' ns1 + Maximum
      ! MOV EDI, ns2
      ! ADD EDI, kMaximum    ' ns2 + Maximum

      ! PUSH ESI
      ! PUSH endstr1
      ! PUSH EDI
      ! PUSH endstr2

      ' Instead of SubStrSim(startstr1, ns1 - 1, startstr2, ns2 - 1)
      ! DEC ns1              ' ns1 - 1
      ! DEC ns2              ' ns2 - 1

      ! INC nRef             ' Next combination in query
      ! PUSH startstr1
      ! PUSH ns1
      ! PUSH startstr2
      ! PUSH ns2

      ! MOV ECX, kMaximum    ' Maximum =
      ! ADD Maximum, ECX     '    Maximum + kMaximum
      ! JMP Label001         ' Continue

      ' Restore registers
 Label009:
      ! POP ESI
      ! POP EDI
      ! POP EBX

      Similarity = Maximum / (lenstr1 + lenstr2 + 2) * 2

   End Function

   Function PbMain
      Local simindex As Double, str1 As String, str2 As String, result As String, _
         i As Long, start As Single, finish As Single

      str1 = UCase$("Borje Hagsten")
      str2 = UCase$("Boje Hasgten") ' With errors

      start = Timer
      For i = 1 To 100000
         simindex = Similarity(str1, str2)
      Next
      finish = Timer

      result = Format$(simindex * 100, "##.##")
      MsgBox "String Similarity Index = " & result & "%. Time =" & Format$(finish - start, "#,.#####") & " Secs / 100000 calls"
End Function

Mirror provided by Knuth Konrad