A Yet Faster Case InSeNsItIve INSTR()
Updated: 9 July 2007 fixing dh register bug.
With some extra code, we can shorten one of the looping paths and squeeze the last drops of performance out of this function. from 47msec down to around 37 msec per 100,000 loops.
The trick is to ensure the shortest possible route when checking against the the first character of the keyword. Another enhancement is to assume most of the characters will above the upper-case band (ie lower-case) and thus able to skip one of the checks.
' FindString
' Version 2
' Charles E V Pegge
' 09 July 2007
' PowerBasic PBwin ver 8.x
#COMPILE EXE
#DIM ALL
#INCLUDE "WIN32API.INC"
' Case insensitive INSTR(). Returns position of keystring or zero if there is no match
' Parameters:
' 1 string pointer for string or buffer to be searched
' 2 string pointer for the keyword to search with
' 3 the length of the text to be searched
' 4 the length of the keyword
' Return:
' the number of the matched word starting from 1
' If no match was found in the string then 0 is returned
' Limits:
' only the first 127 chars of the keyword will be compared
' Side effects:
' the keyword is converted to lower case, so it may need to be renewed if used in other functions.
FUNCTION findstring(BYVAL p AS BYTE PTR, BYVAL q AS BYTE PTR, BYVAL ple AS LONG, BYVAL qle AS LONG) AS LONG
#REGISTER NONE
'============================'
! mov esi,p ' pointer to string to be seeched
! mov edi,q ' pointer to keyword
! mov ecx,ple ' length of string to be searched
! add ecx,esi ' add pointer to length to get end string boundary
! mov edx,qle ' length of keyword up or 127 whichever is less
'----------------------------'
' convert keyword to uppercase
'
! cmp qle,&h7f ' limit search length of keyword so this fits into a signed byte
! jle efix_length '
! mov qle,&h127 ' set maximum length for use in a signed byte '
efix_length: '
'
'----------------------------'
'
! mov ebx,edi ' hold start position of keyword in ebx
lowercase_loop: ' DO loop
! dec dl ' any chars left to scan?
! jl exconv ' if not then exit this loop
! mov al,[edi] ' load char
! cmp al,&h5a ' is it above the uppercase 'z'
! jg ecase1 ' skip if it is
! cmp al,&h40 ' is at or below the uppercase boundary?
! jle ecase1 ' the skip
! or byte ptr [edi],&h20 ' convert to lowercase by patching 0010 0000
ecase1: '
! inc edi ' next char
! jmp lowercase_loop ' continue if more chars to convert
exconv:
! mov edi,ebx ' restore start point of keyword
'
'----------------------------'
'
! mov ebx,esi ' store current string address in ebx
! inc ebx ' ebx now contains the next address to scan from (default)
! mov edx,qle ' '
! mov eax,q ' hold the first keyword letter in ah
! mov ah,[eax] '
! cmp dl,0 ' null keyword ?
! jle done_match '
'============================'
search: '
'----------------------------'
' FIRST LETTER
! cmp esi,ecx ' check against boundary
! jge end_of_string ' if the boundary has been reached then there is no match
! mov al,[esi] ' load the character
! inc esi ' advanve the string pointer ready for next
! cmp al,&h5a ' is this above uppercase case letters?
! jle ecase2 ' it is less so skip
! cmp al,&h40 ' could this below upper case letters?
! jz scanning ' then skip all this and procede to scan
! or al,&h20 ' convert to loower case by patching 0010 0000
ecase2: '
! cmp al,ah ' Do they match?
! jnz search ' if they dont match then continue search loop
' drop thru if there is a match '
'----------------------------'
scanning: ' DO loop
! inc edi ' advance the keyword character pointer
! dec dl ' downcount to check if any keyword chars remaining
! jle done_match ' sucessful match
! cmp esi,ecx ' check against boundary
! jge end_of_string ' if the boundary has been reached then there is no match
! mov al,[esi] ' load the character
! cmp al,&h5a ' is this above uppercase case letters?
! jg ecase3 ' yes so skip
! cmp al,&h40 ' could this below upper case letters?
! jle ecase3 ' it is less sp skip
! or al,&h20 ' convert to loower case by patching 0010 0000
ecase3: '
! cmp al,[edi] ' Do they match?
! jnz fail_match ' if they dont match then procede to next word in the text string
'
'----------------------------'
' ADVANCE NEXT SCAN POSITION
! cmp al,ah ' check if it matches the current char
! jnz emark ' skip if it does not
! cmp dh,0 ' now check whether dh is zero
! jnz emark ' skip if it is not
! mov ebx,esi ' otherwise record the position in ebx, this marks the start of the next scan
! mov dh,1 ' set dh to 1 so that this check is not repeated during this scan
emark: '
'----------------------------'
'
! inc esi ' next text string position
! jmp short scanning ' continue scanning
'
'
'----------------------------'
'
fail_match: ' when the keyword characters did not match
! cmp dh,0 ' was the first key letter encountered
! jnz eno ' if not then ..
! cmp esi,ebx ' is esi greater than ebx?
! jg eno1 ' then keep esi where it is
eno: '
! mov esi,ebx ' otherwise set next scan position
eno1: '
! mov ebx,esi ' equalise
! inc ebx ' then set next future default position for string index
! mov edi,q ' restore the start position of the keyword
! mov edx,qle ' restore length of keyword to down counter (and also set dh to zero)
! jmp search ' go back and do another scan
'
' '
'----------------------------'
' '
end_of_string: ' but no match so return zero
! mov eax,0 '
! jmp short xit ' to end
'
'----------------------------'
'
done_match: ' success so return string index
! mov eax,esi '
! sub eax,p ' subtract base
! sub eax,qle ' subtract length of keyword
! inc eax ' drop thru to end
'
'----------------------------'
'
xit: '
! mov function,eax ' return the word number in eax or 0 if unsuccesful
'
'============================'
END FUNCTION
FUNCTION PBMAIN () AS LONG
DIM q AS LONG
DIM ms AS STRING
DIM mk AS STRING
' test string
' 0 1 2 3 4 | RULER
' 1234567890123456789012345678901234567890123456789 |
ms=" o b 1n1 two threes 123AbC three abrabrk1 abr"
mk="Abrk1 abs"
'speed test
LOCAL vv AS LONG
LOCAL tl AS LONG
LOCAL re AS LONG
LOCAL i AS LONG
LOCAL j AS LONG
LOCAL t AS QUAD
DIM priority AS LONG
priority=GetPriorityClass(GetCurrentProcess) ' save current thread priority
i=SetPriorityClass(GetCurrentProcess,&h00000100) ' set Priority to REAL TIME
IF i=0 THEN MSGBOX "unable to get RealTime priority for accurate measurement"
t=getTickCount() ' get the current millisecond count since last boot
' but if you leave your computer on for 49.7 days then
' this counter will turnover!
re=1000000
FOR i=1 TO re
j=findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk))
'j=INSTR(1,UCASE$(ms),UCASE$(mk))
'j=INSTR(1,ms,mk)
NEXT
t=getTickCount()-t 'record lapsed time in milliseconds
SetPriorityClass(GetCurrentProcess(),priority) ' restore priority: this is usually &h00000020 NORMAL PRIORITY
MSGBOX "location for '"+mk+"' is: "+STR$(findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk)))+" ["+mk+"]"+$CR+ _
"Time for 1000,000 loops: mSec: "+STR$(t)
END FUNCTION