Author Topic: String Search Functions / Inline Assembler  (Read 11808 times)

0 Members and 1 Guest are viewing this topic.

Offline Charles Pegge

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 672
  • User-Rate: +27/-1
    • Charles Pegge
String Search Functions / Inline Assembler
« on: June 29, 2007, 07:19:36 PM »
Search for a word in a list and return its index

[Updated: 30th June 2007[/b]
Removed uppercase conversion from keyword, so it remains unaltered by the function.

Suitable for short lists

PowerBasic Version

Code: [Select]
' MatchID

' Charles E V Pegge
' 30 June 1007

' PowerBasic PBwin ver 8.x

#COMPILE EXE
#DIM ALL

' Case insensitive word matching. Returns word number in the sequence or zero if there is no match.

' Parameters:

' 1 string pointer for string or buffer to be searched
' 2 string pointer for the keyword to search with
' 3 the length of the text to be searched
' 4 the length of the keyword

' Return:

' the number of the matched word starting from 1
' If no match was found in the string then 0 is returned

' Neither the keyword nor the main string is altered by this function.

FUNCTION matchid(BYVAL p AS BYTE PTR, BYVAL q AS BYTE PTR, BYVAL ple AS LONG, BYVAL qle AS LONG) AS LONG

'----------------------------'
 ! mov esi,p                 ' pointer to string to be seeched
 ! mov edi,q                 ' pointer to keyword
 ! mov ecx,ple               ' length of string to be searched
 ! mov edx,qle               ' length of keyword
 ! add ecx,esi               ' add pointer to length to get end string boundary
 ! add edx,edi               ' add pointer to length to get end keyword boundary
'----------------------------'

                            '
 ! xor ebx,ebx               ' zero the word counter used to index the words in the string
 ! dec esi                   ' predec to enter next_word loop
                             '
'----------------------------'
next_word:                   ' otherwise drop thru and look for start of next word
 ! inc esi                   ' next char
 ! cmp esi,ecx               ' check boundary
 ! jge end_of_string         ' finish if the end of string
 ! cmp byte ptr [esi],32     ' is it a space or lower ascii?
 ! jle next_word             ' then continue checking through the string
 ! inc ebx                   ' increment word number
'----------------------------'
scanning:                    ' DO loop
 ! cmp edi,edx               ' check against the boundary
 ! jl scan_cont              ' if the boundary has not been reached then skip over
'----------------------------'
 ! cmp esi,ecx               ' check string boundary
 ! jge done_match            ' successful match if reached
 ! cmp byte ptr [esi],32     ' else check if space
 ! jle done_match            ' successful match if space or less
 ! jmp fail_match            ' there are more chars in the word so match failed
'----------------------------'
scan_cont:                   '
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! mov ah,[edi]
 ! and eax,&h5f5f            ' convert both to pseudo-upper case using mask 0101 1111
 ! cmp al,ah                 ' Do they match?
 ! jnz fail_match            ' if they dont match then procede to next word in the text string
 ! inc esi                   ' next text string position
 ! inc edi                   ' advance the keyword character pointer
 ! jmp short scanning        ' otherwise continue scanning
'============================'
                             '
fail_match:                  ' when the keyword characters did not match
 ! mov edi,q                 ' restore the start position of the keyword
next_nspc:                   ' Do loop to reach end of this word
 ! cmp byte ptr [esi],32     ' is this char a space
 ! jle next_word             ' if it is then procede to locate next word
 ! inc esi                   ' next char in the string
 ! cmp esi,ecx               ' boundary check
 ! jge end_of_string         ' finish without a match if the string boundary has been reached
 ! jmp next_nspc             ' if okay then continue working through the string
                             '
'============================'
                             '
'----------------------------'
end_of_string:               ' but no match so return zero
 ! mov eax,0                 '
 ! jmp short xit             '
'----------------------------'
                             '
done_match:                  ' success so return string index
 !  mov eax,ebx              ' pointer to the start of the word
'----------------------------'
                             '
xit:                         '
 ! mov function,eax          ' return the word number in eax or 0 if unsuccesful
'----------------------------'

END FUNCTION



FUNCTION PBMAIN () AS LONG

DIM q AS LONG
DIM ms AS STRING
DIM mk AS STRING

' test string
'     1 2 3      4                      5       6      7
ms="  o b 1n1    123abcd "+CHR$(10)+"   0123abc 123AbC three "

mk="123aBc"

MSGBOX "Word Index for '"+mk+"' is: "+STR$(matchid(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk)))


END FUNCTION

« Last Edit: June 30, 2007, 11:02:13 AM by Charles Pegge »

Offline Theo Gottwald

  • Administrator
  • Hero Member
  • *****
  • Posts: 918
  • User-Rate: +30/-4
    • it-berater
Re: String Search Functions / Inline Assembler
« Reply #1 on: June 30, 2007, 09:59:29 AM »
While such subroutines are excellent for learning how to handlestrings (and are therefore highly welcome),
did you ever do a speed-test against PB-INSTR?

In the past all handmade-subroutines I saw were not significant better (depending on the case, string-len etc.) then the original INSTR.

Offline Charles Pegge

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 672
  • User-Rate: +27/-1
    • Charles Pegge
Re: String Search Functions / Inline Assembler
« Reply #2 on: June 30, 2007, 11:15:15 AM »
Update to Findstring 1 9 July 2007 fixing dh register error

Not the above MATCHID, Theo but the function here is much closer to INSTR. It has some optimisation to avoid unnecessary scanning but there is a complexity tradeoff especially for short keywords. It may be more efficient to switch to a simpler algothm if the length of the keyword is say, 4 letters or less.

With the example given, I found that this function outperforms the PB equivalent:

INSTR(1,UCASE$(s),UCASE$(k)). This takes 63 msec for 100,000 loops whereas the code here performs the same task in 46 msec.

A normal case sensitive INSTR however, does it in 15 mSec


A Case Insensitive INSTR
Including speed test


For PowerBasic
Code: [Select]

#COMPILE EXE
#DIM ALL
#INCLUDE "WIN32API.INC"

' Case insensitive INSTR(). Returns position of keystring or zero if there is no match


' Parameters:

' 1 string pointer for string or buffer to be searched
' 2 string pointer for the keyword to search with
' 3 the length of the text to be searched
' 4 the length of the keyword

' Return:

' the number of the matched word starting from 1
' If no match was found in the string then 0 is returned


' Limits:
' only the first 127 chars of the keyword will be compared

' Side effects:

' the keyword is converted to upper case, so it may need to be renewed if used in other functions.



FUNCTION findstring(BYVAL p AS BYTE PTR, BYVAL q AS BYTE PTR, BYVAL ple AS LONG, BYVAL qle AS LONG) AS LONG

#REGISTER NONE

'----------------------------'
 ! mov esi,p                 ' pointer to string to be seeched
 ! mov edi,q                 ' pointer to keyword
 ! mov ecx,ple               ' length of string to be searched
 ! add ecx,esi               ' add pointer to length to get end string boundary
 ! mov edx,qle               ' length of keyword up or 127 whichever is less
'----------------------------'
                             ' convert keyword to uppercase
              '
 ! cmp qle,&h7f              ' limit search length of keyword so this fits into a signed byte
 ! jle efix_length           '
 ! mov qle,&h127             ' set maximum length for use in a signed byte               '
efix_length:                 '
                             '
'----------------------------'
                             '
 ! mov ebx,edi               ' hold start position of keyword in ebx
uppercase_loop:              ' DO loop
 ! dec dl                    ' any chars left to scan?
 ! jl exconv                 ' if not then exit this loop
 ! mov al,[edi]              ' load char
 ! cmp al,&h60               ' is at or below the lowercase boundary?
 ! jle ecase1                ' the skip
 ! cmp al,&h7a               ' is it above the lowercase 'z'
 ! jg ecase1                 ' skip if it is
 ! and byte ptr [edi],&hdf   ' convert to upper case by masking out 1101 1111
ecase1:                      '
 ! inc edi                   ' next char
 ! jmp uppercase_loop        ' continue if more chars to convert
exconv:
 ! mov edi,ebx               ' restore start point of keyword
                             '
'----------------------------'
                             '
 ! mov ebx,esi               ' store current string address in ebx
 ! inc ebx                   ' ebx now contains the next address to scan from (default)
 ! mov edx,qle               '              '
 ! mov eax,q                 ' hold the first keyword letter in ah
 ! mov ah,[eax]              '
                             '
'----------------------------'
search:                      '
'----------------------------'
scanning:                    ' DO loop
 ! dec dl                    ' downcount to check if any keyword chars remaining
 ! jl done_match             ' sucessful match
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! cmp al,&h60               ' could this be a lower case letter?
 ! jle ecase2                ' it is less so it is not
 ! cmp al,&h7a               ' is this above lower case letters?
 ! jg ecase2                 ' it is so skip
 ! and al,&hdf               ' convert to upper case by masking: 1101 1111
ecase2:                      '
 ! cmp al,[edi]              ' Do they match?
 ! jnz fail_match            ' if they dont match then procede to next word in the text string
                             '
'----------------------------'
                             ' ADVANCE NEXT SCAN POSITION
 ! cmp dh,0                  ' now check whether dh is zero
 ! jnz emark                 ' skip if it is not
 ! cmp al,ah                 ' otherwise check if it matches the current char
 ! jnz emark                 ' skip if it does not
 ! cmp ebx,esi
 ! jge emark
 ! mov ebx,esi               ' otherwise record the position in ebx, this marks the start of the next scan
 ! mov dh,1                  ' set dh to 1 so that this check is not repeated during this scan
emark:                       '

'----------------------------'
                             '
 ! inc esi                   ' next text string position
 ! inc edi                   ' advance the keyword character pointer
 ! jmp short scanning        ' otherwise continue scanning
                             '
                             '
'----------------------------'
                             '
fail_match:                  ' when the keyword characters did not match
 ! cmp dh,0                  ' was the first key letter encountered
 ! jnz eno                   ' if not then ..
 ! cmp esi,ebx               ' is esi greater than ebx?
 ! jg eno1                   ' then keep esi where it is
eno:                         '
 ! mov esi,ebx               ' otherwise set next scan position
eno1:                        '
 ! mov ebx,esi               ' equalise
 ! inc ebx                   ' then set next future default position for string index
 ! mov edi,q                 ' restore the start position of the keyword
 ! mov edx,qle               ' restore length of keyword to down counter (and also set dh to zero)
 ! jmp short search          ' go back and do another scan
                             '
'                            '
'----------------------------'
'                            '
end_of_string:               ' but no match so return zero
 ! mov eax,0                 '
 ! jmp short xit             ' to end
                             '
'----------------------------'
                             '
done_match:                  ' success so return string index
! mov eax,esi                '
! sub eax,p                  '
! sub eax,qle                '
! inc eax                    '  drop thru to end
                             '
'----------------------------'
                             '
xit:                         '
 ! mov function,eax          ' return the word number in eax or 0 if unsuccesful
                             '
'----------------------------'

END FUNCTION



FUNCTION PBMAIN () AS LONG

DIM q AS LONG
DIM ms AS STRING
DIM mk AS STRING

' test string
'   0        1         2         3         4          | RULER
'   1234567890123456789012345678901234567890123456789 |
ms="  o b 1n1    two threes 123AbC three abrabrk1 bbcd"

mk="Abrk1 bbcd"

'speed test

LOCAL vv AS LONG
LOCAL tl AS LONG
LOCAL re AS LONG
LOCAL i AS LONG
LOCAL j AS LONG
LOCAL t AS QUAD

DIM priority AS LONG

priority=GetPriorityClass(GetCurrentProcess)  ' save current thread priority
i=SetPriorityClass(GetCurrentProcess,&h00000100)   ' set Priority to REAL TIME

IF i=0 THEN MSGBOX "unable to get RealTime priority for accurate measurement"

t=getTickCount()                              ' get the current millisecond count since last boot
                                              ' but if you leave your computer on for 49.7 days then
                                              ' this counter will turnover!
re=100000

FOR i=1 TO re
 j=findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk))
 'j=INSTR(1,UCASE$(ms),UCASE$(mk))
 'j=INSTR(1,ms,mk)
NEXT

t=getTickCount()-t 'record lapsed time in milliseconds



SetPriorityClass(GetCurrentProcess(),priority) ' restore priority: this is usually &h00000020 NORMAL PRIORITY





MSGBOX "location for '"+mk+"' is: "+STR$(findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk)))+" ["+mk+"]"+$CR+ _
 "Time for 100,000 loops: mSec:  "+STR$(t)


END FUNCTION
« Last Edit: July 09, 2007, 09:33:02 AM by Charles Pegge »

Offline Charles Pegge

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 672
  • User-Rate: +27/-1
    • Charles Pegge
Re: String Search Functions / Inline Assembler
« Reply #3 on: June 30, 2007, 02:42:46 PM »
A Yet Faster Case InSeNsItIve INSTR()

Updated: 9 July 2007 fixing dh register bug.

With some extra code, we can shorten one of the looping paths and squeeze the last drops of performance out of this function. from 47msec down to around 37 msec per 100,000 loops.

The trick is to ensure the shortest possible route when checking against the the first character of the keyword. Another enhancement is to assume most of the characters will above the upper-case band (ie lower-case) and thus able to skip one of the checks.

Code: [Select]

' FindString
' Version 2

' Charles E V Pegge
' 09 July 2007

' PowerBasic PBwin ver 8.x

#COMPILE EXE
#DIM ALL
#INCLUDE "WIN32API.INC"

' Case insensitive INSTR(). Returns position of keystring or zero if there is no match


' Parameters:

' 1 string pointer for string or buffer to be searched
' 2 string pointer for the keyword to search with
' 3 the length of the text to be searched
' 4 the length of the keyword

' Return:

' the number of the matched word starting from 1
' If no match was found in the string then 0 is returned


' Limits:
' only the first 127 chars of the keyword will be compared

' Side effects:

' the keyword is converted to lower case, so it may need to be renewed if used in other functions.



FUNCTION findstring(BYVAL p AS BYTE PTR, BYVAL q AS BYTE PTR, BYVAL ple AS LONG, BYVAL qle AS LONG) AS LONG

#REGISTER NONE

'============================'
 ! mov esi,p                 ' pointer to string to be seeched
 ! mov edi,q                 ' pointer to keyword
 ! mov ecx,ple               ' length of string to be searched
 ! add ecx,esi               ' add pointer to length to get end string boundary
 ! mov edx,qle               ' length of keyword up or 127 whichever is less
'----------------------------'
                             ' convert keyword to uppercase
              '
 ! cmp qle,&h7f              ' limit search length of keyword so this fits into a signed byte
 ! jle efix_length           '
 ! mov qle,&h127             ' set maximum length for use in a signed byte               '
efix_length:                 '
                             '
'----------------------------'
                             '
 ! mov ebx,edi               ' hold start position of keyword in ebx
lowercase_loop:              ' DO loop
 ! dec dl                    ' any chars left to scan?
 ! jl exconv                 ' if not then exit this loop
 ! mov al,[edi]              ' load char
 ! cmp al,&h5a               ' is it above the uppercase 'z'
 ! jg ecase1                 ' skip if it is
 ! cmp al,&h40               ' is at or below the uppercase boundary?
 ! jle ecase1                ' the skip
 ! or byte ptr [edi],&h20   ' convert to lowercase by patching 0010 0000
ecase1:                      '
 ! inc edi                   ' next char
 ! jmp lowercase_loop        ' continue if more chars to convert
exconv:
 ! mov edi,ebx               ' restore start point of keyword
                             '
'----------------------------'
                             '
 ! mov ebx,esi               ' store current string address in ebx
 ! inc ebx                   ' ebx now contains the next address to scan from (default)
 ! mov edx,qle               '              '
 ! mov eax,q                 ' hold the first keyword letter in ah
 ! mov ah,[eax]              '
 ! cmp dl,0                  ' null keyword ?
 ! jle done_match            '


'============================'
search:                      '
'----------------------------'
                             ' FIRST LETTER
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! inc esi                   ' advanve the string pointer ready for next
 ! cmp al,&h5a               ' is this above uppercase case letters?
 ! jle ecase2                ' it is less so skip
 ! cmp al,&h40               ' could this below upper case letters?
 ! jz scanning               ' then skip all this and procede to scan
 ! or al,&h20                ' convert to loower case by patching 0010 0000
ecase2:                      '
 ! cmp al,ah                 ' Do they match?
 ! jnz search                ' if they dont match then continue search loop
                             ' drop thru if there is a match              '
'----------------------------'

scanning:                    ' DO loop
 ! inc edi                   ' advance the keyword character pointer
 ! dec dl                    ' downcount to check if any keyword chars remaining
 ! jle done_match            ' sucessful match
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! cmp al,&h5a               ' is this above uppercase case letters?
 ! jg ecase3                 ' yes so skip
 ! cmp al,&h40               ' could this below upper case letters?
 ! jle ecase3                ' it is less sp skip
 ! or al,&h20                ' convert to loower case by patching 0010 0000
ecase3:                      '
 ! cmp al,[edi]              ' Do they match?
 ! jnz fail_match            ' if they dont match then procede to next word in the text string
                             '
'----------------------------'
                             ' ADVANCE NEXT SCAN POSITION
 ! cmp al,ah                 ' check if it matches the current char
 ! jnz emark                 ' skip if it does not
 ! cmp dh,0                  ' now check whether dh is zero
 ! jnz emark                 ' skip if it is not
 ! mov ebx,esi               ' otherwise record the position in ebx, this marks the start of the next scan
 ! mov dh,1                  ' set dh to 1 so that this check is not repeated during this scan
emark:                       '

'----------------------------'
                             '
 ! inc esi                   ' next text string position
 ! jmp short scanning        ' continue scanning
                             '
                             '
'----------------------------'
                             '
fail_match:                  ' when the keyword characters did not match
 ! cmp dh,0                  ' was the first key letter encountered
 ! jnz eno                   ' if not then ..
 ! cmp esi,ebx               ' is esi greater than ebx?
 ! jg eno1                   ' then keep esi where it is
eno:                         '
 ! mov esi,ebx               ' otherwise set next scan position
eno1:                        '
 ! mov ebx,esi               ' equalise
 ! inc ebx                   ' then set next future default position for string index
 ! mov edi,q                 ' restore the start position of the keyword
 ! mov edx,qle               ' restore length of keyword to down counter (and also set dh to zero)
 ! jmp search          ' go back and do another scan
                             '
'                            '
'----------------------------'
'                            '
end_of_string:               ' but no match so return zero
 ! mov eax,0                 '
 ! jmp short xit             ' to end
                             '
'----------------------------'
                             '
done_match:                  ' success so return string index
! mov eax,esi                '
! sub eax,p                  ' subtract base
! sub eax,qle                ' subtract length of keyword
! inc eax                    ' drop thru to end
                             '
'----------------------------'
                             '
xit:                         '
 ! mov function,eax          ' return the word number in eax or 0 if unsuccesful
                             '
'============================'

END FUNCTION



FUNCTION PBMAIN () AS LONG

DIM q AS LONG
DIM ms AS STRING
DIM mk AS STRING

' test string
'   0        1         2         3         4          | RULER
'   1234567890123456789012345678901234567890123456789 |
ms="  o b 1n1    two threes 123AbC three abrabrk1 abr"

mk="Abrk1 abs"

'speed test

LOCAL vv AS LONG
LOCAL tl AS LONG
LOCAL re AS LONG
LOCAL i AS LONG
LOCAL j AS LONG
LOCAL t AS QUAD

DIM priority AS LONG

priority=GetPriorityClass(GetCurrentProcess)  ' save current thread priority
i=SetPriorityClass(GetCurrentProcess,&h00000100)   ' set Priority to REAL TIME

IF i=0 THEN MSGBOX "unable to get RealTime priority for accurate measurement"

t=getTickCount()                              ' get the current millisecond count since last boot
                                              ' but if you leave your computer on for 49.7 days then
                                              ' this counter will turnover!
re=1000000

FOR i=1 TO re
 j=findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk))
 'j=INSTR(1,UCASE$(ms),UCASE$(mk))
 'j=INSTR(1,ms,mk)
NEXT

t=getTickCount()-t 'record lapsed time in milliseconds



SetPriorityClass(GetCurrentProcess(),priority) ' restore priority: this is usually &h00000020 NORMAL PRIORITY





MSGBOX "location for '"+mk+"' is: "+STR$(findstring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk)))+" ["+mk+"]"+$CR+ _
 "Time for 1000,000 loops: mSec:  "+STR$(t)


END FUNCTION


« Last Edit: July 09, 2007, 09:33:31 AM by Charles Pegge »

Offline Edwin Knoppert

  • Sr. Member
  • ****
  • Posts: 254
  • User-Rate: +11/-4
    • Hellobasic.com
Re: String Search Functions / Inline Assembler
« Reply #4 on: June 30, 2007, 05:18:24 PM »
I like ordinary BASIC, indulge me with a test :)

http://www.powerbasic.com/support/forums/Forum7/HTML/002354.html

Instr_ByPtr()

Offline Theo Gottwald

  • Administrator
  • Hero Member
  • *****
  • Posts: 918
  • User-Rate: +30/-4
    • it-berater
Re: String Search Functions / Inline Assembler
« Reply #5 on: June 30, 2007, 06:11:27 PM »
Ok, here is my bet:

I doubt that any BASIC Code can be faster. It can have the same speed in the best case.
The code (using pointers) i have seen at that link may compile to good optimized ASM,
but it will never reach the handoptimized code from Charles.

Now someone must do  the test :-)

Offline Edwin Knoppert

  • Sr. Member
  • ****
  • Posts: 254
  • User-Rate: +11/-4
    • Hellobasic.com
Re: String Search Functions / Inline Assembler
« Reply #6 on: June 30, 2007, 06:37:06 PM »
I am not saying this will be faster, i would like to mention that resorting to asm is usually nonsense for most of us.
Speed differences *can* be very little between PowerBASIC code and asm.
There are a few over here knowing exactly what they are doing but that's just a minority, code must be maintainable, asm is not to most of us.
Most of us will prefer code they can understand.

Enjoy asm if you like it of course..

Offline Charles Pegge

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 672
  • User-Rate: +27/-1
    • Charles Pegge
Re: String Search Functions / Inline Assembler
« Reply #7 on: June 30, 2007, 07:41:02 PM »
Update 9 July 2007 fixing dh reggister bug

I love assembler Edwin, but I accept that most PB functions are already highly optimised, and it is unlikely that one can improve them. However, Assembler is useful for specialised functions which would normally involve several BASIC operations.

Here is my version of Instring. I know it is no faster than Bob's. There is probably very little difference under most circumstances because we are down to the bed rock of the CPU. The main loop simply cannot be made any shorter.

An Instring

Code: [Select]
' InString
' Version 1

' Charles E V Pegge
' 09 July 2007

' PowerBasic PBwin ver 8.x

#COMPILE EXE
#DIM ALL
#INCLUDE "WIN32API.INC"

' Case sensitive INSTR(). Returns position of keystring or zero if there is no match


' Parameters:

' 1 string pointer for string or buffer to be searched
' 2 string pointer for the keyword to search with
' 3 the length of the text to be searched
' 4 the length of the keyword

' Return:

' the number of the matched word starting from 1
' If no match was found in the string then 0 is returned


' Limits:
' only the first 127 chars of the keyword will be compared



FUNCTION instring(BYVAL p AS BYTE PTR, BYVAL q AS BYTE PTR, BYVAL ple AS LONG, BYVAL qle AS LONG) AS LONG

#REGISTER NONE

' ASM
'============================'
                             '
 ! mov esi,p                 ' pointer to string to be seeched
 ! mov edi,q                 ' pointer to keyword
 ! mov ecx,ple               ' length of string to be searched
 ! add ecx,esi               ' add pointer to length to get end string boundary
 ! mov edx,qle               ' length of keyword up or 127 whichever is less
'----------------------------'
                             ' convert keyword to uppercase
                             '
 ! cmp qle,&h7f              ' limit search length of keyword so this fits into a signed byte
 ! jle efix_length           '
 ! mov qle,&h127             ' set maximum length for use in a signed byte
efix_length:                 '
                             '
                             '
 ! mov ebx,esi               ' store current string address in ebx
 ! inc ebx                   ' ebx now contains the next address to scan from (default)
 ! mov edx,qle               '              '
 ! mov eax,q                 ' hold the first keyword letter in ah
 ! mov ah,[eax]              '
 ! cmp dl,0                  ' null keyword ?
 ! jle done_match            '
                             '
                             '
'============================'
search:                      '
'============================'
                             ' FIRST LETTER
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! inc esi                   ' advanve the string pointer ready for next
 ! cmp al,ah                 ' Do they match?
 ! jnz search                ' if they dont match then continue search loop
                             ' drop thru if there is a match              '
'----------------------------'
 '! jmp scanning
                             ' Accelerator for keywords at least 4 bytes long
 ! cmp dl,4                  ' are there 4 bytes or more to compare
 ! jl scanning               ' if less then scan as normal
 ! mov eax,[esi-1]           ' load 4 bytes
 ! cmp eax,[edi]             ' compare all 4 together
 ! jnz search                ' if the match fails then continue search
 ! mov eax,q                 ' restore 1st key char to ah
 ! mov ah,[eax]              '
 ! add esi,3                 ' offset esi by 3 since esi is already incremented
 ! add edi,4                 ' line up to check the fifth char
 ! sub dl,4                  ' reduce byte count by 4
 ! jmp scanning1             '
                             '
scanning:                    ' DO loop

 ! inc edi                   ' advance the keyword character pointer
 ! dec dl                    ' downcount to check if any keyword chars remaining
                             '
scanning1:                   '
                             '
 ! jle done_match             ' sucessful match
 ! cmp esi,ecx               ' check against boundary
 ! jge end_of_string         ' if the boundary has been reached then there is no match
 ! mov al,[esi]              ' load the character
 ! cmp al,[edi]              ' Do they match?
 ! jnz fail_match            ' if they dont match then procede to next word in the text string
                             '
'----------------------------'
                             ' ADVANCE NEXT SCAN POSITION
 ! cmp al,ah                 ' check if it matches the current char
 ! jnz emark                 ' skip if it does not
 ! cmp dh,0                  ' now check whether dh is zero
 ! jnz emark                 ' skip if it is not
 ! mov ebx,esi               ' otherwise record the position in ebx, this marks the start of the next scan
 ! mov dh,1                  ' set dh to 1 so that this check is not repeated during this scan
                             '
emark:                       '
                             '
'----------------------------'
                             '
 ! inc esi                   ' next text string position
 ! jmp short scanning        ' continue scanning
                             '
                             '
'----------------------------'
                             '
fail_match:                  ' when the keyword characters did not match
                             '
 ! cmp dh,0                  ' was the first key letter encountered
 ! jnz eno                   ' if not then ..
 ! cmp esi,ebx               ' is esi greater than ebx?
 ! jg eno1                   ' then keep esi where it is
                             '
eno:                         '
                             '
 ! mov esi,ebx               ' otherwise set next scan position
                             '
eno1:                        '
                             '
 ! mov ebx,esi               ' equalise
 ! inc ebx                   ' then set next future default position for string index
 ! mov edi,q                 ' restore the start position of the keyword
 ! mov edx,qle               ' restore length of keyword to down counter (and also set dh to zero)
 ! jmp search                ' go back and do another scan
                             '
'                            '
'----------------------------'
'                            '
end_of_string:               ' but no match so return zero
 ! mov eax,0                 '
 ! jmp short xit             ' to end
                             '
'----------------------------'
                             '
done_match:                  ' success so return string index
                             '
! mov eax,esi                ' transfer to eax
! sub eax,p                  ' subtract string base
! sub eax,qle                ' subtract length of keyword
! inc eax                    ' add 1 for DASIC's string indexing
                             ' drop thru to xit
'----------------------------'
                             '
xit:                         ' final steps
                             '
 ! mov function,eax          ' return the word number in eax or 0 if unsuccesful
                             '
'============================'

END FUNCTION



FUNCTION PBMAIN () AS LONG

DIM q AS LONG
DIM ms AS STRING
DIM mk AS STRING

' test string
'   0        1         2         3         4          | RULER
'   1234567890123456789012345678901234567890123456789 |
ms="  o b 1n1    two threes 123AbC three abrabrk1 "

mk="abrk1"

'speed test

LOCAL vv AS LONG
LOCAL tl AS LONG
LOCAL re AS LONG
LOCAL i AS LONG
LOCAL j AS LONG
LOCAL t AS QUAD

DIM priority AS LONG

priority=GetPriorityClass(GetCurrentProcess)  ' save current thread priority
i=SetPriorityClass(GetCurrentProcess,&h00000100)   ' set Priority to REAL TIME

IF i=0 THEN MSGBOX "unable to get RealTime priority for accurate measurement"

t=getTickCount()                              ' get the current millisecond count since last boot
                                              ' but if you leave your computer on for 49.7 days then
                                              ' this counter will turnover!
re=1000000

FOR i=1 TO re
 j=instring(STRPTR(ms),STRPTR(mk),LEN(ms),LEN(mk))
 'j=INSTR(1,ms,mk)
NEXT

t=getTickCount()-t 'record lapsed time in milliseconds



SetPriorityClass(GetCurrentProcess(),priority) ' restore priority: this is usually &h00000020 NORMAL PRIORITY





MSGBOX "location for '"+mk+"' is: "+STR$(j)+"   ["+mk+"]"+$CR+ _
 "Time for 1000,000 loops: mSec:  "+STR$(t)


END FUNCTION

« Last Edit: July 09, 2007, 09:34:04 AM by Charles Pegge »