Code Clean
This is a more sophisticated version of String Clean which is intended for cleaning up source code prior to compilation or scripting. It allows test contained between quotes to go through with all spaces and other characters unaltered while removing excess spaces from the rest of the text.
Comments are detected and stripped off, and line continuation (underscore) also truncates the line. Leading and trailing spaces are removed after the line has been truncated in this way. Finally, the ascii code causing the truncation is returned by the function.
PowerBasic
' CodeClean
' Charles E V Pegge
' 15 July 2007
' PowerBasic V 8.x
#COMPILE EXE
#DIM ALL
FUNCTION CodeClean(BYREF s AS STRING, BYREF le AS LONG) AS LONG
#REGISTER NONE
' cuts off at comments !//! ascii 47 47
' cuts off at comments !|! ascii 124
' cuts off at line continuations '_' asci 95
' cuts off at ascii 39 ' as first character
' then strips left and right spaces
' excludes multiple spaces except within quotes "''"
' returns last ascii code parsed
' Parameters:
' 1 source/dest string
' 2 length of string to be parsed
' Return:
' ascii code of last character parsed.
DIM ps AS BYTE PTR
ps=STRPTR(s)
'asm
'===================='
! mov esi,ps ' string source
! mov edi,esi ' string dest
! mov ebx,edi ' last non-space position
! mov ecx,le ' length pointer
! mov ecx,[ecx] ' source length down counter
! xor edx,edx ' length counter
! xor eax,eax ' char
! dec esi ' adjust pointer for overshoot
'--------------------'
rep_ltrim: '
'--------------------'
! inc esi ' for next char
! dec ecx ' any chars left?
! jl exit_extract ' if not then finish
! cmp byte ptr [esi],32
! jle rep_ltrim ' continue left trimming
'--------------------'
! mov al,[esi] '
'--------------------'
! ' check for comment symbols
! '
! cmp al,39 ' line starting with single quote
! jz exit_extract '
! cmp al,42 ' line starting with star *
! jz exit_extract '
! cmp al,47 ' line starting with slash /
! jz exit_extract '
! '
'--------------------'
! inc ecx ' adjust counter for overshoot
'===================='
rep_extract: ' loop point
'--------------------'
! dec ecx ' any more chars?
! jl rtrim_extract ' if not then finish
! mov al,[esi] ' get the character
! inc esi ' increment the char pointer
'--------------------'
! cmp al,34 ' is it a double quote?
! jz quo ' procede to skip over the quote
! cmp al,39 ' is it a single quote?
! jz quo ' if not then skip over quote section
! jmp nquo ' otherwise skip over quotes section
'--------------------'
quo: '
'--------------------'
! mov ah,al ' hold char in ah to compares
! mov [edi],al ' store it
! inc edi
! mov ebx,edi ' track non-space
'--------------------'
rep_quo: ' loop
! dec ecx ' any chars left?
! jl exit_extract ' finish if not
! mov al,[esi] ' get char
! inc esi ' next
! mov [edi],al ' store char
! inc edi ' next location
! cmp al,ah ' compare with quote char at beginning
! jnz rep_quo ' continue looping if not end-quote
'--------------------'
! mov ah,0 ' clear ah of quote char
! mov ebx,edi ' mark as non space
! jmp rep_extract ' continue with main loop
'--------------------'
nquo: '
'--------------------'
! cmp al,47 ' check for '//'
! jnz n47 ' skip if not ascii 47
! cmp ecx,0 ' end of string?
! jz n47 ' skip id so
! cmp byte ptr [esi],47 ' look ahead for '/'
! jz rtrim_extract '
'--------------------'
n47: '
'--------------------'
! cmp al,124 ' check vertical bar as comment marker
! jz rtrim_extract ' trim off as exit
! cmp al,95 ' check under score as line continuation
! jz rtrim_extract ' trim off and exit
! cmp al,32 ' is it a space?
! jnz nspace ' if not then procede to store char
'--------------------'
! cmp edi,ebx ' was there a previous space
! jnz rep_extract ' if there was then do not store another
'--------------------'
nspace: '
'--------------------'
! mov [edi],al ' store at dest
! inc edi ' increment dest pointer
! cmp al,32 ' is it a space?
! jle rep_extract ' then skip updating ebx
! mov ebx,edi ' otherwise record the edi ptr in ebx
! jmp rep_extract ' repeat
'--------------------'
rtrim_extract: ' auto eliminate trailing spces from the length count
'--------------------'
exit_extract: '
'--------------------'
! sub ebx,ps ' get length
! mov ecx,le ' get the le pointer
! mov [ecx],ebx ' store the length
! mov ah,0 ' clear any ah quote matchers '
! mov function,eax ' store the last char encountered
'===================='
'end asm
END FUNCTION
FUNCTION PBMAIN()
DIM ss AS STRING
DIM le AS LONG
DIM aa AS LONG
ss=" a = b+c + 'z | // z' //| this is a comment "
'ss=" 1* "
'ss=" * "
'ss=" "
le=LEN(ss)
aa=CodeClean(ss,le)
MSGBOX ">>"+LEFT$(ss,le)+"<<"+" length:"+STR$(le)+" terminator code:"+STR$(aa)
END FUNCTION