Algorithms |
|
String |
|
Replace tabs with the corresponding number of spaces
const proc: delTabs (inout string: stri, in integer: tabJump) is func local var integer: tabPos is 0; var integer: blankCount is 0; begin tabPos := pos(stri, "\t"); while tabPos <> 0 do blankCount := tabJump - pred(tabPos) rem tabJump; stri := stri[ .. pred(tabPos)] & " " mult blankCount & stri[succ(tabPos) .. ]; tabPos := pos(stri, "\t"); end while; end func;
Replace leading spaces with the corresponding number of tabs
const proc: insertLeadingTabs (inout string: stri, in integer: tabJump) is func local var integer: blankCount is 1; var integer: tabCount is 0; begin while blankCount <= length(stri) and stri[blankCount] = ' ' do incr(blankCount); end while; decr(blankCount); if blankCount >= tabJump then tabCount := blankCount div tabJump; stri := "\t" mult tabCount & stri[succ(tabCount * tabJump) .. ]; end if; end func;
Replace spaces with the corresponding number of tabs
const proc: insertTabs (inout string: stri, in integer: tabJump) is func local var integer: blankPos is 0; var integer: pos is 0; var integer: tabCount1 is 0; var integer: tabCount2 is 0; begin blankPos := rpos(stri, ' '); while blankPos <> 0 do pos := pred(blankPos); while pos >= 1 and stri[pos] = ' ' do decr(pos); end while; tabCount1 := pos div tabJump; tabCount2 := blankPos div tabJump; if blankPos - pos >= 2 and tabCount2 > tabCount1 then stri := stri[.. pos] & "\t" mult (tabCount2 - tabCount1) & stri[succ(tabCount2 * tabJump) ..]; end if; blankPos := rpos(stri, ' ', pos); end while; end func;
Convert an Unicode UTF-32 string to UTF-8
The "unicode.s7i" library defines toUtf8, which converts an UTF-32 string to UTF-8. The function 'toUtf8_2' below shows how toUtf8 works:
const func string: toUtf8_2 (in string: stri) is func result var string: stri8 is ""; local var char: ch is ' '; var integer: pos is 1; begin stri8 := "\0;" mult (6 * length(stri)); for ch range stri do if ch <= '\16#7f;' then stri8 @:= [pos] ch; incr(pos); elsif ch <= '\16#7ff;' then stri8 @:= [pos ] chr(16#C0 + ( ord(ch) >> 6)); stri8 @:= [pos + 1] chr(16#80 + ( ord(ch) mod 16#40)); pos +:= 2; elsif ch <= '\16#ffff;' then stri8 @:= [pos ] chr(16#E0 + ( ord(ch) >> 12)); stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 6) mod 16#40)); stri8 @:= [pos + 2] chr(16#80 + ( ord(ch) mod 16#40)); pos +:= 3; elsif ch <= '\16#1fffff;' then stri8 @:= [pos ] chr(16#F0 + ( ord(ch) >> 18)); stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 12) mod 16#40)); stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 6) mod 16#40)); stri8 @:= [pos + 3] chr(16#80 + ( ord(ch) mod 16#40)); pos +:= 4; elsif ch <= '\16#3ffffff;' then stri8 @:= [pos ] chr(16#F8 + ( ord(ch) >> 24)); stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 18) mod 16#40)); stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 12) mod 16#40)); stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >> 6) mod 16#40)); stri8 @:= [pos + 4] chr(16#80 + ( ord(ch) mod 16#40)); pos +:= 5; else stri8 @:= [pos ] chr(16#FC + ( ord(ch) >> 30)); stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 24) mod 16#40)); stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 18) mod 16#40)); stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >> 12) mod 16#40)); stri8 @:= [pos + 4] chr(16#80 + ((ord(ch) >> 6) mod 16#40)); stri8 @:= [pos + 5] chr(16#80 + ( ord(ch) mod 16#40)); pos +:= 6; end if; end for; stri8 := stri8[.. pred(pos)]; end func;
Convert an Unicode UTF-8 string to UTF-32
The "unicode.s7i" library defines fromUtf8, which converts an UTF-8 string to UTF-32. The function 'fromUtf8_2' below shows how fromUtf8 works:
const func string: fromUtf8_2 (in string: stri8) is func result var string: stri is ""; local var integer: length is 0; var integer: pos8 is 0; var integer: pos is 0; var boolean: okay is TRUE; begin length := length(stri8); stri := "\0;" mult length; pos8 := 1; pos := 0; while length > 0 do incr(pos); if ord(stri8[pos8]) <= 16#7F then stri @:= [pos] stri8[pos8]; incr(pos8); decr(length); elsif ord(stri8[pos8 ]) >> 5 = 16#06 and length >= 2 and ord(stri8[pos8 + 1]) >> 6 = 16#02 then stri @:= [pos] chr((ord(stri8[pos8 ]) mod 16#20 << 6) + (ord(stri8[pos8 + 1]) mod 16#40)); pos8 +:= 2; length -:= 2; elsif ord(stri8[pos8 ]) >> 4 = 16#0E and length >= 3 and ord(stri8[pos8 + 1]) >> 6 = 16#02 and ord(stri8[pos8 + 2]) >> 6 = 16#02 then stri @:= [pos] chr((ord(stri8[pos8 ]) mod 16#10 << 12) + (ord(stri8[pos8 + 1]) mod 16#40 << 6) + (ord(stri8[pos8 + 2]) mod 16#40)); pos8 +:= 3; length -:= 3; elsif ord(stri8[pos8 ]) >> 3 = 16#1E and length >= 4 and ord(stri8[pos8 + 1]) >> 6 = 16#02 and ord(stri8[pos8 + 2]) >> 6 = 16#02 and ord(stri8[pos8 + 3]) >> 6 = 16#02 then stri @:= [pos] chr((ord(stri8[pos8 ]) mod 16#08 << 18) + (ord(stri8[pos8 + 1]) mod 16#40 << 12) + (ord(stri8[pos8 + 2]) mod 16#40 << 6) + (ord(stri8[pos8 + 3]) mod 16#40)); pos8 +:= 4; length -:= 4; elsif ord(stri8[pos8 ]) >> 2 = 16#3E and length >= 5 and ord(stri8[pos8 + 1]) >> 6 = 16#02 and ord(stri8[pos8 + 2]) >> 6 = 16#02 and ord(stri8[pos8 + 3]) >> 6 = 16#02 and ord(stri8[pos8 + 4]) >> 6 = 16#02 then stri @:= [pos] chr((ord(stri8[pos8 ]) mod 16#04 << 24) + (ord(stri8[pos8 + 1]) mod 16#40 << 18) + (ord(stri8[pos8 + 2]) mod 16#40 << 12) + (ord(stri8[pos8 + 3]) mod 16#40 << 6) + (ord(stri8[pos8 + 4]) mod 16#40)); pos8 +:= 5; length -:= 5; elsif ord(stri8[pos8 ]) >> 2 = 16#3F and length >= 6 and ord(stri8[pos8 + 1]) >> 6 = 16#02 and ord(stri8[pos8 + 2]) >> 6 = 16#02 and ord(stri8[pos8 + 3]) >> 6 = 16#02 and ord(stri8[pos8 + 4]) >> 6 = 16#02 and ord(stri8[pos8 + 5]) >> 6 = 16#02 then stri @:= [pos] chr((ord(stri8[pos8 ]) mod 16#04 << 30) + (ord(stri8[pos8 + 1]) mod 16#40 << 24) + (ord(stri8[pos8 + 2]) mod 16#40 << 18) + (ord(stri8[pos8 + 3]) mod 16#40 << 12) + (ord(stri8[pos8 + 4]) mod 16#40 << 6) + (ord(stri8[pos8 + 5]) mod 16#40)); pos8 +:= 6; length -:= 6; else okay := FALSE; length := 0; end if; end while; if okay then stri := stri[.. pos]; else raise RANGE_ERROR; end if; end func;
Encode a string with the Base64 encoding
The function toBase64 is part of the "encoding.s7i" library. Base64 encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with letters, digits and the characters '+' and '/'.
const func string: toBase64 (in string: byteStri) is func result var string: base64 is ""; local const string: coding is "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; var integer: index is 1; var integer: subIndex is 1; var char: ch is ' '; var integer: threeBytes is 0; var string: fourBytes is " "; var integer: posToAddNewline is 58; begin for index range 1 to length(byteStri) step 3 do threeBytes := 0; for subIndex range index to index + 2 do threeBytes <<:= 8; if subIndex <= length(byteStri) then ch := byteStri[subIndex]; if ch >= '\256;' then raise RANGE_ERROR; end if; threeBytes +:= ord(ch); end if; end for; fourBytes @:= [1] coding[succ( threeBytes >> 18)]; fourBytes @:= [2] coding[succ((threeBytes >> 12) mod 64)]; fourBytes @:= [3] coding[succ((threeBytes >> 6) mod 64)]; fourBytes @:= [4] coding[succ( threeBytes mod 64)]; if index = posToAddNewline then base64 &:= "\n"; posToAddNewline +:= 57; end if; base64 &:= fourBytes; end for; index := length(base64); if length(byteStri) rem 3 = 2 then base64 @:= [index] '='; elsif length(byteStri) rem 3 = 1 then base64 @:= [pred(index)] "=="; end if; end func;
Decode a Base64 encoded string
The function fromBase64 is part of the "encoding.s7i" library.
const func string: fromBase64 (in string: base64) is func result var string: decoded is ""; local const array integer: decode is [] ( # -1 is illegal 62, -1, -1, -1, 63, # + / 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, # 0 - 9 -1, -1, -1, 0, -1, -1, -1, # = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, # A - M 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, # N - Z -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, # a - m 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51); # n - z var integer: index is 1; var integer: subIndex is 1; var integer: number is 0; var integer: fourBytes is 0; var string: threeBytes is " "; begin while index <= length(base64) - 3 do if base64[index] >= '+' then fourBytes := 0; for subIndex range index to index + 3 do number := decode[ord(base64[subIndex]) - ord(pred('+'))]; if number = -1 then raise RANGE_ERROR; end if; fourBytes := (fourBytes << 6) + number; end for; threeBytes @:= [1] chr( fourBytes >> 16); threeBytes @:= [2] chr((fourBytes >> 8) mod 256); threeBytes @:= [3] chr( fourBytes mod 256); decoded &:= threeBytes; index +:= 4; elsif base64[index] = '\n' or base64[index] = '\r' then incr(index); else raise RANGE_ERROR; end if; end while; if index <> succ(length(base64)) or (length(base64) >= 2 and pos(base64[.. length(base64) - 2], '=') <> 0) then raise RANGE_ERROR; end if; if length(base64) >= 2 and base64[pred(length(base64)) fixLen 2] = "==" then decoded := decoded[.. length(decoded) - 2]; elsif length(base64) >= 1 and base64[length(base64)] = '=' then decoded := decoded[.. pred(length(decoded))]; end if; end func;
Encode a string with the Quoted-printable encoding
The function toQuotedPrintable is part of the "encoding.s7i" library. Quoted-printable encodes a byte string as ASCII string. This is done by encoding printable ASCII characters except '=' as themself. Other byte values are encoded with '=' followed by two hexadecimal digits representing the byte's numeric value.
const func string: toQuotedPrintable (in string: byteStri) is func result var string: quoted is ""; local var integer: index is 0; var integer: startPos is 1; var integer: counter is 1; var char: ch is ' '; begin for index range 1 to length(byteStri) do ch := byteStri[index]; if ch >= '\256;' then raise RANGE_ERROR; elsif ch = '\n' or (ch = '\r' and index < length(byteStri) and byteStri[succ(index)] = '\n') then if index > 1 then ch := byteStri[pred(index)]; if ch = ' ' or ch = '\t' then quoted &:= byteStri[startPos .. index - 2]; if counter >= 76 then quoted &:= "=\n"; counter := 1; end if; quoted &:= "=" <& ord(byteStri[pred(index)]) RADIX 16 lpad0 2; counter +:= 3; startPos := index; end if; end if; counter := 1; elsif ch >= '\127;' or ch = '=' or (ch < ' ' and ch <> '\9;') then quoted &:= byteStri[startPos .. pred(index)]; if counter >= 74 then quoted &:= "=\n"; counter := 1; end if; quoted &:= "=" <& ord(ch) RADIX 16 lpad0 2; startPos := succ(index); counter +:= 3; elsif counter >= 76 then quoted &:= byteStri[startPos .. pred(index)] & "=\n"; startPos := index; counter := 2; else incr(counter); end if; end for; quoted &:= byteStri[startPos ..]; end func;
Decode a Quoted-printable encoded string
The function fromQuotedPrintable is part of the "encoding.s7i" library.
const func string: fromQuotedPrintable (in string: quoted) is func result var string: decoded is ""; local var integer: startPos is 1; var integer: equalSignPos is 0; var string: twoChars is ""; begin equalSignPos := pos(quoted, "="); while equalSignPos <> 0 do decoded &:= quoted[startPos .. pred(equalSignPos)]; if equalSignPos < length(quoted) and quoted[succ(equalSignPos)] = '\n' then startPos := equalSignPos + 2; elsif equalSignPos <= length(quoted) - 2 then twoChars := quoted[succ(equalSignPos) fixLen 2]; if twoChars[1] in hexdigit_char and twoChars[2] in hexdigit_char then decoded &:= chr(integer(twoChars, 16)); elsif twoChars <> "\r\n" then raise RANGE_ERROR; end if; startPos := equalSignPos + 3; else raise RANGE_ERROR; end if; equalSignPos := pos(quoted, "=", startPos); end while; decoded &:= quoted[startPos ..]; end func;
Encode a string with uuencoding
The function toUuencoded is part of the "encoding.s7i" library. Uuencode encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with consecutive ASCII characters starting from ' ' (which represents 0). Every line starts with a radix-64 digit character indicating the number of data bytes encoded on that line.
const func string: toUuencoded (in string: byteStri) is func result var string: uuencoded is ""; local var integer: index is 1; var integer: subIndex is 1; var char: ch is ' '; var integer: threeBytes is 0; var string: fourBytes is " "; var integer: posToAddNewline is 43; begin if length(byteStri) <> 0 then if length(byteStri) < 45 then uuencoded &:= chr(32 + length(byteStri)); else uuencoded &:= "M"; end if; for index range 1 to length(byteStri) step 3 do threeBytes := 0; for subIndex range index to index + 2 do threeBytes <<:= 8; if subIndex <= length(byteStri) then ch := byteStri[subIndex]; if ch >= '\256;' then raise RANGE_ERROR; end if; threeBytes +:= ord(ch); end if; end for; fourBytes @:= [1] chr(32 + (threeBytes >> 18)); fourBytes @:= [2] chr(32 + (threeBytes >> 12) mod 64); fourBytes @:= [3] chr(32 + (threeBytes >> 6) mod 64); fourBytes @:= [4] chr(32 + threeBytes mod 64); uuencoded &:= fourBytes; if index = posToAddNewline and length(byteStri) > index + 2 then if length(byteStri) - index - 2 < 45 then uuencoded &:= "\n" <& chr(32 + length(byteStri) - index - 2); else uuencoded &:= "\nM"; end if; posToAddNewline +:= 45; end if; end for; uuencoded &:= "\n"; end if; uuencoded &:= "`\n"; end func;
Decode an uuencoded string
The function fromUuencoded is part of the "encoding.s7i" library.
const func string: fromUuencoded (in string: uuencoded) is func result var string: decoded is ""; local var integer: lineLength is 1; var integer: index is 1; var integer: subIndex is 1; var integer: number is 0; var integer: fourBytes is 0; var string: threeBytes is " "; begin lineLength := ord(uuencoded[1]) - 32; while lineLength <> 0 and lineLength <> 64 do incr(index); while lineLength >= 1 do fourBytes := 0; for subIndex range index to index + 3 do number := ord(uuencoded[subIndex]) - 32; if number = 64 then number := 0; elsif number < 0 or number > 64 then raise RANGE_ERROR; end if; fourBytes := (fourBytes << 6) + number; end for; threeBytes @:= [1] chr( fourBytes >> 16); threeBytes @:= [2] chr((fourBytes >> 8) mod 256); threeBytes @:= [3] chr( fourBytes mod 256); decoded &:= threeBytes[ .. lineLength]; lineLength -:= 3; index +:= 4; end while; while index <= length(uuencoded) and uuencoded[index] <> '\n' do incr(index); end while; if index < length(uuencoded) then incr(index); lineLength := ord(uuencoded[index]) - 32; else lineLength := 0; end if; end while; end func;
Encode a string with percent encoding
The function toPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character.
const func string: toPercentEncoded (in string: byteStri) is func result var string: percentEncoded is ""; local const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'}; var integer: pos is 0; var integer: start is 1; var char: ch is ' '; begin for ch key pos range byteStri do if ch > '\255;' then raise RANGE_ERROR; elsif ch not in unreservedChars then percentEncoded &:= byteStri[start .. pred(pos)]; percentEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2; start := succ(pos); end if; end for; percentEncoded &:= byteStri[start ..]; end func;
Decode a percent encoded string
The function fromPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. Allowed URL characters are encoded as themself.
const func string: fromPercentEncoded (in string: percentEncoded) is func result var string: decoded is ""; local var integer: pos is 0; var integer: start is 1; begin pos := pos(percentEncoded, '%'); while pos <> 0 do if pos <= length(percentEncoded) - 2 and percentEncoded[succ(pos)] in hexdigit_char and percentEncoded[pos + 2] in hexdigit_char then decoded &:= percentEncoded[start .. pred(pos)]; decoded &:= char(integer(percentEncoded[succ(pos) fixLen 2], 16)); pos +:= 2; start := succ(pos); end if; pos := pos(percentEncoded, '%', succ(pos)); end while; decoded &:= percentEncoded[start ..]; end func;
Encode a string with the URL encoding
The function toUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character. A plus sign ('+') is used to encode a space (' ').
const func string: toUrlEncoded (in string: byteStri) is func result var string: urlEncoded is ""; local const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'}; var integer: pos is 0; var integer: start is 1; var char: ch is ' '; begin for ch key pos range byteStri do if ch > '\255;' then raise RANGE_ERROR; elsif ch = ' ' then urlEncoded &:= byteStri[start .. pred(pos)]; urlEncoded &:= '+'; start := succ(pos); elsif ch not in unreservedChars then urlEncoded &:= byteStri[start .. pred(pos)]; urlEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2; start := succ(pos); end if; end for; urlEncoded &:= byteStri[start ..]; end func;
Decode an URL encoded string
The function fromUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. A plus sign ('+') is used to encode a space (' '). Allowed URL characters are encoded as themself.
const func string: fromUrlEncoded (in string: urlEncoded) is func result var string: decoded is ""; local var integer: pos is 0; var integer: start is 1; var char: ch is ' '; begin for ch key pos range urlEncoded do if ch = '%' and pos <= length(urlEncoded) - 2 and urlEncoded[succ(pos)] in hexdigit_char and urlEncoded[pos + 2] in hexdigit_char then decoded &:= urlEncoded[start .. pred(pos)]; decoded &:= chr(integer(urlEncoded[succ(pos) fixLen 2], 16)); start := pos + 3; elsif ch = '+' then decoded &:= urlEncoded[start .. pred(pos)]; decoded &:= ' '; start := succ(pos); end if; end for; decoded &:= urlEncoded[start ..]; end func;
Encode a string with the Ascii85 encoding
The function toAscii85 is part of the "encoding.s7i" library. Ascii85 encodes a byte string as ASCII string. This is done by encoding every four bytes with five printable ASCII characters. Five radix 85 digits provide enough possible values to encode the possible values of four bytes. The radix 85 digits are encoded with the characters '!' (encodes 0) through 'u' (encodes 84). If the last block of the byte string contains fewer than 4 bytes, the block is padded with up to three null bytes before encoding. After encoding, as many bytes as were added as padding are removed from the end of the output. In files the end of an Ascii85 encoding is marked with "~>" (this end marker is not added by toAscii85).
const func string: toAscii85 (in string: byteStri) is func result var string: ascii85 is ""; local var integer: index is 0; var integer: subIndex is 0; var integer: fourBytes is 0; var string: fiveBytes is " "; var integer: partialGroupSize is 0; var char: ch is ' '; begin for index range 1 to length(byteStri) - 3 step 4 do fourBytes := bytes2Int(byteStri[index fixLen 4], UNSIGNED, BE); if fourBytes = 0 then ascii85 &:= 'z'; else for subIndex range 5 downto 1 do fiveBytes @:= [subIndex] chr(ord('!') + fourBytes rem 85); fourBytes := fourBytes div 85; end for; ascii85 &:= fiveBytes; end if; end for; partialGroupSize := length(byteStri) mod 4; if partialGroupSize <> 0 then index := succ(pred(length(byteStri)) mdiv 4 * 4); fourBytes := bytes2Int(byteStri[index ..] & "\0;" mult 4 - partialGroupSize, UNSIGNED, BE); for subIndex range 5 downto 1 do fiveBytes @:= [subIndex] chr(ord('!') + fourBytes rem 85); fourBytes := fourBytes div 85; end for; ascii85 &:= fiveBytes[.. succ(partialGroupSize)]; end if; end func;
Decode a Ascii85 encoded string
The function fromAscii85 is part of the "encoding.s7i" library. Every block of five radix 85 characters is decoded to four bytes. Radix 85 characters are between '!' (encodes 0) and 'u' (encodes 84). The character 'z' is used to encode a block of four zero bytes. White space in the Ascii85 encoded string is ignored. The last block is padded to 5 bytes with the Ascii85 character 'u', and as many bytes as were added as padding are omitted from the end of the output.
const func string: fromAscii85 (in string: ascii85) is func result var string: decoded is ""; local const set of char: whiteSpace is {'\0;', '\t', '\n', '\f', '\r', ' '}; var char: ch is ' '; var integer: digitIndex is 0; var integer: base85Number is 0; var integer: idx is 0; begin for ch range ascii85 until ch = '~' do if ch >= '!' and ch <= 'u' then incr(digitIndex); base85Number := base85Number * 85 + (ord(ch) - ord('!')); if digitIndex = 5 then decoded &:= bytes(base85Number, UNSIGNED, BE, 4); digitIndex := 0; base85Number := 0; end if; elsif ch = 'z' and digitIndex = 0 then decoded &:= "\0;\0;\0;\0;"; elsif ch not in whiteSpace then raise RANGE_ERROR; end if; end for; if digitIndex <> 0 then for idx range 1 to 5 - digitIndex do base85Number := base85Number * 85 + 84; end for; decoded &:= bytes(base85Number, UNSIGNED, BE, 4)[.. pred(digitIndex)]; end if; end func;
Encode a string with the AsciiHex encoding
The function toAsciiHex is part of the "encoding.s7i" library. AsciiHex encodes a byte string as ASCII string. In AsciiHex each byte is encoded with two hexadecimal digits. White-space characters in an AsciiHex encoded string are ignored. The AsciiHex encoded string ends with the character '>'.
const func string: toAsciiHex (in string: byteStri) is func result var string: asciiHex is ""; local const char: endOfData is '>'; const integer: encodingsPerLine is 32; var integer: index is 1; begin while index <= length(byteStri) do asciiHex &:= hex(byteStri[index len encodingsPerLine]); asciiHex &:= "\n"; index +:= encodingsPerLine; end while; asciiHex &:= endOfData; end func;
Decode an AsciiHex encoded string
The function fromAsciiHex is part of the "encoding.s7i" library. * In AsciiHex each byte is encoded with two hexadecimal digits. * White-space characters in an AsciiHex encoded string are ignored. * The AsciiHex encoded string ends with the character '>'. * If a '>' follows the first hexadecimal digit of an encoded byte * the decoding works as if a '0' is at the place of the '>'. * The decoder works also correctly if the '>' is missing.
const func string: fromAsciiHex (in string: asciiHex) is func result var string: stri is ""; local const char: endOfData is '>'; const set of char: whiteSpace is {'\0;', '\t', '\n', '\f', '\r', ' '}; var integer: index is 1; begin while index < length(asciiHex) and asciiHex[index] <> endOfData do if asciiHex[index] not in whiteSpace then if asciiHex[succ(index)] = endOfData then stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16)); incr(index); else stri &:= char(integer(asciiHex[index fixLen 2], 16)); index +:= 2; end if; else incr(index); end if; end while; if index = length(asciiHex) and asciiHex[index] <> endOfData and asciiHex[index] not in whiteSpace then stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16)); end if; end func;
Compress a string using the Lempel Ziv Welch (LZW) compression
This algorithm compresses a byte string to a string of tokens which contains characters >= 256. To write this to a byte file it is necessary to add an algorithm which writes the tokens with 9 or more bits.
const func string: lzwCompress (in string: uncompressed) is func result var string: compressed is ""; local var char: ch is ' '; var hash [string] char: mydict is (hash [string] char).value; var string: buffer is ""; var string: xstr is ""; begin for ch range chr(0) to chr(255) do mydict @:= [str(ch)] ch; end for; for ch range uncompressed do xstr := buffer & str(ch); if xstr in mydict then buffer &:= str(ch) else compressed &:= str(mydict[buffer]); mydict @:= [xstr] chr(length(mydict)); buffer := str(ch); end if; end for; if buffer <> "" then compressed &:= str(mydict[buffer]); end if; end func;
Decompress a Lempel Ziv Welch (LZW) compressed string
The compressed string consists of a sequence of tokens (which contain also characters >= 256). The decompress algorithm produces a byte string.
const func string: lzwDecompress (in string: compressed) is func result var string: uncompressed is ""; local var char: ch is ' '; var hash [char] string: mydict is (hash [char] string).value; var string: buffer is ""; var string: current is ""; var string: chain is ""; begin for ch range chr(0) to chr(255) do mydict @:= [ch] str(ch); end for; for ch range compressed do if buffer = "" then buffer := mydict[ch]; uncompressed &:= buffer; elsif ch <= chr(255) then current := mydict[ch]; uncompressed &:= current; chain := buffer & current; mydict @:= [chr(length(mydict))] chain; buffer := current; else if ch in mydict then chain := mydict[ch]; else chain := buffer & str(buffer[1]); end if; uncompressed &:= chain; mydict @:= [chr(length(mydict))] buffer & str(chain[1]); buffer := chain; end if; end for; end func;
Compress a string using the run length encoding of bzip2
A sequence of 4 to 259 identical characters is replaced by four identical characters followed by a repeat length between 0 and 255. This run length encoding is used as first compression technique in the bzip2 compression.
const func string: bzip2RleCompress (in string: uncompressed) is func result var string: compressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(uncompressed) - 3 do ch := uncompressed[index]; if uncompressed[succ(index)] = ch and uncompressed[index + 2] = ch and uncompressed[index + 3] = ch then index +:= 4; compressed &:= uncompressed[oldpos .. pred(index)]; oldpos := index; while index <= length(uncompressed) and uncompressed[index] = ch do incr(index); end while; compressed &:= chr(index - oldpos); oldpos := index; else incr(index); end if; end while; compressed &:= uncompressed[oldpos ..]; end func;
Decompress a string using the run length encoding of bzip2
A sequence of 4 identical characters followed by a repeat length between 0 and 255 is replaced by 4 to 259 identical characters. This run length decoding is used as last decompression technique in the bzip2 decompression.
const func string: bzip2RleDecompress (in string: compressed) is func result var string: uncompressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(compressed) - 3 do ch := compressed[index]; if compressed[succ(index)] = ch and compressed[index + 2] = ch and compressed[index + 3] = ch then index +:= 4; uncompressed &:= compressed[oldpos .. pred(index)] & str(ch) mult ord(compressed[index]); incr(index); oldpos := index; else incr(index); end if; end while; uncompressed &:= compressed[oldpos ..]; end func;
Compress a string using the run length encoding of PackBits
const func string: packBits (in string: uncompressed) is func result var string: compressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(uncompressed) - 2 do ch := uncompressed[index]; if uncompressed[succ(index)] = ch and uncompressed[index + 2] = ch then while index - oldpos >= 128 do compressed &:= "\127;" & uncompressed[oldpos fixLen 128]; oldpos +:= 128; end while; if index > oldpos then compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos .. pred(index)]; oldpos := index; end if; index +:= 3; while index <= length(uncompressed) and uncompressed[index] = ch do incr(index); end while; while index - oldpos >= 128 do compressed &:= "\129;" & str(ch); oldpos +:= 128; end while; if pred(index) > oldpos then compressed &:= str(chr(257 - (index - oldpos))) & str(ch); oldpos := index; end if; else incr(index); end if; end while; index := succ(length(uncompressed)); while index - oldpos >= 128 do compressed &:= "\127;" & uncompressed[oldpos fixLen 128]; oldpos +:= 128; end while; if index > oldpos then compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos ..]; end if; end func;
Decompress a string using the run length encoding of PackBits
const func string: unpackBits (in string: compressed) is func result var string: uncompressed is ""; local var integer: index is 1; var char: ch is ' '; begin while index <= length(compressed) do ch := compressed[index]; if ch <= chr(127) then uncompressed &:= compressed[succ(index) fixLen succ(ord(ch))]; index +:= ord(ch) + 2; else uncompressed &:= str(compressed[succ(index)]) mult (257 - ord(ch)); index +:= 2; end if; end while; end func;
Burrows-Wheeler transform (basic concept)
This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.
const func string: burrowsWheelerTransformConcept (in string: stri) is func result var string: transformed is ""; local var integer: length is 0; var integer: index is 0; var array string: rotations is 0 times ""; begin length := succ(length(stri)); rotations := length times ""; for index range 1 to length do rotations[index] := stri[index ..] & "\256;" & stri[.. pred(index)]; end for; rotations := sort(rotations); for index range 1 to length do transformed &:= rotations[index][length]; end for; end func;
Inverse Burrows-Wheeler transform (basic concept)
This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.
const func string: inverseBurrowsWheelerTransformConcept (in string: transformed) is func result var string: stri is ""; local var integer: length is 0; var integer: count is 0; var integer: index is 0; var array string: rotations is 0 times ""; begin length := length(transformed); rotations := length times ""; for count range 1 to length do for index range 1 to length do rotations[index] := str(transformed[index]) & rotations[index]; end for; rotations := sort(rotations); end for; stri := rotations[1]; index := pos(stri, "\256;"); stri := stri[succ(index) ..] & stri[.. pred(index)]; end func;
Convert a character string to morse code
const func string: charToMorse (in char: ch) is func result var string: morseCode is ""; begin case ch of when {'a', 'A'}: morseCode := ".-"; when {'b', 'B'}: morseCode := "-..."; when {'c', 'C'}: morseCode := "-.-."; when {'d', 'D'}: morseCode := "-.."; when {'e', 'E'}: morseCode := "."; when {'f', 'F'}: morseCode := "..-."; when {'g', 'G'}: morseCode := "--."; when {'h', 'H'}: morseCode := "...."; when {'i', 'I'}: morseCode := ".."; when {'j', 'J'}: morseCode := ".---"; when {'k', 'K'}: morseCode := "-.-"; when {'l', 'L'}: morseCode := ".-.."; when {'m', 'M'}: morseCode := "--"; when {'n', 'N'}: morseCode := "-."; when {'o', 'O'}: morseCode := "---"; when {'p', 'P'}: morseCode := ".--."; when {'q', 'Q'}: morseCode := "--.-"; when {'r', 'R'}: morseCode := ".-."; when {'s', 'S'}: morseCode := "..."; when {'t', 'T'}: morseCode := "-"; when {'u', 'U'}: morseCode := "..-"; when {'v', 'V'}: morseCode := "...-"; when {'w', 'W'}: morseCode := ".--"; when {'x', 'X'}: morseCode := "-..-"; when {'y', 'Y'}: morseCode := "-.--"; when {'z', 'Z'}: morseCode := "--.."; when {'Ä', 'Æ'}: morseCode := ".-.-"; when {'À', 'Å'}: morseCode := ".--.-"; when {'Ç', 'Ĉ'}: morseCode := "-.-.."; when {'Ð'}: morseCode := "..--."; when {'È'}: morseCode := ".-..-"; when {'É'}: morseCode := "..-.."; when {'Ĝ'}: morseCode := "--.-."; when {'Ĵ'}: morseCode := ".---."; when {'Ñ'}: morseCode := "--.--"; when {'Ö', 'Ø'}: morseCode := "---."; when {'Ŝ'}: morseCode := "...-."; when {'Ü', 'Ŭ'}: morseCode := "..--"; when {'Þ'}: morseCode := ".--.."; when {'0'}: morseCode := "-----"; when {'1'}: morseCode := ".----"; when {'2'}: morseCode := "..---"; when {'3'}: morseCode := "...--"; when {'4'}: morseCode := "....-"; when {'5'}: morseCode := "....."; when {'6'}: morseCode := "-...."; when {'7'}: morseCode := "--..."; when {'8'}: morseCode := "---.."; when {'9'}: morseCode := "----."; when {'!'}: morseCode := "-.-.--"; when {'"'}: morseCode := ".-..-."; when {'$'}: morseCode := "...-..-"; when {'''}: morseCode := ".----."; when {'('}: morseCode := "-.--."; when {')'}: morseCode := "-.--.-"; when {'+'}: morseCode := ".-.-."; when {','}: morseCode := "--..--"; when {'-'}: morseCode := "-....-"; when {'.'}: morseCode := ".-.-.-"; when {'/'}: morseCode := "-..-."; when {':'}: morseCode := "---..."; when {';'}: morseCode := "-.-.-."; when {'='}: morseCode := "-...-"; when {'?'}: morseCode := "..--.."; when {'@'}: morseCode := ".--.-."; when {' '}: morseCode := " "; end case; end func; const func string: stringToMorse (in string: stri) is func result var string: morseCode is ""; local var char: ch is ' '; begin for ch range stri do morseCode &:= charToMorse(ch) & " "; end for; end func;
Convert morse code to a character string
const func char: morseToChar (in string: morseLetter) is func result var char: ch is ' '; begin if morseLetter = "" then ch := ' '; elsif morseLetter = "." then ch := 'E'; elsif morseLetter = "-" then ch := 'T'; elsif morseLetter = ".." then ch := 'I'; elsif morseLetter = ".-" then ch := 'A'; elsif morseLetter = "-." then ch := 'N'; elsif morseLetter = "--" then ch := 'M'; elsif morseLetter = "..." then ch := 'S'; elsif morseLetter = "..-" then ch := 'U'; elsif morseLetter = ".-." then ch := 'R'; elsif morseLetter = ".--" then ch := 'W'; elsif morseLetter = "-.." then ch := 'D'; elsif morseLetter = "-.-" then ch := 'K'; elsif morseLetter = "--." then ch := 'G'; elsif morseLetter = "---" then ch := 'O'; elsif morseLetter = "...." then ch := 'H'; elsif morseLetter = "...-" then ch := 'V'; elsif morseLetter = "..-." then ch := 'F'; elsif morseLetter = "..--" then ch := 'Ü'; # also 'Ŭ' elsif morseLetter = ".-.." then ch := 'L'; elsif morseLetter = ".-.-" then ch := 'Ä'; # also 'Æ' elsif morseLetter = ".--." then ch := 'P'; elsif morseLetter = ".---" then ch := 'J'; elsif morseLetter = "-..." then ch := 'B'; elsif morseLetter = "-..-" then ch := 'X'; elsif morseLetter = "-.-." then ch := 'C'; elsif morseLetter = "-.--" then ch := 'Y'; elsif morseLetter = "--.." then ch := 'Z'; elsif morseLetter = "--.-" then ch := 'Q'; elsif morseLetter = "---." then ch := 'Ö'; # also 'Ø' elsif morseLetter = "----" then ch := ' '; # 'ch' elsif morseLetter = "....." then ch := '5'; elsif morseLetter = "....-" then ch := '4'; elsif morseLetter = "...-." then ch := 'Ŝ'; elsif morseLetter = "...--" then ch := '3'; elsif morseLetter = "..-.." then ch := 'É'; elsif morseLetter = "..-.-" then ch := ' '; # unused elsif morseLetter = "..--." then ch := 'Ð'; elsif morseLetter = "..---" then ch := '2'; elsif morseLetter = ".-..." then ch := ' '; # unused elsif morseLetter = ".-..-" then ch := 'È'; elsif morseLetter = ".-.-." then ch := '+'; elsif morseLetter = ".-.--" then ch := ' '; # unused elsif morseLetter = ".--.." then ch := 'Þ'; elsif morseLetter = ".--.-" then ch := 'À'; # also 'Å' elsif morseLetter = ".---." then ch := 'Ĵ'; elsif morseLetter = ".----" then ch := '1'; elsif morseLetter = "-...." then ch := '6'; elsif morseLetter = "-...-" then ch := '='; elsif morseLetter = "-..-." then ch := '/'; elsif morseLetter = "-..--" then ch := ' '; # unused elsif morseLetter = "-.-.." then ch := 'Ç'; # also 'Ĉ' elsif morseLetter = "-.-.-" then ch := ' '; # Start of message elsif morseLetter = "-.--." then ch := '('; # also 'Ĥ' elsif morseLetter = "-.---" then ch := ' '; # unused elsif morseLetter = "--..." then ch := '7'; elsif morseLetter = "--..-" then ch := ' '; # unused elsif morseLetter = "--.-." then ch := 'Ĝ'; elsif morseLetter = "--.--" then ch := 'Ñ'; elsif morseLetter = "---.." then ch := '8'; elsif morseLetter = "---.-" then ch := ' '; # unused elsif morseLetter = "----." then ch := '9'; elsif morseLetter = "-----" then ch := '0'; elsif morseLetter = "..--.." then ch := '?'; elsif morseLetter = ".-..-." then ch := '"'; elsif morseLetter = ".-.-.-" then ch := '.'; elsif morseLetter = ".--.-." then ch := '@'; elsif morseLetter = ".----." then ch := '''; elsif morseLetter = "-....-" then ch := '-'; elsif morseLetter = "-.-.--" then ch := '!'; elsif morseLetter = "-.-.-." then ch := ';'; elsif morseLetter = "-.--.-" then ch := ')'; elsif morseLetter = "--..--" then ch := ','; elsif morseLetter = "---..." then ch := ':'; elsif morseLetter = "...-..-" then ch := '$'; else ch := ' '; end if; end func; const func string: morseToString (in string: morseCode) is func result var string: stri is ""; local var array string: letters is 0 times ""; var string: letter is ""; begin letters := split(replace(morseCode, " ", " "), ' '); for letter range letters do stri &:= str(morseToChar(letter)); end for; end func;
Wildcard match used in command shells
const func boolean: wildcard_match (in string: main_stri, in string: pattern) is func result var boolean: doesMatch is FALSE; local var integer: main_length is 0; var integer: main_index is 1; var string: pattern_tail is ""; begin if pattern = "" then doesMatch := main_stri = ""; else case pattern[1] of when {'*'}: if pattern = "*" then doesMatch := TRUE; else main_length := length(main_stri); pattern_tail := pattern[2 .. ]; while main_index <= main_length and not doesMatch do doesMatch := wildcard_match(main_stri[main_index .. ], pattern_tail); incr(main_index); end while; end if; when {'?'}: if main_stri <> "" then doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]); end if; otherwise: if main_stri <> "" and main_stri[1] = pattern[1] then doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]); end if; end case; end if; end func;
String compare function where digit sequences are compared numerically
include "scanstri.s7i"; const func integer: cmpNumeric (in var string: stri1, in var string: stri2) is func result var integer: signumValue is 0; local var string: part1 is ""; var string: part2 is ""; begin while signumValue = 0 and (stri1 <> "" or stri2 <> "") do part1 := getDigits(stri1); part2 := getDigits(stri2); if part1 <> "" and part2 <> "" then signumValue := compare(part1 lpad0 length(part2), part2 lpad0 length(part1)); if signumValue = 0 then signumValue := compare(length(part1), length(part2)); end if; elsif part1 <> "" then signumValue := compare(part1, stri2); elsif part2 <> "" then signumValue := compare(stri1, part2); end if; if signumValue = 0 then part1 := getNonDigits(stri1); part2 := getNonDigits(stri2); if part1 <> "" and part2 <> "" then signumValue := compare(part1, part2); elsif part1 <> "" then signumValue := compare(part1, stri2); elsif part2 <> "" then signumValue := compare(stri1, part2); end if; end if; end while; end func;
|
|