Seed7 Algorithms: String

String

Replace tabs with the corresponding number of spaces

const proc: delTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: tabPos is 0;
    var integer: blankCount is 0;
  begin
    tabPos := pos(stri, "\t");
    while tabPos <> 0 do
      blankCount := tabJump - pred(tabPos) rem tabJump;
      stri := stri[ .. pred(tabPos)] &
          " " mult blankCount & stri[succ(tabPos) .. ];
      tabPos := pos(stri, "\t");
    end while;
  end func;

Replace leading spaces with the corresponding number of tabs

const proc: insertLeadingTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: blankCount is 1;
    var integer: tabCount is 0;
  begin
    while blankCount <= length(stri) and stri[blankCount] = ' ' do
      incr(blankCount);
    end while;
    decr(blankCount);
    if blankCount >= tabJump then
      tabCount := blankCount div tabJump;
      stri := "\t" mult tabCount & stri[succ(tabCount * tabJump) .. ];
    end if;
  end func;

Replace spaces with the corresponding number of tabs

const proc: insertTabs (inout string: stri, in integer: tabJump) is func
  local
    var integer: blankPos is 0;
    var integer: pos is 0;
    var integer: tabCount1 is 0;
    var integer: tabCount2 is 0;
  begin
    blankPos := rpos(stri, ' ');
    while blankPos <> 0 do
      pos := pred(blankPos);
      while pos >= 1 and stri[pos] = ' ' do
        decr(pos);
      end while;
      tabCount1 := pos div tabJump;
      tabCount2 := blankPos div tabJump;
      if blankPos - pos >= 2 and tabCount2 > tabCount1 then
        stri := stri[.. pos] & "\t" mult (tabCount2 - tabCount1) &
            stri[succ(tabCount2 * tabJump) ..];
      end if;
      blankPos := rpos(stri, ' ', pos);
    end while;
  end func;

Convert an Unicode UTF-32 string to UTF-8

The "unicode.s7i" library defines toUtf8, which converts an UTF-32 string to UTF-8. The function 'toUtf8_2' below shows how toUtf8 works:

const func string: toUtf8_2 (in string: stri) is func
  result
    var string: stri8 is "";
  local
    var char: ch is ' ';
    var integer: pos is 1;
  begin
    stri8 := "\0;" mult (6 * length(stri));
    for ch range stri do
      if ch <= '\16#7f;' then
        stri8 @:= [pos] ch;
        incr(pos);
      elsif ch <= '\16#7ff;' then
        stri8 @:= [pos    ] chr(16#C0 + ( ord(ch) >>  6));
        stri8 @:= [pos + 1] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 2;
      elsif ch <= '\16#ffff;' then
        stri8 @:= [pos    ] chr(16#E0 + ( ord(ch) >> 12));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 3;
      elsif ch <= '\16#1fffff;' then
        stri8 @:= [pos    ] chr(16#F0 + ( ord(ch) >> 18));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 4;
      elsif ch <= '\16#3ffffff;' then
        stri8 @:= [pos    ] chr(16#F8 + ( ord(ch) >> 24));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 18) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 4] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 5;
      else
        stri8 @:= [pos    ] chr(16#FC + ( ord(ch) >> 30));
        stri8 @:= [pos + 1] chr(16#80 + ((ord(ch) >> 24) mod 16#40));
        stri8 @:= [pos + 2] chr(16#80 + ((ord(ch) >> 18) mod 16#40));
        stri8 @:= [pos + 3] chr(16#80 + ((ord(ch) >> 12) mod 16#40));
        stri8 @:= [pos + 4] chr(16#80 + ((ord(ch) >>  6) mod 16#40));
        stri8 @:= [pos + 5] chr(16#80 + ( ord(ch)        mod 16#40));
        pos +:= 6;
      end if;
    end for;
    stri8 := stri8[.. pred(pos)];
  end func;

Convert an Unicode UTF-8 string to UTF-32

The "unicode.s7i" library defines fromUtf8, which converts an UTF-8 string to UTF-32. The function 'fromUtf8_2' below shows how fromUtf8 works:

const func string: fromUtf8_2 (in string: stri8) is func
  result
    var string: stri is "";
  local
    var integer: length is 0;
    var integer: pos8 is 0;
    var integer: pos is 0;
    var boolean: okay is TRUE;
  begin
    length := length(stri8);
    stri := "\0;" mult length;
    pos8 := 1;
    pos := 0;
    while length > 0 do
      incr(pos);
      if ord(stri8[pos8]) <= 16#7F then
        stri @:= [pos] stri8[pos8];
        incr(pos8);
        decr(length);
      elsif ord(stri8[pos8    ]) >> 5 = 16#06 and length >= 2 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#20 << 6) +
                           (ord(stri8[pos8 + 1]) mod 16#40));
        pos8 +:= 2;
        length -:= 2;
      elsif ord(stri8[pos8    ]) >> 4 = 16#0E and length >= 3 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#10 << 12) +
                           (ord(stri8[pos8 + 1]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 2]) mod 16#40));
        pos8 +:= 3;
        length -:= 3;
      elsif ord(stri8[pos8    ]) >> 3 = 16#1E and length >= 4 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#08 << 18) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 2]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 3]) mod 16#40));
        pos8 +:= 4;
        length -:= 4;
      elsif ord(stri8[pos8    ]) >> 2 = 16#3E and length >= 5 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 and
            ord(stri8[pos8 + 4]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#04 << 24) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 18) +
                           (ord(stri8[pos8 + 2]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 3]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 4]) mod 16#40));
        pos8 +:= 5;
        length -:= 5;
      elsif ord(stri8[pos8    ]) >> 2 = 16#3F and length >= 6 and
            ord(stri8[pos8 + 1]) >> 6 = 16#02 and
            ord(stri8[pos8 + 2]) >> 6 = 16#02 and
            ord(stri8[pos8 + 3]) >> 6 = 16#02 and
            ord(stri8[pos8 + 4]) >> 6 = 16#02 and
            ord(stri8[pos8 + 5]) >> 6 = 16#02 then
        stri @:= [pos] chr((ord(stri8[pos8    ]) mod 16#04 << 30) +
                           (ord(stri8[pos8 + 1]) mod 16#40 << 24) +
                           (ord(stri8[pos8 + 2]) mod 16#40 << 18) +
                           (ord(stri8[pos8 + 3]) mod 16#40 << 12) +
                           (ord(stri8[pos8 + 4]) mod 16#40 <<  6) +
                           (ord(stri8[pos8 + 5]) mod 16#40));
        pos8 +:= 6;
        length -:= 6;
      else
        okay := FALSE;
        length := 0;
      end if;
    end while;
    if okay then
      stri := stri[.. pos];
    else
      raise RANGE_ERROR;
    end if;
  end func;

Encode a string with the Base64 encoding

The function toBase64 is part of the "encoding.s7i" library. Base64 encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with letters, digits and the characters '+' and '/'.

const func string: toBase64 (in string: byteStri) is func
  result
    var string: base64 is "";
  local
    const string: coding is "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    var integer: index is 1;
    var integer: subIndex is 1;
    var char: ch is ' ';
    var integer: threeBytes is 0;
    var string: fourBytes is "    ";
    var integer: posToAddNewline is 58;
  begin
    for index range 1 to length(byteStri) step 3 do
      threeBytes := 0;
      for subIndex range index to index + 2 do
        threeBytes <<:= 8;
        if subIndex <= length(byteStri) then
          ch := byteStri[subIndex];
          if ch >= '\256;' then
            raise RANGE_ERROR;
          end if;
          threeBytes +:= ord(ch);
        end if;
      end for;
      fourBytes @:= [1] coding[succ( threeBytes >> 18)];
      fourBytes @:= [2] coding[succ((threeBytes >> 12) mod 64)];
      fourBytes @:= [3] coding[succ((threeBytes >>  6) mod 64)];
      fourBytes @:= [4] coding[succ( threeBytes        mod 64)];
      if index = posToAddNewline then
        base64 &:= "\n";
        posToAddNewline +:= 57;
      end if;
      base64 &:= fourBytes;
    end for;
    index := length(base64);
    if length(byteStri) rem 3 = 2 then
      base64 @:= [index] '=';
    elsif length(byteStri) rem 3 = 1 then
      base64 @:= [pred(index)] "==";
    end if;
  end func;

Decode a Base64 encoded string

The function fromBase64 is part of the "encoding.s7i" library.

const func string: fromBase64 (in string: base64) is func
  result
    var string: decoded is "";
  local
    const array integer: decode is [] (                      # -1 is illegal
        62, -1, -1, -1, 63,                                  # + /
        52, 53, 54, 55, 56, 57, 58, 59, 60, 61,              # 0 - 9
        -1, -1, -1,  0, -1, -1, -1,                          # =
         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  # A - M
        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  # N - Z
        -1, -1, -1, -1, -1, -1,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,  # a - m
        39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51); # n - z
    var integer: index is 1;
    var integer: subIndex is 1;
    var integer: number is 0;
    var integer: fourBytes is 0;
    var string: threeBytes is "   ";
  begin
    while index <= length(base64) - 3 do
      if base64[index] >= '+' then
        fourBytes := 0;
        for subIndex range index to index + 3 do
          number := decode[ord(base64[subIndex]) - ord(pred('+'))];
          if number = -1 then
            raise RANGE_ERROR;
          end if;
          fourBytes := (fourBytes << 6) + number;
        end for;
        threeBytes @:= [1] chr( fourBytes >> 16);
        threeBytes @:= [2] chr((fourBytes >>  8) mod 256);
        threeBytes @:= [3] chr( fourBytes        mod 256);
        decoded &:= threeBytes;
        index +:= 4;
      elsif base64[index] = '\n' or base64[index] = '\r' then
        incr(index);
      else
        raise RANGE_ERROR;
      end if;
    end while;
    if index <> succ(length(base64)) or
        (length(base64) >= 2 and
         pos(base64[.. length(base64) - 2], '=') <> 0) then
      raise RANGE_ERROR;
    end if;
    if length(base64) >= 2 and base64[pred(length(base64)) fixLen 2] = "==" then
      decoded := decoded[.. length(decoded) - 2];
    elsif length(base64) >= 1 and base64[length(base64)] = '=' then
      decoded := decoded[.. pred(length(decoded))];
    end if;
  end func;

Encode a string with the Quoted-printable encoding

The function toQuotedPrintable is part of the "encoding.s7i" library. Quoted-printable encodes a byte string as ASCII string. This is done by encoding printable ASCII characters except '=' as themself. Other byte values are encoded with '=' followed by two hexadecimal digits representing the byte's numeric value.

const func string: toQuotedPrintable (in string: byteStri) is func
  result
    var string: quoted is "";
  local
    var integer: index is 0;
    var integer: startPos is 1;
    var integer: counter is 1;
    var char: ch is ' ';
  begin
    for index range 1 to length(byteStri) do
      ch := byteStri[index];
      if ch >= '\256;' then
        raise RANGE_ERROR;
      elsif ch = '\n' or (ch = '\r' and
          index < length(byteStri) and byteStri[succ(index)] = '\n') then
        if index > 1 then
          ch := byteStri[pred(index)];
          if ch = ' ' or ch = '\t' then
            quoted &:= byteStri[startPos .. index - 2];
            if counter >= 76 then
              quoted &:= "=\n";
              counter := 1;
            end if;
            quoted &:= "=" <& ord(byteStri[pred(index)]) RADIX 16 lpad0 2;
            counter +:= 3;
            startPos := index;
          end if;
        end if;
        counter := 1;
      elsif ch >= '\127;' or ch = '=' or (ch < ' ' and ch <> '\9;') then
        quoted &:= byteStri[startPos .. pred(index)];
        if counter >= 74 then
          quoted &:= "=\n";
          counter := 1;
        end if;
        quoted &:= "=" <& ord(ch) RADIX 16 lpad0 2;
        startPos := succ(index);
        counter +:= 3;
      elsif counter >= 76 then
        quoted &:= byteStri[startPos .. pred(index)] & "=\n";
        startPos := index;
        counter := 2;
      else
        incr(counter);
      end if;
    end for;
    quoted &:= byteStri[startPos ..];
  end func;

Decode a Quoted-printable encoded string

The function fromQuotedPrintable is part of the "encoding.s7i" library.

const func string: fromQuotedPrintable (in string: quoted) is func
  result
    var string: decoded is "";
  local
    var integer: startPos is 1;
    var integer: equalSignPos is 0;
    var string: twoChars is "";
  begin
    equalSignPos := pos(quoted, "=");
    while equalSignPos <> 0 do
      decoded &:= quoted[startPos .. pred(equalSignPos)];
      if equalSignPos < length(quoted) and
          quoted[succ(equalSignPos)] = '\n' then
        startPos := equalSignPos + 2;
      elsif equalSignPos <= length(quoted) - 2 then
        twoChars := quoted[succ(equalSignPos) fixLen 2];
        if twoChars[1] in hexdigit_char and
            twoChars[2] in hexdigit_char then
          decoded &:= chr(integer(twoChars, 16));
        elsif twoChars <> "\r\n" then
          raise RANGE_ERROR;
        end if;
        startPos := equalSignPos + 3;
      else
        raise RANGE_ERROR;
      end if;
      equalSignPos := pos(quoted, "=", startPos);
    end while;
    decoded &:= quoted[startPos ..];
  end func;

Encode a string with uuencoding

The function toUuencoded is part of the "encoding.s7i" library. Uuencode encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with consecutive ASCII characters starting from ' ' (which represents 0). Every line starts with a radix-64 digit character indicating the number of data bytes encoded on that line.

const func string: toUuencoded (in string: byteStri) is func
  result
    var string: uuencoded is "";
  local
    var integer: index is 1;
    var integer: subIndex is 1;
    var char: ch is ' ';
    var integer: threeBytes is 0;
    var string: fourBytes is "    ";
    var integer: posToAddNewline is 43;
  begin
    if length(byteStri) <> 0 then
      if length(byteStri) < 45 then
        uuencoded &:= chr(32 + length(byteStri));
      else
        uuencoded &:= "M";
      end if;
      for index range 1 to length(byteStri) step 3 do
        threeBytes := 0;
        for subIndex range index to index + 2 do
          threeBytes <<:= 8;
          if subIndex <= length(byteStri) then
            ch := byteStri[subIndex];
            if ch >= '\256;' then
              raise RANGE_ERROR;
            end if;
            threeBytes +:= ord(ch);
          end if;
        end for;
        fourBytes @:= [1] chr(32 + (threeBytes >> 18));
        fourBytes @:= [2] chr(32 + (threeBytes >> 12) mod 64);
        fourBytes @:= [3] chr(32 + (threeBytes >>  6) mod 64);
        fourBytes @:= [4] chr(32 +  threeBytes        mod 64);
        uuencoded &:= fourBytes;
        if index = posToAddNewline and length(byteStri) > index + 2 then
          if length(byteStri) - index - 2 < 45 then
            uuencoded &:= "\n" <& chr(32 + length(byteStri) - index - 2);
          else
            uuencoded &:= "\nM";
          end if;
          posToAddNewline +:= 45;
        end if;
      end for;
      uuencoded &:= "\n";
    end if;
    uuencoded &:= "`\n";
  end func;

Decode an uuencoded string

The function fromUuencoded is part of the "encoding.s7i" library.

const func string: fromUuencoded (in string: uuencoded) is func
  result
    var string: decoded is "";
  local
    var integer: lineLength is 1;
    var integer: index is 1;
    var integer: subIndex is 1;
    var integer: number is 0;
    var integer: fourBytes is 0;
    var string: threeBytes is "   ";
  begin
    lineLength := ord(uuencoded[1]) - 32;
    while lineLength <> 0 and lineLength <> 64 do
      incr(index);
      while lineLength >= 1 do
        fourBytes := 0;
        for subIndex range index to index + 3 do
          number := ord(uuencoded[subIndex]) - 32;
          if number = 64 then
            number := 0;
          elsif number < 0 or number > 64 then
            raise RANGE_ERROR;
          end if;
          fourBytes := (fourBytes << 6) + number;
        end for;
        threeBytes @:= [1] chr( fourBytes >> 16);
        threeBytes @:= [2] chr((fourBytes >>  8) mod 256);
        threeBytes @:= [3] chr( fourBytes        mod 256);
        decoded &:= threeBytes[ .. lineLength];
        lineLength -:= 3;
        index +:= 4;
      end while;
      while index <= length(uuencoded) and uuencoded[index] <> '\n' do
        incr(index);
      end while;
      if index < length(uuencoded) then
        incr(index);
        lineLength := ord(uuencoded[index]) - 32;
      else
        lineLength := 0;
      end if;
    end while;
  end func;

Encode a string with percent encoding

The function toPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character.

const func string: toPercentEncoded (in string: byteStri) is func
  result
    var string: percentEncoded is "";
  local
    const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'};
    var integer: pos is 0;
    var integer: start is 1;
    var char: ch is ' ';
  begin
    for ch key pos range byteStri do
      if ch > '\255;' then
        raise RANGE_ERROR;
      elsif ch not in unreservedChars then
        percentEncoded &:= byteStri[start .. pred(pos)];
        percentEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2;
        start := succ(pos);
      end if;
    end for;
    percentEncoded &:= byteStri[start ..];
  end func;

Decode a percent encoded string

The function fromPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. Allowed URL characters are encoded as themself.

const func string: fromPercentEncoded (in string: percentEncoded) is func
  result
    var string: decoded is "";
  local
    var integer: pos is 0;
    var integer: start is 1;
  begin
    pos := pos(percentEncoded, '%');
    while pos <> 0 do
      if pos <= length(percentEncoded) - 2 and
          percentEncoded[succ(pos)] in hexdigit_char and
          percentEncoded[pos + 2] in hexdigit_char then
        decoded &:= percentEncoded[start .. pred(pos)];
        decoded &:= char(integer(percentEncoded[succ(pos) fixLen 2], 16));
        pos +:= 2;
        start := succ(pos);
      end if;
      pos := pos(percentEncoded, '%', succ(pos));
    end while;
    decoded &:= percentEncoded[start ..];
  end func;

Encode a string with the URL encoding

The function toUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character. A plus sign ('+') is used to encode a space (' ').

const func string: toUrlEncoded (in string: byteStri) is func
  result
    var string: urlEncoded is "";
  local
    const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'};
    var integer: pos is 0;
    var integer: start is 1;
    var char: ch is ' ';
  begin
    for ch key pos range byteStri do
      if ch > '\255;' then
        raise RANGE_ERROR;
      elsif ch = ' ' then
        urlEncoded &:= byteStri[start .. pred(pos)];
        urlEncoded &:= '+';
        start := succ(pos);
      elsif ch not in unreservedChars then
        urlEncoded &:= byteStri[start .. pred(pos)];
        urlEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2;
        start := succ(pos);
      end if;
    end for;
    urlEncoded &:= byteStri[start ..];
  end func;

Decode an URL encoded string

The function fromUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. A plus sign ('+') is used to encode a space (' '). Allowed URL characters are encoded as themself.

const func string: fromUrlEncoded (in string: urlEncoded) is func
  result
    var string: decoded is "";
  local
    var integer: pos is 0;
    var integer: start is 1;
    var char: ch is ' ';
  begin
    for ch key pos range urlEncoded do
      if ch = '%' and pos <= length(urlEncoded) - 2 and
          urlEncoded[succ(pos)] in hexdigit_char and
          urlEncoded[pos + 2] in hexdigit_char then
        decoded &:= urlEncoded[start .. pred(pos)];
        decoded &:= chr(integer(urlEncoded[succ(pos) fixLen 2], 16));
        start := pos + 3;
      elsif ch = '+' then
        decoded &:= urlEncoded[start .. pred(pos)];
        decoded &:= ' ';
        start := succ(pos);
      end if;
    end for;
    decoded &:= urlEncoded[start ..];
  end func;

Encode a string with the Ascii85 encoding

The function toAscii85 is part of the "encoding.s7i" library. Ascii85 encodes a byte string as ASCII string. This is done by encoding every four bytes with five printable ASCII characters. Five radix 85 digits provide enough possible values to encode the possible values of four bytes. The radix 85 digits are encoded with the characters '!' (encodes 0) through 'u' (encodes 84). If the last block of the byte string contains fewer than 4 bytes, the block is padded with up to three null bytes before encoding. After encoding, as many bytes as were added as padding are removed from the end of the output. In files the end of an Ascii85 encoding is marked with "~>" (this end marker is not added by toAscii85).

const func string: toAscii85 (in string: byteStri) is func
  result
    var string: ascii85 is "";
  local
    var integer: index is 0;
    var integer: subIndex is 0;
    var integer: fourBytes is 0;
    var string: fiveBytes is "     ";
    var integer: partialGroupSize is 0;
    var char: ch is ' ';
  begin
    for index range 1 to length(byteStri) - 3 step 4 do
      fourBytes := bytes2Int(byteStri[index fixLen 4], UNSIGNED, BE);
      if fourBytes = 0 then
        ascii85 &:= 'z';
      else
        for subIndex range 5 downto 1 do
          fiveBytes @:= [subIndex] chr(ord('!') + fourBytes rem 85);
          fourBytes := fourBytes div 85;
        end for;
        ascii85 &:= fiveBytes;
      end if;
    end for;
    partialGroupSize := length(byteStri) mod 4;
    if partialGroupSize <> 0 then
      index := succ(pred(length(byteStri)) mdiv 4 * 4);
      fourBytes := bytes2Int(byteStri[index ..] &
                             "\0;" mult 4 - partialGroupSize,
                             UNSIGNED, BE);
      for subIndex range 5 downto 1 do
        fiveBytes @:= [subIndex] chr(ord('!') + fourBytes rem 85);
        fourBytes := fourBytes div 85;
      end for;
      ascii85 &:= fiveBytes[.. succ(partialGroupSize)];
    end if;
  end func;

Decode a Ascii85 encoded string

The function fromAscii85 is part of the "encoding.s7i" library. Every block of five radix 85 characters is decoded to four bytes. Radix 85 characters are between '!' (encodes 0) and 'u' (encodes 84). The character 'z' is used to encode a block of four zero bytes. White space in the Ascii85 encoded string is ignored. The last block is padded to 5 bytes with the Ascii85 character 'u', and as many bytes as were added as padding are omitted from the end of the output.

const func string: fromAscii85 (in string: ascii85) is func
  result
    var string: decoded is "";
  local
    const set of char: whiteSpace is {'\0;', '\t', '\n', '\f', '\r', ' '};
    var char: ch is ' ';
    var integer: digitIndex is 0;
    var integer: base85Number is 0;
    var integer: idx is 0;
  begin
    for ch range ascii85 until ch = '~' do
      if ch >= '!' and ch <= 'u' then
        incr(digitIndex);
        base85Number := base85Number * 85 + (ord(ch) - ord('!'));
        if digitIndex = 5 then
          decoded &:= bytes(base85Number, UNSIGNED, BE, 4);
          digitIndex := 0;
          base85Number := 0;
        end if;
      elsif ch = 'z' and digitIndex = 0 then
        decoded &:= "\0;\0;\0;\0;";
      elsif ch not in whiteSpace then
        raise RANGE_ERROR;
      end if;
    end for;
    if digitIndex <> 0 then
      for idx range 1 to 5 - digitIndex do
        base85Number := base85Number * 85 + 84;
      end for;
      decoded &:= bytes(base85Number, UNSIGNED, BE, 4)[.. pred(digitIndex)];
    end if;
  end func;

Encode a string with the AsciiHex encoding

The function toAsciiHex is part of the "encoding.s7i" library. AsciiHex encodes a byte string as ASCII string. In AsciiHex each byte is encoded with two hexadecimal digits. White-space characters in an AsciiHex encoded string are ignored. The AsciiHex encoded string ends with the character '>'.

const func string: toAsciiHex (in string: byteStri) is func
  result
    var string: asciiHex is "";
  local
    const char: endOfData is '>';
    const integer: encodingsPerLine is 32;
    var integer: index is 1;
  begin
    while index <= length(byteStri) do
      asciiHex &:= hex(byteStri[index len encodingsPerLine]);
      asciiHex &:= "\n";
      index +:= encodingsPerLine;
    end while;
    asciiHex &:= endOfData;
  end func;

Decode an AsciiHex encoded string

The function fromAsciiHex is part of the "encoding.s7i" library. * In AsciiHex each byte is encoded with two hexadecimal digits. * White-space characters in an AsciiHex encoded string are ignored. * The AsciiHex encoded string ends with the character '>'. * If a '>' follows the first hexadecimal digit of an encoded byte * the decoding works as if a '0' is at the place of the '>'. * The decoder works also correctly if the '>' is missing.

const func string: fromAsciiHex (in string: asciiHex) is func
  result
    var string: stri is "";
  local
    const char: endOfData is '>';
    const set of char: whiteSpace is {'\0;', '\t', '\n', '\f', '\r', ' '};
    var integer: index is 1;
  begin
    while index < length(asciiHex) and asciiHex[index] <> endOfData do
      if asciiHex[index] not in whiteSpace then
        if asciiHex[succ(index)] = endOfData then
          stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16));
          incr(index);
        else
          stri &:= char(integer(asciiHex[index fixLen 2], 16));
          index +:= 2;
        end if;
      else
        incr(index);
      end if;
    end while;
    if index = length(asciiHex) and asciiHex[index] <> endOfData and
        asciiHex[index] not in whiteSpace then
      stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16));
    end if;
  end func;

Compress a string using the Lempel Ziv Welch (LZW) compression

This algorithm compresses a byte string to a string of tokens which contains characters >= 256. To write this to a byte file it is necessary to add an algorithm which writes the tokens with 9 or more bits.

const func string: lzwCompress (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var char: ch is ' ';
    var hash [string] char: mydict is (hash [string] char).value;
    var string: buffer is "";
    var string: xstr is "";
  begin
    for ch range chr(0) to chr(255) do
      mydict @:= [str(ch)] ch;
    end for;
    for ch range uncompressed do
      xstr := buffer & str(ch);
      if xstr in mydict then
        buffer &:= str(ch)
      else
        compressed &:= str(mydict[buffer]);
        mydict @:= [xstr] chr(length(mydict));
        buffer := str(ch);
      end if;
    end for;
    if buffer <> "" then
      compressed &:= str(mydict[buffer]);
    end if;
  end func;

Decompress a Lempel Ziv Welch (LZW) compressed string

The compressed string consists of a sequence of tokens (which contain also characters >= 256). The decompress algorithm produces a byte string.

const func string: lzwDecompress (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var char: ch is ' ';
    var hash [char] string: mydict is (hash [char] string).value;
    var string: buffer is "";
    var string: current is "";
    var string: chain is "";
  begin
    for ch range chr(0) to chr(255) do
      mydict @:= [ch] str(ch);
    end for;
    for ch range compressed do
      if buffer = "" then
        buffer := mydict[ch];
        uncompressed &:= buffer;
      elsif ch <= chr(255) then
        current := mydict[ch];
        uncompressed &:= current;
        chain := buffer & current;
        mydict @:= [chr(length(mydict))] chain;
        buffer := current;
      else
        if ch in mydict then
          chain := mydict[ch];
        else
          chain := buffer & str(buffer[1]);
        end if;
        uncompressed &:= chain;
        mydict @:= [chr(length(mydict))] buffer & str(chain[1]);
        buffer := chain;
      end if;
    end for;
  end func;

Compress a string using the run length encoding of bzip2

A sequence of 4 to 259 identical characters is replaced by four identical characters followed by a repeat length between 0 and 255. This run length encoding is used as first compression technique in the bzip2 compression.

const func string: bzip2RleCompress (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(uncompressed) - 3 do
      ch := uncompressed[index];
      if  uncompressed[succ(index)] = ch and
          uncompressed[index + 2] = ch and
          uncompressed[index + 3] = ch then
        index +:= 4;
        compressed &:= uncompressed[oldpos .. pred(index)];
        oldpos := index;
        while index <= length(uncompressed) and
            uncompressed[index] = ch do
          incr(index);
        end while;
        compressed &:= chr(index - oldpos);
        oldpos := index;
      else
        incr(index);
      end if;
    end while;
    compressed &:= uncompressed[oldpos ..];
  end func;

Decompress a string using the run length encoding of bzip2

A sequence of 4 identical characters followed by a repeat length between 0 and 255 is replaced by 4 to 259 identical characters. This run length decoding is used as last decompression technique in the bzip2 decompression.

const func string: bzip2RleDecompress (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(compressed) - 3 do
      ch := compressed[index];
      if  compressed[succ(index)] = ch and
          compressed[index + 2] = ch and
          compressed[index + 3] = ch then
        index +:= 4;
        uncompressed &:= compressed[oldpos .. pred(index)] & str(ch) mult ord(compressed[index]);
        incr(index);
        oldpos := index;
      else
        incr(index);
      end if;
    end while;
    uncompressed &:= compressed[oldpos ..];
  end func;

Compress a string using the run length encoding of PackBits

const func string: packBits (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var integer: index is 1;
    var integer: oldpos is 1;
    var char: ch is ' ';
  begin
    while index <= length(uncompressed) - 2 do
      ch := uncompressed[index];
      if  uncompressed[succ(index)] = ch and
          uncompressed[index + 2] = ch then
        while index - oldpos >= 128 do
          compressed &:= "\127;" & uncompressed[oldpos fixLen 128];
          oldpos +:= 128;
        end while;
        if index > oldpos then
          compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos .. pred(index)];
          oldpos := index;
        end if;
        index +:= 3;
        while index <= length(uncompressed) and uncompressed[index] = ch do
          incr(index);
        end while;
        while index - oldpos >= 128 do
          compressed &:= "\129;" & str(ch);
          oldpos +:= 128;
        end while;
        if pred(index) > oldpos then
          compressed &:= str(chr(257 - (index - oldpos))) & str(ch);
          oldpos := index;
        end if;
      else
        incr(index);
      end if;
    end while;
    index := succ(length(uncompressed));
    while index - oldpos >= 128 do
      compressed &:= "\127;" & uncompressed[oldpos fixLen 128];
      oldpos +:= 128;
    end while;
    if index > oldpos then
      compressed &:= str(chr(pred(index - oldpos))) & uncompressed[oldpos ..];
    end if;
  end func;

Decompress a string using the run length encoding of PackBits

const func string: unpackBits (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var integer: index is 1;
    var char: ch is ' ';
  begin
    while index <= length(compressed) do
      ch := compressed[index];
      if ch <= chr(127) then
        uncompressed &:= compressed[succ(index) fixLen succ(ord(ch))];
        index +:= ord(ch) + 2;
      else
        uncompressed &:= str(compressed[succ(index)]) mult (257 - ord(ch));
        index +:= 2;
      end if;
    end while;
  end func;

Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.

const func string: burrowsWheelerTransformConcept (in string: stri) is func
  result
    var string: transformed is "";
  local
    var integer: length is 0;
    var integer: index is 0;
    var array string: rotations is 0 times "";
  begin
    length := succ(length(stri));
    rotations := length times "";
    for index range 1 to length do
      rotations[index] := stri[index ..] & "\256;" & stri[.. pred(index)];
    end for;
    rotations := sort(rotations);
    for index range 1 to length do
      transformed &:= rotations[index][length];
    end for;
  end func;

Inverse Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.

const func string: inverseBurrowsWheelerTransformConcept (in string: transformed) is func
  result
    var string: stri is "";
  local
    var integer: length is 0;
    var integer: count is 0;
    var integer: index is 0;
    var array string: rotations is 0 times "";
  begin
    length := length(transformed);
    rotations := length times "";
    for count range 1 to length do
      for index range 1 to length do
        rotations[index] := str(transformed[index]) & rotations[index];
      end for;
      rotations := sort(rotations);
    end for;
    stri := rotations[1];
    index := pos(stri, "\256;");
    stri := stri[succ(index) ..] & stri[.. pred(index)];
  end func;

Convert a character string to morse code

const func string: charToMorse (in char: ch) is func
  result
    var string: morseCode is "";
  begin
    case ch of
      when {'a', 'A'}: morseCode := ".-";
      when {'b', 'B'}: morseCode := "-...";
      when {'c', 'C'}: morseCode := "-.-.";
      when {'d', 'D'}: morseCode := "-..";
      when {'e', 'E'}: morseCode := ".";
      when {'f', 'F'}: morseCode := "..-.";
      when {'g', 'G'}: morseCode := "--.";
      when {'h', 'H'}: morseCode := "....";
      when {'i', 'I'}: morseCode := "..";
      when {'j', 'J'}: morseCode := ".---";
      when {'k', 'K'}: morseCode := "-.-";
      when {'l', 'L'}: morseCode := ".-..";
      when {'m', 'M'}: morseCode := "--";
      when {'n', 'N'}: morseCode := "-.";
      when {'o', 'O'}: morseCode := "---";
      when {'p', 'P'}: morseCode := ".--.";
      when {'q', 'Q'}: morseCode := "--.-";
      when {'r', 'R'}: morseCode := ".-.";
      when {'s', 'S'}: morseCode := "...";
      when {'t', 'T'}: morseCode := "-";
      when {'u', 'U'}: morseCode := "..-";
      when {'v', 'V'}: morseCode := "...-";
      when {'w', 'W'}: morseCode := ".--";
      when {'x', 'X'}: morseCode := "-..-";
      when {'y', 'Y'}: morseCode := "-.--";
      when {'z', 'Z'}: morseCode := "--..";
      when {'Ä', 'Æ'}: morseCode := ".-.-";
      when {'À', 'Å'}: morseCode := ".--.-";
      when {'Ç', 'Ĉ'}: morseCode := "-.-..";
      when {'Ð'}:      morseCode := "..--.";
      when {'È'}:      morseCode := ".-..-";
      when {'É'}:      morseCode := "..-..";
      when {'Ĝ'}:      morseCode := "--.-.";
      when {'Ĵ'}:      morseCode := ".---.";
      when {'Ñ'}:      morseCode := "--.--";
      when {'Ö', 'Ø'}: morseCode := "---.";
      when {'Ŝ'}:      morseCode := "...-.";
      when {'Ü', 'Ŭ'}: morseCode := "..--";
      when {'Þ'}:      morseCode := ".--..";
      when {'0'}:      morseCode := "-----";
      when {'1'}:      morseCode := ".----";
      when {'2'}:      morseCode := "..---";
      when {'3'}:      morseCode := "...--";
      when {'4'}:      morseCode := "....-";
      when {'5'}:      morseCode := ".....";
      when {'6'}:      morseCode := "-....";
      when {'7'}:      morseCode := "--...";
      when {'8'}:      morseCode := "---..";
      when {'9'}:      morseCode := "----.";
      when {'!'}:      morseCode := "-.-.--";
      when {'"'}:      morseCode := ".-..-.";
      when {'$'}:      morseCode := "...-..-";
      when {'''}:      morseCode := ".----.";
      when {'('}:      morseCode := "-.--.";
      when {')'}:      morseCode := "-.--.-";
      when {'+'}:      morseCode := ".-.-.";
      when {','}:      morseCode := "--..--";
      when {'-'}:      morseCode := "-....-";
      when {'.'}:      morseCode := ".-.-.-";
      when {'/'}:      morseCode := "-..-.";
      when {':'}:      morseCode := "---...";
      when {';'}:      morseCode := "-.-.-.";
      when {'='}:      morseCode := "-...-";
      when {'?'}:      morseCode := "..--..";
      when {'@'}:      morseCode := ".--.-.";
      when {' '}:      morseCode := " ";
    end case;
  end func;

const func string: stringToMorse (in string: stri) is func
  result
    var string: morseCode is "";
  local
    var char: ch is ' ';
  begin
    for ch range stri do
      morseCode &:= charToMorse(ch) & " ";
    end for;
  end func;

Convert morse code to a character string

const func char: morseToChar (in string: morseLetter) is func
  result
    var char: ch is ' ';
  begin
    if    morseLetter = "" then        ch := ' ';
    elsif morseLetter = "." then       ch := 'E';
    elsif morseLetter = "-" then       ch := 'T';
    elsif morseLetter = ".." then      ch := 'I';
    elsif morseLetter = ".-" then      ch := 'A';
    elsif morseLetter = "-." then      ch := 'N';
    elsif morseLetter = "--" then      ch := 'M';
    elsif morseLetter = "..." then     ch := 'S';
    elsif morseLetter = "..-" then     ch := 'U';
    elsif morseLetter = ".-." then     ch := 'R';
    elsif morseLetter = ".--" then     ch := 'W';
    elsif morseLetter = "-.." then     ch := 'D';
    elsif morseLetter = "-.-" then     ch := 'K';
    elsif morseLetter = "--." then     ch := 'G';
    elsif morseLetter = "---" then     ch := 'O';
    elsif morseLetter = "...." then    ch := 'H';
    elsif morseLetter = "...-" then    ch := 'V';
    elsif morseLetter = "..-." then    ch := 'F';
    elsif morseLetter = "..--" then    ch := 'Ü'; # also 'Ŭ'
    elsif morseLetter = ".-.." then    ch := 'L';
    elsif morseLetter = ".-.-" then    ch := 'Ä'; # also 'Æ'
    elsif morseLetter = ".--." then    ch := 'P';
    elsif morseLetter = ".---" then    ch := 'J';
    elsif morseLetter = "-..." then    ch := 'B';
    elsif morseLetter = "-..-" then    ch := 'X';
    elsif morseLetter = "-.-." then    ch := 'C';
    elsif morseLetter = "-.--" then    ch := 'Y';
    elsif morseLetter = "--.." then    ch := 'Z';
    elsif morseLetter = "--.-" then    ch := 'Q';
    elsif morseLetter = "---." then    ch := 'Ö'; # also 'Ø'
    elsif morseLetter = "----" then    ch := ' '; # 'ch'
    elsif morseLetter = "....." then   ch := '5';
    elsif morseLetter = "....-" then   ch := '4';
    elsif morseLetter = "...-." then   ch := 'Ŝ';
    elsif morseLetter = "...--" then   ch := '3';
    elsif morseLetter = "..-.." then   ch := 'É';
    elsif morseLetter = "..-.-" then   ch := ' '; # unused
    elsif morseLetter = "..--." then   ch := 'Ð';
    elsif morseLetter = "..---" then   ch := '2';
    elsif morseLetter = ".-..." then   ch := ' '; # unused
    elsif morseLetter = ".-..-" then   ch := 'È';
    elsif morseLetter = ".-.-." then   ch := '+';
    elsif morseLetter = ".-.--" then   ch := ' '; # unused
    elsif morseLetter = ".--.." then   ch := 'Þ';
    elsif morseLetter = ".--.-" then   ch := 'À'; # also 'Å'
    elsif morseLetter = ".---." then   ch := 'Ĵ';
    elsif morseLetter = ".----" then   ch := '1';
    elsif morseLetter = "-...." then   ch := '6';
    elsif morseLetter = "-...-" then   ch := '=';
    elsif morseLetter = "-..-." then   ch := '/';
    elsif morseLetter = "-..--" then   ch := ' '; # unused
    elsif morseLetter = "-.-.." then   ch := 'Ç'; # also 'Ĉ'
    elsif morseLetter = "-.-.-" then   ch := ' '; # Start of message
    elsif morseLetter = "-.--." then   ch := '('; # also 'Ĥ'
    elsif morseLetter = "-.---" then   ch := ' '; # unused
    elsif morseLetter = "--..." then   ch := '7';
    elsif morseLetter = "--..-" then   ch := ' '; # unused
    elsif morseLetter = "--.-." then   ch := 'Ĝ';
    elsif morseLetter = "--.--" then   ch := 'Ñ';
    elsif morseLetter = "---.." then   ch := '8';
    elsif morseLetter = "---.-" then   ch := ' '; # unused
    elsif morseLetter = "----." then   ch := '9';
    elsif morseLetter = "-----" then   ch := '0';
    elsif morseLetter = "..--.." then  ch := '?';
    elsif morseLetter = ".-..-." then  ch := '"';
    elsif morseLetter = ".-.-.-" then  ch := '.';
    elsif morseLetter = ".--.-." then  ch := '@';
    elsif morseLetter = ".----." then  ch := ''';
    elsif morseLetter = "-....-" then  ch := '-';
    elsif morseLetter = "-.-.--" then  ch := '!';
    elsif morseLetter = "-.-.-." then  ch := ';';
    elsif morseLetter = "-.--.-" then  ch := ')';
    elsif morseLetter = "--..--" then  ch := ',';
    elsif morseLetter = "---..." then  ch := ':';
    elsif morseLetter = "...-..-" then ch := '$';
    else                               ch := ' ';
    end if;
  end func;

const func string: morseToString (in string: morseCode) is func
  result
    var string: stri is "";
  local
    var array string: letters is 0 times "";
    var string: letter is "";
  begin
    letters := split(replace(morseCode, "  ", " "), ' ');
    for letter range letters do
      stri &:= str(morseToChar(letter));
    end for;
  end func;

Wildcard match used in command shells

const func boolean: wildcard_match (in string: main_stri, in string: pattern) is func
  result
    var boolean: doesMatch is FALSE;
  local
    var integer: main_length is 0;
    var integer: main_index is 1;
    var string: pattern_tail is "";
  begin
    if pattern = "" then
      doesMatch := main_stri = "";
    else
      case pattern[1] of
        when {'*'}:
          if pattern = "*" then
            doesMatch := TRUE;
          else
            main_length := length(main_stri);
            pattern_tail := pattern[2 .. ];
            while main_index <= main_length and not doesMatch do
              doesMatch := wildcard_match(main_stri[main_index .. ],
                  pattern_tail);
              incr(main_index);
            end while;
          end if;
        when {'?'}:
          if main_stri <> "" then
            doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]);
          end if;
        otherwise:
          if main_stri <> "" and main_stri[1] = pattern[1] then
            doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]);
          end if;
      end case;
    end if;
  end func;

String compare function where digit sequences are compared numerically

include "scanstri.s7i";

const func integer: cmpNumeric (in var string: stri1, in var string: stri2) is func
  result
    var integer: signumValue is 0;
  local
    var string: part1 is "";
    var string: part2 is "";
  begin
    while signumValue = 0 and (stri1 <> "" or stri2 <> "") do
      part1 := getDigits(stri1);
      part2 := getDigits(stri2);
      if part1 <> "" and part2 <> "" then
        signumValue := compare(part1 lpad0 length(part2), part2 lpad0 length(part1));
        if signumValue = 0 then
          signumValue := compare(length(part1), length(part2));
        end if;
      elsif part1 <> "" then
        signumValue := compare(part1, stri2);
      elsif part2 <> "" then
        signumValue := compare(stri1, part2);
      end if;
      if signumValue = 0 then
        part1 := getNonDigits(stri1);
        part2 := getNonDigits(stri2);
        if part1 <> "" and part2 <> "" then
          signumValue := compare(part1, part2);
        elsif part1 <> "" then
          signumValue := compare(part1, stri2);
        elsif part2 <> "" then
          signumValue := compare(stri1, part2);
        end if;
      end if;
    end while;
  end func;