(********************************************************************)
(*                                                                  *)
(*  scanstri.s7i  String scanner functions                          *)
(*  Copyright (C) 2007 - 2011, 2013, 2019, 2021  Thomas Mertes      *)
(*                2023, 2024  Thomas Mertes                         *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "chartype.s7i";


(**
 *  Skips a possibly nested comment from a [[string]].
 *  The comment starts with (* and ends with *) . When the function
 *  is called it is assumed that stri[1] contains the '*' of the
 *  comment start. When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the ')'.
 *)
const proc: skipComment (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    repeat
      repeat
        while pos <= leng and stri[pos] not in special_comment_char do
          incr(pos);
        end while;
        if pos <= leng and stri[pos] = '(' then
          incr(pos);
          if pos <= leng and stri[pos] = '*' then
            stri := stri[pos ..];
            skipComment(stri);
            leng := length(stri);
            pos := 1;
          end if;
        end if;
      until pos > leng or stri[pos] = '*';
      if pos <= leng then
        incr(pos);
      end if;
    until pos > leng or stri[pos] = ')';
    stri := stri[succ(pos) ..];
  end func; # skipComment


(**
 *  Reads a possibly nested comment from a [[string]].
 *  The comment starts with (* and ends with *) . When the function
 *  is called it is assumed that stri[1] contains the '*' of the
 *  comment start. When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the ')'.
 *  @return the content of the comment, including the introducing (*
 *          and the ending *) .
 *)
const func string: getComment (inout string: stri) is func
  result
    var string: symbol is "(";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    repeat
      repeat
        while pos <= leng and stri[pos] not in special_comment_char do
          incr(pos);
        end while;
        if pos <= leng and stri[pos] = '(' then
          incr(pos);
          if pos <= leng and stri[pos] = '*' then
            symbol &:= stri[.. pos - 2];
            stri := stri[pos ..];
            symbol &:= getComment(stri);
            leng := length(stri);
            pos := 1;
          end if;
        end if;
      until pos > leng or stri[pos] = '*';
      if pos <= leng then
        incr(pos);
      end if;
    until pos > leng or stri[pos] = ')';
    symbol &:= stri[.. pos];
    stri := stri[succ(pos) ..];
  end func; # getComment


(**
 *  Skips a classic C comment from a [[string]].
 *  The comment starts with /* and ends with */ . In a classic
 *  C comment no nesting of comments is allowed. When the function
 *  is called it is assumed that stri[1] contains the '*'
 *  of the comment start.  When the function is left ''stri''
 *  is empty or stri[1] contains the character after the '/'.
 *)
const proc: skipClassicComment (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 2;
  begin
    leng := length(stri);
    repeat
      while pos <= leng and stri[pos] <> '*' do
        incr(pos);
      end while;
      incr(pos);
    until pos > leng or stri[pos] = '/';
    stri := stri[succ(pos) ..];
  end func;


(**
 *  Skips a line comment from a [[string]].
 *  A line comment starts with an introducing character (like '#')
 *  and ends with the end of the line. When the function is called
 *  it is assumed that ''stri'' is empty or stri[1] contains the
 *  introducing character (e.g. '#'). When the function is left
 *  ''stri'' is empty or stri[1] contains the line end character
 *  ('\n').
 *)
const proc: skipLineComment (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    repeat
      incr(pos);
    until pos > leng or stri[pos] = '\n';
    stri := stri[pos ..];
  end func; # skipLineComment


(**
 *  Reads a line comment from a [[string]].
 *  A line comment starts with an introducing character (like '#')
 *  and ends with the end of the line. When the function is called
 *  it is assumed that ''stri'' is empty or stri[1] contains the
 *  introducing character (e.g. '#'). When the function is left
 *  ''stri'' is empty or stri[1] contains the line end character
 *  ('\n').
 *  @return the content of the comment, including the start marker
 *          (e.g. '#') but without line end character ('\n').
 *)
const func string: getLineComment (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    repeat
      incr(pos);
    until pos > leng or stri[pos] = '\n';
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func; # getLineComment


(**
 *  Reads a sequence of digits from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the digits.
 *   stri := "12";    getDigits(stri)  returns  "12"  and stri = ""
 *   stri := "12ab";  getDigits(stri)  returns  "12"  and stri = "ab"
 *   stri := "ab";    getDigits(stri)  returns  ""    and stri = "ab"
 *   stri := " 12";   getDigits(stri)  returns  ""    and stri = " 12"
 *  @return the digit sequence, and
 *          "" if no digit was found.
 *)
const func string: getDigits (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and stri[pos] >= '0' and stri[pos] <= '9' do
      incr(pos);
    end while;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a sequence of hexadecimal digits from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the hexadecimal digits.
 *   stri := "1f";   getHexDigits(stri)  returns  "1f"  and stri = ""
 *   stri := "1ag";  getHexDigits(stri)  returns  "1a"  and stri = "g"
 *   stri := "gx";   getHexDigits(stri)  returns  ""    and stri = "gx"
 *   stri := " 1a";  getHexDigits(stri)  returns  ""    and stri = " 1a"
 *  @return the digit sequence, and
 *          "" if no digit was found.
 *)
const func string: getHexDigits (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and stri[pos] in hexdigit_char do
      incr(pos);
    end while;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a decimal integer with optional sign from a [[string]].
 *  A decimal integer accepted by ''getInteger'' consists of an optional
 *  + or - sign followed by a possibly empty sequence of digits. Because
 *  of the LL(1) approach, a sign without following digits is accepted.
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the integer.
 *   stri := "123*2";  getInteger(stri)  returns  "123"  and stri = "*2"
 *   stri := "+1-2";   getInteger(stri)  returns  "+1"   and stri = "-2"
 *   stri := "-2+3";   getInteger(stri)  returns  "-2"   and stri = "+3"
 *   stri := "+-0";    getInteger(stri)  returns  "+"    and stri = "-0"
 *   stri := "pi";     getInteger(stri)  returns  ""     and stri = "pi"
 *  @return the decimal integer string, and
 *          "" if no integer was found.
 *)
const func string: getInteger (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng and (stri[pos] = '-' or stri[pos] = '+') then
      incr(pos);
    end if;
    while pos <= leng and stri[pos] >= '0' and stri[pos] <= '9' do
      incr(pos);
    end while;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a numeric literal from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the introducing digit. When the function is left
 *  ''stri'' is empty or stri[1] contains the character after the
 *  literal.
 *   stri := "1x";      getNumber(stri)  returns  "1"       and stri = "x"
 *   stri := "1.0+";    getNumber(stri)  returns  "1.0"     and stri = "+"
 *   stri := "1.0E1-";  getNumber(stri)  returns  "1.0E1"   and stri = "-"
 *   stri := "1.0e-1";  getNumber(stri)  returns  "1.0e-1"  and stri = ""
 *   stri := "2#101*";  getNumber(stri)  returns  "2#101"   and stri = "*"
 *   stri := "1e2y";    getNumber(stri)  returns  "1e2"     and stri = "y"
 *   stri := "1E+3z";   getNumber(stri)  returns  "1E+3"    and stri = "z"
 *   stri := "1234_/";  getNumber(stri)  returns  "1234_"   and stri = "/"
 *  @return The function returns the numeric literal.
 *)
const func string: getNumber (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and stri[pos] in digit_char do
      incr(pos);
    end while;
    if pos <= leng and stri[pos] = '.' then
      # float literal
      incr(pos);
      while pos <= leng and stri[pos] in digit_char do
        incr(pos);
      end while;
      if pos <= leng and (stri[pos] = 'E' or stri[pos] = 'e') then
        incr(pos);
        if pos <= leng and stri[pos] = '+' then
          incr(pos);
        elsif pos <= leng and stri[pos] = '-' then
          incr(pos);
        end if;
        while pos <= leng and stri[pos] in digit_char do
          incr(pos);
        end while;
      end if;
    elsif pos <= leng and stri[pos] = '#' then
      # based integer literal
      incr(pos);
      while pos <= leng and stri[pos] in alphanum_char do
        incr(pos);
      end while;
    elsif pos <= leng and (stri[pos] = 'E' or stri[pos] = 'e') then
      # integer literal with exponent
      incr(pos);
      if pos <= leng and (stri[pos] = '+' or stri[pos] = '-') then
        incr(pos);
      end if;
      while pos <= leng and stri[pos] in digit_char do
        incr(pos);
      end while;
    elsif pos <= leng and stri[pos] = '_' then
      # bigInteger literal
      incr(pos);
    end if;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a sequence of non digits from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains a digit.
 *   stri := "1+2";   getNonDigits(stri)  returns  ""    and stri = "1+2"
 *   stri := " 1+2";  getNonDigits(stri)  returns  " "   and stri = "1+2"
 *   stri := "-1+2";  getNonDigits(stri)  returns  "-"   and stri = "1+2"
 *   stri := "a+2";   getNonDigits(stri)  returns  "a+"  and stri = "2"
 *  @return the non digit sequence, and
 *          "" if a digit was found.
 *)
const func string: getNonDigits (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and (stri[pos] < '0' or stri[pos] > '9') do
      incr(pos);
    end while;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads a text quoted with characters like " and ' from a [[string]].
 *  The introducing and the closing quoting character must be identical.
 *  When the function is called it is assumed that stri[1] contains the
 *  introducing quoting character (which can be any character). When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the closing quoting character.
 *   stri := "'ab'+";  getQuotedText(stri)  returns  "ab"  and stri = "+"
 *   stri := "''=a";   getQuotedText(stri)  returns  ""    and stri = "=a"
 *   stri := "\"A\"";  getQuotedText(stri)  returns  "A"   and stri = ""
 *   stri := "\"\"?";  getQuotedText(stri)  returns  ""    and stri = "?"
 *   stri := ":ab:5";  getQuotedText(stri)  returns  "ab"  and stri = "5"
 *   stri := "+XY";    getQuotedText(stri)  returns  "XY"  and stri = ""
 *  @return the string literal without introducing or closing
 *          characters ( " or ' ).
 *)
const func string: getQuotedText (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var char: quoteChar is ' ';
    var integer: pos is 1;
  begin
    quoteChar := stri[1];
    leng := length(stri);
    repeat
      incr(pos);
    until pos > leng or stri[pos] = quoteChar;
    symbol := stri[2 .. pred(pos)];
    stri := stri[succ(pos) ..];
  end func;


(**
 *  Read a space terminated command line word, from a [[string]].
 *  Before reading the word it skips whitespace characters. A command
 *  line word can consist of unquoted and quoted parts. A quoted part
 *  is introduced with double quotes (") and ends with unescaped
 *  double quotes. A \ (backslash) is used to escape characters that
 *  would terminate the word respectively the quoted part. The
 *  backslash is ignored and the character after it is added to the
 *  word. To represent a backslash it must be doubled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the terminating space. Examples:
 *   stri := "a b c";        getCommandLineWord(stri) returns "a"
 *   stri := "a\\ b c";      getCommandLineWord(stri) returns "a b"
 *   stri := " a b c";       getCommandLineWord(stri) returns "a"
 *   stri := "\\ a b c";     getCommandLineWord(stri) returns " a"
 *   stri := "a\\\"b c";     getCommandLineWord(stri) returns "a\"b"
 *   stri := "a\" b\" c";    getCommandLineWord(stri) returns "a b"
 *   stri := "\"a b\" c";    getCommandLineWord(stri) returns "a b"
 *   stri := " \"a\" b c";   getCommandLineWord(stri) returns "a"
 *   stri := "\" a\" b c";   getCommandLineWord(stri) returns " a"
 *   stri := " \" a\" b c";  getCommandLineWord(stri) returns " a"
 *   stri := "\"a\\\"b\" c"; getCommandLineWord(stri) returns "a\"b"
 *   stri := "a\\\\b c";     getCommandLineWord(stri) returns "a\\b"
 *  @return the space terminated word (without terminating space).
 *)
const func string: getCommandLineWord (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and stri[pos] = ' ' do
      incr(pos);
    end while;
    while pos <= leng and stri[pos] <> ' ' do
      if stri[pos] = '"' then
        incr(pos);
        while pos <= leng and stri[pos] <> '"' do
          if stri[pos] = '\\' and pos < leng then
            incr(pos);
          end if;
          symbol &:= stri[pos];
          incr(pos);
        end while;
        incr(pos);
      else
        while pos <= leng and stri[pos] <> ' ' and stri[pos] <> '"' do
          if stri[pos] = '\\' and pos < leng then
            incr(pos);
          end if;
          symbol &:= stri[pos];
          incr(pos);
        end while;
      end if;
    end while;
    stri := stri[succ(pos) ..];
  end func;


(**
 *  Read a simple [[string]] literal from a [[string]].
 *  A simple string literal is enclosed in delimiter characters
 *  (e.g. " or ' ). Delimiter characters within the simple string
 *  literal must be doubled. A simple string literal does not
 *  support an escape character. All characters, including control
 *  characters (e.g. linefeed) are allowed inside a simple string
 *  literal. When the function is called it is assumed that
 *  stri[1] contains the introducing delimiter character.
 *  When the function is left ''stri'' is empty or stri[1]
 *  contains the character after the closing delimiter.
 *   stri := "\"\"";        getSimpleStringLiteral(stri) returns "\"\""
 *   stri := "\"\"x";       getSimpleStringLiteral(stri) returns "\"\""
 *   stri := "\"\"\"";      getSimpleStringLiteral(stri) returns "\"\"\""
 *   stri := "\"\"\"\"";    getSimpleStringLiteral(stri) returns "\"\"\""
 *   stri := "\"a\"\"\"";   getSimpleStringLiteral(stri) returns "\"a\"\""
 *   stri := "\"\"\"b\"";   getSimpleStringLiteral(stri) returns "\"\"b\""
 *   stri := "\"a\"\"b\"";  getSimpleStringLiteral(stri) returns "\"a\"b\""
 *   stri := "\"\"\"\"x";   getSimpleStringLiteral(stri) returns "\"\"\""
 *   stri := "\"a\"\"\"x";  getSimpleStringLiteral(stri) returns "\"a\"\""
 *   stri := "\"\"\"b\"x";  getSimpleStringLiteral(stri) returns "\"\"b\""
 *   stri := "\"a\"\"b\"x"; getSimpleStringLiteral(stri) returns "\"a\"b\""
 *  @return the string literal including the introducing and
 *          closing delimiter character. Double delimiter chars in
 *          the literal are converted to single delimiter chars.
 *)
const func string: getSimpleStringLiteral (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var char: delimiter is ' ';
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    delimiter := stri[1];
    symbol := str(delimiter);
    repeat
      incr(pos);
      while pos <= leng and stri[pos] <> delimiter do
        symbol &:= stri[pos];
        incr(pos);
      end while;
      if pos <= leng then
        incr(pos);
        if pos <= leng and stri[pos] = delimiter then
          symbol &:= delimiter;
        end if;
      end if;
    until pos > leng or stri[pos] <> delimiter;
    symbol &:= delimiter;
    stri := stri[pos ..];
  end func;


(**
 *  Reads an escape sequence from ''stri'' and appends it to ''symbol''.
 *  The function accepts escape sequences from character and string
 *  literals. When the function is called it is assumed that stri[1]
 *  contains the introducing \ . When the function is left stri[1]
 *  contains the character after the escape sequence. The complete
 *  escape sequence including the introducing \ is appended to
 *  ''symbol''.
 *)
const proc: getEscapeSequence (in string: stri, inout integer: pos, inout string: symbol) is func
  local
    var integer: leng is 0;
    var integer: semicolonPos is 0;
    var string: numberStri is "";
  begin
    leng := length(stri);
    symbol &:= "\\";
    incr(pos);
    if pos <= leng then
      if stri[pos] = '\n' or stri[pos] = ' ' or stri[pos] = '\t' or stri[pos] = '\r' then
        repeat
          symbol &:= stri[pos];
          incr(pos);
        until pos > leng or
            stri[pos] <> '\n' and stri[pos] <> ' ' and stri[pos] <> '\t' and stri[pos] <> '\r';
        if pos <= leng and stri[pos] = '\\' then
          symbol &:= stri[pos];
          incr(pos);
        end if;
      elsif stri[pos] in digit_char then
        semicolonPos := pos(stri, ';', succ(pos));
        if semicolonPos <> 0 then
          numberStri := stri[pos .. pred(semicolonPos)];
        else
          numberStri := stri[pos ..];
        end if;
        symbol &:= getNumber(numberStri);
        if numberStri = "" then
          symbol &:= ";";
          pos := succ(semicolonPos);
        else
          pos := semicolonPos - length(numberStri);
        end if;
      else
        symbol &:= stri[pos];
        incr(pos);
      end if;
    end if;
  end func;


(**
 *  Reads a character literal from a [[string]].
 *  When the function is called it is assumed that stri[1] contains
 *  the introducing ' . When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the closing ' .
 *  @return the character literal including the introducing ' and
 *          the closing ' .
 *)
const func string: getCharLiteral (inout string: stri) is func
  result
    var string: symbol is "'";
  local
    var integer: leng is 0;
    var integer: pos is 2;
  begin
    leng := length(stri);
    if pos <= leng and stri[pos] <> '\n' and stri[pos] <> '\r' then
      if stri[pos] = '\\' then
        repeat
          getEscapeSequence(stri, pos, symbol);
        until stri[pos] <> '\\';
      else
        symbol &:= stri[pos];
        incr(pos);
      end if;
      if pos > leng or stri[pos] <> '\'' then
        if stri[pos] <> '\n' and stri[pos] <> '\r' then
          repeat
            symbol &:= stri[pos];
            incr(pos);
          until pos > leng or
              stri[pos] = '\'' or
              stri[pos] = '\n' or
              stri[pos] = '\r';
          if pos <= leng and stri[pos] = '\'' then
            symbol &:= stri[pos];
            incr(pos);
          end if;
        end if;
      else
        symbol &:= stri[pos];
        incr(pos);
      end if;
    end if;
    stri := stri[pos ..];
  end func;


(**
 *  Reads a string literal from a [[string]].
 *  When the function is called it is assumed that stri[1] contains
 *  the introducing " . When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the closing " .
 *  @return the string literal including the introducing " and the
 *          closing " .
 *)
const func string: getStringLiteral (inout string: stri) is func
  result
    var string: symbol is "\"";
  local
    var integer: leng is 0;
    var integer: pos is 1;
    var boolean: reading_string is TRUE;
  begin
    leng := length(stri);
    incr(pos);
    repeat
      while pos <= leng and stri[pos] in no_escape_char do
        symbol &:= stri[pos];
        incr(pos);
      end while;
      if pos > leng or stri[pos] = '\n' or stri[pos] = '\r' then
        reading_string := FALSE;
      elsif stri[pos] = '\"' then
        symbol &:= stri[pos];
        incr(pos);
        if pos <= leng and stri[pos] = '\"' then
          symbol &:= stri[pos];
          incr(pos);
        else
          reading_string := FALSE;
        end if;
      elsif stri[pos] = '\\' then
        getEscapeSequence(stri, pos, symbol);
      else
        repeat
          symbol &:= stri[pos];
          incr(pos);
        until pos > leng or stri[pos] >= ' ' or stri[pos] <= '~';
      end if;
    until not reading_string;
    stri := stri[pos ..];
  end func;


(**
 *  Reads the text of a string literal from a [[string]].
 *  When the function is called it is assumed that stri[1] contains
 *  the introducing " . When the function is left ''stri'' is empty or
 *  stri[1] contains the character after the closing " .
 *  @return the text of the string literal without introducing or
 *          closing " .
 *)
const func string: getCStringLiteralText (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: startPos is 2;
    var integer: pos is 2;
    var boolean: reading_string is TRUE;
    var integer: charValue is 0;
  begin
    leng := length(stri);
    repeat
      startPos := pos;
      while pos <= leng and stri[pos] in no_escape_char do
        incr(pos);
      end while;
      symbol &:= stri[startPos .. pred(pos)];
      if pos > leng or stri[pos] = '\n' or stri[pos] = '\r' then
        reading_string := FALSE;
      elsif stri[pos] = '\"' then
        incr(pos);
        if pos <= leng and stri[pos] = '\"' then
          symbol &:= stri[pos];
          incr(pos);
        else
          reading_string := FALSE;
        end if;
      elsif stri[pos] = '\\' then
        incr(pos);
        if pos <= leng then
          case stri[pos] of
            when {'a'}:
              symbol &:= "\a";
              incr(pos);
            when {'b'}:
              symbol &:= "\b";
              incr(pos);
            when {'f'}:
              symbol &:= "\f";
              incr(pos);
            when {'n'}:
              symbol &:= "\n";
              incr(pos);
            when {'r'}:
              symbol &:= "\r";
              incr(pos);
            when {'t'}:
              symbol &:= "\t";
              incr(pos);
            when {'v'}:
              symbol &:= "\v";
              incr(pos);
            when {'\\', '?', ''', '"'}:
              symbol &:= stri[pos];
              incr(pos);
            when octdigit_char:
              incr(pos);
              if pos <= leng and stri[pos] in octdigit_char then
                incr(pos);
                if pos <= leng and stri[pos] in octdigit_char then
                  charValue := integer(stri[pos - 2 fixLen 3], 8);
                  incr(pos);
                else
                  charValue := integer(stri[pos - 2 fixLen 2], 8);
                end if;
              else
                charValue := integer(stri[pred(pos) fixLen 1], 8);
              end if;
              symbol &:= chr(charValue);
            when {'x'}:
              incr(pos);
              if pos <= leng and stri[pos] in hexdigit_char then
                incr(pos);
                if pos <= leng and stri[pos] in hexdigit_char then
                  charValue := integer(stri[pred(pos) fixLen 2], 16);
                  incr(pos);
                else
                  charValue := integer(stri[pred(pos) fixLen 1], 16);
                end if;
                symbol &:= chr(charValue);
              else
                symbol &:= "\\x";
              end if;
            otherwise:
              symbol &:= "\\";
          end case;
        else
          symbol &:= "\\";
        end if;
      else
        repeat
          symbol &:= stri[pos];
          incr(pos);
        until pos > leng or stri[pos] >= ' ' or stri[pos] <= '~';
      end if;
    until not reading_string;
    stri := stri[pos ..];
  end func;


(**
 *  Reads a sequence of letters from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the letters.
 *   stri := "test";    getLetters(stri)  returns  "test"  and stri = ""
 *   stri := "test1";   getLetters(stri)  returns  "test"  and stri = "1";
 *   stri := "test+1";  getLetters(stri)  returns  "test"  and stri = "+1"
 *   stri := "+1";      getLetters(stri)  returns  ""      and stri = "+1"
 *  @return the letter sequence, and
 *          "" if no letter was found.
 *)
const func string: getLetters (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while pos <= leng and stri[pos] in letter_char do
      incr(pos);
    end while;
    symbol := stri[.. pred(pos)];
    stri := stri[pos ..];
  end func;


(**
 *  Reads an alphanumeric name from a [[string]].
 *  A name consists of a letter or underscore followed by letters,
 *  digits or underscores. When the function is called it is assumed
 *  that ''stri'' is empty or stri[1] contains the first character to be
 *  handled. When the function is left ''stri'' is empty or stri[1]
 *  contains the character after the name.
 *   stri := "test";    getName(stri)  returns  "test"   and stri = ""
 *   stri := "test1";   getName(stri)  returns  "test1"  and stri = "";
 *   stri := "test+1";  getName(stri)  returns  "test"   and stri = "+1"
 *   stri := "+1";      getName(stri)  returns  ""       and stri = "+1"
 *  @return the name, and
 *          "" if no letter or underscore was found.
 *)
const func string: getName (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng and stri[pos] in name_start_char then
      incr(pos);
      while pos <= leng and stri[pos] in name_char do
        incr(pos);
      end while;
      symbol := stri[.. pred(pos)];
      stri := stri[pos ..];
    end if;
  end func;


(**
 *  Skips space characters from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the space characters.
 *   stri := "  ok";  skipSpace(stri);  afterwards  stri = "ok"
 *   stri := "   ";   skipSpace(stri);  afterwards  stri = ""
 *   stri := "ok ";   skipSpace(stri);  afterwards  stri = "ok "
 *)
const proc: skipSpace (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if leng >= 1 and stri[1] = ' ' then
      repeat
        incr(pos);
      until pos > leng or stri[pos] <> ' ';
      stri := stri[pos ..];
    end if;
  end func;


(**
 *  Skips space and tab characters from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the sequence of space and tab characters.
 *   stri := "\t x";  skipSpaceOrTab(stri);  afterwards  stri = "x"
 *   stri := "\t  ";  skipSpaceOrTab(stri);  afterwards  stri = ""
 *   stri := "abc ";  skipSpaceOrTab(stri);  afterwards  stri = "abc "
 *)
const proc: skipSpaceOrTab (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if leng >= 1 and (stri[1] = ' ' or stri[1] = '\t') then
      repeat
        incr(pos);
      until pos > leng or (stri[pos] <> ' ' and stri[pos] <> '\t');
      stri := stri[pos ..];
    end if;
  end func;


(**
 *  Skips whitespace characters from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the whitespace characters.
 *   stri := "\t\n\r X";  skipWhiteSpace(stri);  afterwards  stri = "X"
 *   stri := "\t\n\r ";   skipWhiteSpace(stri);  afterwards  stri = ""
 *   stri := "X ";        skipWhiteSpace(stri);  afterwards  stri = "X "
 *)
const proc: skipWhiteSpace (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng and stri[pos] in white_space_char then
      repeat
        incr(pos);
      until pos > leng or stri[pos] not in white_space_char;
      stri := stri[pos ..];
    end if;
  end func;


(**
 *  Reads whitespace characters from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the whitespace characters.
 *   stri := "\t X";  getWhiteSpace(stri)  returns  "\t "   and stri = "X"
 *   stri := "\r\n";  getWhiteSpace(stri)  returns  "\r\n"  and stri = ""
 *   stri := "X ";    getWhiteSpace(stri)  returns  ""      and stri = "X "
 *  @return the string of whitespace characters, and
 *          "" if no whitespace character was found.
 *)
const func string: getWhiteSpace (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng and stri[pos] in white_space_char then
      repeat
        incr(pos);
      until pos > leng or stri[pos] not in white_space_char;
      symbol := stri[.. pred(pos)];
      stri := stri[pos ..];
    end if;
  end func;


(**
 *  Reads a white space delimited word from a [[string]].
 *  Before reading the word it skips whitespace characters. A word is
 *  a sequence of characters which does not contain a whitespace
 *  character. When the function is called it is assumed that ''stri''
 *  is empty or stri[1] contains the first character to be handled.
 *  When the function is left ''stri'' is empty or stri[1] contains the
 *  character after the word.
 *   stri := " abc";   getWord(stri)  returns  "abc"  and stri = ""
 *   stri := " abc ";  getWord(stri)  returns  "abc"  and stri = " "
 *   stri := "abc\t";  getWord(stri)  returns  "abc"  and stri = "\t"
 *  @return the word, and "" if no word was found.
 *)
const func string: getWord (inout string: stri) is func
  result
    var string: aWord is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      repeat
        incr(pos);
      until pos > leng or stri[pos] in white_space_char;
      aWord := stri[start .. pred(pos)];
      stri := stri[pos ..];
    else
      stri := "";
    end if;
  end func;


(**
 *  Reads a word consisting of ''wordChars'' from a [[string]].
 *  Before reading the word it skips non-''wordChars'' characters.
 *  A word is a sequence of ''wordChars'' characters. When the function
 *  is called it is assumed that ''stri'' is empty or stri[1] contains
 *  the first character to be handled. When the function is left
 *  ''stri'' is empty or stri[1] contains the character after the word.
 *   stri := " a1";   getWord(stri, alphanum_char)  returns  "a1"  and stri = ""
 *   stri := "-a2.";  getWord(stri, alphanum_char)  returns  "a2"  and stri = "."
 *   stri := "=a3,";  getWord(stri, alphanum_char)  returns  "a3"  and stri = ","
 *   stri := "a4\t";  getWord(stri, alphanum_char)  returns  "a4"  and stri = "\t"
 *   stri := ", a5";  getWord(stri, alphanum_char)  returns  "a5"  and stri = ""
 *  @return the word, and "" if no word was found.
 *)
const func string: getWord (inout string: stri, in set of char: wordChars) is func
  result
    var string: aWord is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 1;
  begin
    leng := length(stri);
    while start <= leng and stri[start] not in wordChars do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      repeat
        incr(pos);
      until pos > leng or stri[pos] not in wordChars;
      aWord := stri[start .. pred(pos)];
      stri := stri[pos ..];
    else
      stri := "";
    end if;
  end func;


(**
 *  Skips a line from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains '\n'.
 *  If stri[1] already contains '\n' nothing is done.
 *   stri := "ab\nc";  skipLine(stri);  afterwards  stri = "\nc"
 *   stri := "abc";    skipLine(stri);  afterwards  stri = ""
 *)
const proc: skipLine (inout string: stri) is func
  local
    var integer: pos is 0;
  begin
    pos := pos(stri, '\n');
    if pos <> 0 then
      stri := stri[pos ..];
    else
      stri := "";
    end if;
  end func;


(**
 *  Reads a line from a [[string]].
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains the first character to be handled. When the
 *  function is left ''stri'' is empty or stri[1] contains '\n'.
 *  If stri[1] already contains '\n' nothing is done and the
 *  function returns "" .
 *   stri := "ab\nc";  getLine(stri)  returns  "ab"   and stri = "\nc"
 *   stri := "abc";    getLine(stri)  returns  "abc"  and stri = ""
 *  @return the line read, and
 *          "" if ''stri'' is empty or stri[1] contains '\n'.
 *)
const func string: getLine (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: pos is 0;
  begin
    pos := pos(stri, '\n');
    if pos <> 0 then
      symbol := stri[.. pred(pos)];
      stri := stri[pos ..];
    else
      symbol := stri;
      stri := "";
    end if;
  end func;


(**
 *  Reads a symbol or a comment from a [[string]].
 *  Before reading the symbol or comment it skips whitespace
 *  characters. A symbol can be a literal (numeric, character or
 *  string), a name, a special symbol (sequence of special characters)
 *  or a parenthesis. A comment can be a normal comment or a line
 *  comment. When the function is called it is assumed that ''stri'' is
 *  empty or stri[1] contains a whitespace character or the first
 *  character of a symbol or comment. When the function is left ''stri''
 *  is empty or stri[1] contains the character after the symbol or
 *  comment.
 *  @return the symbol or comment, and
 *          "" if the end of ''stri'' was reached.
 *)
const func string: getSymbolOrComment (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when name_start_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in name_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when special_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in special_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when left_paren_char:
          stri := stri[succ(pos) ..];
          if pos <= length(stri) and stri[pos] = '*' then
            symbol := getComment(stri);
          else
            symbol := "(";
          end if;
        when other_paren_char:
          symbol := stri[pos fixLen 1];
          stri := stri[succ(pos) ..];
        when digit_char:
          stri := stri[pos ..];
          symbol := getNumber(stri);
        when single_quotation_char:
          stri := stri[pos ..];
          symbol := getCharLiteral(stri);
        when double_quotation_char:
          stri := stri[pos ..];
          symbol := getStringLiteral(stri);
        when sharp_char:
          stri := stri[pos ..];
          symbol := getLineComment(stri);
        otherwise:
          incr(pos);
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
      end case;
    else
      stri := "";
    end if;
  end func;


(**
 *  Reads a symbol from a [[string]].
 *  Before reading the symbol it skips whitespace characters and
 *  comments (normal comments and line comments). A symbol can be a
 *  literal (numeric, character or string), a name, a special symbol
 *  (sequence of special characters) or a parenthesis. When the
 *  function is called it is assumed that ''stri'' is empty or stri[1]
 *  contains a whitespace character or the first character of a symbol
 *  or comment. When the function is left ''stri'' is empty or stri[1]
 *  contains the character after the symbol.
 *  @return the symbol, and
 *          "" if end of ''stri'' was reached.
 *)
const func string: getSymbol (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when name_start_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in name_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when special_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in special_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when left_paren_char:
          stri := stri[succ(pos) ..];
          if length(stri) >= 1 and stri[1] = '*' then
            skipComment(stri);
            symbol := getSymbol(stri);
          else
            symbol := "(";
          end if;
        when other_paren_char:
          symbol := stri[pos fixLen 1];
          stri := stri[succ(pos) ..];
        when digit_char:
          stri := stri[pos ..];
          symbol := getNumber(stri);
        when single_quotation_char:
          stri := stri[pos ..];
          symbol := getCharLiteral(stri);
        when double_quotation_char:
          stri := stri[pos ..];
          symbol := getStringLiteral(stri);
        when sharp_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] = '\n';
          stri := stri[pos ..];
        otherwise:
          incr(pos);
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
      end case;
    else
      stri := "";
    end if;
  end func;


(**
 *  Skips an XML comment from a [[string]].
 *  The XML comment starts with <!-- and ends with --> . When the
 *  function is called it is assumed that stri[1] contains the
 *  last '-' of the introducing <!-- . When the function is left
 *  ''stri'' is empty or stri[1] contains the character after --> .
 *)
const proc: skipXmlComment (inout string: stri) is func
  local
    var integer: leng is 0;
    var integer: pos is 1;
    var boolean: endOfCommentReached is FALSE;
  begin
    leng := length(stri);
    repeat
      while pos <= leng and stri[pos] <> '-' do
        incr(pos);
      end while;
      if pos <= leng then
        # stri[pos] = '-'
        incr(pos);
        if pos <= leng and stri[pos] = '-' then
          repeat
            incr(pos);
          until pos > leng or stri[pos] <> '-';
          if pos <= leng and stri[pos] = '>' then
            incr(pos);
            endOfCommentReached := TRUE;
          end if;
        end if;
      end if;
    until endOfCommentReached or pos > leng;
    stri := stri[pos ..];
  end func;


(**
 *  Reads an XML/HTML tag or the XML/HTML content text from a [[string]].
 *  An XML/HTML tag starts with < and ends with > . The content text
 *  starts with everything else and ends just before a < or with the end
 *  of ''stri''. When the function is called it is assumed that stri[1]
 *  contains the introducing < of an XML/HTML tag or the first character
 *  of the content text. When the function is left the character after
 *  the XML/HTML tag or the content text is in stri[1].
 *  @return the XML/HTML tag or XML/HTML content text, and
 *          "" if the end of ''stri'' was reached.
 *)
const func string: getXmlTagOrContent (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
  begin
    leng := length(stri);
    if pos <= leng then
      if stri[pos] = '<' then
        repeat
          incr(pos);
        until pos > leng or stri[pos] = '>';
        symbol := stri[.. pos];
        stri := stri[succ(pos) ..];
      else
        repeat
          incr(pos);
        until pos > leng or stri[pos] = '<';
        symbol := stri[.. pred(pos)];
        stri := stri[pos ..];
      end if;
    end if;
  end func;


(**
 *  Read the content text of a CDATA section.
 *  In a CDATA section the text between <![CDATA[ and ]]> is considered
 *  content text. Inside a CDATA section the characters < and & have no
 *  special meaning. All occurances of < and & inside CDATA are returned
 *  as &lt; and &amp; respectively. When the function is called it is
 *  assumed that ''stri'' is empty or stri[1] contains the first character
 *  after the introducing <![CDATA[ sequence. When the function is left
 *  ''stri'' is empty or stri[1] contains the character after final
 *  ]]> sequence.
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @return the content text of the CDATA section that has been read.
 *)
const func string: getXmlCdataContent (inout string: stri) is func
  result
    var string: cdata is "";
  local
    var integer: leng is 0;
    var integer: pos is 1;
    var char: character is ' ';
  begin
    leng := length(stri);
    repeat
      repeat
        while pos <= leng and stri[pos] <> ']' do
          character :=  stri[pos];
          if character = '<' then
            cdata &:= "&lt;";
          elsif character = '&' then
            cdata &:= "&amp;";
          else
            cdata &:= character;
          end if;
          incr(pos);
        end while;
        incr(pos);
        if pos <= leng and stri[pos] <> ']' then
          cdata &:= ']';
        end if;
      until pos > leng or stri[pos] = ']';
      incr(pos);
      if pos <= leng and stri[pos] <> '>' then
        cdata &:= "]]";
      end if;
    until pos > leng or stri[pos] = '>';
    incr(pos);
    stri := stri[pos ..];
  end func;


(**
 *  Reads an XML/HTML tag head or an XML/HTML content from a [[string]].
 *  Examples of XML/HTML tag heads are:
 *   <html
 *   <meta
 *   <table
 *   </span
 *  Before reading a tag head or content, it skips whitespace characters
 *  and XML comments. An XML/HTML tag head starts with < and ends
 *  before a > or a / or a whitespace character or the end of ''stri''.
 *  The content text starts with a non whitespace character and ends
 *  just before a < or with the end of ''stri''. Content text can be
 *  also in a CDATA section. In a CDATA section the text between
 *  <![CDATA[ and ]]> is considered content text. Inside a CDATA section
 *  the characters < and & have no special meaning. All occurances of
 *  < and & inside CDATA are returned as &lt; and &amp; respectively.
 *  When the function is called it is assumed that ''stri'' is empty
 *  or stri[1] contains either a whitespace character, the introducing
 *  < of an XML/HTML tag or the first character of the content text.
 *  When the function is left, ''stri'' is empty or stri[1] contains
 *  the character after the XML/HTML tag head or the content text.
 *  Text between <!-- and --> is considered an XML comment. An XML
 *  comment is ignored and getXmlTagHeadOrContent() is called recursive.
 *  The function can be used as follows:
 *   symbol := getXmlTagHeadOrContent(stri);
 *   if startsWith(symbol, "</") then
 *     ... handle the XML/HTML end-tag ...
 *   elsif startsWith(symbol, "<") then
 *     ... handle the attributes of the XML/HTML start-tag ...
 *   else
 *     ... handle the content text ...
 *   end if;
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @return the XML/HTML tag head or XML/HTML content text, and
 *          "" if the end of ''stri'' was reached.
 *)
const func string: getXmlTagHeadOrContent (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 1;
    var boolean: finished is FALSE;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if pos <= leng and stri[pos] = '<' then
      incr(pos);
      if pos <= leng and stri[pos] = '!' then
        incr(pos);
        if pos <= leng and stri[pos] = '-' then
          incr(pos);
          if pos <= leng and stri[pos] = '-' then
            stri := stri[pos ..];
            skipXmlComment(stri);
            symbol := getXmlTagHeadOrContent(stri);
            finished  := TRUE;
          end if;
        elsif pos <= leng and stri[pos] = '[' then
          symbol := "<![";
          incr(pos);
          while pos <= leng and stri[pos] in letter_char do
            symbol &:= stri[pos];
            incr(pos);
          end while;
          if symbol = "<![CDATA" and pos <= leng and stri[pos] = '[' then
            incr(pos);
            stri := stri[pos ..];
            symbol := getXmlCdataContent(stri);
            if symbol = "" then
              symbol := getXmlTagHeadOrContent(stri);
            end if;
            finished  := TRUE;
          end if;
        end if;
      elsif pos <= leng and stri[pos] = '/' then
        incr(pos);
      end if;
      if not finished then
        if pos <= leng and (isLetter(stri[pos]) or stri[pos] = '_') then
          repeat
            incr(pos);
          until pos > leng or stri[pos] in white_space_char or
              stri[pos] = '>' or stri[pos] = '/';
          symbol := stri[start .. pred(pos)];
        else
          while pos <= leng and stri[pos] <> '<' do
            incr(pos);
          end while;
          symbol := "&lt;" & stri[succ(start) .. pred(pos)];
        end if;
        stri := stri[pos ..];
      end if;
    else
      repeat
        incr(pos);
      until pos > leng or stri[pos] = '<';
      symbol := stri[start .. pred(pos)];
      stri := stri[pos ..];
    end if;
  end func;

(**
 *  Reads a symbol which can appear inside an XML/HTML tag from [[string]].
 *  Before reading the symbol it skips whitespace characters. A symbol
 *  inside an XML/HTML tag can be a name, a string literal (quoted with "
 *  or ' ), the equals sign (=), the end of tag character (>), the slash
 *  character (/) or a special symbol (a sequence of characters that
 *  does not include the character > or a whitespace character). Special
 *  symbols can only appear in HTML tags. When the function is called it
 *  is assumed that ''stri'' is empty or stri[1] contains a whitespace
 *  character or the first character of a symbol. When the function is
 *  left ''stri'' is empty or stri[1] contains the character after the
 *  symbol.
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @return the symbol, and
 *          "" if the end of ''stri'' was reached.
 *)
const func string: getSymbolInXmlTag (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when html_name_start_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in html_name_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when double_quotation_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] = '"';
          symbol := stri[succ(start) .. pred(pos)];
          stri := stri[succ(pos) ..];
        when single_quotation_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] = ''';
          symbol := stri[succ(start) .. pred(pos)];
          stri := stri[succ(pos) ..];
        when equals_or_end_tag:
          symbol := stri[pos fixLen 1];
          stri := stri[succ(pos) ..];
        otherwise:
          repeat
            incr(pos);
          until pos > leng or stri[pos] in white_space_or_end_tag;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
      end case;
    else
      stri := "";
    end if;
  end func;


(**
 *  Skips beyond an XML Tag in a [[string]].
 *  When the function is left ''stri'' is empty or stri[1] contains
 *  the character after '>'.
 *)
const proc: skipXmlTag (inout string: stri) is func
  local
    var string: symbol is "";
  begin
    repeat
      symbol := getSymbolInXmlTag(stri);
    until symbol = ">" or symbol = "";
  end func;


(**
 *  Skips beyond an XML Tag in a [[string]].
 *  The parameter ''symbol'' is used to provide the current symbol
 *  which possibly can be ">" or "". When the function is left
 *  ''stri'' is empty or stri[1] contains the character after '>'.
 *)
const proc: skipXmlTag (inout string: stri, in var string: symbol) is func
  begin
    while symbol <> ">" and symbol <> "" do
      symbol := getSymbolInXmlTag(stri);
    end while;
  end func;


(**
 *  Reads name and value of an attribute inside an XML tag from a [[string]].
 *  The function skips possible leading whitespace characters. Attribute
 *  name and value are returned in ''attributeName'' and ''attributeValue''
 *  respectively. Attribute assignments can have the following forms:
 *   aName="aValue"
 *   aName='aValue'
 *  Surrounding single or double quotes of the attribute value are omitted.
 *  It is a syntax error if an attribute value is not quoted. White
 *  space characters before and after the = are ignored. XML entities
 *  in ''attributeValue'' are left as is. If no more attributes are
 *  present in the XML tag ''attributeName'' is set to "". In this case
 *  ''attributeValue'' contains the end of the XML tag (">" or "/>") and
 *  ''stri'' is empty or stri[1] contains the character after '>'.
 *  If a syntax error occurs the function skips beyond the end of
 *  the XML tag (''stri'' is empty or stri[1] contains the character
 *  after '>'). To indicate the syntax error ''attributeName'' is
 *  set to "" and ''attributeValue'' is set to a symbol shortly before
 *  the error (this will never be ">" or "/>"). The attributes of
 *  an XML start-tag or empty-element tag can be processed with:
 *   getNextXmlAttribute(stri, attributeName, attributeValue);
 *   while attributeName <> "" do
 *     ... process the current attribute ...
 *     getNextXmlAttribute(stri, attributeName, attributeValue);
 *   end while;
 *   if attributeValue = "/>" then
 *     ... this is an empty-element tag ...
 *   elsif attributeValue = ">" then
 *     ... this is a start-tag ...
 *   else
 *     ... there is a syntax error ...
 *   end if;
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @param attributeName Destination for the attribute name.
 *  @param attributeValue Destination for the attribute value:
 *)
const proc: getNextXmlAttribute (inout string: stri,
    inout string: attributeName, inout string: attributeValue) is func
  begin
    attributeName := getSymbolInXmlTag(stri);
    if attributeName = "/" then
      attributeName := "";
      attributeValue := getSymbolInXmlTag(stri);
      if attributeValue = ">" then
        attributeValue := "/>";
      else
        attributeValue := "/";
        skipXmlTag(stri, attributeValue);
      end if;
    elsif attributeName = ">" then
      attributeName := "";
      attributeValue := ">";
    else
      attributeValue := getSymbolInXmlTag(stri);
      if attributeValue = "=" then
        attributeValue := getSymbolInXmlTag(stri);
        if  startsWith(attributeValue, "\"") or
            startsWith(attributeValue, "'") then
          attributeValue := attributeValue[2 ..];
        else
          attributeValue := attributeName;
          attributeName := "";
          skipXmlTag(stri, attributeValue);
        end if;
      else
        attributeValue := attributeName;
        attributeName := "";
        skipXmlTag(stri, attributeValue);
      end if;
    end if;
  end func;


(**
 *  Reads a HTML tag attribute value from a [[string]].
 *  Before reading the value it skips whitespace characters. A HTML
 *  tag attribute value can be quoted with " or ' or it is terminated
 *  with the character > or a whitespace character. When the function
 *  is called it is assumed that ''stri'' is empty or stri[1] contains
 *  a whitespace character or the first character of a value. When the
 *  function is left ''stri'' is empty or stri[1] contains the character
 *  after the attribute value.
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @return the attribute value, and
 *          "" if the end of the HTML tag or the end of ''stri'' is
 *              directly after the skipped whitespace characters.
 *)
const func string: getHtmlAttributeValue (inout string: stri) is func
  result
    var string: attributeValue is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in white_space_char do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when double_quotation_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] = '"';
          attributeValue := stri[succ(start) .. pred(pos)];
          stri := stri[succ(pos) ..];
        when single_quotation_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] = ''';
          attributeValue := stri[succ(start) .. pred(pos)];
          stri := stri[succ(pos) ..];
        when {'>'}:
          stri := stri[pos ..];
        otherwise:
          repeat
            incr(pos);
          until pos > leng or stri[pos] in white_space_or_end_tag;
          attributeValue := stri[start .. pred(pos)];
          stri := stri[pos ..];
      end case;
    else
      stri := "";
    end if;
  end func;


(**
 *  Reads name and value of an attribute inside a HTML tag from a [[string]].
 *  The function skips possible leading whitespace characters. Attribute
 *  name and value are returned in ''attributeName'' and ''attributeValue''
 *  respectively. Attribute assignments can have the following forms:
 *   aName="aValue"
 *   aName='aValue'
 *   aName=aValue
 *   aName
 *  Possible surrounding single or double quotes of the attribute value
 *  are omitted. White space characters before and after the = are
 *  ignored. HTML entities in ''attributeValue'' are left as is.
 *  If no more attributes are present in the XML tag ''attributeName''
 *  is set to "". In this case ''attributeValue'' contains the end of
 *  the HTML tag (">" or "/>") and ''stri'' is empty or stri[1] contains
 *  the character after '>'. The attributes of a HTML start-tag or
 *  empty-element tag can be processed with:
 *   getNextHtmlAttribute(stri, attributeName, attributeValue);
 *   while attributeName <> "" do
 *     ... process the current attribute ...
 *     getNextHtmlAttribute(stri, attributeName, attributeValue);
 *   end while;
 *   if attributeValue = "/>" then
 *     ... this is an empty-element tag ...
 *   else  # attributeValue = ">"
 *     ... this is a start-tag ...
 *   end if;
 *  @param stri Input [[string]] from which the consumed characters are removed.
 *  @param attributeName Destination for the attribute name.
 *  @param attributeValue Destination for the attribute value:
 *)
const proc: getNextHtmlAttribute (inout string: stri,
    inout string: attributeName, inout string: attributeValue) is func
  begin
    attributeName := getSymbolInXmlTag(stri);
    if attributeName = "/" and stri <> "" and stri[1] = '>' then
      stri := stri[2 ..];
      attributeName := "";
      attributeValue := "/>";
    elsif attributeName = ">" then
      attributeName := "";
      attributeValue := ">";
    else
      skipWhiteSpace(stri);
      if stri <> "" and stri[1] = '=' then
        stri := stri[2 ..];
        attributeValue := getHtmlAttributeValue(stri);
        if  startsWith(attributeValue, "\"") or
            startsWith(attributeValue, "'") then
          attributeValue := attributeValue[2 ..];
        end if;
      else
        attributeValue := "";
      end if;
    end if;
  end func;


(**
 *  Reads a symbol which appears in a HTTP header from a [[string]].
 *  Before reading the symbol it skips whitespace characters. A symbol
 *  from a HTTP header can be a token a string literal or a separator.
 *  When the function is called it is assumed that ''stri'' is empty or
 *  stri[1] contains a whitespace character or the first character of
 *  a symbol. When the function is left ''stri'' is empty or stri[1]
 *  contains the character after the symbol.
 *  @return the symbol, and
 *          "" if the end of ''stri'' was reached.
 *)
const func string: getHttpSymbol (inout string: stri) is func
  result
    var string: symbol is "";
  local
    var integer: leng is 0;
    var integer: start is 1;
    var integer: pos is 0;
  begin
    leng := length(stri);
    while start <= leng and stri[start] in space_or_tab do
      incr(start);
    end while;
    if start <= leng then
      pos := start;
      case stri[pos] of
        when http_token_char:
          repeat
            incr(pos);
          until pos > leng or stri[pos] not in http_token_char;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
        when double_quotation_char:
          incr(start);
          incr(pos);
          while pos <= leng and stri[pos] <> '"' do
            if stri[pos] = '\\' and pos < leng then
              symbol &:= stri[start .. pred(pos)] & stri[succ(pos) fixLen 1];
              pos +:= 2;
              start := pos;
            else
              incr(pos);
            end if;
          end while;
          symbol &:= stri[start .. pred(pos)];
          stri := stri[succ(pos) ..];
        when http_separators:
          symbol := stri[pos fixLen 1];
          stri := stri[succ(pos) ..];
        otherwise:
          repeat
            incr(pos);
          until pos > leng or stri[pos] in white_space_or_end_tag;
          symbol := stri[start .. pred(pos)];
          stri := stri[pos ..];
      end case;
    else
      stri := "";
    end if;
  end func;


const func string: getValueOfHeaderAttribute (in var string: headerLine, in string: attribute) is func
  result
    var string: attrValue is "";
  local
    var string: symbol is "";
  begin
    repeat
      symbol := getHttpSymbol(headerLine);
    until symbol = attribute or symbol = "";
    if symbol = attribute then
      symbol := getHttpSymbol(headerLine);
      if symbol = "=" then
        attrValue := getHttpSymbol(headerLine);
      end if;
    end if;
  end func;