(********************************************************************)
(*                                                                  *)
(*  utf8.s7i      File implementation type for UTF-8 files          *)
(*  Copyright (C) 2005  Thomas Mertes                               *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "external_file.s7i";


(**
 *  [[file|File]] implementation type for UTF-8 files.
 *  This type supports UTF-8 encoded sequential files of the
 *  operating system. UTF-8 files are seekable, therefore they
 *  support the functions [[external_file#length(in_external_file)|length]],
 *  [[#seek(in_utf8File,in_integer)|seek]] and
 *  [[external_file#tell(in_external_file)|tell]].
 *)
const type: utf8File is sub external_file struct
  end struct;


const func char: utf8_getc (ref clib_file: inFile)                    is action "UT8_GETC";
const func string: utf8_gets (in clib_file: inFile,
                              in integer: maxLength)                  is action "UT8_GETS";
const func string: utf8_word_read (ref clib_file: inFile,
                                   inout char: terminationChar)       is action "UT8_WORD_READ";
const func string: utf8_line_read (ref clib_file: inFile,
                                   inout char: terminationChar)       is action "UT8_LINE_READ";
const proc: utf8_write (ref clib_file: outFile, in string: stri)      is action "UT8_WRITE";
const proc: utf8_seek (ref clib_file: aFile, in integer: position)    is action "UT8_SEEK";


(**
 *  Opens an Unicode file which uses the UTF-8 encoding.
 *  The file is opened with the specified ''path'' and ''mode''.
 *  There are text modes and binary modes:
 *  *Binary modes:
 *  ** "r"   Open file for reading.
 *  ** "w"   Truncate to zero length or create file for writing.
 *  ** "a"   Append; open or create file for writing at end-of-file.
 *  ** "r+"  Open file for update (reading and writing).
 *  ** "w+"  Truncate to zero length or create file for update.
 *  ** "a+"  Append; open or create file for update, writing at end-of-file.
 *  *Text modes:
 *  ** "rt"  Open file for reading.
 *  ** "wt"  Truncate to zero length or create file for writing.
 *  ** "at"  Append; open or create file for writing at end-of-file.
 *  ** "rt+" Open file for update (reading and writing).
 *  ** "wt+" Truncate to zero length or create file for update.
 *  ** "at+" Append; open or create file for update, writing at end-of-file.
 *  Note that this modes differ from the ones used by the C function
 *  fopen().
 *  @param path Path of the file to be opened. The path must
 *         use the standard path representation.
 *  @param mode Mode of the file to be opened.
 *  @return the file opened, or [[null_file#STD_NULL|STD_NULL]]
 *          if it could not be opened or if ''path'' refers to
 *          a directory.
 *  @exception MEMORY_ERROR Not enough memory to convert the path
 *             to the system path type.
 *  @exception RANGE_ERROR The ''mode'' is not one of the allowed
 *             values or ''path'' does not use the standard path
 *             representation or ''path'' cannot be converted
 *             to the system path type.
 *)
const func file: openUtf8 (in string: path, in string: mode) is func
  result
    var file: newFile is STD_NULL;
  local
    var clib_file: open_file is CLIB_NULL_FILE;
    var utf8File: new_file is utf8File.value;
  begin
    open_file := openClibFile(path, mode);
    if open_file <> CLIB_NULL_FILE then
      new_file.ext_file := open_file;
      new_file.name := path;
      newFile := toInterface(new_file);
    end if;
  end func;


(**
 *  Write a string to an UTF-8 file.
 *  @exception FILE_ERROR A system function returns an error.
 *)
const proc: write (in utf8File: outFile, in string: stri) is func
  begin
    utf8_write(outFile.ext_file, stri);
  end func;


(**
 *  Read a character from an UTF-8 file.
 *  @return the character read, or [[char#EOF|EOF]] at the end of the file.
 *  @exception RANGE_ERROR The file contains an invalid encoding.
 *)
const func char: getc (in utf8File: inFile) is
  return utf8_getc(inFile.ext_file);


(**
 *  Return a string read with a maximum length from an UTF-8 file.
 *  @return the string read.
 *  @exception RANGE_ERROR The parameter ''maxLength'' is negative, or
 *             the file contains an invalid encoding.
 *)
const func string: gets (in utf8File: inFile, in integer: maxLength) is
  return utf8_gets(inFile.ext_file, maxLength);


(**
 *  Read a word from an UTF-8 file.
 *  Before reading the word it skips spaces and tabs. The function
 *  accepts words ending with ' ', '\t', '\n', "\r\n" or [[char#EOF|EOF]].
 *  The word ending characters are not copied into the string.
 *  That means that the '\r' of a "\r\n" sequence is silently removed.
 *  When the function is left inFile.bufferChar contains ' ',
 *  '\t', '\n' or [[char#EOF|EOF]].
 *  @return the word read.
 *  @exception RANGE_ERROR The file contains an invalid encoding.
 *  @exception MEMORY_ERROR Not enough memory to represent the result.
 *  @exception FILE_ERROR A system function returns an error.
 *)
const func string: getwd (inout utf8File: inFile) is
  return utf8_word_read(inFile.ext_file, inFile.bufferChar);


(**
 *  Read a line from an UTF-8 file.
 *  The function accepts lines ending with '\n', "\r\n" or [[char#EOF|EOF]].
 *  The line ending characters are not copied into the string.
 *  That means that the '\r' of a "\r\n" sequence is silently removed.
 *  When the function is left inFile.bufferChar contains '\n' or
 *  [[char#EOF|EOF]].
 *  @return the line read.
 *  @exception RANGE_ERROR The file contains an invalid encoding.
 *  @exception MEMORY_ERROR Not enough memory to represent the result.
 *  @exception FILE_ERROR A system function returns an error.
 *)
const func string: getln (inout utf8File: inFile) is
  return utf8_line_read(inFile.ext_file, inFile.bufferChar);


(**
 *  Set the current file position.
 *  The file position is measured in bytes from the start of the file.
 *  The first byte in the file has the position 1.
 *  If the file position would be in the middle of an UTF-8 encoded
 *  character the position is advanced to the beginning of the next
 *  UTF-8 character.
 *  @exception RANGE_ERROR The file position is negative or zero or
 *             the file position is not representable in the system
 *             file position type.
 *  @exception FILE_ERROR A system function returns an error.
 *)
const proc: seek (in utf8File: aFile, in integer: position) is func
  begin
    utf8_seek(aFile.ext_file, position);
  end func;


(* System STD_UTF8_IN, STD_UTF8_OUT and STD_UTF8_ERR files *)


const func utf8File: INIT_STD_UTF8_FILE (ref clib_file: primitive_file,
    in string: path) is func
  result
    var utf8File: aFile is utf8File.value;
  begin
    aFile.ext_file := primitive_file;
    aFile.name := path;
  end func;


(**
 *  UTF-8 version of the standard input file of the operating system.
 *  Reading from ''STD_UTF8_IN'' can be done with e.g.:
 *   read(STD_UTF8_IN, aVariable);
 *  It is also possible to redirect ''STD_UTF8_IN'' to the default
 *  input of ''read'' with:
 *   IN := STD_UTF8_IN;
 *  Afterwards
 *   read(aVariable);
 *  reads from ''STD_UTF8_IN''.
 *)
var utf8File: STD_UTF8_IN is  INIT_STD_UTF8_FILE(CLIB_INPUT,  "STD_UTF8_IN");


(**
 *  UTF-8 version of the standard output file of the operating system.
 *  Writing to ''STD_UTF8_OUT'' can be done with e.g.:
 *   write(STD_UTF8_OUT, something);
 *  It is also possible to redirect the default output of ''write''
 *  to ''STD_UTF8_OUT'' with:
 *   OUT := STD_UTF8_OUT;
 *  Afterwards
 *   write(something);
 *  writes to ''STD_UTF8_OUT''.
 *)
var utf8File: STD_UTF8_OUT is INIT_STD_UTF8_FILE(CLIB_OUTPUT, "STD_UTF8_OUT");


(**
 *  UTF-8 version of the standard error file of the operating system.
 *  Writing to ''STD_UTF8_ERR'' can be done with e.g.:
 *   write(STD_UTF8_ERR, something);
 *)
var utf8File: STD_UTF8_ERR is INIT_STD_UTF8_FILE(CLIB_ERROR,  "STD_UTF8_ERR");