(********************************************************************)
(*                                                                  *)
(*  gzip.s7i      Gzip compression support library                  *)
(*  Copyright (C) 2008 - 2013, 2015, 2019 - 2024  Thomas Mertes     *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "inflate.s7i";
include "deflate.s7i";
include "bytedata.s7i";
include "time.s7i";
include "crc32.s7i";


const string: GZIP_MAGIC is "\31;\139;";  # Identification for gzip format

const integer: GZIP_HEADER_FIXED_SIZE is 10;


const type: gzipHeader is new struct
    var string: magic is "";
    var integer: compressionMethod is 0;
    var boolean: flagFtext is FALSE;
    var boolean: flagFhcrc is FALSE;
    var boolean: flagFextra is FALSE;
    var boolean: flagFname is FALSE;
    var boolean: flagFcomment is FALSE;
    var integer: mtime is 0;
    var integer: extraFlags is 0;
    var integer: operatingSystem is 0;
    var string: extraData is "";
    var string: originalFileName is "";
    var string: fileComment is "";
    var integer: crc16 is 0;
  end struct;


const proc: showHeader (in gzipHeader: header) is func
  begin
    writeln("magic: " <& literal(header.magic));
    writeln("compressionMethod: " <& header.compressionMethod);
    writeln("flagFtext: " <& header.flagFtext);
    writeln("flagFhcrc: " <& header.flagFhcrc);
    writeln("flagFextra: " <& header.flagFextra);
    writeln("flagFname: " <& header.flagFname);
    writeln("flagFcomment: " <& header.flagFcomment);
    writeln("mtime: " <& header.mtime);
    writeln("extraFlags: " <& header.extraFlags);
    writeln("operatingSystem: " <& header.operatingSystem);
    writeln("extraData: " <& literal(header.extraData));
    writeln("originalFileName: " <& literal(header.originalFileName));
    writeln("fileComment: " <& literal(header.fileComment));
    writeln("crc16: " <& header.crc16);
  end func;


const func gzipHeader: readGzipHeader (in string: stri, inout integer: bytePos) is func
  result
    var gzipHeader: header is gzipHeader.value;
  local
    var integer: flags is 0;
    var integer: extraLength is 0;
  begin
    if length(stri) >= GZIP_HEADER_FIXED_SIZE and
        startsWith(stri, GZIP_MAGIC) then
      header.magic := GZIP_MAGIC;
      header.compressionMethod := ord(stri[3]);
      flags := ord(stri[4]);
      header.flagFtext    := odd(flags);
      header.flagFhcrc    := odd(flags >> 1);
      header.flagFextra   := odd(flags >> 2);
      header.flagFname    := odd(flags >> 3);
      header.flagFcomment := odd(flags >> 4);
      header.mtime := bytes2Int(stri[5 fixLen 4], UNSIGNED, LE);
      header.extraFlags := ord(stri[9]);
      header.operatingSystem := ord(stri[10]);
      bytePos := 11;
      if header.flagFextra then
        if length(stri) >= succ(bytePos) then
          extraLength := bytes2Int(stri[bytePos fixLen 2], UNSIGNED, LE);
          bytePos +:= 2;
          header.extraData := stri[bytePos len extraLength];
          bytePos +:= extraLength;
          if length(header.extraData) <> extraLength then
            header.magic := "";
          end if;
        else
          header.magic := "";
        end if;
      end if;
      if header.flagFname then
        header.originalFileName := getAsciiz(stri, bytePos);
      end if;
      if header.flagFcomment then
        header.fileComment := getAsciiz(stri, bytePos);
      end if;
      if header.flagFhcrc then
        if length(stri) >= succ(bytePos) then
          header.crc16 := bytes2Int(stri[bytePos fixLen 2], UNSIGNED, LE);
          bytePos +:= 2;
        else
          header.magic := "";
        end if;
      end if;
    end if;
  end func;


const func gzipHeader: readGzipHeader (inout file: compressed) is func
  result
    var gzipHeader: header is gzipHeader.value;
  local
    var string: stri is "";
    var integer: flags is 0;
    var integer: extraLength is 0;
  begin
    stri := gets(compressed, GZIP_HEADER_FIXED_SIZE);
    if length(stri) = GZIP_HEADER_FIXED_SIZE and
        startsWith(stri, GZIP_MAGIC) then
      header.magic := GZIP_MAGIC;
      header.compressionMethod := ord(stri[3]);
      flags := ord(stri[4]);
      header.flagFtext    := odd(flags);
      header.flagFhcrc    := odd(flags >> 1);
      header.flagFextra   := odd(flags >> 2);
      header.flagFname    := odd(flags >> 3);
      header.flagFcomment := odd(flags >> 4);
      header.mtime := bytes2Int(stri[5 fixLen 4], UNSIGNED, LE);
      header.extraFlags := ord(stri[9]);
      header.operatingSystem := ord(stri[10]);
      if header.flagFextra then
        stri := gets(compressed, 2);
        if length(stri) = 2 then
          extraLength := bytes2Int(stri, UNSIGNED, LE);
          header.extraData := gets(compressed, extraLength);
          if length(header.extraData) <> extraLength then
            header.magic := "";
          end if;
        else
          header.magic := "";
        end if;
      end if;
      if header.flagFname then
        header.originalFileName := getAsciiz(compressed);
      end if;
      if header.flagFcomment then
        header.fileComment := getAsciiz(compressed);
      end if;
      if header.flagFhcrc then
        stri := gets(compressed, 2);
        if length(stri) = 2 then
          header.crc16 := bytes2Int(stri, UNSIGNED, LE);
        else
          header.magic := "";
        end if;
      end if;
    end if;
    # showHeader(header);
  end func;


(**
 *  Decompress a string that was compressed with zlib (RFC 1950).
 *  Zlib uses the DEFLATE compression algorithm. DEFLATE uses
 *  a combination of the LZ77 algorithm and Huffman coding.
 *  Additionally to DEFLATE zlib uses a small header.
 *  @return the uncompressed string.
 *  @exception RANGE_ERROR If ''compressed'' is not in zlib format.
 *)
const func string: gzuncompress (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var integer: compressionMethod is 0;
    var integer: cinfo is 0;
    var integer: flags is 0;
    var lsbInBitStream: compressedStream is lsbInBitStream.value;
    var boolean: bfinal is FALSE;
  begin
    if (ord(compressed[1]) * 256 + ord(compressed[2])) mod 31 = 0 then
      compressionMethod := ord(compressed[1]) mod 16;
      cinfo := (ord(compressed[1]) >> 4) mod 16;
      flags := ord(compressed[2]);
      if compressionMethod = 8 then
        compressedStream := openLsbInBitStream(compressed);
        if odd(flags >> 5) then
          skipBits(compressedStream, 48);
        else
          skipBits(compressedStream, 16);
        end if;
        repeat
          processCompressedBlock(compressedStream, uncompressed, bfinal);
        until bfinal;
      end if;
    end if;
  end func;


const func string: gzcompress (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    const string: GZ_MAGIC is "x\156;";
    var lsbOutBitStream: compressedStream is lsbOutBitStream.value;
  begin
    write(compressedStream, GZ_MAGIC);
    deflateBlock(uncompressed, compressedStream, TRUE);
    flush(compressedStream);
    compressed := getBytes(compressedStream);
  end func;


(**
 *  Decompress a string that was compressed with gzip (RFC 1952).
 *  Gzip uses the DEFLATE compression algorithm. DEFLATE uses
 *  a combination of the LZ77 algorithm and Huffman coding.
 *  Additionally to DEFLATE gzip uses a magic number and a header.
 *  @return the uncompressed string.
 *  @exception RANGE_ERROR If ''compressed'' is not in gzip format.
 *)
const func string: gunzip (in string: compressed) is func
  result
    var string: uncompressed is "";
  local
    var gzipHeader: header is gzipHeader.value;
    var lsbInBitStream: compressedStream is lsbInBitStream.value;
    var integer: bytePos is 0;
    var boolean: bfinal is FALSE;
    var string: trailer is "";
  begin
    header := readGzipHeader(compressed, bytePos);
    if header.magic = GZIP_MAGIC then
      compressedStream := openLsbInBitStream(compressed);
      skipBits(compressedStream, pred(bytePos) * 8);
      repeat
        processCompressedBlock(compressedStream, uncompressed, bfinal);
      until bfinal;
      trailer := gets(compressedStream, 8);
      if length(trailer) <> 8 or
          bytes2Int(trailer[1 fixLen 4], UNSIGNED, LE) <> ord(crc32(uncompressed)) or
          bytes2Int(trailer[5 fixLen 4], UNSIGNED, LE) <> length(uncompressed) mod 2 ** 32 then
        raise RANGE_ERROR;
      end if;
    end if;
  end func;


(**
 *  [[file|File]] implementation type to decompress a GZIP file.
 *  GZIP is a file format used for compression.
 *)
const type: gzipFile is sub null_file struct
    var lsbInBitStream: compressedStream is lsbInBitStream.value;
    var boolean: bfinal is FALSE;
    var string: uncompressed is "";
    var integer: position is 1;
  end struct;

type_implements_interface(gzipFile, file);


(**
 *  Open a GZIP file for reading (decompression).
 *  GZIP is a file format used for compression. Reading from
 *  the file delivers decompressed data. Writing is not supported.
 *  Gzip uses the DEFLATE compression algorithm. DEFLATE
 *  uses a combination of the LZ77 algorithm and Huffman coding.
 *  Additionally to DEFLATE gzip uses a magic number and a header.
 *  @return the file opened, or [[null_file#STD_NULL|STD_NULL]]
 *          if the file is not in GZIP format.
 *)
const func file: openGzipFile (inout file: compressed, READ) is func
  result
    var file: newFile is STD_NULL;
  local
    var gzipHeader: header is gzipHeader.value;
    var gzipFile: new_gzipFile is gzipFile.value;
  begin
    header := readGzipHeader(compressed);
    if header.magic = GZIP_MAGIC then
      new_gzipFile.compressedStream := openLsbInBitStream(compressed);
      newFile := toInterface(new_gzipFile);
    end if;
  end func;


(**
 *  Close a ''gzipFile''.
 *)
const proc: close (in gzipFile: aFile) is noop;


(**
 *  Read a character from a ''gzipFile''.
 *  @return the character read.
 *)
const func char: getc (inout gzipFile: inFile) is func
  result
    var char: charRead is ' ';
  begin
    while inFile.position > length(inFile.uncompressed) and
        not inFile.bfinal do
      processCompressedBlock(inFile.compressedStream,
          inFile.uncompressed, inFile.bfinal);
    end while;
    if inFile.position <= length(inFile.uncompressed) then
      charRead := inFile.uncompressed[inFile.position];
      incr(inFile.position);
    else
      charRead := EOF;
    end if;
  end func;


(**
 *  Read a string with maximum length from a ''gzipFile''.
 *  @return the string read.
 *  @exception RANGE_ERROR The parameter ''maxLength'' is negative.
 *)
const func string: gets (inout gzipFile: inFile, in integer: maxLength) is func
  result
    var string: striRead is "";
  begin
    if maxLength <= 0 then
      if maxLength <> 0 then
        raise RANGE_ERROR;
      end if;
    else
      while maxLength > succ(length(inFile.uncompressed) - inFile.position) and
          not inFile.bfinal do
        processCompressedBlock(inFile.compressedStream,
            inFile.uncompressed, inFile.bfinal);
      end while;
      if maxLength <= succ(length(inFile.uncompressed) - inFile.position) then
        striRead := inFile.uncompressed[inFile.position fixLen maxLength];
        inFile.position +:= maxLength;
      else
        striRead := inFile.uncompressed[inFile.position ..];
        inFile.position := succ(length(inFile.uncompressed));
      end if;
    end if;
  end func;


(**
 *  Determine the end-of-file indicator.
 *  The end-of-file indicator is set if at least one request to read
 *  from the file failed.
 *  @return TRUE if the end-of-file indicator is set, FALSE otherwise.
 *)
const func boolean: eof (in gzipFile: inFile) is
  return inFile.position > length(inFile.uncompressed) and inFile.bfinal;


(**
 *  Determine if at least one character can be read successfully.
 *  This function allows a file to be handled like an iterator.
 *  @return FALSE if ''getc'' would return EOF, TRUE otherwise.
 *)
const func boolean: hasNext (inout gzipFile: inFile) is func
  result
    var boolean: hasNext is FALSE;
  begin
    while inFile.position > length(inFile.uncompressed) and
        not inFile.bfinal do
      processCompressedBlock(inFile.compressedStream,
          inFile.uncompressed, inFile.bfinal);
    end while;
    hasNext := inFile.position <= length(inFile.uncompressed);
  end func;


(**
 *  Obtain the length of a file.
 *  The file length is measured in bytes.
 *  @return the length of a file, or 0 if it cannot be obtained.
 *)
const func integer: length (inout gzipFile: aFile) is func
  result
    var integer: length is 0;
  begin
    while not aFile.bfinal do
      processCompressedBlock(aFile.compressedStream,
          aFile.uncompressed, aFile.bfinal);
    end while;
    length := length(aFile.uncompressed);
  end func;


(**
 *  Determine if the file ''aFile'' is seekable.
 *  If a file is seekable the functions ''seek'' and ''tell''
 *  can be used to set and and obtain the current file position.
 *  @return TRUE, since a ''gzipFile'' is seekable.
 *)
const boolean: seekable (in gzipFile: aFile) is TRUE;


(**
 *  Set the current file position.
 *  The file position is measured in bytes from the start of the file.
 *  The first byte in the file has the position 1.
 *  @exception RANGE_ERROR The file position is negative or zero.
 *)
const proc: seek (inout gzipFile: aFile, in integer: position) is func
  begin
    if position <= 0 then
      raise RANGE_ERROR;
    else
      aFile.position := position;
    end if;
  end func;


(**
 *  Obtain the current file position.
 *  The file position is measured in bytes from the start of the file.
 *  The first byte in the file has the position 1.
 *  @return the current file position.
 *)
const func integer: tell (in gzipFile: aFile) is
  return aFile.position;


(**
 *  Compress a string to the gzip format.
 *  Gzip uses the DEFLATE compression algorithm. DEFLATE uses
 *  a combination of the LZ77 algorithm and Huffman coding.
 *  Additionally to DEFLATE gzip uses a magic number and a header.
 *  @return the compressed string.
 *)
const func string: gzip (in string: uncompressed) is func
  result
    var string: compressed is "";
  local
    var lsbOutBitStream: compressedStream is lsbOutBitStream.value;
  begin
    write(compressedStream,
          GZIP_MAGIC &
          "\8;" &  # Compression method: Deflate
          "\0;" &  # Flags
          bytes(timestamp1970(time(NOW)), UNSIGNED, LE, 4) &
          "\0;" &  # Extra flags
          "\3;");  # Operating system: Unix
    deflateBlock(uncompressed, compressedStream, TRUE);
    flush(compressedStream);
    compressed := getBytes(compressedStream);
    compressed &:= bytes(ord(crc32(uncompressed)),         UNSIGNED, LE, 4) &
                   bytes(length(uncompressed) mod 2 ** 32, UNSIGNED, LE, 4);
  end func;


(**
 *  [[file|File]] implementation type to compress data with the GZIP format.
 *)
const type: gzipWriteFile is sub null_file struct
    var file: destFile is STD_NULL;
    var deflateData: deflateState is deflateData.value;
    var bin32: crc32 is bin32(0);
    var integer: uncompressedLength is 0;
    var string: uncompressed is "";
    var lsbOutBitStream: compressedStream is lsbOutBitStream.value;
  end struct;

type_implements_interface(gzipWriteFile, file);


(**
 *  Open a GZIP file for writing (compression)
 *  GZIP is a file format used for compression. Writing to the file
 *  compresses the data to ''destFile''. Reading is not supported.
 *  Gzip uses the DEFLATE compression algorithm. DEFLATE
 *  uses a combination of the LZ77 algorithm and Huffman coding.
 *  Additionally to DEFLATE gzip uses a magic number and a header.
 *  @return the file opened.
 *)
const func file: openGzipFile (inout file: destFile, WRITE) is func
  result
    var file: newFile is STD_NULL;
  local
    var gzipWriteFile: new_gzipWriteFile is gzipWriteFile.value;
  begin
    write(destFile,
          GZIP_MAGIC &
          "\8;" &  # Compression method: Deflate
          "\0;" &  # Flags
          bytes(timestamp1970(time(NOW)), UNSIGNED, LE, 4) &
          "\0;" &  # Extra flags
          "\3;");  # Operating system: Unix
    new_gzipWriteFile.destFile := destFile;
    newFile := toInterface(new_gzipWriteFile);
  end func;


(**
 *  Close a ''gzipWriteFile''.
 *)
const proc: close (inout gzipWriteFile: aFile) is func
  begin
    deflateBlock(aFile.deflateState, aFile.uncompressed,
                 length(aFile.uncompressed), aFile.compressedStream, TRUE);
    flush(aFile.compressedStream);
    write(aFile.destFile,
          getBytes(aFile.compressedStream) &
          bytes(crc32(aFile.uncompressed, aFile.crc32), LE, 4) &
          bytes((length(aFile.uncompressed) +
                 aFile.uncompressedLength) mod 2 ** 32, UNSIGNED, LE, 4));
    flush(aFile.destFile);
  end func;


(**
 *  Write the [[string]] ''stri'' to a ''gzipWriteFile''.
 *)
const proc: write (inout gzipWriteFile: outFile, in string: stri) is func
  local
    const integer: blockSize is 1000000;
  begin
    if length(outFile.uncompressed) > blockSize and length(stri) <> 0 then
      deflateBlock(outFile.deflateState, outFile.uncompressed,
                   length(outFile.uncompressed), outFile.compressedStream, FALSE);
      # Do not flush. A partially filled byte is kept in outFile.compressedStream.
      write(outFile.destFile, getBytes(outFile.compressedStream));
      outFile.deflateState.uncompressedPos := 1;
      outFile.deflateState.dictionary := lookupDict.value;
      outFile.deflateState.slidingWindow := slidingWindowType times -32768;
      outFile.crc32 := crc32(outFile.uncompressed, outFile.crc32);
      outFile.uncompressedLength +:= length(outFile.uncompressed);
      outFile.uncompressed := "";
    end if;
    outFile.uncompressed &:= stri;
  end func;


(**
 *  Obtain the length of a file.
 *  The number of uncompressed characters written to the file.
 *  @return the length of a file.
 *)
const func integer: length (in gzipWriteFile: outFile) is
  return outFile.uncompressedLength + length(outFile.uncompressed);


(**
 *  Obtain the current file position.
 *  The file position is measured in bytes from the start of the file.
 *  The first byte in the file has the position 1.
 *  @return the current file position.
 *)
const func integer: tell (in gzipWriteFile: outFile) is
  return succ(outFile.uncompressedLength + length(outFile.uncompressed));