(********************************************************************)
(*                                                                  *)
(*  magic.s7i     Get magic numbers from strings and files.         *)
(*  Copyright (C) 2024  Thomas Mertes                               *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "bytedata.s7i";


const string: MAGIC_AR               is "!<arch>\n";
const string: MAGIC_BMP              is "BM";
const string: MAGIC_BZIP2            is "BZh";
const string: MAGIC_CPIO_BINARY_BE   is "\16#71;\16#c7;";
const string: MAGIC_CPIO_BINARY_LE   is "\16#c7;\16#71;";
const string: MAGIC_CPIO_ASCII_OLD   is "070707";
const string: MAGIC_CPIO_ASCII       is "070701";
const string: MAGIC_CPIO_ASCII_CRC   is "070702";
const string: MAGIC_ELF_32_BIT_LE    is "\127;ELF\1;\1;";
const string: MAGIC_ELF_64_BIT_LE    is "\127;ELF\2;\1;";
const string: MAGIC_ELF_32_BIT_BE    is "\127;ELF\1;\2;";
const string: MAGIC_ELF_64_BIT_BE    is "\127;ELF\2;\2;";
const string: MAGIC_GIF_87           is "GIF87a";
const string: MAGIC_GIF_89           is "GIF89a";
const string: MAGIC_GZIP             is "\31;\139;";
const string: MAGIC_ICO              is "\0;\0;\1;\0;";
const string: MAGIC_JPEG             is "\16#ff;\16#d8;\16#ff;";
const string: MAGIC_MACH_O_32_BIT_LE is "\16#fe;\16#ed;\16#fa;\16#ce;";
const string: MAGIC_MACH_O_64_BIT_LE is "\16#fe;\16#ed;\16#fa;\16#cf;";
const string: MAGIC_MACH_O_32_BIT_BE is "\16#ce;\16#fa;\16#ed;\16#fe;";
const string: MAGIC_MACH_O_64_BIT_BE is "\16#cf;\16#fa;\16#ed;\16#fe;";
const string: MAGIC_MZ               is "MZ";
const string: MAGIC_PDF              is "%PDF-";
const string: MAGIC_PE               is "PE\0;\0;";
const string: MAGIC_PNG              is "\137;PNG\r\n\26;\n";
const string: MAGIC_PBM_ASCII        is "P1";
const string: MAGIC_PGM_ASCII        is "P2";
const string: MAGIC_PPM_ASCII        is "P3";
const string: MAGIC_PBM_BINARY       is "P4";
const string: MAGIC_PGM_BINARY       is "P5";
const string: MAGIC_PPM_BINARY       is "P6";
const string: MAGIC_RAR              is "Rar!\26;\7;";
const string: MAGIC_RIFF             is "RIFF";
const string: MAGIC_RPM              is "\16#ed;\16#ab;\16#ee;\16#db;";
const string: MAGIC_SEVEN_ZIP        is "7z\188;\175;'\28;";
const string: MAGIC_SHEBANG          is "#!";
const string: MAGIC_SQLITE3          is "SQLite format 3\0;";
const string: MAGIC_TAR              is "ustar";
const string: MAGIC_TIFF_LE          is "II\42;\0;";
const string: MAGIC_TIFF_BE          is "MM\0;\42;";
const string: MAGIC_UTF8             is "\16#ef;\16#bb;\16#bf;";
const string: MAGIC_UTF16LE          is "\16#ff;\16#fe;";
const string: MAGIC_UTF16BE          is "\16#fe;\16#ff;";
const string: MAGIC_WASM             is "\0;asm";
const string: MAGIC_WEBP             is "WEBPVP8";
const string: MAGIC_ZIP              is "PK\3;\4;";
const string: MAGIC_ZIP_EMPTY        is "PK\5;\6;";
const string: MAGIC_XZ               is "\16#FD;7zXZ\0;";
const string: MAGIC_ZSTD             is "(\16#B5;/\16#FD;";


(**
 *  Determine the magic number (signature) of the given [[string]] ''data''.
 *  @return the magic number (signature), or
 *          "" if no magic number was found.
 *)
const func string: getMagic (in string: data) is func
  result
    var string: magic is "";
  local
    var string: magicBytes is "";
    var integer: offset is 0;
  begin
    if length(data) >= 2 then
      magicBytes := data[.. 2];
      if  magicBytes = MAGIC_BMP            or
          magicBytes = MAGIC_CPIO_BINARY_BE or
          magicBytes = MAGIC_CPIO_BINARY_LE or
          magicBytes = MAGIC_GZIP           or
          magicBytes = MAGIC_PBM_ASCII      or
          magicBytes = MAGIC_PGM_ASCII      or
          magicBytes = MAGIC_PPM_ASCII      or
          magicBytes = MAGIC_PBM_BINARY     or
          magicBytes = MAGIC_PGM_BINARY     or
          magicBytes = MAGIC_PPM_BINARY     or
          magicBytes = MAGIC_SHEBANG        or
          magicBytes = MAGIC_UTF16LE        or
          magicBytes = MAGIC_UTF16BE        then
        magic := magicBytes;
      elsif magicBytes = "MZ" then
        if length(data) >= 64 then
          offset := bytes2Int(data[61 fixLen 4], UNSIGNED, LE);
          if length(data) >= offset + 4 and
              data[succ(offset) fixLen 4] = MAGIC_PE then
            magic := MAGIC_PE;
          else
            magic := MAGIC_MZ;
          end if;
        else
          magic := MAGIC_MZ;
        end if;
      elsif length(data) >= 3 then
        magicBytes := data[.. 3];
        if  magicBytes = MAGIC_BZIP2 or
            magicBytes = MAGIC_JPEG  or
            magicBytes = MAGIC_UTF8  then
          magic := magicBytes;
        elsif length(data) >= 4 then
          magicBytes := data[.. 4];
          if  magicBytes = MAGIC_ICO              or
              magicBytes = MAGIC_MACH_O_32_BIT_LE or
              magicBytes = MAGIC_MACH_O_64_BIT_LE or
              magicBytes = MAGIC_MACH_O_32_BIT_BE or
              magicBytes = MAGIC_MACH_O_64_BIT_BE or
              magicBytes = MAGIC_RPM              or
              magicBytes = MAGIC_TIFF_LE          or
              magicBytes = MAGIC_TIFF_BE          or
              magicBytes = MAGIC_WASM             or
              magicBytes = MAGIC_ZIP              or
              magicBytes = MAGIC_ZIP_EMPTY        or
              magicBytes = MAGIC_ZSTD             then
              magic := magicBytes;
          elsif magicBytes = MAGIC_RIFF then
            if length(data) >= 15 and
                data[9 fixLen 7] = MAGIC_WEBP then
              magic := MAGIC_WEBP;
            else
              magic := MAGIC_RIFF;
            end if;
          elsif length(data) >= 5 then
            magicBytes := data[.. 5];
            if  magicBytes = MAGIC_PDF then
              magic := magicBytes;
            elsif length(data) >= 6 then
              magicBytes := data[.. 6];
              if  magicBytes = MAGIC_CPIO_ASCII_OLD or
                  magicBytes = MAGIC_CPIO_ASCII     or
                  magicBytes = MAGIC_CPIO_ASCII_CRC or
                  magicBytes = MAGIC_ELF_32_BIT_LE  or
                  magicBytes = MAGIC_ELF_64_BIT_LE  or
                  magicBytes = MAGIC_ELF_32_BIT_BE  or
                  magicBytes = MAGIC_ELF_64_BIT_BE  or
                  magicBytes = MAGIC_GIF_87         or
                  magicBytes = MAGIC_GIF_89         or
                  magicBytes = MAGIC_RAR            or
                  magicBytes = MAGIC_SEVEN_ZIP      or
                  magicBytes = MAGIC_XZ             then
                magic := magicBytes;
              elsif length(data) >= 8 then
                magicBytes := data[.. 8];
                if  magicBytes = MAGIC_AR  or
                    magicBytes = MAGIC_PNG then
                  magic := magicBytes;
                elsif length(data) >= 16 then
                  magicBytes := data[.. 16];
                  if  magicBytes = MAGIC_SQLITE3 then
                    magic := magicBytes;
                  elsif length(data) >= 262 then
                    magicBytes := data[258 fixLen 5];
                    if magicBytes = MAGIC_TAR then
                      magic := magicBytes;
                    end if;
                  end if;
                end if;
              end if;
            end if;
          end if;
        end if;
      end if;
    end if;
  end func;


(**
 *  Determine the magic number (signature) of the given [[file]] ''inFile''.
 *  @return the magic number (signature), or
 *          "" if no magic number was found.
 *)
const func string: getMagic (inout file: inFile) is func
  result
    var string: magic is "";
  local
    var string: magicBytes is "";
    var integer: offset is 0;
  begin
    magicBytes := gets(inFile, 2);
    if length(magicBytes) = 2 then
      if  magicBytes = MAGIC_BMP            or
          magicBytes = MAGIC_CPIO_BINARY_BE or
          magicBytes = MAGIC_CPIO_BINARY_LE or
          magicBytes = MAGIC_GZIP           or
          magicBytes = MAGIC_PBM_ASCII      or
          magicBytes = MAGIC_PGM_ASCII      or
          magicBytes = MAGIC_PPM_ASCII      or
          magicBytes = MAGIC_PBM_BINARY     or
          magicBytes = MAGIC_PGM_BINARY     or
          magicBytes = MAGIC_PPM_BINARY     or
          magicBytes = MAGIC_SHEBANG        or
          magicBytes = MAGIC_UTF16LE        or
          magicBytes = MAGIC_UTF16BE        then
        magic := magicBytes;
      elsif magicBytes = "MZ" then
        magicBytes &:= gets(inFile, 62);
        if length(magicBytes) = 64 then
          offset := bytes2Int(magicBytes[61 fixLen 4], UNSIGNED, LE);
          magicBytes &:= gets(inFile, offset - 60);
          if length(magicBytes) = offset + 4 and
              magicBytes[succ(offset) fixLen 4] = MAGIC_PE then
            magic := MAGIC_PE;
          else
            magic := MAGIC_MZ;
          end if;
        else
          magic := MAGIC_MZ;
        end if;
      else
        magicBytes &:= gets(inFile, 1);
        if length(magicBytes) = 3 then
          if  magicBytes = MAGIC_BZIP2 or
              magicBytes = MAGIC_JPEG  or
              magicBytes = MAGIC_UTF8  then
            magic := magicBytes;
          else
            magicBytes &:= gets(inFile, 1);
            if length(magicBytes) = 4 then
              if  magicBytes = MAGIC_ICO              or
                  magicBytes = MAGIC_MACH_O_32_BIT_LE or
                  magicBytes = MAGIC_MACH_O_64_BIT_LE or
                  magicBytes = MAGIC_MACH_O_32_BIT_BE or
                  magicBytes = MAGIC_MACH_O_64_BIT_BE or
                  magicBytes = MAGIC_RPM              or
                  magicBytes = MAGIC_TIFF_LE          or
                  magicBytes = MAGIC_TIFF_BE          or
                  magicBytes = MAGIC_WASM             or
                  magicBytes = MAGIC_ZIP              or
                  magicBytes = MAGIC_ZIP_EMPTY        or
                  magicBytes = MAGIC_ZSTD             then
                magic := magicBytes;
              elsif magicBytes = MAGIC_RIFF then
                magicBytes &:= gets(inFile, 11);
                if length(magicBytes) = 15 and
                    magicBytes[9 fixLen 7] = MAGIC_WEBP then
                  magic := MAGIC_WEBP;
                else
                  magic := MAGIC_RIFF;
                end if;
              else
                magicBytes &:= gets(inFile, 1);
                if length(magicBytes) = 5 then
                  if  magicBytes = MAGIC_PDF then
                    magic := magicBytes;
                  else
                    magicBytes &:= gets(inFile, 1);
                    if length(magicBytes) = 6 then
                      if  magicBytes = MAGIC_CPIO_ASCII_OLD or
                          magicBytes = MAGIC_CPIO_ASCII     or
                          magicBytes = MAGIC_CPIO_ASCII_CRC or
                          magicBytes = MAGIC_ELF_32_BIT_LE  or
                          magicBytes = MAGIC_ELF_64_BIT_LE  or
                          magicBytes = MAGIC_ELF_32_BIT_BE  or
                          magicBytes = MAGIC_ELF_64_BIT_BE  or
                          magicBytes = MAGIC_GIF_87         or
                          magicBytes = MAGIC_GIF_89         or
                          magicBytes = MAGIC_RAR            or
                          magicBytes = MAGIC_SEVEN_ZIP      or
                          magicBytes = MAGIC_XZ             then
                        magic := magicBytes;
                      else
                        magicBytes &:= gets(inFile, 2);
                        if length(magicBytes) = 8 then
                          if  magicBytes = MAGIC_AR  or
                              magicBytes = MAGIC_PNG then
                            magic := magicBytes;
                          else
                            magicBytes &:= gets(inFile, 8);
                            if length(magicBytes) = 16 then
                              if  magicBytes = MAGIC_SQLITE3 then
                                magic := magicBytes;
                              else
                                magicBytes &:= gets(inFile, 246);
                                if length(magicBytes) = 262 then
                                  if endsWith(magicBytes, MAGIC_TAR) then
                                    magic := MAGIC_TAR;
                                  end if;
                                end if;
                              end if;
                            end if;
                          end if;
                        end if;
                      end if;
                    end if;
                  end if;
                end if;
              end if;
            end if;
          end if;
        end if;
      end if;
    end if;
  end func;


(**
 *  Determine a description corresponding to a magic number (signature).
 *  @return the description of a magic number.
 *)
const func string: magicDescription (in string: magic) is func
  result
    var string: description is "";
  begin
    case magic of
      when {MAGIC_AR}:               description := "current ar archive";
      when {MAGIC_BMP}:              description := "PC bitmap";
      when {MAGIC_BZIP2}:            description := "bzip2 compressed data";
      when {MAGIC_CPIO_BINARY_BE,
            MAGIC_CPIO_BINARY_LE,
            MAGIC_CPIO_ASCII_OLD,
            MAGIC_CPIO_ASCII,
            MAGIC_CPIO_ASCII_CRC}:   description := "cpio archive";
      when {MAGIC_ELF_32_BIT_LE}:    description := "ELF 32-bit LSB executable";
      when {MAGIC_ELF_64_BIT_LE}:    description := "ELF 64-bit LSB executable";
      when {MAGIC_ELF_32_BIT_BE}:    description := "ELF 32-bit MSB executable";
      when {MAGIC_ELF_64_BIT_BE}:    description := "ELF 64-bit MSB executable";
      when {MAGIC_GIF_87,
            MAGIC_GIF_89}:           description := "GIF image data";
      when {MAGIC_GZIP}:             description := "gzip compressed data";
      when {MAGIC_ICO}:              description := "MS Windows icon resource";
      when {MAGIC_JPEG}:             description := "JPEG image data";
      when {MAGIC_MACH_O_32_BIT_LE}: description := "Mach-O 32-bit LSB executable";
      when {MAGIC_MACH_O_64_BIT_LE}: description := "Mach-O 64-bit LSB executable";
      when {MAGIC_MACH_O_32_BIT_BE}: description := "Mach-O 32-bit MSB executable";
      when {MAGIC_MACH_O_64_BIT_BE}: description := "Mach-O 64-bit MSB executable";
      when {MAGIC_PDF}:              description := "PDF document";
      when {MAGIC_PE}:               description := "PE executable";
      when {MAGIC_PNG}:              description := "PNG image data";
      when {MAGIC_PBM_ASCII}:        description := "Netpbm ASCII bitmap image data";
      when {MAGIC_PGM_ASCII}:        description := "Netpbm ASCII graymap image data";
      when {MAGIC_PPM_ASCII}:        description := "Netpbm ASCII pixmap image data";
      when {MAGIC_PBM_BINARY}:       description := "Netpbm rawbits bitmap image data";
      when {MAGIC_PGM_BINARY}:       description := "Netpbm rawbits graymap image data";
      when {MAGIC_PPM_BINARY}:       description := "Netpbm rawbits pixmap image data";
      when {MAGIC_RAR}:              description := "RAR archive data";
      when {MAGIC_RIFF}:             description := "RIFF data";
      when {MAGIC_RPM}:              description := "RPM v3.0";
      when {MAGIC_SEVEN_ZIP}:        description := "7-zip archive data";
      when {MAGIC_SHEBANG}:          description := "a script";
      when {MAGIC_SQLITE3}:          description := "SQLite 3.x database";
      when {MAGIC_TAR}:              description := "POSIX tar archive";
      when {MAGIC_TIFF_LE,
            MAGIC_TIFF_BE}:          description := "TIFF image data";
      when {MAGIC_UTF8,
            MAGIC_UTF16LE,
            MAGIC_UTF16BE}:          description := "Unicode text";
      when {MAGIC_WASM}:             description := "WebAssembly (wasm) binary module";
      when {MAGIC_WEBP}:             description := "Web/P image data";
      when {MAGIC_ZIP,
            MAGIC_ZIP_EMPTY}:        description := "Zip archive data";
      when {MAGIC_XZ}:               description := "XZ compressed data";
      when {MAGIC_ZSTD}:             description := "Zstandard compressed data";
    end case;
  end func;