(********************************************************************)
(*                                                                  *)
(*  gethttp.s7i   Support to get data with the HTTP protocol        *)
(*  Copyright (C) 2008, 2010, 2011, 2013 - 2015  Thomas Mertes      *)
(*                                                                  *)
(*  This file is part of the Seed7 Runtime Library.                 *)
(*                                                                  *)
(*  The Seed7 Runtime Library is free software; you can             *)
(*  redistribute it and/or modify it under the terms of the GNU     *)
(*  Lesser General Public License as published by the Free Software *)
(*  Foundation; either version 2.1 of the License, or (at your      *)
(*  option) any later version.                                      *)
(*                                                                  *)
(*  The Seed7 Runtime Library is distributed in the hope that it    *)
(*  will be useful, but WITHOUT ANY WARRANTY; without even the      *)
(*  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *)
(*  PURPOSE.  See the GNU Lesser General Public License for more    *)
(*  details.                                                        *)
(*                                                                  *)
(*  You should have received a copy of the GNU Lesser General       *)
(*  Public License along with this program; if not, write to the    *)
(*  Free Software Foundation, Inc., 51 Franklin Street,             *)
(*  Fifth Floor, Boston, MA  02110-1301, USA.                       *)
(*                                                                  *)
(********************************************************************)


include "scanstri.s7i";
include "socket.s7i";
include "gzip.s7i";
include "charsets.s7i";


var string: proxyServer is "";
var integer: proxyHttpPort is 0;
const integer: httpDefaultPort is 80;
const integer: httpsDefaultPort is 443;


const type: httpLocation is new struct
    var boolean: httpsProtocol is FALSE;
    var string: serverName is "";
    var integer: portNumber is 0;
    var string: hostName is "";
    var string: path is "";
    var string: params is "";
  end struct;


(**
 *  Set the proxy server to be used for getHttp.
 *)
const proc: setProxy (in string: serverName, in integer: portNumber) is func
  begin
    proxyServer := serverName;
    proxyHttpPort := portNumber;
  end func;


const func httpLocation: getHttpLocation (in string: location, in integer: defaultPortNumber) is func
  result
    var httpLocation: locationData is httpLocation.value;
  local
    var integer: slashPos is 0;
    var integer: questionMarkPos is 0;
    var integer: bracketPos is 0;
    var integer: colonPos is 0;
  begin
    # writeln("getHttpLocation: " <& location);
    slashPos := pos(location, "/");
    questionMarkPos := pos(location, "?");
    if slashPos = 0 then
      if questionMarkPos = 0 then
        locationData.hostName := location;
        locationData.path     := "";
        locationData.params   := "";
      else
        locationData.hostName := location[.. pred(questionMarkPos)];
        locationData.path     := "";
        locationData.params   := location[succ(questionMarkPos) ..];
      end if;
    else
      if questionMarkPos = 0 then
        locationData.hostName := location[.. pred(slashPos)];
        locationData.path     := location[succ(slashPos) ..];
        locationData.params   := "";
      elsif slashPos < questionMarkPos then
        locationData.hostName := location[.. pred(slashPos)];
        locationData.path     := location[succ(slashPos) .. pred(questionMarkPos)];
        locationData.params   := location[succ(questionMarkPos) ..];
      else
        locationData.hostName := location[.. pred(questionMarkPos)];
        locationData.path     := "";
        locationData.params   := location[succ(questionMarkPos) ..];
      end if;
    end if;
    bracketPos := pos(locationData.hostName, "]:");
    if bracketPos <> 0 and startsWith(locationData.hostName, "[") and
        isDigitString(locationData.hostName[bracketPos + 2 ..]) then
      locationData.portNumber := integer(locationData.hostName[bracketPos + 2 ..]);
      locationData.hostName := locationData.hostName[2 .. pred(bracketPos)];
    else
      colonPos := pos(locationData.hostName, ":");
      if colonPos <> 0 and
          not isDigitString(locationData.hostName[.. pred(colonPos)]) and
          isDigitString(locationData.hostName[succ(colonPos) ..]) then
        locationData.portNumber := integer(locationData.hostName[succ(colonPos) ..]);
        locationData.hostName := locationData.hostName[.. pred(colonPos)];
      else
        locationData.portNumber := defaultPortNumber;
      end if;
    end if;
    locationData.serverName := locationData.hostName;
  end func;


const func string: toHttpAscii (in string: stri) is func
  result
    var string: encoded is "";
  local
    var string: stri8 is "";
    var integer: pos is 0;
    var integer: start is 1;
    var char: ch is ' ';
  begin
    stri8 := toUtf8(stri);
    for ch key pos range stri8 do
      if ord(ch) >= 127 or ch < ' ' or
          ch in {'%', '/', '?', '&', '=', '+'} then
        encoded &:= stri8[start .. pred(pos)];
        encoded &:= "%" <& ord(ch) RADIX 16 lpad0 2;
        start := succ(pos);
      elsif ch = ' ' then
        encoded &:= stri8[start .. pred(pos)];
        encoded &:= "+";
        start := succ(pos);
      end if;
    end for;
    encoded &:= stri8[start ..];
  end func;


const proc: sendGet (inout file: sock, in httpLocation: location) is func
  local
    var string: address is "";
    var string: request is "";
  begin
    address := "/" & location.path;
    if location.params <> "" then
      address &:= "?" & location.params;
    end if;
    request &:= "GET " <& address <& " HTTP/1.1\r\n";
    request &:= "Host: " <& location.hostName <& "\r\n";
    request &:= "User-Agent: BlackHole" <& "\r\n";
    request &:= "\r\n";
    write(sock, request);
  end func;


const func file: openHttp (in httpLocation: locationData) is func
  result
    var file: sock is STD_NULL;
  begin
    # writeln("openHttp: " <& literal(locationData.serverName) <& " " <& locationData.portNumber);
    # writeln(locationData.hostName <& ":" <& locationData.portNumber <& "/" <& locationData.path);
    # writeln("params=" <& locationData.params);
    if not locationData.httpsProtocol then
      sock := openInetSocket(locationData.serverName, locationData.portNumber);
    end if;
    if sock <> STD_NULL then
      sendGet(sock, locationData);
    end if;
  end func;


const func file: openHttp (in string: serverName, in integer: serverPortNumber,
    in string: location) is func
  result
    var file: sock is STD_NULL;
  local
    var httpLocation: locationData is httpLocation.value;
  begin
    locationData := getHttpLocation(location, httpDefaultPort);
    locationData.serverName := serverName;
    locationData.portNumber := serverPortNumber;
    sock := openHttp(locationData);
  end func;


const func file: openHttp (in string: location) is func
  result
    var file: sock is STD_NULL;
  local
    var httpLocation: locationData is httpLocation.value;
  begin
    locationData := getHttpLocation(location, httpDefaultPort);
    if proxyServer <> "" then
      locationData.serverName := proxyServer;
      locationData.portNumber := proxyHttpPort;
    end if;
    sock := openHttp(locationData);
  end func;


const func string: getHttp (inout file: sock) is func
  result
    var string: data is "";
  local
    var string: line is "";
    var string: lowerCaseLine is "";
    var array string: header is 0 times "";
    var string: transferEncoding is "";
    var string: contentType is "";
    var string: charset is "";
    var string: contentEncoding is "";
    var string: contentLengthStri is "";
    var integer: contentLength is 0;
    var integer: chunkSize is 0;
    var string: buffer is "";
  begin
    line := getln(sock);
    while line <> "" do
      # writeln(line);
      lowerCaseLine := lower(line);
      if startsWith(lowerCaseLine, "transfer-encoding") then
        transferEncoding := trim(lowerCaseLine[succ(pos(lowerCaseLine, ":")) ..]);
      elsif startsWith(lowerCaseLine, "content-type") then
        contentType := trim(line[succ(pos(line, ":")) ..]);
        charset := getValueOfHeaderAttribute(contentType, "charset");
      elsif startsWith(lowerCaseLine, "content-encoding") then
        contentEncoding := trim(lowerCaseLine[succ(pos(lowerCaseLine, ":")) ..]);
      elsif startsWith(lowerCaseLine, "content-length") then
        contentLengthStri := trim(lowerCaseLine[succ(pos(lowerCaseLine, ":")) ..]);
        block
          contentLength := integer(contentLengthStri);
        exception
          catch RANGE_ERROR:
            contentLength := -1;
        end block;
      end if;
      header &:= [] (line);
      line := getln(sock);
    end while;
    if transferEncoding = "chunked" then
      if not eof(sock) then
        line := getln(sock);
        block
          chunkSize := integer(line, 16);
        exception
          catch RANGE_ERROR:
            chunkSize := -1;
        end block;
        while chunkSize > 0 and not eof(sock) do
          repeat
            buffer := gets(sock, chunkSize);
            chunkSize -:= length(buffer);
            data &:= buffer;
          until chunkSize = 0 or eof(sock);
          if not eof(sock) then
            ignore(getln(sock));
            line := getln(sock);
            block
              chunkSize := integer(line, 16);
            exception
              catch RANGE_ERROR:
                chunkSize := -1;
            end block;
          end if;
        end while;
      end if;
    elsif transferEncoding = "identity" or
        transferEncoding = "" then
      if contentLengthStri <> "" then
        while contentLength <> 0 and not eof(sock) do
          buffer := gets(sock, contentLength);
          contentLength -:= length(buffer);
          data &:= buffer;
        end while;
      else
        buffer := gets(sock, 10000000);
        while buffer <> "" do
          data &:= buffer;
          buffer := gets(sock, 10000000);
        end while;
      end if;
    else
      writeln("Unknown Transfer-Encoding: " <& literal(transferEncoding));
      buffer := gets(sock, 10000000);
      while buffer <> "" do
        data &:= buffer;
        buffer := gets(sock, 10000000);
      end while;
    end if;
    # writeln(length(data));
    if contentEncoding = "gzip" then
      data := gunzip(data);
    end if;
    conv2unicodeByName(data, charset);
  end func;


const func string: getHttpStatusCode (inout file: sock) is func
  result
    var string: statusCode is "";
  local
    var string: line is "";
    var string: statusInfo is "";
    var integer: spacePos is 0;
  begin
    line := getln(sock);
    # writeln(line);
    if startsWith(line, "HTTP") then
      spacePos := pos(line, " ");
      if spacePos <> 0 then
        statusInfo := trim(line[spacePos ..]);
        spacePos := pos(statusInfo, " ");
        if spacePos = 0 then
          statusCode := statusInfo;
        else
          statusCode := statusInfo[.. pred(spacePos)];
        end if;
      end if;
    end if;
  end func;


const func httpLocation: getHttpLocation (in httpLocation: currentLocationData,
    inout file: sock) is func
  result
    var httpLocation: locationData is httpLocation.value;
  local
    var string: line is "";
    var string: lowerCaseLine is "";
    var string: location is "";
  begin
    line := getln(sock);
    while line <> "" do
      # writeln(literal(line));
      lowerCaseLine := lower(line);
      if startsWith(lowerCaseLine, "location") then
        location := trim(line[succ(pos(line, ":")) ..]);
        if startsWith(location, "http:") then
          location := trim(location[6 ..]);
          while startsWith(location, "/") do
            location := location[2 ..];
          end while;
          locationData := getHttpLocation(location, httpDefaultPort);
        elsif startsWith(location, "https:") then
          location := trim(location[7 ..]);
          while startsWith(location, "/") do
            location := location[2 ..];
          end while;
          locationData := getHttpLocation(location, httpsDefaultPort);
          locationData.httpsProtocol := TRUE;
        else
          if not startsWith(location, "/") then
            location := currentLocationData.path & "/" & location;
          end if;
          locationData := getHttpLocation(location, currentLocationData.portNumber);
          locationData.httpsProtocol := currentLocationData.httpsProtocol;
          locationData.serverName := currentLocationData.serverName;
          locationData.hostName := currentLocationData.hostName;
        end if;
      end if;
      line := getln(sock);
    end while;
  end func;


const func string: getHttp (in var httpLocation: locationData) is func
  result
    var string: data is "";
  local
    var file: sock is STD_NULL;
    var string: statusCode is "";
    var string: location is "";
    var boolean: okay is TRUE;
    var integer: repeatCount is 0;
  begin
    repeat
      okay := TRUE;
      sock := openHttp(locationData);
      if sock <> STD_NULL then
        statusCode := getHttpStatusCode(sock);
        # writeln("statusCode: " <& statusCode);
        if statusCode = "301" or statusCode = "302" or
            statusCode = "303" or statusCode = "307" then
          locationData := getHttpLocation(locationData, sock);
          close(sock);
          sock := STD_NULL;
          okay := FALSE;
          incr(repeatCount);
        end if;
      end if;
    until okay or repeatCount > 5;
    if sock <> STD_NULL then
      data := getHttp(sock);
      close(sock);
    end if;
  end func;


const func string: getHttp (in string: serverName, in integer: portNumber, in string: location) is func
  result
    var string: data is "";
  local
    var httpLocation: locationData is httpLocation.value;
  begin
    locationData := getHttpLocation(location, httpDefaultPort);
    locationData.serverName := serverName;
    locationData.portNumber := portNumber;
    data := getHttp(locationData);
  end func;


(**
 *  Get data specified by a ''location'' using the HTTP protocol.
 *   getHttp("example.com")
 *   getHttp("www.example.com/index.html")
 *  @param location Url without http:// at the beginning.
 *  @return the string of data found, or "" if nothing was found.
 *)
const func string: getHttp (in string: location) is func
  result
    var string: data is "";
  local
    var httpLocation: locationData is httpLocation.value;
  begin
    locationData := getHttpLocation(location, httpDefaultPort);
    if proxyServer <> "" then
      locationData.serverName := proxyServer;
      locationData.portNumber := proxyHttpPort;
    end if;
    data := getHttp(locationData);
  end func;