include "xmldom.s7i";
include "html_ent.s7i";
include "html.s7i";
const type: htmlDocument is new struct
var string: doctypeName is "";
var array string: doctypeArguments is 0 times "";
var xmlNode: html is xmlNode.value;
end struct;
const func xmlNode: getHtmlRoot (in htmlDocument: document) is
return document.html;
const func string: getDoctypeName (in htmlDocument: document) is
return document.doctypeName;
const func string: getDoctypeParameter (in htmlDocument: document, in integer: index) is
return document.doctypeArguments[index];
const type: htmlDomParserState is new struct
var file: inFile is STD_NULL;
var string: symbol is "";
end struct;
const func xmlNode: readHtmlNode (inout htmlDomParserState: parserState,
in string: parentEndTagHead) is forward;
const func xmlNode: readHtmlContainerSubNodes (inout htmlDomParserState: parserState,
inout xmlContainer: containerElement, in string: parentEndTagHead) is func
result
var xmlNode: node is xmlNode.value;
local
var string: endTagHead is "";
var set of string: alternateEndTags is (set of string).value;
var xmlElement: emptyElement is xmlElement.value;
begin
endTagHead := "</" & containerElement.name;
case containerElement.name of
when {"li"}: alternateEndTags := {"<li"};
if parentEndTagHead in {"</ol", "</ul", "</menu"} then
alternateEndTags |:= {"</ol", "</ul", "</menu"};
end if;
when {"dt", "dd"}: alternateEndTags := {"<dt", "<dd"};
if parentEndTagHead in {"</dl"} then
incl(alternateEndTags, parentEndTagHead);
end if;
when {"td", "th"}: alternateEndTags := {"<td", "<th", "<tr", "<thead", "<tbody", "<tfoot"};
if parentEndTagHead in {"</tr", "</thead", "</tbody", "</tfoot", "</table"} then
alternateEndTags |:= {"</tr", "</thead", "</tbody", "</tfoot", "</table"};
end if;
when {"tr"}: alternateEndTags := {"<tr", "<thead", "<tbody", "<tfoot"};
if parentEndTagHead in {"</thead", "</tbody", "</tfoot", "</table"} then
alternateEndTags |:= {"</thead", "</tbody", "</tfoot", "</table"};
end if;
when {"thead", "tbody", "tfoot"}:
alternateEndTags := {"<thead", "<tbody", "<tfoot"};
if parentEndTagHead in {"</table"} then
incl(alternateEndTags, parentEndTagHead);
end if;
when {"option"}: alternateEndTags := {"<option", "</select"};
when {"p"}: alternateEndTags := {"<address", "<article", "<aside",
"<blockquote", "<details", "<div", "<dl",
"<fieldset", "<figcaption", "<figure", "<footer",
"<form", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6",
"<header", "<hgroup", "<hr", "<menu", "<nav",
"<ol", "<p", "<pre", "<section", "<table", "<ul"};
if parentEndTagHead not in {"</a", "</audio", "</del", "</ins",
"</map", "</noscript", "</video"} then
incl(alternateEndTags, parentEndTagHead);
end if;
otherwise: alternateEndTags := {parentEndTagHead};
end case;
parserState.symbol := getXmlTagHeadOrContent(parserState.inFile);
while parserState.symbol <> "" and lower(parserState.symbol) <> endTagHead and
lower(parserState.symbol) not in alternateEndTags do
containerElement.subNodes &:= [] (readHtmlNode(parserState, endTagHead));
end while;
if lower(parserState.symbol) = endTagHead then
skipXmlTag(parserState.inFile);
parserState.symbol := getXmlTagHeadOrContent(parserState.inFile);
end if;
if length(containerElement.subNodes) = 0 then
emptyElement.name := containerElement.name;
emptyElement.attributes := containerElement.attributes;
node := toInterface(emptyElement);
else
node := toInterface(containerElement);
end if;
end func;
const func xmlNode: readHtmlNode (inout htmlDomParserState: parserState,
in string: parentEndTagHead) is func
result
var xmlNode: node is xmlNode.value;
local
var xmlContainer: containerElement is xmlContainer.value;
var xmlElement: emptyElement is xmlElement.value;
var xmlText: currentText is xmlText.value;
var string: attributeName is "";
var string: attributeValue is "";
begin
if startsWith(parserState.symbol, "<") then
containerElement.name := lower(parserState.symbol[2 ..]);
getNextHtmlAttribute(parserState.inFile, attributeName, attributeValue);
while attributeName <> "" do
containerElement.attributes @:= [lower(attributeName)]
decodeHtmlEntities(attributeValue);
getNextHtmlAttribute(parserState.inFile, attributeName, attributeValue);
end while;
if attributeValue = "/>" or containerElement.name in voidHtmlElements or
startsWith(containerElement.name, "/") then
emptyElement.name := containerElement.name;
emptyElement.attributes := containerElement.attributes;
node := toInterface(emptyElement);
parserState.symbol := getXmlTagHeadOrContent(parserState.inFile);
else
node := readHtmlContainerSubNodes(parserState, containerElement, parentEndTagHead);
end if;
else
currentText.content := decodeHtmlEntities(rtrim(parserState.symbol));
node := toInterface(currentText);
parserState.symbol := getXmlTagHeadOrContent(parserState.inFile);
end if;
end func;
const func htmlDocument: readHtml (inout file: inFile) is func
result
var htmlDocument: document is htmlDocument.value;
local
var string: symbol is "";
var string: argument is "";
var htmlDomParserState: parserState is htmlDomParserState.value;
begin
symbol := getXmlTagHeadOrContent(inFile);
while startsWith(symbol, "<?") do
skipXmlTag(inFile);
symbol := getXmlTagHeadOrContent(inFile);
end while;
if startsWith(symbol, "<!") and upper(symbol) = "<!DOCTYPE" then
document.doctypeName := symbol[2 ..];
argument := getSymbolInXmlTag(inFile);
while argument <> ">" do
if startsWith(argument, "\"") then
document.doctypeArguments &:= argument & "\"";
else
document.doctypeArguments &:= argument;
end if;
argument := getSymbolInXmlTag(inFile);
end while;
symbol := getXmlTagHeadOrContent(inFile);
else
document.doctypeName := "!DOCTYPE";
document.doctypeArguments := [] ("HTML");
end if;
parserState.inFile := inFile;
parserState.symbol := symbol;
document.html := readHtmlNode(parserState, "");
end func;
const func htmlDocument: readHtml (in string: htmlStri) is func
result
var htmlDocument: document is htmlDocument.value;
local
var file: htmlFile is STD_NULL;
begin
htmlFile := openStriFile(htmlStri);
document := readHtml(htmlFile);
end func;
const proc: writeHtml (inout file: outFile, in htmlDocument: document) is func
local
var string: argument is "";
begin
write(outFile, "<" <& document.doctypeName);
for argument range document.doctypeArguments do
write(outFile, " " <& argument);
end for;
writeln(outFile, ">");
writeXml(outFile, document.html);
end func;
const proc: writeHtml (in htmlDocument: document) is func
begin
writeHtml(OUT, document);
end func;