$ include "seed7_05.s7i";
include "stdio.s7i";
include "osfiles.s7i";
include "charsets.s7i";
include "utf8.s7i";
include "console.s7i";
const proc: main is func
local
var string: conv_name is "";
var string: in_name is "";
var string: out_name is "";
var file: in_file is STD_NULL;
var file: out_file is STD_NULL;
var string: stri is "";
var char: ch is ' ';
begin
if length(argv(PROGRAM)) >= 1 then
conv_name := argv(PROGRAM)[1];
if startsWith(conv_name, "-") then
conv_name := conv_name[2 ..];
end if;
end if;
if length(argv(PROGRAM)) < 2 or conv_name = "?" then
writeln("Toutf8 Version 1.0 - Convert a file to UTF-8");
writeln("Copyright (C) 2006, 2010, 2015 Thomas Mertes");
writeln("This is free software; see the source for copying conditions. There is NO");
writeln("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.");
writeln("Toutf8 is written in the Seed7 programming language");
writeln("Homepage: http://seed7.sourceforge.net");
writeln;
writeln("usage: toutf8 -codepage infile [outfile]");
writeln;
writeln("Converts a file encoded with a codepage to UTF-8.");
writeln("The following codepages are supported:");
writeln(" 437, 708, 720, 737, 775, 850, 852, 855, 857, 858, 860, 861, 862, 863,");
writeln(" 864, 865, 866, 869, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,");
writeln(" 1257, 1258, 8859-1, 8859-2, 8859-3, 8859-4, 8859-5, 8859-6, 8859-7,");
writeln(" 8859-8, 8859-9, 8859-10, 8859-11, 8859-13, 8859-14, 8859-15, 8859-16,");
writeln(" latin-1, latin-2, latin-3, latin-4, latin-5, latin-6, latin-7, latin-8,");
writeln(" latin-9, 037, 273, 277, 280, 285, 297, 500, 1047");
writeln("The following IANA/MIME charset names are also accepted:");
writeln(" ANSI_X3.4-1968, ARMSCII-8, ASCII, CP437, CP850, GEOSTD8, IBM437, IBM850,");
writeln(" ISO_8859-1, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,");
writeln(" ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11,");
writeln(" ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16, KOI8-R, KOI8-U,");
writeln(" MACINTOSH, NS_4551-1, TIS-620, US-ASCII, UTF-16BE, UTF-16LE, UTF-7, UTF-8,");
writeln(" UTF8, VISCII, WINDOWS-1250, WINDOWS-1251, WINDOWS-1252, WINDOWS-1253,");
writeln(" WINDOWS-1254, WINDOWS-1255, WINDOWS-1256, WINDOWS-1257, WINDOWS-1258");
else
in_name := convDosPath(argv(PROGRAM)[2]);
if length(argv(PROGRAM)) >= 3 then
out_name := argv(PROGRAM)[3];
end if;
in_file := open(in_name, "r");
if in_file <> STD_NULL then
stri := gets(in_file, length(in_file));
close(in_file);
if conv_name = "437" then
conv2unicode(stri, cp_437);
elsif conv_name = "708" then
conv2unicode(stri, cp_708);
elsif conv_name = "720" then
conv2unicode(stri, cp_720);
elsif conv_name = "737" then
conv2unicode(stri, cp_737);
elsif conv_name = "775" then
conv2unicode(stri, cp_775);
elsif conv_name = "850" then
conv2unicode(stri, cp_850);
elsif conv_name = "852" then
conv2unicode(stri, cp_852);
elsif conv_name = "855" then
conv2unicode(stri, cp_855);
elsif conv_name = "857" then
conv2unicode(stri, cp_857);
elsif conv_name = "858" then
conv2unicode(stri, cp_858);
elsif conv_name = "860" then
conv2unicode(stri, cp_860);
elsif conv_name = "861" then
conv2unicode(stri, cp_861);
elsif conv_name = "862" then
conv2unicode(stri, cp_862);
elsif conv_name = "863" then
conv2unicode(stri, cp_863);
elsif conv_name = "864" then
conv2unicode(stri, cp_864);
elsif conv_name = "865" then
conv2unicode(stri, cp_865);
elsif conv_name = "866" then
conv2unicode(stri, cp_866);
elsif conv_name = "869" then
conv2unicode(stri, cp_869);
elsif conv_name = "874" then
conv2unicode(stri, cp_874);
elsif conv_name = "1125" then
conv2unicode(stri, cp_1125);
elsif conv_name = "1250" then
conv2unicode(stri, cp_1250);
elsif conv_name = "1251" then
conv2unicode(stri, cp_1251);
elsif conv_name = "1252" then
conv2unicode(stri, cp_1252);
elsif conv_name = "1253" then
conv2unicode(stri, cp_1253);
elsif conv_name = "1254" then
conv2unicode(stri, cp_1254);
elsif conv_name = "1255" then
conv2unicode(stri, cp_1255);
elsif conv_name = "1256" then
conv2unicode(stri, cp_1256);
elsif conv_name = "1257" then
conv2unicode(stri, cp_1257);
elsif conv_name = "1258" then
conv2unicode(stri, cp_1258);
elsif conv_name = "8859-1" or
conv_name = "latin-1" then
noop;
elsif conv_name = "8859-2" or
conv_name = "latin-2" then
conv2unicode(stri, iso_8859_2);
elsif conv_name = "8859-3" or
conv_name = "latin-3" then
conv2unicode(stri, iso_8859_3);
elsif conv_name = "8859-4" or
conv_name = "latin-4" then
conv2unicode(stri, iso_8859_4);
elsif conv_name = "8859-5" then
conv2unicode(stri, iso_8859_5);
elsif conv_name = "8859-6" then
conv2unicode(stri, iso_8859_6);
elsif conv_name = "8859-7" then
conv2unicode(stri, iso_8859_7);
elsif conv_name = "8859-8" then
conv2unicode(stri, iso_8859_8);
elsif conv_name = "8859-9" or
conv_name = "latin-5" then
conv2unicode(stri, iso_8859_9);
elsif conv_name = "8859-10" or
conv_name = "latin-6" then
conv2unicode(stri, iso_8859_10);
elsif conv_name = "8859-11" then
conv2unicode(stri, iso_8859_11);
elsif conv_name = "8859-13" or
conv_name = "latin-7" then
conv2unicode(stri, iso_8859_13);
elsif conv_name = "8859-14" or
conv_name = "latin-8" then
conv2unicode(stri, iso_8859_14);
elsif conv_name = "8859-15" or
conv_name = "latin-9" then
conv2unicode(stri, iso_8859_15);
elsif conv_name = "8859-16" then
conv2unicode(stri, iso_8859_16);
elsif conv_name = "037" then
conv2unicode(stri, cp_037);
elsif conv_name = "273" then
conv2unicode(stri, cp_273);
elsif conv_name = "277" then
conv2unicode(stri, cp_277);
elsif conv_name = "280" then
conv2unicode(stri, cp_280);
elsif conv_name = "285" then
conv2unicode(stri, cp_285);
elsif conv_name = "297" then
conv2unicode(stri, cp_297);
elsif conv_name = "500" then
conv2unicode(stri, cp_500);
elsif conv_name = "1047" then
conv2unicode(stri, cp_1047);
elsif conv_name = "UTF-16BE" then
if startsWith(stri, "\254;\255;") then
stri := fromUtf16Be(stri[3 ..]);
elsif startsWith(stri, "\255;\254;") then
stri := fromUtf16Le(stri[3 ..]);
else
stri := fromUtf16Be(stri);
end if;
elsif conv_name = "UTF-16LE" then
if startsWith(stri, "\255;\254;") then
stri := fromUtf16Le(stri[3 ..]);
elsif startsWith(stri, "\254;\255;") then
stri := fromUtf16Be(stri[3 ..]);
else
stri := fromUtf16Le(stri);
end if;
elsif conv_name = "UTF-7" then
stri := fromUtf7(stri);
else
block
conv2unicodeByName(stri, conv_name);
exception
catch RANGE_ERROR:
writeln(" *** Unsupported codepage: " <& conv_name);
writeln("Use the option -? for a list of codepages.");
stri := "";
end block;
end if;
if stri <> "" then
if out_name <> "" then
out_file := openUtf8(out_name, "w");
else
out_file := STD_CONSOLE;
end if;
if out_file <> STD_NULL then
write(out_file, stri);
close(out_file);
end if;
end if;
end if;
end if;
end func;