const func string: toUtf8 (in string: stri) is action "STR_TO_UTF8";
const func string: fromUtf8 (in string: utf8) is action "STR_FROM_UTF8";
const func string: toUtf16Be (in string: stri) is func
result
var string: utf16Be is "";
local
var char: ch is ' ';
var integer: ch1 is 0;
var integer: ch2 is 0;
begin
for ch range stri do
if ch <= '\16#ffff;' then
if ch >= '\16#d800;' and ch <= '\16#dfff;' then
raise RANGE_ERROR;
else
utf16Be &:= chr((ord(ch) >> 8) mod 256);
utf16Be &:= chr( ord(ch) mod 256);
end if;
elsif ch <= '\16#10ffff;' then
ch1 := 16#d800 + ((ord(ch) - 16#10000) >> 10);
ch2 := 16#dc00 + (ord(ch) - 16#10000) mod 16#400;
utf16Be &:= chr((ch1 >> 8) mod 256);
utf16Be &:= chr( ch1 mod 256);
utf16Be &:= chr((ch2 >> 8) mod 256);
utf16Be &:= chr( ch2 mod 256);
else
raise RANGE_ERROR;
end if;
end for;
end func;
const func string: fromUtf16Be (in string: utf16Be) is func
result
var string: stri is "";
local
var integer: index is 0;
var char: byte1 is ' ';
var char: byte2 is ' ';
var char: ch1 is ' ';
var char: ch2 is ' ';
begin
if odd(length(utf16Be)) then
raise RANGE_ERROR;
end if;
for index range 1 to length(utf16Be) step 2 do
byte1 := utf16Be[index];
byte2 := utf16Be[succ(index)];
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch1 := chr(ord(byte1) * 256 + ord(byte2));
if ch1 >= '\16#d800;' and ch1 <= '\16#dfff;' then
if ch1 <= '\16#dbff;' and index < length(utf16Be) - 2 then
index +:= 2;
byte1 := utf16Be[index];
byte2 := utf16Be[succ(index)];
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch2 := chr(ord(byte1) * 256 + ord(byte2));
if ch2 >= '\16#dc00;' and ch2 <= '\16#dfff;' then
stri &:= chr(((ord(ch1) - 16#d800) << 10) +
(ord(ch2) - 16#dc00) + 16#10000);
else
raise RANGE_ERROR;
end if;
end if;
else
raise RANGE_ERROR;
end if;
else
stri &:= ch1;
end if;
end if;
end for;
end func;
const func string: toUtf16Le (in string: stri) is func
result
var string: utf16Le is "";
local
var char: ch is ' ';
var integer: ch1 is 0;
var integer: ch2 is 0;
begin
for ch range stri do
if ch <= '\16#ffff;' then
if ch >= '\16#d800;' and ch <= '\16#dfff;' then
raise RANGE_ERROR;
else
utf16Le &:= chr( ord(ch) mod 256);
utf16Le &:= chr((ord(ch) >> 8) mod 256);
end if;
elsif ch <= '\16#10ffff;' then
ch1 := 16#d800 + ((ord(ch) - 16#10000) >> 10);
ch2 := 16#dc00 + (ord(ch) - 16#10000) mod 16#400;
utf16Le &:= chr( ch1 mod 256);
utf16Le &:= chr((ch1 >> 8) mod 256);
utf16Le &:= chr( ch2 mod 256);
utf16Le &:= chr((ch2 >> 8) mod 256);
else
raise RANGE_ERROR;
end if;
end for;
end func;
const func string: fromUtf16Le (in string: utf16Le) is func
result
var string: stri is "";
local
var integer: index is 0;
var char: byte1 is ' ';
var char: byte2 is ' ';
var char: ch1 is ' ';
var char: ch2 is ' ';
begin
if odd(length(utf16Le)) then
raise RANGE_ERROR;
end if;
for index range 1 to length(utf16Le) step 2 do
byte1 := utf16Le[index];
byte2 := utf16Le[succ(index)];
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch1 := chr(ord(byte2) * 256 + ord(byte1));
if ch1 >= '\16#d800;' and ch1 <= '\16#dfff;' then
if ch1 <= '\16#dbff;' and index < length(utf16Le) - 2 then
index +:= 2;
byte1 := utf16Le[index];
byte2 := utf16Le[succ(index)];
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch2 := chr(ord(byte2) * 256 + ord(byte1));
if ch2 >= '\16#dc00;' and ch2 <= '\16#dfff;' then
stri &:= chr(((ord(ch1) - 16#d800) << 10) +
(ord(ch2) - 16#dc00) + 16#10000);
else
raise RANGE_ERROR;
end if;
end if;
else
raise RANGE_ERROR;
end if;
else
stri &:= ch1;
end if;
end if;
end for;
end func;
const func string: replaceUtf16SurrogatePairs (in string: stri) is func
result
var string: resultStri is "";
local
var integer: index is 0;
var integer: startPos is 1;
var char: ch1 is ' ';
var char: ch2 is ' ';
begin
for index range 1 to length(stri) do
ch1 := stri[index];
if ch1 >= '\16#d800;' and ch1 <= '\16#dfff;' then
if ch1 <= '\16#dbff;' and index < length(stri) then
ch2 := stri[succ(index)];
if ch2 >= '\16#dc00;' and ch2 <= '\16#dfff;' then
resultStri &:= stri[startPos .. pred(index)];
resultStri &:= chr(((ord(ch1) - 16#d800) << 10) +
(ord(ch2) - 16#dc00) + 16#10000);
incr(index);
startPos := succ(index);
else
raise RANGE_ERROR;
end if;
else
raise RANGE_ERROR;
end if;
end if;
end for;
resultStri &:= stri[startPos ..];
end func;
const func string: fromNullTerminatedUtf16Be (in string: stri, in integer: startPos) is func
result
var string: resultStri is "";
local
var integer: pos is 0;
begin
pos := startPos;
while pos < length(stri) and (stri[pos] <> '\0;' or stri[succ(pos)] <> '\0;') do
pos +:= 2;
end while;
if pos >= length(stri) then
resultStri := fromUtf16Be(stri[startPos ..]);
else
resultStri := fromUtf16Be(stri[startPos .. pred(pos)]);
end if;
end func;
const func string: fromNullTerminatedUtf16Le (in string: stri, in integer: startPos) is func
result
var string: resultStri is "";
local
var integer: pos is 0;
begin
pos := startPos;
while pos < length(stri) and (stri[pos] <> '\0;' or stri[succ(pos)] <> '\0;') do
pos +:= 2;
end while;
if pos >= length(stri) then
resultStri := fromUtf16Le(stri[startPos ..]);
else
resultStri := fromUtf16Le(stri[startPos .. pred(pos)]);
end if;
end func;
const func string: getNullTerminatedUtf16Be (in string: stri, inout integer: currPos) is func
result
var string: resultStri is "";
local
var integer: pos is 0;
begin
pos := currPos;
while pos < length(stri) and (stri[pos] <> '\0;' or stri[succ(pos)] <> '\0;') do
pos +:= 2;
end while;
if pos >= length(stri) then
resultStri := fromUtf16Be(stri[currPos ..]);
currPos := succ(length(stri));
else
resultStri := fromUtf16Be(stri[currPos .. pred(pos)]);
currPos := pos + 2;
end if;
end func;
const func string: getNullTerminatedUtf16Be (inout file: inFile) is func
result
var string: resultStri is "";
local
var char: byte1 is ' ';
var char: byte2 is ' ';
var char: ch is ' ';
begin
byte1 := getc(inFile);
byte2 := getc(inFile);
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch := chr(ord(byte1) * 256 + ord(byte2));
while ch <> '\0;' do
resultStri &:= ch;
byte1 := getc(inFile);
byte2 := getc(inFile);
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch := chr(ord(byte1) * 256 + ord(byte2));
end if;
end while;
end if;
resultStri := replaceUtf16SurrogatePairs(resultStri);
end func;
const func string: getNullTerminatedUtf16Le (in string: stri, inout integer: currPos) is func
result
var string: resultStri is "";
local
var integer: pos is 0;
begin
pos := currPos;
while pos < length(stri) and (stri[pos] <> '\0;' or stri[succ(pos)] <> '\0;') do
pos +:= 2;
end while;
if pos >= length(stri) then
resultStri := fromUtf16Le(stri[currPos ..]);
currPos := succ(length(stri));
else
resultStri := fromUtf16Le(stri[currPos .. pred(pos)]);
currPos := pos + 2;
end if;
end func;
const func string: getNullTerminatedUtf16Le (inout file: inFile) is func
result
var string: resultStri is "";
local
var char: byte1 is ' ';
var char: byte2 is ' ';
var char: ch is ' ';
begin
byte1 := getc(inFile);
byte2 := getc(inFile);
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch := chr(ord(byte2) * 256 + ord(byte1));
while ch <> '\0;' do
resultStri &:= ch;
byte1 := getc(inFile);
byte2 := getc(inFile);
if byte1 > '\255;' or byte2 > '\255;' then
raise RANGE_ERROR;
else
ch := chr(ord(byte2) * 256 + ord(byte1));
end if;
end while;
end if;
resultStri := replaceUtf16SurrogatePairs(resultStri);
end func;
const func string: fromUtf7 (in string: stri7) is func
result
var string: resultStri is "";
local
const array integer: decode is [] (
62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
-1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-1, -1, -1, -1, -1, -1,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51);
var integer: startPos is 1;
var integer: plusPos is 0;
var integer: minusPos is 0;
var integer: index is 0;
var char: ch is ' ';
var integer: charNum is 0;
var integer: accumulator is 0;
var integer: numBits is 0;
var integer: number is 0;
var boolean: okay is FALSE;
var string: unicodeStri is "";
begin
plusPos := pos(stri7, "+");
while plusPos <> 0 do
resultStri &:= stri7[startPos .. pred(plusPos)];
minusPos := pos(stri7, "-", succ(plusPos));
if minusPos = 0 then
resultStri &:= "+";
minusPos := plusPos;
elsif minusPos = succ(plusPos) then
resultStri &:= "+";
else
okay := TRUE;
unicodeStri := "";
accumulator := 0;
numBits := 0;
for index range succ(plusPos) to pred(minusPos) do
ch := stri7[index];
if ch >= '+' and ch <= 'z' then
number := decode[ord(stri7[index]) - ord(pred('+'))];
if number >= 0 then
accumulator := (accumulator << 6) + number;
numBits +:= 6;
if numBits >= 16 then
numBits -:= 16;
charNum := accumulator >> numBits;
accumulator -:= charNum << numBits;
unicodeStri &:= chr(charNum);
end if;
else
okay := FALSE;
index := minusPos;
end if;
else
okay := FALSE;
index := minusPos;
end if;
end for;
if okay then
if accumulator <> 0 then
raise RANGE_ERROR;
else
resultStri &:= replaceUtf16SurrogatePairs(unicodeStri);
end if;
else
resultStri &:= "+";
minusPos := plusPos;
end if;
end if;
startPos := succ(minusPos);
plusPos := pos(stri7, "+", startPos);
end while;
resultStri &:= stri7[startPos ..];
end func;