(********************************************************************) (* *) (* csv.s7i Comma-separated values (CSV) support library. *) (* Copyright (C) 2019 Thomas Mertes *) (* *) (* This file is part of the Seed7 Runtime Library. *) (* *) (* The Seed7 Runtime Library is free software; you can *) (* redistribute it and/or modify it under the terms of the GNU *) (* Lesser General Public License as published by the Free Software *) (* Foundation; either version 2.1 of the License, or (at your *) (* option) any later version. *) (* *) (* The Seed7 Runtime Library is distributed in the hope that it *) (* will be useful, but WITHOUT ANY WARRANTY; without even the *) (* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR *) (* PURPOSE. See the GNU Lesser General Public License for more *) (* details. *) (* *) (* You should have received a copy of the GNU Lesser General *) (* Public License along with this program; if not, write to the *) (* Free Software Foundation, Inc., 51 Franklin Street, *) (* Fifth Floor, Boston, MA 02110-1301, USA. *) (* *) (********************************************************************) (** * Convert an array of [[string]]s to a CSV line. * In a CSV line the fields are separated by the ''separator'' character. * Fields that contain ''separator'' characters, double quotes, linefeeds * or carriage returns are enclosed in double quotes ("). Double quotes * inside a double quoted field are represented by doubling them * (e.g.: The double quoted field "a""b" has the value a"b ). * @param data String array to be converted. * @param separator Separator character to be used in the CSV line. * @return the CSV line created from ta data array. *) const func string: toCsvLine (in array string: data, in char: separator) is func result var string: csvLine is ""; local var string: field is ""; var integer: index is 0; begin for field key index range data do if index <> 1 then csvLine &:= separator; end if; if pos(field, "\"") <> 0 or pos(field, separator) <> 0 or pos(field, '\n') <> 0 or pos(field, '\r') <> 0 then csvLine &:= "\"" & replace(field, "\"", "\"\"") & "\""; else csvLine &:= field; end if; end for; end func; (** * Convert a CSV line to an array of [[string]]s. * CSV fields in the CSV line are delimited by the ''separator'' character * or the beginning or the end of the CSV line. A CSV field might be * enclosed in double quotes ("). A double quoted CSV field might contain * ''separator'' characters, double quotes (") or linefeed characters. * Double quotes inside a double quoted field are represented by doubling * them (e.g.: The double quoted field "a""b" has the value a"b ). * This function is intended to be used, if the CSV line is already in * a [[string]]. To read CSV lines from a file the function * [[#readCsvLine(inout_file,in_char)|readCsvLine]] should be used * instead of a combination of [[file#getln(inout_file)|getln]] and * [[#fromCsvLine(in_string,in_char)|fromCsvLine]]. The function * [[#readCsvLine(inout_file,in_char)|readCsvLine]] allows to read * CSV fields, which contain linefeed characters. * @param csvLine CSV line to be converted. * @param separator Separator character used in the CSV line. * @return the array of CSV fields from the CSV line. * @exception RANGE_ERROR If ''csvLine'' is not in CSV format. *) const func array string: fromCsvLine (in string: csvLine, in char: separator) is func result var array string: data is 0 times ""; local var integer: pos is 0; var string: field is ""; begin repeat field := ""; incr(pos); if pos <= length(csvLine) and csvLine[pos] = '"' then repeat incr(pos); while pos <= length(csvLine) and csvLine[pos] <> '"' do field &:= csvLine[pos]; incr(pos); end while; if pos <= length(csvLine) and csvLine[pos] = '"' then incr(pos); if pos <= length(csvLine) and csvLine[pos] = '"' then field &:= '"'; end if; end if; until pos > length(csvLine) or csvLine[pos] <> '"'; else repeat while pos <= length(csvLine) and csvLine[pos] <> separator and csvLine[pos] <> '\n' and csvLine[pos] <> '\r' do field &:= csvLine[pos]; incr(pos); end while; if pos <= length(csvLine) and csvLine[pos] = '\r' then incr(pos); if pos <= length(csvLine) and csvLine[pos] <> '\n' then field &:= '\r'; end if; end if; until pos > length(csvLine) or csvLine[pos] = separator or csvLine[pos] = '\n'; end if; data &:= field; until pos > length(csvLine) or csvLine[pos] <> separator; if pos <= length(csvLine) and csvLine[pos] = '\r' then incr(pos); end if; if pos <= length(csvLine) and csvLine[pos] <> '\n' then raise RANGE_ERROR; end if; end func; (** * Read the fields of a CSV line with a given ''separator'' from a file. * CSV fields are delimited by the ''separator'' character or the * beginning or the end of a CSV line. A CSV line is terminated * with '\n', "\r\n" or [[char#EOF|EOF]]. A CSV field might be enclosed * in double quotes ("). A double quoted CSV field might contain * ''separator'' characters, double quotes (") or linefeed characters. * Double quotes inside a double quoted field are represented by doubling * them (e.g.: The double quoted field "a""b" has the value a"b ). * For UTF-8 encoded CSV files there are two possibilities: * * If the CSV file has been opened with [[utf8#openUtf8(in_string,in_string)|openUtf8]] \ * the CSV fields will contain Unicode data. * * If the file has been opened with [[external_file#open(in_string,in_string)|open]] the CSV fields will \ * contain UTF-8 encoded data. In this case the function \ * [[unicode#fromUtf8(in_string)|fromUtf8]] must be used, \ * to convert each CSV field from the result array. * @param inFile File from which the CSV line is read. * @param separator Separator character used in the CSV line. * @return the array of CSV fields from the CSV line. * @exception RANGE_ERROR If the CSV line is not in CSV format. *) const func array string: readCsvLine (inout file: inFile, in char: separator) is func result var array string: data is 0 times ""; local var char: ch is ' '; var string: field is ""; begin repeat ch := getc(inFile); field := ""; if ch = '"' then repeat ch := getc(inFile); while ch <> '"' and ch <> EOF do field &:= ch; ch := getc(inFile); end while; if ch = '"' then ch := getc(inFile); if ch = '"' then field &:= ch; end if; end if; until ch <> '"'; else repeat while ch <> separator and ch <> '\n' and ch <> '\r' and ch <> EOF do field &:= ch; ch := getc(inFile); end while; if ch = '\r' then ch := getc(inFile); if ch <> '\n' then field &:= '\r'; end if; end if; until ch = separator or ch = '\n' or ch = EOF; end if; data &:= field; until ch <> separator; if ch = '\r' then ch := getc(inFile); end if; if ch <> '\n' and ch <> EOF then raise RANGE_ERROR; end if; inFile.bufferChar := ch; end func;