solorice/vscodium/extensions/mechatroner.rainbow-csv-2.2.0/rainbow_utils.js
2022-04-28 21:17:01 +03:00

588 lines
22 KiB
JavaScript

const os = require('os');
const fs = require('fs');
const path = require('path');
const rbql = require('./rbql_core/rbql-js/rbql.js');
const rbql_csv = require('./rbql_core/rbql-js/rbql_csv.js');
const vscode = require('vscode');
const csv_utils = require('./rbql_core/rbql-js/csv_utils.js');
class AssertionError extends Error {}
function assert(condition, message=null) {
if (!condition) {
if (!message) {
message = 'Assertion error';
}
throw new AssertionError(message);
}
}
function get_default_js_udf_content() {
let default_content = `// This file can be used to store RBQL UDFs. Example:
//
// function foo(value) {
// return 'foo ' + String(value.length);
// }
//
// Functions defined in this file can be used in RBQL queries e.g.
// SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
//
// Don't forget to save this file after editing!
//
// Write your own functions bellow this line:
`.replace(new RegExp(/^ */, 'mg'), '');
return default_content;
}
function get_default_python_udf_content() {
let default_content = `# This file can be used to store RBQL UDFs. Example:
#
# def foo(value):
# return 'foo ' + str(len(value))
#
#
# Functions defined in this file can be used in RBQL queries e.g.
# SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
#
# Don't forget to save this file after editing!
#
# Write your own functions bellow this line:
`.replace(new RegExp(/^ */, 'mg'), '');
return default_content;
}
function calc_column_sizes(active_doc, delim, policy) {
let result = [];
let num_lines = active_doc.lineCount;
const config = vscode.workspace.getConfiguration('rainbow_csv');
let comment_prefix = config ? config.get('comment_prefix') : '';
for (let lnum = 0; lnum < num_lines; lnum++) {
let line_text = active_doc.lineAt(lnum).text;
if (comment_prefix && line_text.startsWith(comment_prefix))
continue;
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
if (warning) {
return [null, lnum + 1];
}
for (let i = 0; i < fields.length; i++) {
if (result.length <= i)
result.push(0);
result[i] = Math.max(result[i], (fields[i].trim()).length);
}
}
return [result, null];
}
function align_columns(active_doc, delim, policy, column_sizes) {
let result_lines = [];
let num_lines = active_doc.lineCount;
let has_edit = false;
const config = vscode.workspace.getConfiguration('rainbow_csv');
let comment_prefix = config ? config.get('comment_prefix') : '';
for (let lnum = 0; lnum < num_lines; lnum++) {
let line_text = active_doc.lineAt(lnum).text;
if (comment_prefix && line_text.startsWith(comment_prefix)) {
result_lines.push(line_text);
continue;
}
let fields = csv_utils.smart_split(line_text, delim, policy, true)[0];
for (let i = 0; i < fields.length - 1; i++) {
if (i >= column_sizes.length) // Safeguard against async doc edit.
break;
let adjusted = fields[i].trim();
let delta_len = column_sizes[i] - adjusted.length;
if (delta_len >= 0) { // Safeguard against async doc edit.
adjusted += ' '.repeat(delta_len + 1);
}
if (fields[i] != adjusted) {
fields[i] = adjusted;
has_edit = true;
}
}
result_lines.push(fields.join(delim));
}
if (!has_edit)
return null;
return result_lines.join('\n');
}
function shrink_columns(active_doc, delim, policy) {
let result_lines = [];
let num_lines = active_doc.lineCount;
let has_edit = false;
const config = vscode.workspace.getConfiguration('rainbow_csv');
let comment_prefix = config ? config.get('comment_prefix') : '';
for (let lnum = 0; lnum < num_lines; lnum++) {
let line_text = active_doc.lineAt(lnum).text;
if (comment_prefix && line_text.startsWith(comment_prefix)) {
result_lines.push(line_text);
continue;
}
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
if (warning) {
return [null, lnum + 1];
}
for (let i = 0; i < fields.length; i++) {
let adjusted = fields[i].trim();
if (fields[i].length != adjusted.length) {
fields[i] = adjusted;
has_edit = true;
}
}
result_lines.push(fields.join(delim));
}
if (!has_edit)
return [null, null];
return [result_lines.join('\n'), null];
}
function get_last(arr) {
return arr[arr.length - 1];
}
function populate_optimistic_rfc_csv_record_map(document, requested_end_record, dst_record_map, comment_prefix=null) {
let num_lines = document.lineCount;
let record_begin = null;
let start_line_idx = dst_record_map.length ? get_last(dst_record_map)[1] : 0;
for (let lnum = start_line_idx; lnum < num_lines && dst_record_map.length < requested_end_record; ++lnum) {
let line_text = document.lineAt(lnum).text;
if (lnum + 1 >= num_lines && line_text == "")
break; // Skip the last empty line.
if (comment_prefix && line_text.startsWith(comment_prefix))
continue;
let match_list = line_text.match(/"/g);
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
if (record_begin === null && !has_unbalanced_double_quote) {
dst_record_map.push([lnum, lnum + 1]);
} else if (record_begin === null && has_unbalanced_double_quote) {
record_begin = lnum;
} else if (!has_unbalanced_double_quote) {
continue;
} else {
dst_record_map.push([record_begin, lnum + 1]);
record_begin = null;
}
}
if (record_begin !== null) {
dst_record_map.push([record_begin, num_lines]);
}
}
function make_table_name_key(file_path) {
return 'rbql_table_name:' + file_path;
}
function expanduser(filepath) {
if (filepath.charAt(0) === '~') {
return path.join(os.homedir(), filepath.slice(1));
}
return filepath;
}
function find_table_path(vscode_global_state, main_table_dir, table_id) {
// If table_id is a relative path it could be relative either to the current directory or to the main table dir.
var candidate_path = expanduser(table_id);
if (fs.existsSync(candidate_path)) {
return candidate_path;
}
if (main_table_dir && !path.isAbsolute(candidate_path)) {
candidate_path = path.join(main_table_dir, candidate_path);
if (fs.existsSync(candidate_path)) {
return candidate_path;
}
}
let table_path = vscode_global_state ? vscode_global_state.get(make_table_name_key(table_id)) : null;
if (table_path && fs.existsSync(table_path)) {
return table_path;
}
return null;
}
async function read_header(table_path, encoding) {
if (encoding == 'latin-1')
encoding = 'binary';
let readline = require('readline');
let input_reader = readline.createInterface({ input: fs.createReadStream(table_path, {encoding: encoding}) });
let closed = false;
let promise_resolve = null;
let promise_reject = null;
let output_promise = new Promise(function(resolve, reject) {
promise_resolve = resolve;
promise_reject = reject;
});
input_reader.on('line', line => {
if (!closed) {
closed = true;
input_reader.close();
promise_resolve(line);
}
});
input_reader.on('error', error => {
promise_reject(error);
});
return output_promise;
}
function get_header_line(document, comment_prefix) {
const num_lines = document.lineCount;
for (let lnum = 0; lnum < num_lines; ++lnum) {
const line_text = document.lineAt(lnum).text;
if (!comment_prefix || !line_text.startsWith(comment_prefix)) {
return line_text;
}
}
return null;
}
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
let keys = Object.keys(inconsistent_records_info);
let entries = [];
for (let i = 0; i < keys.length; i++) {
let key = keys[i];
let record_id = inconsistent_records_info[key];
entries.push([record_id, key]);
}
entries.sort(function(a, b) { return a[0] - b[0]; });
assert(entries.length > 1);
let [record_1, num_fields_1] = entries[0];
let [record_2, num_fields_2] = entries[1];
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
return warn_msg;
}
class RbqlIOHandlingError extends Error {}
class VSCodeRecordIterator extends rbql.RBQLInputIterator {
constructor(document, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
// We could have done a hack here actually: convert the document to stream/buffer and then use the standard reader.
super();
this.document = document;
this.delim = delim;
this.policy = policy;
this.has_header = has_header;
this.comment_prefix = comment_prefix;
this.table_name = table_name;
this.variable_prefix = variable_prefix;
this.NR = 0; // Record number.
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields).
this.fields_info = new Object();
this.first_defective_line = null;
this.first_record = this.get_first_record();
}
stop() {
}
get_first_record() {
let header_line = get_header_line(this.document, this.comment_prefix);
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
return first_record;
}
async get_variables_map(query_text) {
let variable_map = new Object();
rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
let header_line = get_header_line(this.document, this.comment_prefix);
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
if (this.has_header) {
rbql.parse_attribute_variables(query_text, this.variable_prefix, first_record, 'CSV header line', variable_map);
rbql.parse_dictionary_variables(query_text, this.variable_prefix, first_record, variable_map);
}
return variable_map;
}
async get_header() {
return this.has_header ? this.first_record : null;
}
get_line_rfc() {
let rfc_line_buffer = [];
const num_lines = this.document.lineCount;
while (this.NL < num_lines) {
let line = this.document.lineAt(this.NL).text;
this.NL += 1;
if (this.NL == num_lines && line.length == 0)
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
let record_line = csv_utils.accumulate_rfc_line_into_record(rfc_line_buffer, line, this.comment_prefix);
if (record_line !== null)
return record_line;
}
return null;
}
get_line_simple() {
const num_lines = this.document.lineCount;
while (this.NL < num_lines) {
let line = this.document.lineAt(this.NL).text;
this.NL += 1;
if (this.NL == num_lines && line.length == 0)
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
if (this.comment_prefix === null || !line.startsWith(this.comment_prefix))
return line;
}
return null;
}
do_get_record() {
let line = (this.policy == 'quoted_rfc') ? this.get_line_rfc() : this.get_line_simple();
if (line === null)
return null;
let [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false);
if (warning) {
if (this.first_defective_line === null) {
this.first_defective_line = this.NL;
if (this.policy == 'quoted_rfc')
throw new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`);
}
}
let num_fields = record.length;
if (!this.fields_info.hasOwnProperty(num_fields))
this.fields_info[num_fields] = this.NR;
return record;
}
async get_record() {
if (this.NR == 0 && this.has_header) {
this.do_get_record(); // Skip the header record.
}
this.NR += 1;
let record = this.do_get_record();
return record;
}
get_warnings() {
let result = [];
if (this.first_defective_line !== null)
result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
if (Object.keys(this.fields_info).length > 1)
result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
return result;
}
}
class VSCodeWriter extends rbql.RBQLOutputWriter {
constructor(delim, policy) {
super();
this.delim = delim;
this.policy = policy;
this.header_len = null;
this.null_in_output = false;
this.delim_in_simple_output = false;
this.output_lines = [];
if (policy == 'simple') {
this.polymorphic_join = this.simple_join;
} else if (policy == 'quoted') {
this.polymorphic_join = this.quoted_join;
} else if (policy == 'quoted_rfc') {
this.polymorphic_join = this.quoted_join_rfc;
} else if (policy == 'monocolumn') {
this.polymorphic_join = this.mono_join;
} else if (policy == 'whitespace') {
this.polymorphic_join = this.simple_join;
} else {
throw new RbqlIOHandlingError('Unknown output csv policy');
}
}
set_header(header) {
if (header !== null) {
this.header_len = header.length;
this.write(header);
}
}
quoted_join(fields) {
let delim = this.delim;
var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
return quoted_fields.join(this.delim);
};
quoted_join_rfc(fields) {
let delim = this.delim;
var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); });
return quoted_fields.join(this.delim);
};
mono_join(fields) {
if (fields.length > 1) {
throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field');
}
return fields[0];
};
simple_join(fields) {
var res = fields.join(this.delim);
if (fields.join('').indexOf(this.delim) != -1) {
this.delim_in_simple_output = true;
}
return res;
};
normalize_fields(out_fields) {
for (var i = 0; i < out_fields.length; i++) {
if (out_fields[i] == null) {
this.null_in_output = true;
out_fields[i] = '';
} else if (Array.isArray(out_fields[i])) {
this.normalize_fields(out_fields[i]);
out_fields[i] = out_fields[i].join(this.sub_array_delim);
}
}
};
write(fields) {
if (this.header_len !== null && fields.length != this.header_len)
throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
this.normalize_fields(fields);
this.output_lines.push(this.polymorphic_join(fields));
return true;
};
async finish() {
}
get_warnings() {
let result = [];
if (this.null_in_output)
result.push('null values in output were replaced by empty strings');
if (this.delim_in_simple_output)
result.push('Some output fields contain separator');
return result;
};
}
class VSCodeTableRegistry {
constructor(){}
get_iterator_by_table_id(_table_id) {
throw new RbqlIOHandlingError("JOIN queries are currently not supported in vscode.dev web version.");
}
get_warnings() {
return [];
};
}
async function rbql_query_web(query_text, input_document, input_delim, input_policy, output_delim, output_policy, output_warnings, with_headers, comment_prefix=null) {
let user_init_code = ''; // TODO find a way to have init code.
let join_tables_registry = new VSCodeTableRegistry(); // TODO find a way to have join registry.
let input_iterator = new VSCodeRecordIterator(input_document, input_delim, input_policy, with_headers, comment_prefix);
let output_writer = new VSCodeWriter(output_delim, output_policy);
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
return output_writer.output_lines;
}
class VSCodeFileSystemCSVRegistry extends rbql.RBQLTableRegistry {
constructor(vscode_global_state, input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
super();
this.vscode_global_state = vscode_global_state;
this.input_file_dir = input_file_dir;
this.delim = delim;
this.policy = policy;
this.encoding = encoding;
this.has_header = has_header;
this.comment_prefix = comment_prefix;
this.stream = null;
this.record_iterator = null;
this.options = options;
this.bulk_input_path = null;
this.table_path = null;
}
get_iterator_by_table_id(table_id) {
this.table_path = find_table_path(this.vscode_global_state, this.input_file_dir, table_id);
if (this.table_path === null) {
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
}
if (this.options && this.options['bulk_read']) {
this.bulk_input_path = this.table_path;
} else {
this.stream = fs.createReadStream(this.table_path);
}
this.record_iterator = new rbql_csv.CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
return this.record_iterator;
};
get_warnings(output_warnings) {
if (this.record_iterator && this.has_header) {
output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
}
}
}
async function rbql_query_node(vscode_global_state, query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
let input_stream = null;
let bulk_input_path = null;
if (options && options['bulk_read'] && input_path) {
bulk_input_path = input_path;
} else {
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
}
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
if (input_delim == '"' && input_policy == 'quoted')
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
if (csv_encoding == 'latin-1')
csv_encoding = 'binary';
if (!rbql_csv.is_ascii(query_text) && csv_encoding == 'binary')
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
if ((!rbql_csv.is_ascii(input_delim) || !rbql_csv.is_ascii(output_delim)) && csv_encoding == 'binary')
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js');
if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
user_init_code = rbql_csv.read_user_init_code(default_init_source_path);
}
let input_file_dir = input_path ? path.dirname(input_path) : null;
let join_tables_registry = new VSCodeFileSystemCSVRegistry(vscode_global_state, input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
let input_iterator = new rbql_csv.CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
let output_writer = new rbql_csv.CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
join_tables_registry.get_warnings(output_warnings);
}
module.exports.make_table_name_key = make_table_name_key;
module.exports.find_table_path = find_table_path;
module.exports.read_header = read_header;
module.exports.rbql_query_web = rbql_query_web;
module.exports.rbql_query_node = rbql_query_node;
module.exports.get_header_line = get_header_line;
module.exports.populate_optimistic_rfc_csv_record_map = populate_optimistic_rfc_csv_record_map;
module.exports.get_default_js_udf_content = get_default_js_udf_content;
module.exports.get_default_python_udf_content = get_default_python_udf_content;
module.exports.align_columns = align_columns;
module.exports.shrink_columns = shrink_columns;
module.exports.calc_column_sizes = calc_column_sizes;