mirror of
https://github.com/kristoferssolo/solorice.git
synced 2025-10-21 20:10:34 +00:00
676 lines
26 KiB
JavaScript
676 lines
26 KiB
JavaScript
const os = require('os');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const rbql = require('./rbql_core/rbql-js/rbql.js');
|
|
const rbql_csv = require('./rbql_core/rbql-js/rbql_csv.js');
|
|
const csv_utils = require('./rbql_core/rbql-js/csv_utils.js');
|
|
|
|
const non_numeric_sentinel = -1;
|
|
const number_regex = /^([0-9]+)(\.[0-9]+)?$/;
|
|
|
|
class AssertionError extends Error {}
|
|
|
|
function assert(condition, message=null) {
|
|
if (!condition) {
|
|
if (!message) {
|
|
message = 'Assertion error';
|
|
}
|
|
throw new AssertionError(message);
|
|
}
|
|
}
|
|
|
|
|
|
function get_default_js_udf_content() {
|
|
let default_content = `// This file can be used to store RBQL UDFs. Example:
|
|
//
|
|
// function foo(value) {
|
|
// return 'foo ' + String(value.length);
|
|
// }
|
|
//
|
|
// Functions defined in this file can be used in RBQL queries e.g.
|
|
// SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
|
|
//
|
|
// Don't forget to save this file after editing!
|
|
//
|
|
// Write your own functions bellow this line:
|
|
`.replace(new RegExp(/^ */, 'mg'), '');
|
|
return default_content;
|
|
}
|
|
|
|
|
|
function get_default_python_udf_content() {
|
|
let default_content = `# This file can be used to store RBQL UDFs. Example:
|
|
#
|
|
# def foo(value):
|
|
# return 'foo ' + str(len(value))
|
|
#
|
|
#
|
|
# Functions defined in this file can be used in RBQL queries e.g.
|
|
# SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
|
|
#
|
|
# Don't forget to save this file after editing!
|
|
#
|
|
# Write your own functions bellow this line:
|
|
`.replace(new RegExp(/^ */, 'mg'), '');
|
|
return default_content;
|
|
}
|
|
|
|
|
|
function update_subcomponent_stats(field, is_first_line, max_field_components_lens) {
|
|
// Extract overall field length and length of integer and fractional parts of the field if it represents a number.
|
|
// Here `max_field_components_lens` is a tuple: (max_field_length, max_integer_part_length, max_fractional_part_length)
|
|
if (field.length > max_field_components_lens[0]) {
|
|
max_field_components_lens[0] = field.length;
|
|
}
|
|
if (max_field_components_lens[1] == non_numeric_sentinel) {
|
|
// Column is not a number, early return.
|
|
return;
|
|
}
|
|
let match_result = number_regex.exec(field);
|
|
if (match_result === null) {
|
|
if (!is_first_line && field.length) { // Checking field_length here allows numeric columns to have some of the fields empty.
|
|
// We only mark the column as non-header if we know that this is not a header line.
|
|
max_field_components_lens[1] = non_numeric_sentinel;
|
|
max_field_components_lens[2] = non_numeric_sentinel;
|
|
}
|
|
return;
|
|
}
|
|
let cur_integer_part_length = match_result[1].length;
|
|
max_field_components_lens[1] = Math.max(max_field_components_lens[1], cur_integer_part_length);
|
|
let cur_fractional_part_length = match_result[2] === undefined ? 0 : match_result[2].length;
|
|
max_field_components_lens[2] = Math.max(max_field_components_lens[2], cur_fractional_part_length);
|
|
}
|
|
|
|
|
|
function calc_column_stats(active_doc, delim, policy, comment_prefix) {
|
|
let column_stats = [];
|
|
let num_lines = active_doc.lineCount;
|
|
let is_first_line = true;
|
|
for (let lnum = 0; lnum < num_lines; lnum++) {
|
|
let line_text = active_doc.lineAt(lnum).text;
|
|
if (comment_prefix && line_text.startsWith(comment_prefix))
|
|
continue;
|
|
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
|
|
if (warning) {
|
|
return [null, lnum + 1];
|
|
}
|
|
for (let fnum = 0; fnum < fields.length; fnum++) {
|
|
let field = fields[fnum].trim();
|
|
if (column_stats.length <= fnum) {
|
|
column_stats.push([0, 0, 0]);
|
|
}
|
|
update_subcomponent_stats(field, is_first_line, column_stats[fnum]);
|
|
}
|
|
is_first_line = false;
|
|
}
|
|
return [column_stats, null];
|
|
}
|
|
|
|
|
|
function adjust_column_stats(column_stats) {
|
|
// Ensure that numeric components max widths are consistent with non-numeric (header) width.
|
|
let adjusted_stats = [];
|
|
for (let column_stat of column_stats) {
|
|
if (column_stat[1] <= 0) {
|
|
column_stat[1] = -1;
|
|
column_stat[2] = -1;
|
|
}
|
|
if (column_stat[1] > 0) {
|
|
// The sum of integer and float parts can be bigger than the max width, e.g. here:
|
|
// value
|
|
// 0.12
|
|
// 1234
|
|
if (column_stat[1] + column_stat[2] > column_stat[0]) {
|
|
column_stat[0] = column_stat[1] + column_stat[2];
|
|
}
|
|
// This is needed when the header is wider than numeric components and/or their sum.
|
|
if (column_stat[0] - column_stat[2] > column_stat[1]) {
|
|
column_stat[1] = column_stat[0] - column_stat[2];
|
|
}
|
|
// Sanity check.
|
|
if (column_stat[0] != column_stat[1] + column_stat[2]) {
|
|
// Assertion Error, this can never happen.
|
|
return null;
|
|
}
|
|
}
|
|
adjusted_stats.push(column_stat);
|
|
}
|
|
return adjusted_stats;
|
|
}
|
|
|
|
|
|
function align_field(field, is_first_line, max_field_components_lens, is_last_column) {
|
|
// Align field, use Math.max() to avoid negative delta_length which can happen theorethically due to async doc edit.
|
|
const extra_readability_whitespace_length = 1;
|
|
field = field.trim();
|
|
if (max_field_components_lens[1] == non_numeric_sentinel) {
|
|
let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
|
|
return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
|
|
}
|
|
if (is_first_line) {
|
|
if (number_regex.exec(field) === null) {
|
|
// The line must be a header - align it using max_width rule.
|
|
let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
|
|
return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
|
|
}
|
|
}
|
|
let dot_pos = field.indexOf('.');
|
|
let cur_integer_part_length = dot_pos == -1 ? field.length : dot_pos;
|
|
// Here cur_fractional_part_length includes the leading dot too.
|
|
let cur_fractional_part_length = dot_pos == -1 ? 0 : field.length - dot_pos;
|
|
let integer_delta_length = Math.max(max_field_components_lens[1] - cur_integer_part_length, 0);
|
|
let fractional_delta_length = Math.max(max_field_components_lens[2] - cur_fractional_part_length);
|
|
let trailing_spaces = is_last_column ? '' : ' '.repeat(fractional_delta_length + extra_readability_whitespace_length);
|
|
return ' '.repeat(integer_delta_length) + field + trailing_spaces;
|
|
}
|
|
|
|
|
|
function align_columns(active_doc, delim, policy, comment_prefix, column_stats) {
|
|
let result_lines = [];
|
|
let num_lines = active_doc.lineCount;
|
|
let has_edit = false;
|
|
let is_first_line = true;
|
|
for (let lnum = 0; lnum < num_lines; lnum++) {
|
|
let line_text = active_doc.lineAt(lnum).text;
|
|
if (comment_prefix && line_text.startsWith(comment_prefix)) {
|
|
result_lines.push(line_text);
|
|
continue;
|
|
}
|
|
if (lnum + 1 == num_lines && line_text == '') {
|
|
// Skip the last empty line which corresponds to the trailing newline character.
|
|
result_lines.push(line_text);
|
|
continue;
|
|
}
|
|
let fields = csv_utils.smart_split(line_text, delim, policy, true)[0];
|
|
for (let fnum = 0; fnum < fields.length; fnum++) {
|
|
if (fnum >= column_stats.length) // Safeguard against async doc edit, should never happen.
|
|
break;
|
|
let is_last_column = fnum + 1 == column_stats.length;
|
|
let adjusted = align_field(fields[fnum], is_first_line, column_stats[fnum], is_last_column);
|
|
if (fields[fnum] != adjusted) {
|
|
fields[fnum] = adjusted;
|
|
has_edit = true;
|
|
}
|
|
}
|
|
is_first_line = false;
|
|
result_lines.push(fields.join(delim));
|
|
}
|
|
if (!has_edit)
|
|
return null;
|
|
return result_lines.join('\n');
|
|
}
|
|
|
|
|
|
function shrink_columns(active_doc, delim, policy, comment_prefix) {
|
|
let result_lines = [];
|
|
let num_lines = active_doc.lineCount;
|
|
let has_edit = false;
|
|
for (let lnum = 0; lnum < num_lines; lnum++) {
|
|
let line_text = active_doc.lineAt(lnum).text;
|
|
if (comment_prefix && line_text.startsWith(comment_prefix)) {
|
|
result_lines.push(line_text);
|
|
continue;
|
|
}
|
|
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
|
|
if (warning) {
|
|
return [null, lnum + 1];
|
|
}
|
|
for (let i = 0; i < fields.length; i++) {
|
|
let adjusted = fields[i].trim();
|
|
if (fields[i].length != adjusted.length) {
|
|
fields[i] = adjusted;
|
|
has_edit = true;
|
|
}
|
|
}
|
|
result_lines.push(fields.join(delim));
|
|
}
|
|
if (!has_edit)
|
|
return [null, null];
|
|
return [result_lines.join('\n'), null];
|
|
}
|
|
|
|
|
|
function get_last(arr) {
|
|
return arr[arr.length - 1];
|
|
}
|
|
|
|
|
|
function populate_optimistic_rfc_csv_record_map(document, requested_end_record, dst_record_map, comment_prefix=null) {
|
|
let num_lines = document.lineCount;
|
|
let record_begin = null;
|
|
let start_line_idx = dst_record_map.length ? get_last(dst_record_map)[1] : 0;
|
|
for (let lnum = start_line_idx; lnum < num_lines && dst_record_map.length < requested_end_record; ++lnum) {
|
|
let line_text = document.lineAt(lnum).text;
|
|
if (lnum + 1 >= num_lines && line_text == "")
|
|
break; // Skip the last empty line.
|
|
if (comment_prefix && line_text.startsWith(comment_prefix))
|
|
continue;
|
|
let match_list = line_text.match(/"/g);
|
|
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
|
|
if (record_begin === null && !has_unbalanced_double_quote) {
|
|
dst_record_map.push([lnum, lnum + 1]);
|
|
} else if (record_begin === null && has_unbalanced_double_quote) {
|
|
record_begin = lnum;
|
|
} else if (!has_unbalanced_double_quote) {
|
|
continue;
|
|
} else {
|
|
dst_record_map.push([record_begin, lnum + 1]);
|
|
record_begin = null;
|
|
}
|
|
}
|
|
if (record_begin !== null) {
|
|
dst_record_map.push([record_begin, num_lines]);
|
|
}
|
|
}
|
|
|
|
|
|
function make_table_name_key(file_path) {
|
|
return 'rbql_table_name:' + file_path;
|
|
}
|
|
|
|
|
|
function expanduser(filepath) {
|
|
if (filepath.charAt(0) === '~') {
|
|
return path.join(os.homedir(), filepath.slice(1));
|
|
}
|
|
return filepath;
|
|
}
|
|
|
|
|
|
function find_table_path(vscode_global_state, main_table_dir, table_id) {
|
|
// If table_id is a relative path it could be relative either to the current directory or to the main table dir.
|
|
var candidate_path = expanduser(table_id);
|
|
if (fs.existsSync(candidate_path)) {
|
|
return candidate_path;
|
|
}
|
|
if (main_table_dir && !path.isAbsolute(candidate_path)) {
|
|
candidate_path = path.join(main_table_dir, candidate_path);
|
|
if (fs.existsSync(candidate_path)) {
|
|
return candidate_path;
|
|
}
|
|
}
|
|
let table_path = vscode_global_state ? vscode_global_state.get(make_table_name_key(table_id)) : null;
|
|
if (table_path && fs.existsSync(table_path)) {
|
|
return table_path;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
|
|
async function read_header(table_path, encoding) {
|
|
if (encoding == 'latin-1')
|
|
encoding = 'binary';
|
|
let readline = require('readline');
|
|
let input_reader = readline.createInterface({ input: fs.createReadStream(table_path, {encoding: encoding}) });
|
|
let closed = false;
|
|
let promise_resolve = null;
|
|
let promise_reject = null;
|
|
let output_promise = new Promise(function(resolve, reject) {
|
|
promise_resolve = resolve;
|
|
promise_reject = reject;
|
|
});
|
|
input_reader.on('line', line => {
|
|
if (!closed) {
|
|
closed = true;
|
|
input_reader.close();
|
|
promise_resolve(line);
|
|
}
|
|
});
|
|
input_reader.on('error', error => {
|
|
promise_reject(error);
|
|
});
|
|
return output_promise;
|
|
}
|
|
|
|
|
|
function get_header_line(document, comment_prefix) {
|
|
const num_lines = document.lineCount;
|
|
for (let lnum = 0; lnum < num_lines; ++lnum) {
|
|
const line_text = document.lineAt(lnum).text;
|
|
if (!comment_prefix || !line_text.startsWith(comment_prefix)) {
|
|
return line_text;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
|
|
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
|
let keys = Object.keys(inconsistent_records_info);
|
|
let entries = [];
|
|
for (let i = 0; i < keys.length; i++) {
|
|
let key = keys[i];
|
|
let record_id = inconsistent_records_info[key];
|
|
entries.push([record_id, key]);
|
|
}
|
|
entries.sort(function(a, b) { return a[0] - b[0]; });
|
|
assert(entries.length > 1);
|
|
let [record_1, num_fields_1] = entries[0];
|
|
let [record_2, num_fields_2] = entries[1];
|
|
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
|
|
warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
|
|
return warn_msg;
|
|
}
|
|
|
|
|
|
|
|
class RbqlIOHandlingError extends Error {}
|
|
|
|
class VSCodeRecordIterator extends rbql.RBQLInputIterator {
|
|
constructor(document, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
|
|
// We could have done a hack here actually: convert the document to stream/buffer and then use the standard reader.
|
|
super();
|
|
this.document = document;
|
|
this.delim = delim;
|
|
this.policy = policy;
|
|
this.has_header = has_header;
|
|
this.comment_prefix = comment_prefix;
|
|
this.table_name = table_name;
|
|
this.variable_prefix = variable_prefix;
|
|
this.NR = 0; // Record number.
|
|
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields).
|
|
this.fields_info = new Object();
|
|
this.first_defective_line = null;
|
|
this.first_record = this.get_first_record();
|
|
}
|
|
|
|
stop() {
|
|
}
|
|
|
|
get_first_record() {
|
|
let header_line = get_header_line(this.document, this.comment_prefix);
|
|
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
|
|
return first_record;
|
|
}
|
|
|
|
async get_variables_map(query_text) {
|
|
let variable_map = new Object();
|
|
rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
|
|
rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
|
|
let header_line = get_header_line(this.document, this.comment_prefix);
|
|
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
|
|
if (this.has_header) {
|
|
rbql.parse_attribute_variables(query_text, this.variable_prefix, first_record, 'CSV header line', variable_map);
|
|
rbql.parse_dictionary_variables(query_text, this.variable_prefix, first_record, variable_map);
|
|
}
|
|
return variable_map;
|
|
}
|
|
|
|
async get_header() {
|
|
return this.has_header ? this.first_record : null;
|
|
}
|
|
|
|
get_line_rfc() {
|
|
let rfc_line_buffer = [];
|
|
const num_lines = this.document.lineCount;
|
|
while (this.NL < num_lines) {
|
|
let line = this.document.lineAt(this.NL).text;
|
|
this.NL += 1;
|
|
if (this.NL == num_lines && line.length == 0)
|
|
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
|
|
let record_line = csv_utils.accumulate_rfc_line_into_record(rfc_line_buffer, line, this.comment_prefix);
|
|
if (record_line !== null)
|
|
return record_line;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
get_line_simple() {
|
|
const num_lines = this.document.lineCount;
|
|
while (this.NL < num_lines) {
|
|
let line = this.document.lineAt(this.NL).text;
|
|
this.NL += 1;
|
|
if (this.NL == num_lines && line.length == 0)
|
|
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
|
|
if (this.comment_prefix === null || !line.startsWith(this.comment_prefix))
|
|
return line;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
do_get_record() {
|
|
let line = (this.policy == 'quoted_rfc') ? this.get_line_rfc() : this.get_line_simple();
|
|
if (line === null)
|
|
return null;
|
|
let [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false);
|
|
if (warning) {
|
|
if (this.first_defective_line === null) {
|
|
this.first_defective_line = this.NL;
|
|
if (this.policy == 'quoted_rfc')
|
|
throw new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`);
|
|
}
|
|
}
|
|
let num_fields = record.length;
|
|
if (!this.fields_info.hasOwnProperty(num_fields))
|
|
this.fields_info[num_fields] = this.NR;
|
|
return record;
|
|
}
|
|
|
|
async get_record() {
|
|
if (this.NR == 0 && this.has_header) {
|
|
this.do_get_record(); // Skip the header record.
|
|
}
|
|
this.NR += 1;
|
|
let record = this.do_get_record();
|
|
return record;
|
|
}
|
|
|
|
get_warnings() {
|
|
let result = [];
|
|
if (this.first_defective_line !== null)
|
|
result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
|
|
if (Object.keys(this.fields_info).length > 1)
|
|
result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
class VSCodeWriter extends rbql.RBQLOutputWriter {
|
|
constructor(delim, policy) {
|
|
super();
|
|
this.delim = delim;
|
|
this.policy = policy;
|
|
this.header_len = null;
|
|
this.null_in_output = false;
|
|
this.delim_in_simple_output = false;
|
|
this.output_lines = [];
|
|
|
|
if (policy == 'simple') {
|
|
this.polymorphic_join = this.simple_join;
|
|
} else if (policy == 'quoted') {
|
|
this.polymorphic_join = this.quoted_join;
|
|
} else if (policy == 'quoted_rfc') {
|
|
this.polymorphic_join = this.quoted_join_rfc;
|
|
} else if (policy == 'monocolumn') {
|
|
this.polymorphic_join = this.mono_join;
|
|
} else if (policy == 'whitespace') {
|
|
this.polymorphic_join = this.simple_join;
|
|
} else {
|
|
throw new RbqlIOHandlingError('Unknown output csv policy');
|
|
}
|
|
}
|
|
|
|
set_header(header) {
|
|
if (header !== null) {
|
|
this.header_len = header.length;
|
|
this.write(header);
|
|
}
|
|
}
|
|
|
|
quoted_join(fields) {
|
|
let delim = this.delim;
|
|
var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
|
|
return quoted_fields.join(this.delim);
|
|
};
|
|
|
|
|
|
quoted_join_rfc(fields) {
|
|
let delim = this.delim;
|
|
var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); });
|
|
return quoted_fields.join(this.delim);
|
|
};
|
|
|
|
|
|
mono_join(fields) {
|
|
if (fields.length > 1) {
|
|
throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field');
|
|
}
|
|
return fields[0];
|
|
};
|
|
|
|
|
|
simple_join(fields) {
|
|
var res = fields.join(this.delim);
|
|
if (fields.join('').indexOf(this.delim) != -1) {
|
|
this.delim_in_simple_output = true;
|
|
}
|
|
return res;
|
|
};
|
|
|
|
|
|
normalize_fields(out_fields) {
|
|
for (var i = 0; i < out_fields.length; i++) {
|
|
if (out_fields[i] == null) {
|
|
this.null_in_output = true;
|
|
out_fields[i] = '';
|
|
} else if (Array.isArray(out_fields[i])) {
|
|
this.normalize_fields(out_fields[i]);
|
|
out_fields[i] = out_fields[i].join(this.sub_array_delim);
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
write(fields) {
|
|
if (this.header_len !== null && fields.length != this.header_len)
|
|
throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
|
|
this.normalize_fields(fields);
|
|
this.output_lines.push(this.polymorphic_join(fields));
|
|
return true;
|
|
};
|
|
|
|
async finish() {
|
|
}
|
|
|
|
get_warnings() {
|
|
let result = [];
|
|
if (this.null_in_output)
|
|
result.push('null values in output were replaced by empty strings');
|
|
if (this.delim_in_simple_output)
|
|
result.push('Some output fields contain separator');
|
|
return result;
|
|
};
|
|
}
|
|
|
|
class VSCodeTableRegistry {
|
|
constructor(){}
|
|
|
|
get_iterator_by_table_id(_table_id) {
|
|
throw new RbqlIOHandlingError("JOIN queries are currently not supported in vscode.dev web version.");
|
|
}
|
|
|
|
get_warnings() {
|
|
return [];
|
|
};
|
|
}
|
|
|
|
async function rbql_query_web(query_text, input_document, input_delim, input_policy, output_delim, output_policy, output_warnings, with_headers, comment_prefix=null) {
|
|
let user_init_code = ''; // TODO find a way to have init code.
|
|
let join_tables_registry = new VSCodeTableRegistry(); // TODO find a way to have join registry.
|
|
let input_iterator = new VSCodeRecordIterator(input_document, input_delim, input_policy, with_headers, comment_prefix);
|
|
let output_writer = new VSCodeWriter(output_delim, output_policy);
|
|
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
|
return output_writer.output_lines;
|
|
}
|
|
|
|
|
|
class VSCodeFileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
|
constructor(vscode_global_state, input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
|
|
super();
|
|
this.vscode_global_state = vscode_global_state;
|
|
this.input_file_dir = input_file_dir;
|
|
this.delim = delim;
|
|
this.policy = policy;
|
|
this.encoding = encoding;
|
|
this.has_header = has_header;
|
|
this.comment_prefix = comment_prefix;
|
|
this.stream = null;
|
|
this.record_iterator = null;
|
|
|
|
this.options = options;
|
|
this.bulk_input_path = null;
|
|
this.table_path = null;
|
|
}
|
|
|
|
get_iterator_by_table_id(table_id) {
|
|
this.table_path = find_table_path(this.vscode_global_state, this.input_file_dir, table_id);
|
|
if (this.table_path === null) {
|
|
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
|
|
}
|
|
if (this.options && this.options['bulk_read']) {
|
|
this.bulk_input_path = this.table_path;
|
|
} else {
|
|
this.stream = fs.createReadStream(this.table_path);
|
|
}
|
|
this.record_iterator = new rbql_csv.CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
|
|
return this.record_iterator;
|
|
};
|
|
|
|
get_warnings(output_warnings) {
|
|
if (this.record_iterator && this.has_header) {
|
|
output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
async function rbql_query_node(vscode_global_state, query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
|
|
let input_stream = null;
|
|
let bulk_input_path = null;
|
|
if (options && options['bulk_read'] && input_path) {
|
|
bulk_input_path = input_path;
|
|
} else {
|
|
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
|
}
|
|
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
|
if (input_delim == '"' && input_policy == 'quoted')
|
|
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
|
if (csv_encoding == 'latin-1')
|
|
csv_encoding = 'binary';
|
|
if (!rbql_csv.is_ascii(query_text) && csv_encoding == 'binary')
|
|
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
|
if ((!rbql_csv.is_ascii(input_delim) || !rbql_csv.is_ascii(output_delim)) && csv_encoding == 'binary')
|
|
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
|
|
|
let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js');
|
|
if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
|
|
user_init_code = rbql_csv.read_user_init_code(default_init_source_path);
|
|
}
|
|
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
|
let join_tables_registry = new VSCodeFileSystemCSVRegistry(vscode_global_state, input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
|
let input_iterator = new rbql_csv.CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
|
|
let output_writer = new rbql_csv.CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
|
|
|
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
|
join_tables_registry.get_warnings(output_warnings);
|
|
}
|
|
|
|
|
|
module.exports.make_table_name_key = make_table_name_key;
|
|
module.exports.find_table_path = find_table_path;
|
|
module.exports.read_header = read_header;
|
|
module.exports.rbql_query_web = rbql_query_web;
|
|
module.exports.rbql_query_node = rbql_query_node;
|
|
module.exports.get_header_line = get_header_line;
|
|
module.exports.populate_optimistic_rfc_csv_record_map = populate_optimistic_rfc_csv_record_map;
|
|
module.exports.get_default_js_udf_content = get_default_js_udf_content;
|
|
module.exports.get_default_python_udf_content = get_default_python_udf_content;
|
|
module.exports.align_columns = align_columns;
|
|
module.exports.shrink_columns = shrink_columns;
|
|
module.exports.calc_column_stats = calc_column_stats;
|
|
module.exports.adjust_column_stats = adjust_column_stats;
|
|
module.exports.update_subcomponent_stats = update_subcomponent_stats;
|
|
module.exports.align_field = align_field;
|