solorice/vscodium/extensions/mechatroner.rainbow-csv-2.3.0/rainbow_utils.js

const os = require('os');
const fs = require('fs');
const path = require('path');

const rbql = require('./rbql_core/rbql-js/rbql.js');
const rbql_csv = require('./rbql_core/rbql-js/rbql_csv.js');
const csv_utils = require('./rbql_core/rbql-js/csv_utils.js');

const non_numeric_sentinel = -1;
const number_regex = /^([0-9]+)(\.[0-9]+)?$/;

class AssertionError extends Error {}

function assert(condition, message=null) {
    if (!condition) {
        if (!message) {
            message = 'Assertion error';
        }
        throw new AssertionError(message);
    }
}


function get_default_js_udf_content() {
    let default_content = `// This file can be used to store RBQL UDFs. Example:
    //
    // function foo(value) {
    //     return 'foo ' + String(value.length);
    // }
    //
    // Functions defined in this file can be used in RBQL queries e.g.
    // SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
    //
    // Don't forget to save this file after editing!
    //
    // Write your own functions bellow this line:
    `.replace(new RegExp(/^  */, 'mg'), '');
    return default_content;
}


function get_default_python_udf_content() {
    let default_content = `# This file can be used to store RBQL UDFs. Example:
    #
    # def foo(value):
    #     return 'foo ' + str(len(value))
    #
    #
    # Functions defined in this file can be used in RBQL queries e.g.
    # SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
    #
    # Don't forget to save this file after editing!
    #
    # Write your own functions bellow this line:
    `.replace(new RegExp(/^  */, 'mg'), '');
    return default_content;
}


function update_subcomponent_stats(field, is_first_line, max_field_components_lens) {
    // Extract overall field length and length of integer and fractional parts of the field if it represents a number.
    // Here `max_field_components_lens` is a tuple: (max_field_length, max_integer_part_length, max_fractional_part_length)
    if (field.length > max_field_components_lens[0]) {
        max_field_components_lens[0] = field.length;
    }
    if (max_field_components_lens[1] == non_numeric_sentinel) {
        // Column is not a number, early return.
        return;
    }
    let match_result = number_regex.exec(field);
    if (match_result === null) {
        if (!is_first_line && field.length) { // Checking field_length here allows numeric columns to have some of the fields empty.
            // We only mark the column as non-header if we know that this is not a header line.
            max_field_components_lens[1] = non_numeric_sentinel;
            max_field_components_lens[2] = non_numeric_sentinel;
        }
        return;
    }
    let cur_integer_part_length = match_result[1].length;
    max_field_components_lens[1] = Math.max(max_field_components_lens[1], cur_integer_part_length);
    let cur_fractional_part_length = match_result[2] === undefined ? 0 : match_result[2].length;
    max_field_components_lens[2] = Math.max(max_field_components_lens[2], cur_fractional_part_length);
}


function calc_column_stats(active_doc, delim, policy, comment_prefix) {
    let column_stats = [];
    let num_lines = active_doc.lineCount;
    let is_first_line = true;
    for (let lnum = 0; lnum < num_lines; lnum++) {
        let line_text = active_doc.lineAt(lnum).text;
        if (comment_prefix && line_text.startsWith(comment_prefix))
            continue;
        let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
        if (warning) {
            return [null, lnum + 1];
        }
        for (let fnum = 0; fnum < fields.length; fnum++) {
            let field = fields[fnum].trim();
            if (column_stats.length <= fnum) {
                column_stats.push([0, 0, 0]);
            }
            update_subcomponent_stats(field, is_first_line, column_stats[fnum]);
        }
        is_first_line = false;
    }
    return [column_stats, null];
}


function adjust_column_stats(column_stats) {
    // Ensure that numeric components max widths are consistent with non-numeric (header) width.
    let adjusted_stats = [];
    for (let column_stat of column_stats) {
        if (column_stat[1] <= 0) {
            column_stat[1] = -1;
            column_stat[2] = -1;
        }
        if (column_stat[1] > 0) {
            // The sum of integer and float parts can be bigger than the max width, e.g. here:
            // value
            // 0.12
            // 1234
            if (column_stat[1] + column_stat[2] > column_stat[0]) {
                column_stat[0] = column_stat[1] + column_stat[2];
            }
            // This is needed when the header is wider than numeric components and/or their sum.
            if (column_stat[0] - column_stat[2] > column_stat[1]) {
                column_stat[1] = column_stat[0] - column_stat[2];
            }
            // Sanity check.
            if (column_stat[0] != column_stat[1] + column_stat[2]) {
                // Assertion Error, this can never happen.
                return null;
            }
        }
        adjusted_stats.push(column_stat);
    }
    return adjusted_stats;
}


function align_field(field, is_first_line, max_field_components_lens, is_last_column) {
    // Align field, use Math.max() to avoid negative delta_length which can happen theorethically due to async doc edit.
    const extra_readability_whitespace_length = 1;
    field = field.trim();
    if (max_field_components_lens[1] == non_numeric_sentinel) {
        let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
        return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
    }
    if (is_first_line) {
        if (number_regex.exec(field) === null) {
            // The line must be a header - align it using max_width rule.
            let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
            return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
        }
    }
    let dot_pos = field.indexOf('.');
    let cur_integer_part_length = dot_pos == -1 ? field.length : dot_pos;
    // Here cur_fractional_part_length includes the leading dot too.
    let cur_fractional_part_length = dot_pos == -1 ? 0 : field.length - dot_pos;
    let integer_delta_length = Math.max(max_field_components_lens[1] - cur_integer_part_length, 0);
    let fractional_delta_length = Math.max(max_field_components_lens[2] - cur_fractional_part_length);
    let trailing_spaces = is_last_column ? '' : ' '.repeat(fractional_delta_length + extra_readability_whitespace_length);
    return ' '.repeat(integer_delta_length) + field + trailing_spaces;
}


function align_columns(active_doc, delim, policy, comment_prefix, column_stats) {
    let result_lines = [];
    let num_lines = active_doc.lineCount;
    let has_edit = false;
    let is_first_line = true;
    for (let lnum = 0; lnum < num_lines; lnum++) {
        let line_text = active_doc.lineAt(lnum).text;
        if (comment_prefix && line_text.startsWith(comment_prefix)) {
            result_lines.push(line_text);
            continue;
        }
        if (lnum + 1 == num_lines && line_text == '') {
            // Skip the last empty line which corresponds to the trailing newline character.
            result_lines.push(line_text);
            continue;
        }
        let fields = csv_utils.smart_split(line_text, delim, policy, true)[0];
        for (let fnum = 0; fnum < fields.length; fnum++) {
            if (fnum >= column_stats.length) // Safeguard against async doc edit, should never happen.
                break;
            let is_last_column = fnum + 1 == column_stats.length;
            let adjusted = align_field(fields[fnum], is_first_line, column_stats[fnum], is_last_column);
            if (fields[fnum] != adjusted) {
                fields[fnum] = adjusted;
                has_edit = true;
            }
        }
        is_first_line = false;
        result_lines.push(fields.join(delim));
    }
    if (!has_edit)
        return null;
    return result_lines.join('\n');
}


function shrink_columns(active_doc, delim, policy, comment_prefix) {
    let result_lines = [];
    let num_lines = active_doc.lineCount;
    let has_edit = false;
    for (let lnum = 0; lnum < num_lines; lnum++) {
        let line_text = active_doc.lineAt(lnum).text;
        if (comment_prefix && line_text.startsWith(comment_prefix)) {
            result_lines.push(line_text);
            continue;
        }
        let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
        if (warning) {
            return [null, lnum + 1];
        }
        for (let i = 0; i < fields.length; i++) {
            let adjusted = fields[i].trim();
            if (fields[i].length != adjusted.length) {
                fields[i] = adjusted;
                has_edit = true;
            }
        }
        result_lines.push(fields.join(delim));
    }
    if (!has_edit)
        return [null, null];
    return [result_lines.join('\n'), null];
}


function get_last(arr) {
    return arr[arr.length - 1];
}


function populate_optimistic_rfc_csv_record_map(document, requested_end_record, dst_record_map, comment_prefix=null) {
    let num_lines = document.lineCount;
    let record_begin = null;
    let start_line_idx = dst_record_map.length ? get_last(dst_record_map)[1] : 0;
    for (let lnum = start_line_idx; lnum < num_lines && dst_record_map.length < requested_end_record; ++lnum) {
        let line_text = document.lineAt(lnum).text;
        if (lnum + 1 >= num_lines && line_text == "")
            break; // Skip the last empty line.
        if (comment_prefix && line_text.startsWith(comment_prefix))
            continue;
        let match_list = line_text.match(/"/g);
        let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
        if (record_begin === null && !has_unbalanced_double_quote) {
            dst_record_map.push([lnum, lnum + 1]);
        } else if (record_begin === null && has_unbalanced_double_quote) {
            record_begin = lnum;
        } else if (!has_unbalanced_double_quote) {
            continue;
        } else {
            dst_record_map.push([record_begin, lnum + 1]);
            record_begin = null;
        }
    }
    if (record_begin !== null) {
        dst_record_map.push([record_begin, num_lines]);
    }
}


function make_table_name_key(file_path) {
    return 'rbql_table_name:' + file_path;
}


function expanduser(filepath) {
    if (filepath.charAt(0) === '~') {
        return path.join(os.homedir(), filepath.slice(1));
    }
    return filepath;
}


function find_table_path(vscode_global_state, main_table_dir, table_id) {
    // If table_id is a relative path it could be relative either to the current directory or to the main table dir.
    var candidate_path = expanduser(table_id);
    if (fs.existsSync(candidate_path)) {
        return candidate_path;
    }
    if (main_table_dir && !path.isAbsolute(candidate_path)) {
        candidate_path = path.join(main_table_dir, candidate_path);
        if (fs.existsSync(candidate_path)) {
            return candidate_path;
        }
    }
    let table_path = vscode_global_state ? vscode_global_state.get(make_table_name_key(table_id)) : null;
    if (table_path && fs.existsSync(table_path)) {
        return table_path;
    }
    return null;
}


async function read_header(table_path, encoding) {
    if (encoding == 'latin-1')
        encoding = 'binary';
    let readline = require('readline');
    let input_reader = readline.createInterface({ input: fs.createReadStream(table_path, {encoding: encoding}) });
    let closed = false;
    let promise_resolve = null;
    let promise_reject = null;
    let output_promise = new Promise(function(resolve, reject) {
        promise_resolve = resolve;
        promise_reject = reject;
    });
    input_reader.on('line', line => {
        if (!closed) {
            closed = true;
            input_reader.close();
            promise_resolve(line);
        }
    });
    input_reader.on('error', error => {
        promise_reject(error);
    });
    return output_promise;
}


function get_header_line(document, comment_prefix) {
    const num_lines = document.lineCount;
    for (let lnum = 0; lnum < num_lines; ++lnum) {
        const line_text = document.lineAt(lnum).text;
        if (!comment_prefix || !line_text.startsWith(comment_prefix)) {
            return line_text;
        }
    }
    return null;
}


function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
    let keys = Object.keys(inconsistent_records_info);
    let entries = [];
    for (let i = 0; i < keys.length; i++) {
        let key = keys[i];
        let record_id = inconsistent_records_info[key];
        entries.push([record_id, key]);
    }
    entries.sort(function(a, b) { return a[0] - b[0]; });
    assert(entries.length > 1);
    let [record_1, num_fields_1] = entries[0];
    let [record_2, num_fields_2] = entries[1];
    let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
    warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
    return warn_msg;
}


class RbqlIOHandlingError extends Error {}

class VSCodeRecordIterator extends rbql.RBQLInputIterator {
    constructor(document, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
        // We could have done a hack here actually: convert the document to stream/buffer and then use the standard reader.
        super();
        this.document = document;
        this.delim = delim;
        this.policy = policy;
        this.has_header = has_header;
        this.comment_prefix = comment_prefix;
        this.table_name = table_name;
        this.variable_prefix = variable_prefix;
        this.NR = 0; // Record number.
        this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields).
        this.fields_info = new Object();
        this.first_defective_line = null;
        this.first_record = this.get_first_record();
    }

    stop() {
    }

    get_first_record() {
        let header_line = get_header_line(this.document, this.comment_prefix);
        let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
        return first_record;
    }

    async get_variables_map(query_text) {
        let variable_map = new Object();
        rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
        rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
        let header_line = get_header_line(this.document, this.comment_prefix);
        let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
        if (this.has_header) {
            rbql.parse_attribute_variables(query_text, this.variable_prefix, first_record, 'CSV header line', variable_map);
            rbql.parse_dictionary_variables(query_text, this.variable_prefix, first_record, variable_map);
        }
        return variable_map;
    }

    async get_header() {
        return this.has_header ? this.first_record : null;
    }

    get_line_rfc() {
        let rfc_line_buffer = [];
        const num_lines = this.document.lineCount;
        while (this.NL < num_lines) {
            let line = this.document.lineAt(this.NL).text;
            this.NL += 1;
            if (this.NL == num_lines && line.length == 0)
                return null; // Skip the last line if it is empty - this can happen due to trailing newline.
            let record_line = csv_utils.accumulate_rfc_line_into_record(rfc_line_buffer, line, this.comment_prefix);
            if (record_line !== null)
                return record_line;
        }
        return null;
    }

    get_line_simple() {
        const num_lines = this.document.lineCount;
        while (this.NL < num_lines) {
            let line = this.document.lineAt(this.NL).text;
            this.NL += 1;
            if (this.NL == num_lines && line.length == 0)
                return null; // Skip the last line if it is empty - this can happen due to trailing newline.
            if (this.comment_prefix === null || !line.startsWith(this.comment_prefix))
                return line;
        }
        return null;
    }

    do_get_record() {
        let line = (this.policy == 'quoted_rfc') ? this.get_line_rfc() : this.get_line_simple();
        if (line === null)
            return null;
        let [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false);
        if (warning) {
            if (this.first_defective_line === null) {
                this.first_defective_line = this.NL;
                if (this.policy == 'quoted_rfc')
                    throw new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`);
            }
        }
        let num_fields = record.length;
        if (!this.fields_info.hasOwnProperty(num_fields))
            this.fields_info[num_fields] = this.NR;
        return record;
    }

    async get_record() {
        if (this.NR == 0 && this.has_header) {
            this.do_get_record(); // Skip the header record.
        }
        this.NR += 1;
        let record = this.do_get_record();
        return record;
    }

    get_warnings() {
        let result = [];
        if (this.first_defective_line !== null)
            result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
        if (Object.keys(this.fields_info).length > 1)
            result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
        return result;
    }
}


class VSCodeWriter extends rbql.RBQLOutputWriter {
    constructor(delim, policy) {
        super();
        this.delim = delim;
        this.policy = policy;
        this.header_len = null;
        this.null_in_output = false;
        this.delim_in_simple_output = false;
        this.output_lines = [];

        if (policy == 'simple') {
            this.polymorphic_join = this.simple_join;
        } else if (policy == 'quoted') {
            this.polymorphic_join = this.quoted_join;
        } else if (policy == 'quoted_rfc') {
            this.polymorphic_join = this.quoted_join_rfc;
        } else if (policy == 'monocolumn') {
            this.polymorphic_join = this.mono_join;
        } else if (policy == 'whitespace') {
            this.polymorphic_join = this.simple_join;
        } else {
            throw new RbqlIOHandlingError('Unknown output csv policy');
        }
    }

    set_header(header) {
        if (header !== null) {
            this.header_len = header.length;
            this.write(header);
        }
    }

    quoted_join(fields) {
        let delim = this.delim;
        var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
        return quoted_fields.join(this.delim);
    };


    quoted_join_rfc(fields) {
        let delim = this.delim;
        var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); });
        return quoted_fields.join(this.delim);
    };


    mono_join(fields) {
        if (fields.length > 1) {
            throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field');
        }
        return fields[0];
    };


    simple_join(fields) {
        var res = fields.join(this.delim);
        if (fields.join('').indexOf(this.delim) != -1) {
            this.delim_in_simple_output = true;
        }
        return res;
    };


    normalize_fields(out_fields) {
        for (var i = 0; i < out_fields.length; i++) {
            if (out_fields[i] == null) {
                this.null_in_output = true;
                out_fields[i] = '';
            } else if (Array.isArray(out_fields[i])) {
                this.normalize_fields(out_fields[i]);
                out_fields[i] = out_fields[i].join(this.sub_array_delim);
            }
        }
    };


    write(fields) {
        if (this.header_len !== null && fields.length != this.header_len)
            throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
        this.normalize_fields(fields);
        this.output_lines.push(this.polymorphic_join(fields));
        return true;
    };

    async finish() {
    }

    get_warnings() {
        let result = [];
        if (this.null_in_output)
            result.push('null values in output were replaced by empty strings');
        if (this.delim_in_simple_output)
            result.push('Some output fields contain separator');
        return result;
    };
}

class VSCodeTableRegistry {
    constructor(){}

    get_iterator_by_table_id(_table_id) {
        throw new RbqlIOHandlingError("JOIN queries are currently not supported in vscode.dev web version.");
    }

    get_warnings() {
        return [];
    };
}

async function rbql_query_web(query_text, input_document, input_delim, input_policy, output_delim, output_policy, output_warnings, with_headers, comment_prefix=null) {
    let user_init_code = ''; // TODO find a way to have init code.
    let join_tables_registry = new VSCodeTableRegistry(); // TODO find a way to have join registry.
    let input_iterator = new VSCodeRecordIterator(input_document, input_delim, input_policy, with_headers, comment_prefix);
    let output_writer = new VSCodeWriter(output_delim, output_policy);
    await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
    return output_writer.output_lines;
}


class VSCodeFileSystemCSVRegistry extends rbql.RBQLTableRegistry {
    constructor(vscode_global_state, input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
        super();
        this.vscode_global_state = vscode_global_state;
        this.input_file_dir = input_file_dir;
        this.delim = delim;
        this.policy = policy;
        this.encoding = encoding;
        this.has_header = has_header;
        this.comment_prefix = comment_prefix;
        this.stream = null;
        this.record_iterator = null;

        this.options = options;
        this.bulk_input_path = null;
        this.table_path = null;
    }

    get_iterator_by_table_id(table_id) {
        this.table_path = find_table_path(this.vscode_global_state, this.input_file_dir, table_id);
        if (this.table_path === null) {
            throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
        }
        if (this.options && this.options['bulk_read']) {
            this.bulk_input_path = this.table_path;
        } else {
            this.stream = fs.createReadStream(this.table_path);
        }
        this.record_iterator = new rbql_csv.CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
        return this.record_iterator;
    };

    get_warnings(output_warnings) {
        if (this.record_iterator && this.has_header) {
            output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
        }
    }
}


async function rbql_query_node(vscode_global_state, query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
    let input_stream = null;
    let bulk_input_path = null;
    if (options && options['bulk_read'] && input_path) {
        bulk_input_path = input_path;
    } else {
        input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
    }
    let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
    if (input_delim == '"' && input_policy == 'quoted')
        throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
    if (csv_encoding == 'latin-1')
        csv_encoding = 'binary';
    if (!rbql_csv.is_ascii(query_text) && csv_encoding == 'binary')
        throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
    if ((!rbql_csv.is_ascii(input_delim) || !rbql_csv.is_ascii(output_delim)) && csv_encoding == 'binary')
        throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');

    let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js');
    if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
        user_init_code = rbql_csv.read_user_init_code(default_init_source_path);
    }
    let input_file_dir = input_path ? path.dirname(input_path) : null;
    let join_tables_registry = new VSCodeFileSystemCSVRegistry(vscode_global_state, input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
    let input_iterator = new rbql_csv.CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
    let output_writer = new rbql_csv.CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);

    await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
    join_tables_registry.get_warnings(output_warnings);
}


module.exports.make_table_name_key = make_table_name_key;
module.exports.find_table_path = find_table_path;
module.exports.read_header = read_header;
module.exports.rbql_query_web = rbql_query_web;
module.exports.rbql_query_node = rbql_query_node;
module.exports.get_header_line = get_header_line;
module.exports.populate_optimistic_rfc_csv_record_map = populate_optimistic_rfc_csv_record_map;
module.exports.get_default_js_udf_content = get_default_js_udf_content;
module.exports.get_default_python_udf_content = get_default_python_udf_content;
module.exports.align_columns = align_columns;
module.exports.shrink_columns = shrink_columns;
module.exports.calc_column_stats = calc_column_stats;
module.exports.adjust_column_stats = adjust_column_stats;
module.exports.update_subcomponent_stats = update_subcomponent_stats;
module.exports.align_field = align_field;