mirror of
https://github.com/kristoferssolo/solorice.git
synced 2025-10-21 20:10:34 +00:00
Minor changes
This commit is contained in:
parent
62035b9b7b
commit
314bc1f53f
@ -235,7 +235,7 @@ awful.screen.connect_for_each_screen(function(s)
|
||||
s.mytasklist, -- Middle widget
|
||||
{ -- Right widgets
|
||||
layout = wibox.layout.fixed.horizontal,
|
||||
wibox.widget.systray(),
|
||||
wibox.widget.systray(),
|
||||
cpu_widget(),
|
||||
ram_widget(),
|
||||
net_speed_widget(),
|
||||
@ -253,7 +253,7 @@ awful.screen.connect_for_each_screen(function(s)
|
||||
--widget_type = 'arc',
|
||||
--mixer_cmd = 'pulsemixer',
|
||||
--},
|
||||
logout_menu_widget(),
|
||||
logout_menu_widget(),
|
||||
mytextclock,
|
||||
s.mylayoutbox,
|
||||
},
|
||||
@ -552,7 +552,7 @@ awful.rules.rules = {
|
||||
{ rule_any = {type = { "normal", "dialog" }
|
||||
}, properties = { titlebars_enabled = false }
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
rule_any = {
|
||||
class = { "discord", "telegram-desktop", "TelegramDesktop" }
|
||||
@ -564,7 +564,7 @@ awful.rules.rules = {
|
||||
class = { "firefox" }
|
||||
}, properties = { screen = 1, tag = "7" }
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
rule_any = {
|
||||
class = { "spotify", "Spotify" },
|
||||
@ -572,11 +572,6 @@ awful.rules.rules = {
|
||||
}, properties = { screen = 2, tag = "9" }
|
||||
},
|
||||
|
||||
{
|
||||
rule_any = {
|
||||
name = { "Apex Legends" }
|
||||
}, properties = { screen = 1, tag = "5", fullscreen = true }
|
||||
},
|
||||
}
|
||||
|
||||
-- {{{ Signals
|
||||
@ -653,10 +648,10 @@ awful.spawn.with_shell("xinput --set-prop 'COOLERMASTER CM310' 'libinput Accel S
|
||||
--awful.spawn.with_shell("xrandr --output DP-0 --mode 1920x1080 --output HDMI-0 --mode 1920x1080 --right-of DP-0")
|
||||
--awful.spawn.with_shell("xrandr --output DP-0 --mode 1920x1080 --output HDMI-0 --mode 1920x1080 --left-of DP-0")
|
||||
awful.spawn.with_shell("dropbox")
|
||||
awful.spawn.with_shell("discord")
|
||||
awful.spawn.with_shell("telegram-desktop")
|
||||
awful.spawn.with_shell("flameshot")
|
||||
--awful.spawn.with_shell("discord-development")
|
||||
awful.spawn.with_shell("discord")
|
||||
-- awful.spawn.with_shell("discord-development")
|
||||
--awful.spawn.with_shell("xrandr --output HDMI-0 --off")
|
||||
awful.spawn.with_shell("setxkbmap lv")
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@
|
||||
# List available modules with "fastfetch --list-modules".
|
||||
# Get the default structure with "fastfetch --print-structure".
|
||||
# --structure Title:Separator:OS:Host:Kernel:Uptime:Packages:Shell:Resolution:DE:WM:WMTheme:Theme:Icons:Font:Cursor:Terminal:TerminalFont:CPU:GPU:Memory:Disk:Battery:Locale:Break:Colors
|
||||
--structure Title:Separator:OS:Host:Kernel:Uptime:Packages:Resolution:DE:WM:WMTheme:Theme:Icons:CPU:GPU:Memory:Disk:Battery:Player:Song:Break
|
||||
--structure Title:Separator:OS:Host:Kernel:Uptime:Packages:Resolution:DE:WM:WMTheme:Theme:Icons:CPU:GPU:Memory:Disk:Battery:Song:Break
|
||||
|
||||
# Multithreading option:
|
||||
# Sets if fastfetch should use multiple threads to detect the values.
|
||||
|
||||
@ -1 +1 @@
|
||||
{"ms-vscode.references-view-0.0.86":true,"ms-vscode.js-debug-companion-1.0.16":true}
|
||||
{"ms-vscode.references-view-0.0.86":true,"ms-vscode.js-debug-companion-1.0.16":true,"mechatroner.rainbow-csv-2.2.0":true}
|
||||
@ -0,0 +1,43 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<PackageManifest Version="2.0.0" xmlns="http://schemas.microsoft.com/developer/vsx-schema/2011" xmlns:d="http://schemas.microsoft.com/developer/vsx-schema-design/2011">
|
||||
<Metadata>
|
||||
<Identity Language="en-US" Id="rainbow-csv" Version="2.3.0" Publisher="mechatroner" />
|
||||
<DisplayName>Rainbow CSV</DisplayName>
|
||||
<Description xml:space="preserve">Highlight CSV and TSV files, Run SQL-like queries</Description>
|
||||
<Tags>csv,tsv,highlight,CSV,__ext_csv,TSV,__ext_tsv,__ext_tab,csv (pipe),CSV (pipe),csv (tilde),CSV (tilde),csv (whitespace),CSV (whitespace),csv (caret),CSV (caret),csv (colon),CSV (colon),csv (double quote),CSV (double quote),csv (equals),CSV (equals),csv (dot),CSV (dot),csv (hyphen),CSV (hyphen),csv (semicolon),CSV (semicolon),rainbow hover markup,plaintext,sql,__web_extension</Tags>
|
||||
<Categories>Data Science,Other,Programming Languages</Categories>
|
||||
<GalleryFlags>Public</GalleryFlags>
|
||||
|
||||
<Properties>
|
||||
<Property Id="Microsoft.VisualStudio.Code.Engine" Value="^1.62.0" />
|
||||
<Property Id="Microsoft.VisualStudio.Code.ExtensionDependencies" Value="" />
|
||||
<Property Id="Microsoft.VisualStudio.Code.ExtensionPack" Value="" />
|
||||
<Property Id="Microsoft.VisualStudio.Code.ExtensionKind" Value="workspace,web" />
|
||||
<Property Id="Microsoft.VisualStudio.Code.LocalizedLanguages" Value="" />
|
||||
|
||||
<Property Id="Microsoft.VisualStudio.Services.Links.Source" Value="https://github.com/mechatroner/vscode_rainbow_csv.git" />
|
||||
<Property Id="Microsoft.VisualStudio.Services.Links.Getstarted" Value="https://github.com/mechatroner/vscode_rainbow_csv.git" />
|
||||
<Property Id="Microsoft.VisualStudio.Services.Links.GitHub" Value="https://github.com/mechatroner/vscode_rainbow_csv.git" />
|
||||
<Property Id="Microsoft.VisualStudio.Services.Links.Support" Value="https://github.com/mechatroner/vscode_rainbow_csv/issues" />
|
||||
<Property Id="Microsoft.VisualStudio.Services.Links.Learn" Value="https://github.com/mechatroner/vscode_rainbow_csv#readme" />
|
||||
|
||||
|
||||
<Property Id="Microsoft.VisualStudio.Services.GitHubFlavoredMarkdown" Value="true" />
|
||||
|
||||
|
||||
</Properties>
|
||||
<License>extension/LICENSE.txt</License>
|
||||
<Icon>extension/rainbow_csv_logo.png</Icon>
|
||||
</Metadata>
|
||||
<Installation>
|
||||
<InstallationTarget Id="Microsoft.VisualStudio.Code"/>
|
||||
</Installation>
|
||||
<Dependencies/>
|
||||
<Assets>
|
||||
<Asset Type="Microsoft.VisualStudio.Code.Manifest" Path="extension/package.json" Addressable="true" />
|
||||
<Asset Type="Microsoft.VisualStudio.Services.Content.Details" Path="extension/README.md" Addressable="true" />
|
||||
<Asset Type="Microsoft.VisualStudio.Services.Content.Changelog" Path="extension/CHANGELOG.md" Addressable="true" />
|
||||
<Asset Type="Microsoft.VisualStudio.Services.Content.License" Path="extension/LICENSE.txt" Addressable="true" />
|
||||
<Asset Type="Microsoft.VisualStudio.Services.Icons.Default" Path="extension/rainbow_csv_logo.png" Addressable="true" />
|
||||
</Assets>
|
||||
</PackageManifest>
|
||||
@ -0,0 +1,96 @@
|
||||
# Rainbow CSV for Visual Studio Code Change Log
|
||||
|
||||
## 2.3.0
|
||||
* Improve alignment algorithm: special handling of numeric columns, see [#106](https://github.com/mechatroner/vscode_rainbow_csv/issues/106).
|
||||
* Show alignment progress indicator which is very nice for large files.
|
||||
|
||||
## 2.2.0
|
||||
* UI and UX improvements by [@anthroid](https://github.com/anthroid).
|
||||
|
||||
## 2.1.0
|
||||
* Support RBQL and column edit mode in web version of VSCode.
|
||||
* Support RBQL result set output dir customization [#101](https://github.com/mechatroner/vscode_rainbow_csv/issues/101).
|
||||
* Slightly reduce startup time by moving non-critical code into a lazy-loaded module.
|
||||
* Internal code refactoring.
|
||||
|
||||
## 2.0.0
|
||||
* Enable web/browser version for vscode.dev
|
||||
* RBQL: improve join table path handling.
|
||||
|
||||
## 1.10.0
|
||||
* RBQL update: improved console UI.
|
||||
|
||||
## 1.9.0
|
||||
* RBQL update: improved CSV header support.
|
||||
|
||||
## 1.8.1
|
||||
* Minor RBQL update
|
||||
|
||||
## 1.8.0
|
||||
* New command: "SetHeaderLine" by @WetDesertRock, see [#71](https://github.com/mechatroner/vscode_rainbow_csv/issues/71)
|
||||
* Updated RBQL
|
||||
* Added integration tests
|
||||
|
||||
|
||||
## 1.7.0
|
||||
* Updated RBQL
|
||||
* Improved RBQL UI
|
||||
|
||||
|
||||
## 1.6.0
|
||||
* Updated RBQL
|
||||
|
||||
|
||||
## 1.5.0
|
||||
* Highlight column info tooltip with the same color as the column itself
|
||||
|
||||
|
||||
## 1.4.0
|
||||
* Run CSV autodetection whenever a text chunk is copied into a new untitled buffer
|
||||
* Improve startup performance
|
||||
* RBQL: Support column names as variables
|
||||
* RBQL: Support newlines in double-quoted CSV fields
|
||||
* RBQL: Change default encoding to UTF-8
|
||||
* RBQL: Enable for untitled buffers
|
||||
* RBQL: Improve UI/UX, add history, built-in docs
|
||||
|
||||
|
||||
## 1.3.0
|
||||
* Updated RBQL to version 0.9.0
|
||||
* Restricted usage of Align/Shrink commands in files with unbalanced double quotes
|
||||
* Fixed incorrect dialect name: "tilda" -> "tilde", see [#40](https://github.com/mechatroner/vscode_rainbow_csv/issues/40)
|
||||
* Added an eror message when RBQL console is used with unsaved file [#41](https://github.com/mechatroner/vscode_rainbow_csv/issues/41)
|
||||
|
||||
|
||||
## 1.2.0
|
||||
* Added frequency-based fallback content-based autodetection algorithm for .csv files
|
||||
* Adjusted default parameters: added '|' to the list of autodetected separators
|
||||
* Fixed "Align/Shrink" button logic [#38](https://github.com/mechatroner/vscode_rainbow_csv/issues/38)
|
||||
* Fixed bug: incorrect RBQL result set dialect when output dialect doesn't match input
|
||||
* Improved documentation
|
||||
|
||||
|
||||
## 1.1.0
|
||||
* Special treatment of comment lines by [@larsonmars](https://github.com/larsonmars)
|
||||
* RBQL encoding customization by [@mandel59](https://github.com/mandel59)
|
||||
* Implemented Whitespace-separated dialect
|
||||
* Linter: detect trailing whitespaces in fields [#15](https://github.com/mechatroner/vscode_rainbow_csv/issues/15)
|
||||
* Added commands: remove trailing whitespaces from all fields and allign columns with trailing whitespaces
|
||||
* Implemented RBQL result set copy-back command
|
||||
* Improved RBQL console UI
|
||||
* Customizable "Preview big CSV: head/tail" context menu options [#32](https://github.com/mechatroner/vscode_rainbow_csv/issues/32)
|
||||
* Improved autodetection algorithm for files with multiple candidate separators
|
||||
|
||||
|
||||
## 0.8.0
|
||||
* Large files preview functionality implemented by [@neilsustc](https://github.com/neilsustc) see [#24](https://github.com/mechatroner/vscode_rainbow_csv/issues/24)
|
||||
* Fix single-autodetection per file limit, see [#26](https://github.com/mechatroner/vscode_rainbow_csv/issues/26)
|
||||
* Enable content-based autodetection for .csv files
|
||||
* Support tooltip message customizations, see [#12](https://github.com/mechatroner/vscode_rainbow_csv/issues/12)
|
||||
* Fix RBQL warnings
|
||||
* Various minor improvements
|
||||
|
||||
|
||||
## 0.7.1
|
||||
* Fix: Added safety check to RBQL that would prevent accidental usage of assignment operator "=" instead of comparison "==" or "===" in JS (In Python this was not possible before the fix due to Python's own syntatic checker).
|
||||
* Added "Rainbow CSV" category to all extension commands by [@yozlet](https://github.com/yozlet) request, see [#22](https://github.com/mechatroner/vscode_rainbow_csv/issues/22)
|
||||
120
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/DEV_README.md
Normal file
120
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/DEV_README.md
Normal file
@ -0,0 +1,120 @@
|
||||
## Instructions
|
||||
|
||||
### Debugging the extension:
|
||||
#### For standard VSCode:
|
||||
1. Open rainbow_csv directory in VSCode
|
||||
2. Make sure you have "Extension" run mode enabled
|
||||
3. Click "Run" or F5
|
||||
|
||||
|
||||
#### For web-based VSCode:
|
||||
1. Run `npm install --only=dev` - OK to run in WSL
|
||||
2. Run `npm run compile-web && npm run start-web-server` - OK to run in WSL. - This should start a local server at http://localhost:3000/
|
||||
3. Point your browser to http://localhost:3000/
|
||||
It is possible to run this in windows cmd too, but it could be that the node_modules dir has to be deleted and installed from scratch.
|
||||
The difference between running in win and in WSL is that in WSL it would only run with `--browser=none` option and this option doesn't run unit tests automatically which could be an issue if you want to run tests instead of manual debugging.
|
||||
|
||||
|
||||
### Running unit tests for the extension inside VSCode:
|
||||
#### For standard VSCode:
|
||||
1. **IMPORTANT** Make sure you have no open VSCode instances running, all VSCode windows are closed!
|
||||
2. run `npm install --only=dev` (If you have WSL - run in Windows, don't run in WSL).
|
||||
3. run `npm run test` in Windows (If you have WSL - run in Windows, don't run in WSL). Make sure that the tests are successful.
|
||||
|
||||
#### For web-based VSCode:
|
||||
1. run `npm install` (If you have WSL - run in Windows, don't run in WSL).
|
||||
2. run `compile-web` (If you have WSL - run in Windows, don't run in WSL). This will combine all scripts into a single web script and put it into the `dist` folder.
|
||||
3. run `npm run test-in-browser` (If you have WSL - run in Windows, don't run in WSL). This will open a new browser windows and run all the unit tests. Make sure that the tests are successful.
|
||||
|
||||
|
||||
### Running unit tests for the extension inside VSCode:
|
||||
1. In console in rainbow_csv directory run `npm install --only=dev` - OK to run the command in WSL while launching in Windows. This will install the dependencies, including `vscode/lib/testrunner`
|
||||
2. Open rainbow_csv directory in VSCode switch to "Extension Tests" mode and click run
|
||||
|
||||
Example of minimalistic test setup:
|
||||
https://github.com/microsoft/vscode-extension-samples/tree/main/helloworld-test-sample
|
||||
|
||||
|
||||
|
||||
#### Debuging
|
||||
Looks like it is possible to directly run scripts from package.json with `npx` like this:
|
||||
```
|
||||
npx vscode-test-web --help
|
||||
```
|
||||
And apparently another option to execute this command is (never tested):
|
||||
```
|
||||
npx @vscode/test-web --extensionDevelopmentPath=$extensionFolderPath $testDataPath
|
||||
```
|
||||
|
||||
Options available for vscode-test-web
|
||||
* version
|
||||
`'insiders' | 'stable' | 'sources' [Optional, default 'insiders']`
|
||||
|
||||
* browser
|
||||
`'chromium' | 'firefox' | 'webkit' | 'none': The browser to launch. [Optional, defaults to 'chromium']`
|
||||
If `none` is provided it wouldn't run unit test and it wouldn't kill the server when the browser window is closed.
|
||||
|
||||
If exceptions happens in extension.js you will be able to see it in the browser console (but the line number would be wrong).
|
||||
|
||||
|
||||
#### Issues
|
||||
* FS mount not working: https://github.com/microsoft/vscode-test-web/issues/16
|
||||
|
||||
|
||||
### Running the browser version for vscode.dev
|
||||
The npx command `npx serve --cors -l 5000` failed in WSL with `cb.apply is not a function` error.
|
||||
The same command worked as expected from Windows cmd.
|
||||
Steps:
|
||||
1. Run `npx serve --cors -l 5000` - this may not work in WSL, in this case run in windows cmd. This local server uses `http` instead of `https` and because of that VSCode will not work with it directly, although the docs say otherwise (https://code.visualstudio.com/api/extension-guides/web-extensions#test-your-web-extension-in-on-vscode.dev) - it will just produce some cors/wss content security policy related errors in the log. So you need to do step 2.
|
||||
2. In another cmd tab run another commmand: `npx localtunnel -p 5000` - this will create a "tunnel" server pointing to the server from the first command - this will produce a link like `https://rotten-snake-42.loca.lt/`
|
||||
3. Follow the `https://rotten-snake-42.loca.lt/` link and press the button - this will show the content of your extension folder - https server is working.
|
||||
4. Go to vscode.dev -> Ctrl+Shift+P -> run Developer: Install Web Extension... -> Copy the `https://rotten-snake-42.loca.lt/` link. In my experience this will work only with https urls. If you use http, the extension will be sort of "installed" - it will be listed in the installed extension pannel but the main extension.js won't be loaded so all the logic will be missing from it.
|
||||
|
||||
|
||||
### Publishing
|
||||
1. Make sure you have webpack installed: run `npm install --only=dev` (Better to avoid running this in WSL).
|
||||
Although this would create `node_modules/` and `package-lock.json` file this is not a problem because they are excluded from final package via the `.vscodeignore` file.
|
||||
2. Run vsce publish as usual. vsce will also automatically run `vscode:prepublish` / `npm run package-web` command.
|
||||
|
||||
|
||||
### Generating documentation with showdown
|
||||
In order to generate RBQL documentation use showdown - based markdown_to_html.js script from junk/rainbow_stuff
|
||||
Usage: `node markdown_to_html.js ~/vscode_rainbow_csv/rbql_core/README.md out.html`
|
||||
|
||||
|
||||
## TODO LIST
|
||||
* Improve RBQL encoding handling logic when VScode encoding info API is implemented, see https://github.com/microsoft/vscode/issues/824.
|
||||
|
||||
* Consider keeping only one open RBQL console at any time - if another one opens automatically close the previous one.
|
||||
|
||||
* DEBUG: Add a huge no-op loop on startup in order to reproduce/emulate high-cpu load error from #55.
|
||||
|
||||
* Support virtual header for rbql_csv.
|
||||
|
||||
* Consider replacing the RBQL query text input with scrollable textarea - it has a drawback that on enter it will go to the next line instead running the query.
|
||||
|
||||
* Figure out if it is possible to convert to a web extension.
|
||||
|
||||
* Make the `is_web_ext` check more reliable and explicit.
|
||||
|
||||
* Replace callbacks with async where possible
|
||||
|
||||
* Store VSCode documents instead of file paths in result_set_parent_map so that the map can be used in web version. And the autodetection_stoplist also should be doc based to work in web.
|
||||
|
||||
* Support JOIN queries in web version.
|
||||
|
||||
* Get rid of typescript files and infra, but add comments explaining how you obtained the js versions of the files.
|
||||
|
||||
* Try to get rid of `@types/vscode` dev dependency.
|
||||
|
||||
* Support all commands in web version
|
||||
|
||||
* Use `await save_to_global_state()` everywhere.
|
||||
|
||||
* Use `await vscode.workspace.openTextDocument` everywhere.
|
||||
|
||||
* Get rid of `then` entirely
|
||||
|
||||
* Merge rbql_query_web and rbql_query_node
|
||||
|
||||
* Add comment prefix handling in RBQL, unit tests (and web_ui entry?)
|
||||
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Dmitry Ignatovich
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
175
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/README.md
Normal file
175
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/README.md
Normal file
@ -0,0 +1,175 @@
|
||||
# Rainbow CSV
|
||||
|
||||
## Main Features
|
||||
* Highlight columns in comma (.csv), tab (.tsv), semicolon and pipe - separated files in different colors
|
||||
* Transform and filter tables using built-in SQL-like query language
|
||||
* Provide info about column on hover
|
||||
* Automatic consistency check for csv files (CSVLint)
|
||||
* Multi-cursor column edit
|
||||
* Align columns with spaces and Shrink (trim spaces from fields)
|
||||
* Works in browser (vscode.dev) with limited functionality
|
||||
|
||||

|
||||
|
||||
## Usage
|
||||
|
||||
If your csv, semicolon-separated or tab-separated file doesn't have .csv or .tsv extension, you can manually enable highlighting by clicking on the current language label mark in the right bottom corner and then choosing "CSV", "TSV", "CSV (semicolon)" or "CSV (pipe)" depending on the file content, see this [screenshot](https://stackoverflow.com/a/30776845/2898283)
|
||||
Another way to do this: select one separator character with mouse cursor -> right click -> "Set as Rainbow separator"
|
||||
|
||||
#### Supported separators
|
||||
|
||||
|Language name | Separator | Extension | Properties |
|
||||
|-----------------|----------------------|-----------|-------------------------------------|
|
||||
|csv | , (comma) | .csv | Ignored inside double-quoted fields |
|
||||
|tsv | \t (TAB) | .tsv .tab | |
|
||||
|csv (semicolon) | ; (semicolon) | | Ignored inside double-quoted fields |
|
||||
|csv (whitespace) | whitespace | | Consecutive whitespaces are merged |
|
||||
|csv (pipe) | | (pipe) | | |
|
||||
|csv (...) | ~ ^ : " = . - | | |
|
||||
|
||||
|
||||
#### Content-based separator autodetection
|
||||
Rainbow CSV runs table autodetection algorithm for all "Plain Text" and "*.csv" files. In most cases this is a very cheap operation because autodetection usually stops after checking only 1 or 2 topmost lines.
|
||||
Autodetection can be disabled at the extension settings page.
|
||||
The autodetection algorithm skips files that have less than N=10 non-comment lines; value of N can be adjusted in the settings.
|
||||
By default only comma, tab, semicolon and pipe are tried during autodetection, but you can adjust the list of candidate separators: add the following line to your VSCode config and edit it by removing or including any of the supported separators:
|
||||
```
|
||||
"rainbow_csv.autodetect_separators": ["\t", ",", ";", "|"],
|
||||
```
|
||||
If the autodetection algorithm makes an error and highlights a non-csv file, you can press "Rainbow OFF" button inside the status line.
|
||||
|
||||
|
||||
#### Customizing file extension - separator association
|
||||
If you often work with spreadsheet files with one specific extension (e.g. ".dat") and you don't want to rely on the autodetection algorithm, you can associate that extension with one of the supported separators.
|
||||
For example to associate ".dat" extension with pipe-separated files and ".csv" with semicolon-separated files add the following lines to your VS Code json config:
|
||||
|
||||
```
|
||||
"files.associations": {
|
||||
"*.dat": "csv (pipe)",
|
||||
"*.csv": "csv (semicolon)"
|
||||
},
|
||||
```
|
||||
|
||||
Important: language identifiers in the config must be specified in **lower case**! E.g. use `csv (semicolon)`, not `CSV (semicolon)`.
|
||||
List of supported language ids: `"csv", "tsv", "csv (semicolon)", "csv (pipe)", "csv (whitespace)", "csv (tilde)", "csv (caret)", "csv (colon)", "csv (double quote)", "csv (equals)", "csv (dot)", "csv (hyphen)"`
|
||||
|
||||
|
||||
#### CSVLint consistency check
|
||||
|
||||
The linter checks the following:
|
||||
* consistency of double quotes usage in CSV rows
|
||||
* consistency of number of fields per CSV row
|
||||
|
||||
To recheck a csv file click on "CSVLint" button.
|
||||
|
||||
|
||||
#### Working with large files
|
||||
To enable Rainbow CSV for very big files (more than 300K lines or 20MB) disable "Editor:Large File Optimizations" option in VS Code settings.
|
||||
You can preview huge files by clicking "Preview... " option in VS Code File Explorer context menu.
|
||||
All Rainbow CSV features would be disabled by VSCode if file is bigger than 50MB.
|
||||
|
||||
|
||||
#### Working with CSV files with comments
|
||||
Some CSV files can contain comment lines e.g. metadata before the header line.
|
||||
To allow CSVLint, content-based autodetection algorithms and _Align_, _Shrink_, _ColumnEdit_ commands work properly with such files you need to adjust your settings.
|
||||
|
||||
|
||||
#### Aligning/Shrinking table
|
||||
You can align columns in CSV files by clicking "Align" statusline button or use _Align_ command
|
||||
To shrink the table, i.e. remove leading and trailing whitespaces, click "Shrink" statusline button or use _Shrink_ command
|
||||
|
||||
|
||||
### Settings
|
||||
You can customize Rainbow CSV at the extension settings section of VSCode settings.
|
||||
There you can find the list of available options and their description.
|
||||
|
||||
|
||||
### Commands:
|
||||
|
||||
#### RBQL
|
||||
Enter RBQL - SQL-like language query editing mode.
|
||||
|
||||
#### Align, Shrink
|
||||
Align columns with whitespaces or shrink them (remove leading/trailing whitespaces)
|
||||
|
||||
#### ColumnEditBefore, ColumnEditAfter, ColumnEditSelect
|
||||
Activate multi-cursor column editing for column under the cursor. Works only for files with less than 10000 lines. For larger files you can use an RBQL query.
|
||||
**WARNING**: This is a dangerous mode. It is possible to accidentally corrupt table structure by incorrectly using "Backspace" or entering separator or double quote characters. Use RBQL if you are not sure.
|
||||
To remove cursor/selection from the header line use "Alt+Click" on it.
|
||||
|
||||
#### SetVirtualHeader
|
||||
Input a comma-separated string with column names to adjust column names displayed in hover tooltips. Actual header line and file content won't be affected.
|
||||
"Virtual" header is persistent and will be associated with the parent file across VSCode sessions.
|
||||
|
||||
#### SetHeaderLine
|
||||
Uses the current line to adjust column names displayed in hover tooltips. Actual header line and file content won't be affected.
|
||||
This is a "Virtual" header and will be persistent and will be associated with the parent file across VSCode sessions.
|
||||
|
||||
#### SetJoinTableName
|
||||
Set a custom name for the current file so you can use it instead of the file path in RBQL JOIN queries
|
||||
|
||||
|
||||
### Colors customization
|
||||
You can customize Rainbow CSV colors to increase contrast. [Instructions](https://github.com/mechatroner/vscode_rainbow_csv/blob/HEAD/test/color_customization_example.md#colors-customization)
|
||||
|
||||
## SQL-like "RBQL" query language
|
||||
|
||||
Rainbow CSV has built-in RBQL query language interpreter that allows you to run SQL-like queries using a1, a2, a3, ... column names.
|
||||
Example:
|
||||
```
|
||||
SELECT a1, a2 * 10 WHERE a1 == "Buy" && a4.indexOf('oil') != -1 ORDER BY parseInt(a2), a4 LIMIT 100
|
||||
```
|
||||
To enter query-editing mode, execute _RBQL_ VSCode command.
|
||||
RBQL is a very simple and powerful tool which would allow you to quickly and easily perform most common data-manipulation tasks and convert your csv tables to bash scripts, single-lines json, single-line xml files, etc.
|
||||
It is very easy to start using RBQL even if you don't know SQL. For example to cut out third and first columns use `SELECT a3, a1`
|
||||
You can use RBQL command for all possible types of files (e.g. .js, .xml, .html), but for non-table files only two variables: _NR_ and _a1_ would be available.
|
||||
|
||||
[Full Documentation](https://github.com/mechatroner/vscode_rainbow_csv/blob/master/rbql_core/README.md#rbql-rainbow-query-language-description)
|
||||
|
||||
|
||||
Screenshot of RBQL Console:
|
||||

|
||||
|
||||
|
||||
## Other
|
||||
### Comparison of Rainbow CSV technology with traditional graphical column alignment
|
||||
|
||||
#### Advantages:
|
||||
|
||||
* WYSIWYG
|
||||
* Familiar editing environment of your favorite text editor
|
||||
* Zero-cost abstraction: Syntax highlighting is essentially free, while graphical column alignment can be computationally expensive
|
||||
* High information density: Rainbow CSV shows more data per screen because it doesn't insert column-aligning whitespaces.
|
||||
* Color -> column association allows to locate the column of interest more quickly when looking back and forth between the data and other objects on the screen (with column alignment one has to locate the header or count the columns to find the right one)
|
||||
* Ability to visually associate two same-colored columns from two different windows. This is not possible with graphical column alignment
|
||||
|
||||
#### Disadvantages:
|
||||
|
||||
* Rainbow CSV may be less effective for CSV files with many (> 10) columns.
|
||||
* Rainbow CSV can't correctly handle newlines inside double-quoted CSV fields (well, theorethically it can, but only under specific conditions)
|
||||
|
||||
|
||||
### References
|
||||
|
||||
#### Related VSCode extensions
|
||||
These extensions can work well together with Rainbow CSV and provide additional functionality e.g. export to Excel format:
|
||||
* [Excel Viewer](https://marketplace.visualstudio.com/items?itemName=GrapeCity.gc-excelviewer)
|
||||
* [Edit CSV](https://marketplace.visualstudio.com/items?itemName=janisdd.vscode-edit-csv)
|
||||
* [Data Preview](https://marketplace.visualstudio.com/items?itemName=RandomFractalsInc.vscode-data-preview)
|
||||
|
||||
|
||||
#### Rainbow CSV and similar plugins in other editors:
|
||||
|
||||
* Rainbow CSV extension in [Vim](https://github.com/mechatroner/rainbow_csv)
|
||||
* rainbow-csv package in [Atom](https://atom.io/packages/rainbow-csv)
|
||||
* rainbow_csv plugin in [Sublime Text](https://packagecontrol.io/packages/rainbow_csv)
|
||||
* rainbow_csv plugin in [gedit](https://github.com/mechatroner/gtk_gedit_rainbow_csv) - doesn't support quoted commas in csv
|
||||
* rainbow_csv_4_nedit in [NEdit](https://github.com/DmitTrix/rainbow_csv_4_nedit)
|
||||
* CSV highlighting in [Nano](https://github.com/scopatz/nanorc)
|
||||
* Rainbow CSV in [IntelliJ IDEA](https://plugins.jetbrains.com/plugin/12896-rainbow-csv/)
|
||||
|
||||
#### RBQL
|
||||
* [RBQL](https://github.com/mechatroner/RBQL)
|
||||
* Library and CLI App for Python [RBQL](https://pypi.org/project/rbql/)
|
||||
* Library and CLI App for JavaScript [RBQL](https://www.npmjs.com/package/rbql)
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Jonathan Ong me@jongleberry.com
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@ -0,0 +1,155 @@
|
||||
/* jshint browser: true */
|
||||
|
||||
(function () {
|
||||
|
||||
// We'll copy the properties below into the mirror div.
|
||||
// Note that some browsers, such as Firefox, do not concatenate properties
|
||||
// into their shorthand (e.g. padding-top, padding-bottom etc. -> padding),
|
||||
// so we have to list every single property explicitly.
|
||||
var properties = [
|
||||
'direction', // RTL support
|
||||
'boxSizing',
|
||||
'width', // on Chrome and IE, exclude the scrollbar, so the mirror div wraps exactly as the textarea does
|
||||
'height',
|
||||
'overflowX',
|
||||
'overflowY', // copy the scrollbar for IE
|
||||
|
||||
'borderTopWidth',
|
||||
'borderRightWidth',
|
||||
'borderBottomWidth',
|
||||
'borderLeftWidth',
|
||||
'borderStyle',
|
||||
|
||||
'paddingTop',
|
||||
'paddingRight',
|
||||
'paddingBottom',
|
||||
'paddingLeft',
|
||||
|
||||
// https://developer.mozilla.org/en-US/docs/Web/CSS/font
|
||||
'fontStyle',
|
||||
'fontVariant',
|
||||
'fontWeight',
|
||||
'fontStretch',
|
||||
'fontSize',
|
||||
'fontSizeAdjust',
|
||||
'lineHeight',
|
||||
'fontFamily',
|
||||
|
||||
'textAlign',
|
||||
'textTransform',
|
||||
'textIndent',
|
||||
'textDecoration', // might not make a difference, but better be safe
|
||||
|
||||
'letterSpacing',
|
||||
'wordSpacing',
|
||||
|
||||
'tabSize',
|
||||
'MozTabSize'
|
||||
|
||||
];
|
||||
|
||||
var isBrowser = (typeof window !== 'undefined');
|
||||
var isFirefox = (isBrowser && window.mozInnerScreenX != null);
|
||||
|
||||
function getCaretCoordinates(element, position, options) {
|
||||
if (!isBrowser) {
|
||||
throw new Error('textarea-caret-position#getCaretCoordinates should only be called in a browser');
|
||||
}
|
||||
|
||||
var debug = options && options.debug || false;
|
||||
if (debug) {
|
||||
var el = document.querySelector('#input-textarea-caret-position-mirror-div');
|
||||
if (el) el.parentNode.removeChild(el);
|
||||
}
|
||||
|
||||
// The mirror div will replicate the textarea's style
|
||||
var div = document.createElement('div');
|
||||
div.id = 'input-textarea-caret-position-mirror-div';
|
||||
document.body.appendChild(div);
|
||||
|
||||
var style = div.style;
|
||||
var computed = window.getComputedStyle ? window.getComputedStyle(element) : element.currentStyle; // currentStyle for IE < 9
|
||||
var isInput = element.nodeName === 'INPUT';
|
||||
|
||||
// Default textarea styles
|
||||
style.whiteSpace = 'pre-wrap';
|
||||
if (!isInput)
|
||||
style.wordWrap = 'break-word'; // only for textarea-s
|
||||
|
||||
// Position off-screen
|
||||
style.position = 'absolute'; // required to return coordinates properly
|
||||
if (!debug)
|
||||
style.visibility = 'hidden'; // not 'display: none' because we want rendering
|
||||
|
||||
// Transfer the element's properties to the div
|
||||
properties.forEach(function (prop) {
|
||||
if (isInput && prop === 'lineHeight') {
|
||||
// Special case for <input>s because text is rendered centered and line height may be != height
|
||||
if (computed.boxSizing === "border-box") {
|
||||
var height = parseInt(computed.height);
|
||||
var outerHeight =
|
||||
parseInt(computed.paddingTop) +
|
||||
parseInt(computed.paddingBottom) +
|
||||
parseInt(computed.borderTopWidth) +
|
||||
parseInt(computed.borderBottomWidth);
|
||||
var targetHeight = outerHeight + parseInt(computed.lineHeight);
|
||||
if (height > targetHeight) {
|
||||
style.lineHeight = height - outerHeight + "px";
|
||||
} else if (height === targetHeight) {
|
||||
style.lineHeight = computed.lineHeight;
|
||||
} else {
|
||||
style.lineHeight = 0;
|
||||
}
|
||||
} else {
|
||||
style.lineHeight = computed.height;
|
||||
}
|
||||
} else {
|
||||
style[prop] = computed[prop];
|
||||
}
|
||||
});
|
||||
|
||||
if (isFirefox) {
|
||||
// Firefox lies about the overflow property for textareas: https://bugzilla.mozilla.org/show_bug.cgi?id=984275
|
||||
if (element.scrollHeight > parseInt(computed.height))
|
||||
style.overflowY = 'scroll';
|
||||
} else {
|
||||
style.overflow = 'hidden'; // for Chrome to not render a scrollbar; IE keeps overflowY = 'scroll'
|
||||
}
|
||||
|
||||
div.textContent = element.value.substring(0, position);
|
||||
// The second special handling for input type="text" vs textarea:
|
||||
// spaces need to be replaced with non-breaking spaces - http://stackoverflow.com/a/13402035/1269037
|
||||
if (isInput)
|
||||
div.textContent = div.textContent.replace(/\s/g, '\u00a0');
|
||||
|
||||
var span = document.createElement('span');
|
||||
// Wrapping must be replicated *exactly*, including when a long word gets
|
||||
// onto the next line, with whitespace at the end of the line before (#7).
|
||||
// The *only* reliable way to do that is to copy the *entire* rest of the
|
||||
// textarea's content into the <span> created at the caret position.
|
||||
// For inputs, just '.' would be enough, but no need to bother.
|
||||
span.textContent = element.value.substring(position) || '.'; // || because a completely empty faux span doesn't render at all
|
||||
div.appendChild(span);
|
||||
|
||||
var coordinates = {
|
||||
top: span.offsetTop + parseInt(computed['borderTopWidth']),
|
||||
left: span.offsetLeft + parseInt(computed['borderLeftWidth']),
|
||||
height: parseInt(computed['lineHeight'])
|
||||
};
|
||||
|
||||
if (debug) {
|
||||
span.style.backgroundColor = '#aaa';
|
||||
} else {
|
||||
document.body.removeChild(div);
|
||||
}
|
||||
|
||||
return coordinates;
|
||||
}
|
||||
|
||||
if (typeof module != 'undefined' && typeof module.exports != 'undefined') {
|
||||
module.exports = getCaretCoordinates;
|
||||
} else if(isBrowser) {
|
||||
window.getCaretCoordinates = getCaretCoordinates;
|
||||
}
|
||||
|
||||
}());
|
||||
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/extension.js
vendored
Normal file
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/extension.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/extension.js.map
vendored
Normal file
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/extension.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
2
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js
vendored
Normal file
2
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js
vendored
Normal file
File diff suppressed because one or more lines are too long
6
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js.LICENSE.txt
vendored
Normal file
6
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js.LICENSE.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
/*!
|
||||
* The buffer module from node.js, for the browser.
|
||||
*
|
||||
* @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js.map
vendored
Normal file
1
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/dist/web/test/suite/index.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
1586
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/extension.js
Normal file
1586
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/extension.js
Normal file
File diff suppressed because it is too large
Load Diff
433
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/package.json
Normal file
433
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/package.json
Normal file
@ -0,0 +1,433 @@
|
||||
{
|
||||
"name": "rainbow-csv",
|
||||
"displayName": "Rainbow CSV",
|
||||
"description": "Highlight CSV and TSV files, Run SQL-like queries",
|
||||
"version": "2.3.0",
|
||||
"publisher": "mechatroner",
|
||||
"license": "MIT",
|
||||
"icon": "rainbow_csv_logo.png",
|
||||
"engines": {
|
||||
"vscode": "^1.62.0"
|
||||
},
|
||||
"keywords": [
|
||||
"csv",
|
||||
"tsv",
|
||||
"highlight"
|
||||
],
|
||||
"categories": [
|
||||
"Data Science",
|
||||
"Other",
|
||||
"Programming Languages"
|
||||
],
|
||||
"activationEvents": [
|
||||
"onLanguage:csv",
|
||||
"onLanguage:tsv",
|
||||
"onLanguage:csv (semicolon)",
|
||||
"onLanguage:csv (pipe)",
|
||||
"onLanguage:csv (tilde)",
|
||||
"onLanguage:csv (whitespace)",
|
||||
"onLanguage:csv (caret)",
|
||||
"onLanguage:csv (colon)",
|
||||
"onLanguage:csv (double quote)",
|
||||
"onLanguage:csv (equals)",
|
||||
"onLanguage:csv (dot)",
|
||||
"onLanguage:csv (hyphen)",
|
||||
"onLanguage:plaintext",
|
||||
"onCommand:rainbow-csv.RBQL",
|
||||
"onCommand:rainbow-csv.RainbowSeparator",
|
||||
"onCommand:rainbow-csv.SampleHead",
|
||||
"onCommand:rainbow-csv.SampleTail"
|
||||
],
|
||||
"main": "./extension",
|
||||
"browser": "./dist/web/extension.js",
|
||||
"contributes": {
|
||||
"configuration": {
|
||||
"type": "object",
|
||||
"title": "Rainbow CSV",
|
||||
"properties": {
|
||||
"rainbow_csv.enable_separator_autodetection": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable automatic content based separator autodetection"
|
||||
},
|
||||
"rainbow_csv.autodetection_min_line_count": {
|
||||
"type": "number",
|
||||
"default": 10,
|
||||
"description": "Minimum number of non-comment lines in file for content-based autodetection"
|
||||
},
|
||||
"rainbow_csv.autodetect_separators": {
|
||||
"type": "array",
|
||||
"default": [
|
||||
"\t",
|
||||
",",
|
||||
";",
|
||||
"|"
|
||||
],
|
||||
"description": "Enable automatic content-based separator autodetection for the specified list of separators",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"description": "Separator"
|
||||
}
|
||||
},
|
||||
"rainbow_csv.enable_tooltip": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable column-info tooltip on hover"
|
||||
},
|
||||
"rainbow_csv.enable_tooltip_column_names": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Show column names in tooltip"
|
||||
},
|
||||
"rainbow_csv.rbql_with_headers_by_default": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "RBQL treats the first line as header by default"
|
||||
},
|
||||
"rainbow_csv.rbql_output_dir": {
|
||||
"type": "string",
|
||||
"default": "TMP",
|
||||
"description": "Output directory for RBQL result sets, can be `TMP`, `INPUT` or a custom absolute path. `TMP` - output in system tmp dir(default), `INPUT` - output in the same dir as input file, otherwise use provided path as the output directory e.g. `/path/to/custom/dir`"
|
||||
},
|
||||
"rainbow_csv.enable_tooltip_warnings": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Show warnings in tooltip"
|
||||
},
|
||||
"rainbow_csv.enable_auto_csv_lint": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable automatic linting/checking of opened csv files"
|
||||
},
|
||||
"rainbow_csv.csv_lint_detect_trailing_spaces": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "CSV Lint: detect leading and trailing whitespaces in fields and show warning"
|
||||
},
|
||||
"rainbow_csv.comment_prefix": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"description": "Comment lines prefix, e.g. \"#\". Set to empty string to disable"
|
||||
},
|
||||
"rainbow_csv.enable_context_menu_head": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Enable \"Preview CSV head\" option in File Explorer context menu"
|
||||
},
|
||||
"rainbow_csv.enable_context_menu_tail": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Enable \"Preview CSV tail\" option in File Explorer context menu"
|
||||
}
|
||||
}
|
||||
},
|
||||
"menus": {
|
||||
"editor/context": [
|
||||
{
|
||||
"when": "editorHasSelection && editorLangId == plaintext",
|
||||
"command": "rainbow-csv.RainbowSeparator",
|
||||
"group": "rainbow_csv"
|
||||
},
|
||||
{
|
||||
"when": "editorTextFocus && editorLangId =~ /^[ct]sv/",
|
||||
"command": "rainbow-csv.SetHeaderLine",
|
||||
"group": "rainbow_csv"
|
||||
}
|
||||
],
|
||||
"explorer/context": [
|
||||
{
|
||||
"command": "rainbow-csv.SampleHead",
|
||||
"when": "config.rainbow_csv.enable_context_menu_head",
|
||||
"group": "rainbow_csv"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SampleTail",
|
||||
"when": "config.rainbow_csv.enable_context_menu_tail",
|
||||
"group": "rainbow_csv"
|
||||
}
|
||||
]
|
||||
},
|
||||
"languages": [
|
||||
{
|
||||
"id": "csv",
|
||||
"aliases": [
|
||||
"CSV",
|
||||
"csv"
|
||||
],
|
||||
"extensions": [
|
||||
".csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "tsv",
|
||||
"aliases": [
|
||||
"TSV",
|
||||
"tsv"
|
||||
],
|
||||
"extensions": [
|
||||
".tsv",
|
||||
".tab"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "csv (pipe)",
|
||||
"aliases": [
|
||||
"CSV (pipe)",
|
||||
"csv (pipe)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (tilde)",
|
||||
"aliases": [
|
||||
"CSV (tilde)",
|
||||
"csv (tilde)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (whitespace)",
|
||||
"aliases": [
|
||||
"CSV (whitespace)",
|
||||
"csv (whitespace)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (caret)",
|
||||
"aliases": [
|
||||
"CSV (caret)",
|
||||
"csv (caret)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (colon)",
|
||||
"aliases": [
|
||||
"CSV (colon)",
|
||||
"csv (colon)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (double quote)",
|
||||
"aliases": [
|
||||
"CSV (double quote)",
|
||||
"csv (double quote)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (equals)",
|
||||
"aliases": [
|
||||
"CSV (equals)",
|
||||
"csv (equals)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (dot)",
|
||||
"aliases": [
|
||||
"CSV (dot)",
|
||||
"csv (dot)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (hyphen)",
|
||||
"aliases": [
|
||||
"CSV (hyphen)",
|
||||
"csv (hyphen)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "csv (semicolon)",
|
||||
"aliases": [
|
||||
"CSV (semicolon)",
|
||||
"csv (semicolon)"
|
||||
],
|
||||
"extensions": []
|
||||
},
|
||||
{
|
||||
"id": "rainbow hover markup",
|
||||
"extensions": []
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"command": "rainbow-csv.CSVLint",
|
||||
"title": "CSV Lint"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SetVirtualHeader",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Set virtual header"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SetHeaderLine",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Set header line"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.RBQL",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "RBQL"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SetJoinTableName",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Set join table name for RBQL"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.ColumnEditBefore",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Column edit before"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.ColumnEditAfter",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Column edit after"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.ColumnEditSelect",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Column edit select"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.RainbowSeparator",
|
||||
"title": "Set as Rainbow separator"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.RainbowSeparatorOff",
|
||||
"title": "RainbowSeparatorOff"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.Align",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Align CSV Columns"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.Shrink",
|
||||
"category": "Rainbow CSV",
|
||||
"title": "Shrink CSV table: Remove leading and trailing whitespaces from all fields"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.CopyBack",
|
||||
"title": "CopyBack"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SampleHead",
|
||||
"title": "Preview big CSV: head"
|
||||
},
|
||||
{
|
||||
"command": "rainbow-csv.SampleTail",
|
||||
"title": "Preview big CSV: tail"
|
||||
}
|
||||
],
|
||||
"grammars": [
|
||||
{
|
||||
"language": "csv",
|
||||
"scopeName": "text.csv",
|
||||
"path": "./syntaxes/csv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "tsv",
|
||||
"scopeName": "text.tsv",
|
||||
"path": "./syntaxes/tsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (pipe)",
|
||||
"scopeName": "text.psv",
|
||||
"path": "./syntaxes/pipe_simple.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (tilde)",
|
||||
"scopeName": "text.tldsv",
|
||||
"path": "./syntaxes/tldsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (whitespace)",
|
||||
"scopeName": "text.wspcsv",
|
||||
"path": "./syntaxes/wspcsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (caret)",
|
||||
"scopeName": "text.crtsv",
|
||||
"path": "./syntaxes/crtsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (colon)",
|
||||
"scopeName": "text.clnsv",
|
||||
"path": "./syntaxes/clnsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (double quote)",
|
||||
"scopeName": "text.dbqsv",
|
||||
"path": "./syntaxes/dbqsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (equals)",
|
||||
"scopeName": "text.eqlsv",
|
||||
"path": "./syntaxes/eqlsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (dot)",
|
||||
"scopeName": "text.dotsv",
|
||||
"path": "./syntaxes/dotsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (hyphen)",
|
||||
"scopeName": "text.hypsv",
|
||||
"path": "./syntaxes/hypsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "csv (semicolon)",
|
||||
"scopeName": "text.scsv",
|
||||
"path": "./syntaxes/scsv.tmLanguage.json"
|
||||
},
|
||||
{
|
||||
"language": "rainbow hover markup",
|
||||
"scopeName": "text.rbhover",
|
||||
"path": "./syntaxes/rb_hover.tmLanguage.json"
|
||||
}
|
||||
]
|
||||
},
|
||||
"capabilities": {
|
||||
"hoverProvider": "true"
|
||||
},
|
||||
"scripts": {
|
||||
"vscode:prepublish": "npm run package-web",
|
||||
"compile-web": "webpack",
|
||||
"lint": "eslint rainbow_utils.js extension.js test/runTest.js && eslint -c .eslintrc_browser.json rainbow_utils.js rbql_client.js",
|
||||
"watch-web": "webpack --watch",
|
||||
"package-web": "webpack --mode production --devtool hidden-source-map",
|
||||
"start-web-server": "vscode-test-web --version stable --browser=none --extensionDevelopmentPath=. .",
|
||||
"test-in-browser": "vscode-test-web --version stable --browser=chromium --extensionDevelopmentPath=. --extensionTestsPath=dist/web/test/suite/index.js .",
|
||||
"test": "node ./test/runTest.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/vscode": "^1.62.0",
|
||||
"@vscode/test-web": "^0.0.22",
|
||||
"@vscode/test-electron": "^1.6.1",
|
||||
"webpack": "^5.64.0",
|
||||
"webpack-cli": "^4.9.1",
|
||||
"assert": "^2.0.0",
|
||||
"process": "^0.11.10",
|
||||
"eslint": "^7.21.0"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/mechatroner/vscode_rainbow_csv"
|
||||
},
|
||||
"__metadata": {
|
||||
"id": "3792588c-3d35-442d-91ea-fe6a755e8155",
|
||||
"publisherId": "0d5438b6-325a-4f88-aa28-6192aa2cf2a6",
|
||||
"publisherDisplayName": "mechatroner",
|
||||
"targetPlatform": "undefined",
|
||||
"updated": true,
|
||||
"isPreReleaseVersion": false,
|
||||
"preRelease": false,
|
||||
"installedTimestamp": 1651406513766
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 2.5 KiB |
@ -0,0 +1,675 @@
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const rbql = require('./rbql_core/rbql-js/rbql.js');
|
||||
const rbql_csv = require('./rbql_core/rbql-js/rbql_csv.js');
|
||||
const csv_utils = require('./rbql_core/rbql-js/csv_utils.js');
|
||||
|
||||
const non_numeric_sentinel = -1;
|
||||
const number_regex = /^([0-9]+)(\.[0-9]+)?$/;
|
||||
|
||||
class AssertionError extends Error {}
|
||||
|
||||
function assert(condition, message=null) {
|
||||
if (!condition) {
|
||||
if (!message) {
|
||||
message = 'Assertion error';
|
||||
}
|
||||
throw new AssertionError(message);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function get_default_js_udf_content() {
|
||||
let default_content = `// This file can be used to store RBQL UDFs. Example:
|
||||
//
|
||||
// function foo(value) {
|
||||
// return 'foo ' + String(value.length);
|
||||
// }
|
||||
//
|
||||
// Functions defined in this file can be used in RBQL queries e.g.
|
||||
// SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
|
||||
//
|
||||
// Don't forget to save this file after editing!
|
||||
//
|
||||
// Write your own functions bellow this line:
|
||||
`.replace(new RegExp(/^ */, 'mg'), '');
|
||||
return default_content;
|
||||
}
|
||||
|
||||
|
||||
function get_default_python_udf_content() {
|
||||
let default_content = `# This file can be used to store RBQL UDFs. Example:
|
||||
#
|
||||
# def foo(value):
|
||||
# return 'foo ' + str(len(value))
|
||||
#
|
||||
#
|
||||
# Functions defined in this file can be used in RBQL queries e.g.
|
||||
# SELECT foo(a1), a2 WHERE foo(a3) != 'foo 5' LIMIT 10
|
||||
#
|
||||
# Don't forget to save this file after editing!
|
||||
#
|
||||
# Write your own functions bellow this line:
|
||||
`.replace(new RegExp(/^ */, 'mg'), '');
|
||||
return default_content;
|
||||
}
|
||||
|
||||
|
||||
function update_subcomponent_stats(field, is_first_line, max_field_components_lens) {
|
||||
// Extract overall field length and length of integer and fractional parts of the field if it represents a number.
|
||||
// Here `max_field_components_lens` is a tuple: (max_field_length, max_integer_part_length, max_fractional_part_length)
|
||||
if (field.length > max_field_components_lens[0]) {
|
||||
max_field_components_lens[0] = field.length;
|
||||
}
|
||||
if (max_field_components_lens[1] == non_numeric_sentinel) {
|
||||
// Column is not a number, early return.
|
||||
return;
|
||||
}
|
||||
let match_result = number_regex.exec(field);
|
||||
if (match_result === null) {
|
||||
if (!is_first_line && field.length) { // Checking field_length here allows numeric columns to have some of the fields empty.
|
||||
// We only mark the column as non-header if we know that this is not a header line.
|
||||
max_field_components_lens[1] = non_numeric_sentinel;
|
||||
max_field_components_lens[2] = non_numeric_sentinel;
|
||||
}
|
||||
return;
|
||||
}
|
||||
let cur_integer_part_length = match_result[1].length;
|
||||
max_field_components_lens[1] = Math.max(max_field_components_lens[1], cur_integer_part_length);
|
||||
let cur_fractional_part_length = match_result[2] === undefined ? 0 : match_result[2].length;
|
||||
max_field_components_lens[2] = Math.max(max_field_components_lens[2], cur_fractional_part_length);
|
||||
}
|
||||
|
||||
|
||||
function calc_column_stats(active_doc, delim, policy, comment_prefix) {
|
||||
let column_stats = [];
|
||||
let num_lines = active_doc.lineCount;
|
||||
let is_first_line = true;
|
||||
for (let lnum = 0; lnum < num_lines; lnum++) {
|
||||
let line_text = active_doc.lineAt(lnum).text;
|
||||
if (comment_prefix && line_text.startsWith(comment_prefix))
|
||||
continue;
|
||||
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
|
||||
if (warning) {
|
||||
return [null, lnum + 1];
|
||||
}
|
||||
for (let fnum = 0; fnum < fields.length; fnum++) {
|
||||
let field = fields[fnum].trim();
|
||||
if (column_stats.length <= fnum) {
|
||||
column_stats.push([0, 0, 0]);
|
||||
}
|
||||
update_subcomponent_stats(field, is_first_line, column_stats[fnum]);
|
||||
}
|
||||
is_first_line = false;
|
||||
}
|
||||
return [column_stats, null];
|
||||
}
|
||||
|
||||
|
||||
function adjust_column_stats(column_stats) {
|
||||
// Ensure that numeric components max widths are consistent with non-numeric (header) width.
|
||||
let adjusted_stats = [];
|
||||
for (let column_stat of column_stats) {
|
||||
if (column_stat[1] <= 0) {
|
||||
column_stat[1] = -1;
|
||||
column_stat[2] = -1;
|
||||
}
|
||||
if (column_stat[1] > 0) {
|
||||
// The sum of integer and float parts can be bigger than the max width, e.g. here:
|
||||
// value
|
||||
// 0.12
|
||||
// 1234
|
||||
if (column_stat[1] + column_stat[2] > column_stat[0]) {
|
||||
column_stat[0] = column_stat[1] + column_stat[2];
|
||||
}
|
||||
// This is needed when the header is wider than numeric components and/or their sum.
|
||||
if (column_stat[0] - column_stat[2] > column_stat[1]) {
|
||||
column_stat[1] = column_stat[0] - column_stat[2];
|
||||
}
|
||||
// Sanity check.
|
||||
if (column_stat[0] != column_stat[1] + column_stat[2]) {
|
||||
// Assertion Error, this can never happen.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
adjusted_stats.push(column_stat);
|
||||
}
|
||||
return adjusted_stats;
|
||||
}
|
||||
|
||||
|
||||
function align_field(field, is_first_line, max_field_components_lens, is_last_column) {
|
||||
// Align field, use Math.max() to avoid negative delta_length which can happen theorethically due to async doc edit.
|
||||
const extra_readability_whitespace_length = 1;
|
||||
field = field.trim();
|
||||
if (max_field_components_lens[1] == non_numeric_sentinel) {
|
||||
let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
|
||||
return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
|
||||
}
|
||||
if (is_first_line) {
|
||||
if (number_regex.exec(field) === null) {
|
||||
// The line must be a header - align it using max_width rule.
|
||||
let delta_length = Math.max(max_field_components_lens[0] - field.length, 0);
|
||||
return is_last_column ? field : field + ' '.repeat(delta_length + extra_readability_whitespace_length);
|
||||
}
|
||||
}
|
||||
let dot_pos = field.indexOf('.');
|
||||
let cur_integer_part_length = dot_pos == -1 ? field.length : dot_pos;
|
||||
// Here cur_fractional_part_length includes the leading dot too.
|
||||
let cur_fractional_part_length = dot_pos == -1 ? 0 : field.length - dot_pos;
|
||||
let integer_delta_length = Math.max(max_field_components_lens[1] - cur_integer_part_length, 0);
|
||||
let fractional_delta_length = Math.max(max_field_components_lens[2] - cur_fractional_part_length);
|
||||
let trailing_spaces = is_last_column ? '' : ' '.repeat(fractional_delta_length + extra_readability_whitespace_length);
|
||||
return ' '.repeat(integer_delta_length) + field + trailing_spaces;
|
||||
}
|
||||
|
||||
|
||||
function align_columns(active_doc, delim, policy, comment_prefix, column_stats) {
|
||||
let result_lines = [];
|
||||
let num_lines = active_doc.lineCount;
|
||||
let has_edit = false;
|
||||
let is_first_line = true;
|
||||
for (let lnum = 0; lnum < num_lines; lnum++) {
|
||||
let line_text = active_doc.lineAt(lnum).text;
|
||||
if (comment_prefix && line_text.startsWith(comment_prefix)) {
|
||||
result_lines.push(line_text);
|
||||
continue;
|
||||
}
|
||||
if (lnum + 1 == num_lines && line_text == '') {
|
||||
// Skip the last empty line which corresponds to the trailing newline character.
|
||||
result_lines.push(line_text);
|
||||
continue;
|
||||
}
|
||||
let fields = csv_utils.smart_split(line_text, delim, policy, true)[0];
|
||||
for (let fnum = 0; fnum < fields.length; fnum++) {
|
||||
if (fnum >= column_stats.length) // Safeguard against async doc edit, should never happen.
|
||||
break;
|
||||
let is_last_column = fnum + 1 == column_stats.length;
|
||||
let adjusted = align_field(fields[fnum], is_first_line, column_stats[fnum], is_last_column);
|
||||
if (fields[fnum] != adjusted) {
|
||||
fields[fnum] = adjusted;
|
||||
has_edit = true;
|
||||
}
|
||||
}
|
||||
is_first_line = false;
|
||||
result_lines.push(fields.join(delim));
|
||||
}
|
||||
if (!has_edit)
|
||||
return null;
|
||||
return result_lines.join('\n');
|
||||
}
|
||||
|
||||
|
||||
function shrink_columns(active_doc, delim, policy, comment_prefix) {
|
||||
let result_lines = [];
|
||||
let num_lines = active_doc.lineCount;
|
||||
let has_edit = false;
|
||||
for (let lnum = 0; lnum < num_lines; lnum++) {
|
||||
let line_text = active_doc.lineAt(lnum).text;
|
||||
if (comment_prefix && line_text.startsWith(comment_prefix)) {
|
||||
result_lines.push(line_text);
|
||||
continue;
|
||||
}
|
||||
let [fields, warning] = csv_utils.smart_split(line_text, delim, policy, true);
|
||||
if (warning) {
|
||||
return [null, lnum + 1];
|
||||
}
|
||||
for (let i = 0; i < fields.length; i++) {
|
||||
let adjusted = fields[i].trim();
|
||||
if (fields[i].length != adjusted.length) {
|
||||
fields[i] = adjusted;
|
||||
has_edit = true;
|
||||
}
|
||||
}
|
||||
result_lines.push(fields.join(delim));
|
||||
}
|
||||
if (!has_edit)
|
||||
return [null, null];
|
||||
return [result_lines.join('\n'), null];
|
||||
}
|
||||
|
||||
|
||||
function get_last(arr) {
|
||||
return arr[arr.length - 1];
|
||||
}
|
||||
|
||||
|
||||
function populate_optimistic_rfc_csv_record_map(document, requested_end_record, dst_record_map, comment_prefix=null) {
|
||||
let num_lines = document.lineCount;
|
||||
let record_begin = null;
|
||||
let start_line_idx = dst_record_map.length ? get_last(dst_record_map)[1] : 0;
|
||||
for (let lnum = start_line_idx; lnum < num_lines && dst_record_map.length < requested_end_record; ++lnum) {
|
||||
let line_text = document.lineAt(lnum).text;
|
||||
if (lnum + 1 >= num_lines && line_text == "")
|
||||
break; // Skip the last empty line.
|
||||
if (comment_prefix && line_text.startsWith(comment_prefix))
|
||||
continue;
|
||||
let match_list = line_text.match(/"/g);
|
||||
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
|
||||
if (record_begin === null && !has_unbalanced_double_quote) {
|
||||
dst_record_map.push([lnum, lnum + 1]);
|
||||
} else if (record_begin === null && has_unbalanced_double_quote) {
|
||||
record_begin = lnum;
|
||||
} else if (!has_unbalanced_double_quote) {
|
||||
continue;
|
||||
} else {
|
||||
dst_record_map.push([record_begin, lnum + 1]);
|
||||
record_begin = null;
|
||||
}
|
||||
}
|
||||
if (record_begin !== null) {
|
||||
dst_record_map.push([record_begin, num_lines]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function make_table_name_key(file_path) {
|
||||
return 'rbql_table_name:' + file_path;
|
||||
}
|
||||
|
||||
|
||||
function expanduser(filepath) {
|
||||
if (filepath.charAt(0) === '~') {
|
||||
return path.join(os.homedir(), filepath.slice(1));
|
||||
}
|
||||
return filepath;
|
||||
}
|
||||
|
||||
|
||||
function find_table_path(vscode_global_state, main_table_dir, table_id) {
|
||||
// If table_id is a relative path it could be relative either to the current directory or to the main table dir.
|
||||
var candidate_path = expanduser(table_id);
|
||||
if (fs.existsSync(candidate_path)) {
|
||||
return candidate_path;
|
||||
}
|
||||
if (main_table_dir && !path.isAbsolute(candidate_path)) {
|
||||
candidate_path = path.join(main_table_dir, candidate_path);
|
||||
if (fs.existsSync(candidate_path)) {
|
||||
return candidate_path;
|
||||
}
|
||||
}
|
||||
let table_path = vscode_global_state ? vscode_global_state.get(make_table_name_key(table_id)) : null;
|
||||
if (table_path && fs.existsSync(table_path)) {
|
||||
return table_path;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
async function read_header(table_path, encoding) {
|
||||
if (encoding == 'latin-1')
|
||||
encoding = 'binary';
|
||||
let readline = require('readline');
|
||||
let input_reader = readline.createInterface({ input: fs.createReadStream(table_path, {encoding: encoding}) });
|
||||
let closed = false;
|
||||
let promise_resolve = null;
|
||||
let promise_reject = null;
|
||||
let output_promise = new Promise(function(resolve, reject) {
|
||||
promise_resolve = resolve;
|
||||
promise_reject = reject;
|
||||
});
|
||||
input_reader.on('line', line => {
|
||||
if (!closed) {
|
||||
closed = true;
|
||||
input_reader.close();
|
||||
promise_resolve(line);
|
||||
}
|
||||
});
|
||||
input_reader.on('error', error => {
|
||||
promise_reject(error);
|
||||
});
|
||||
return output_promise;
|
||||
}
|
||||
|
||||
|
||||
function get_header_line(document, comment_prefix) {
|
||||
const num_lines = document.lineCount;
|
||||
for (let lnum = 0; lnum < num_lines; ++lnum) {
|
||||
const line_text = document.lineAt(lnum).text;
|
||||
if (!comment_prefix || !line_text.startsWith(comment_prefix)) {
|
||||
return line_text;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
||||
let keys = Object.keys(inconsistent_records_info);
|
||||
let entries = [];
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
let key = keys[i];
|
||||
let record_id = inconsistent_records_info[key];
|
||||
entries.push([record_id, key]);
|
||||
}
|
||||
entries.sort(function(a, b) { return a[0] - b[0]; });
|
||||
assert(entries.length > 1);
|
||||
let [record_1, num_fields_1] = entries[0];
|
||||
let [record_2, num_fields_2] = entries[1];
|
||||
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
|
||||
warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
|
||||
return warn_msg;
|
||||
}
|
||||
|
||||
|
||||
|
||||
class RbqlIOHandlingError extends Error {}
|
||||
|
||||
class VSCodeRecordIterator extends rbql.RBQLInputIterator {
|
||||
constructor(document, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
|
||||
// We could have done a hack here actually: convert the document to stream/buffer and then use the standard reader.
|
||||
super();
|
||||
this.document = document;
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
this.has_header = has_header;
|
||||
this.comment_prefix = comment_prefix;
|
||||
this.table_name = table_name;
|
||||
this.variable_prefix = variable_prefix;
|
||||
this.NR = 0; // Record number.
|
||||
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields).
|
||||
this.fields_info = new Object();
|
||||
this.first_defective_line = null;
|
||||
this.first_record = this.get_first_record();
|
||||
}
|
||||
|
||||
stop() {
|
||||
}
|
||||
|
||||
get_first_record() {
|
||||
let header_line = get_header_line(this.document, this.comment_prefix);
|
||||
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
|
||||
return first_record;
|
||||
}
|
||||
|
||||
async get_variables_map(query_text) {
|
||||
let variable_map = new Object();
|
||||
rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
|
||||
rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
|
||||
let header_line = get_header_line(this.document, this.comment_prefix);
|
||||
let first_record = csv_utils.smart_split(header_line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false)[0];
|
||||
if (this.has_header) {
|
||||
rbql.parse_attribute_variables(query_text, this.variable_prefix, first_record, 'CSV header line', variable_map);
|
||||
rbql.parse_dictionary_variables(query_text, this.variable_prefix, first_record, variable_map);
|
||||
}
|
||||
return variable_map;
|
||||
}
|
||||
|
||||
async get_header() {
|
||||
return this.has_header ? this.first_record : null;
|
||||
}
|
||||
|
||||
get_line_rfc() {
|
||||
let rfc_line_buffer = [];
|
||||
const num_lines = this.document.lineCount;
|
||||
while (this.NL < num_lines) {
|
||||
let line = this.document.lineAt(this.NL).text;
|
||||
this.NL += 1;
|
||||
if (this.NL == num_lines && line.length == 0)
|
||||
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
|
||||
let record_line = csv_utils.accumulate_rfc_line_into_record(rfc_line_buffer, line, this.comment_prefix);
|
||||
if (record_line !== null)
|
||||
return record_line;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
get_line_simple() {
|
||||
const num_lines = this.document.lineCount;
|
||||
while (this.NL < num_lines) {
|
||||
let line = this.document.lineAt(this.NL).text;
|
||||
this.NL += 1;
|
||||
if (this.NL == num_lines && line.length == 0)
|
||||
return null; // Skip the last line if it is empty - this can happen due to trailing newline.
|
||||
if (this.comment_prefix === null || !line.startsWith(this.comment_prefix))
|
||||
return line;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
do_get_record() {
|
||||
let line = (this.policy == 'quoted_rfc') ? this.get_line_rfc() : this.get_line_simple();
|
||||
if (line === null)
|
||||
return null;
|
||||
let [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, /*preserve_quotes_and_whitespaces=*/false);
|
||||
if (warning) {
|
||||
if (this.first_defective_line === null) {
|
||||
this.first_defective_line = this.NL;
|
||||
if (this.policy == 'quoted_rfc')
|
||||
throw new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`);
|
||||
}
|
||||
}
|
||||
let num_fields = record.length;
|
||||
if (!this.fields_info.hasOwnProperty(num_fields))
|
||||
this.fields_info[num_fields] = this.NR;
|
||||
return record;
|
||||
}
|
||||
|
||||
async get_record() {
|
||||
if (this.NR == 0 && this.has_header) {
|
||||
this.do_get_record(); // Skip the header record.
|
||||
}
|
||||
this.NR += 1;
|
||||
let record = this.do_get_record();
|
||||
return record;
|
||||
}
|
||||
|
||||
get_warnings() {
|
||||
let result = [];
|
||||
if (this.first_defective_line !== null)
|
||||
result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
|
||||
if (Object.keys(this.fields_info).length > 1)
|
||||
result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class VSCodeWriter extends rbql.RBQLOutputWriter {
|
||||
constructor(delim, policy) {
|
||||
super();
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
this.header_len = null;
|
||||
this.null_in_output = false;
|
||||
this.delim_in_simple_output = false;
|
||||
this.output_lines = [];
|
||||
|
||||
if (policy == 'simple') {
|
||||
this.polymorphic_join = this.simple_join;
|
||||
} else if (policy == 'quoted') {
|
||||
this.polymorphic_join = this.quoted_join;
|
||||
} else if (policy == 'quoted_rfc') {
|
||||
this.polymorphic_join = this.quoted_join_rfc;
|
||||
} else if (policy == 'monocolumn') {
|
||||
this.polymorphic_join = this.mono_join;
|
||||
} else if (policy == 'whitespace') {
|
||||
this.polymorphic_join = this.simple_join;
|
||||
} else {
|
||||
throw new RbqlIOHandlingError('Unknown output csv policy');
|
||||
}
|
||||
}
|
||||
|
||||
set_header(header) {
|
||||
if (header !== null) {
|
||||
this.header_len = header.length;
|
||||
this.write(header);
|
||||
}
|
||||
}
|
||||
|
||||
quoted_join(fields) {
|
||||
let delim = this.delim;
|
||||
var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
|
||||
return quoted_fields.join(this.delim);
|
||||
};
|
||||
|
||||
|
||||
quoted_join_rfc(fields) {
|
||||
let delim = this.delim;
|
||||
var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); });
|
||||
return quoted_fields.join(this.delim);
|
||||
};
|
||||
|
||||
|
||||
mono_join(fields) {
|
||||
if (fields.length > 1) {
|
||||
throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field');
|
||||
}
|
||||
return fields[0];
|
||||
};
|
||||
|
||||
|
||||
simple_join(fields) {
|
||||
var res = fields.join(this.delim);
|
||||
if (fields.join('').indexOf(this.delim) != -1) {
|
||||
this.delim_in_simple_output = true;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
|
||||
normalize_fields(out_fields) {
|
||||
for (var i = 0; i < out_fields.length; i++) {
|
||||
if (out_fields[i] == null) {
|
||||
this.null_in_output = true;
|
||||
out_fields[i] = '';
|
||||
} else if (Array.isArray(out_fields[i])) {
|
||||
this.normalize_fields(out_fields[i]);
|
||||
out_fields[i] = out_fields[i].join(this.sub_array_delim);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
write(fields) {
|
||||
if (this.header_len !== null && fields.length != this.header_len)
|
||||
throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
|
||||
this.normalize_fields(fields);
|
||||
this.output_lines.push(this.polymorphic_join(fields));
|
||||
return true;
|
||||
};
|
||||
|
||||
async finish() {
|
||||
}
|
||||
|
||||
get_warnings() {
|
||||
let result = [];
|
||||
if (this.null_in_output)
|
||||
result.push('null values in output were replaced by empty strings');
|
||||
if (this.delim_in_simple_output)
|
||||
result.push('Some output fields contain separator');
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
class VSCodeTableRegistry {
|
||||
constructor(){}
|
||||
|
||||
get_iterator_by_table_id(_table_id) {
|
||||
throw new RbqlIOHandlingError("JOIN queries are currently not supported in vscode.dev web version.");
|
||||
}
|
||||
|
||||
get_warnings() {
|
||||
return [];
|
||||
};
|
||||
}
|
||||
|
||||
async function rbql_query_web(query_text, input_document, input_delim, input_policy, output_delim, output_policy, output_warnings, with_headers, comment_prefix=null) {
|
||||
let user_init_code = ''; // TODO find a way to have init code.
|
||||
let join_tables_registry = new VSCodeTableRegistry(); // TODO find a way to have join registry.
|
||||
let input_iterator = new VSCodeRecordIterator(input_document, input_delim, input_policy, with_headers, comment_prefix);
|
||||
let output_writer = new VSCodeWriter(output_delim, output_policy);
|
||||
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
||||
return output_writer.output_lines;
|
||||
}
|
||||
|
||||
|
||||
class VSCodeFileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
||||
constructor(vscode_global_state, input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
|
||||
super();
|
||||
this.vscode_global_state = vscode_global_state;
|
||||
this.input_file_dir = input_file_dir;
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
this.encoding = encoding;
|
||||
this.has_header = has_header;
|
||||
this.comment_prefix = comment_prefix;
|
||||
this.stream = null;
|
||||
this.record_iterator = null;
|
||||
|
||||
this.options = options;
|
||||
this.bulk_input_path = null;
|
||||
this.table_path = null;
|
||||
}
|
||||
|
||||
get_iterator_by_table_id(table_id) {
|
||||
this.table_path = find_table_path(this.vscode_global_state, this.input_file_dir, table_id);
|
||||
if (this.table_path === null) {
|
||||
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
|
||||
}
|
||||
if (this.options && this.options['bulk_read']) {
|
||||
this.bulk_input_path = this.table_path;
|
||||
} else {
|
||||
this.stream = fs.createReadStream(this.table_path);
|
||||
}
|
||||
this.record_iterator = new rbql_csv.CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
|
||||
return this.record_iterator;
|
||||
};
|
||||
|
||||
get_warnings(output_warnings) {
|
||||
if (this.record_iterator && this.has_header) {
|
||||
output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function rbql_query_node(vscode_global_state, query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
|
||||
let input_stream = null;
|
||||
let bulk_input_path = null;
|
||||
if (options && options['bulk_read'] && input_path) {
|
||||
bulk_input_path = input_path;
|
||||
} else {
|
||||
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
||||
}
|
||||
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
||||
if (input_delim == '"' && input_policy == 'quoted')
|
||||
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
||||
if (csv_encoding == 'latin-1')
|
||||
csv_encoding = 'binary';
|
||||
if (!rbql_csv.is_ascii(query_text) && csv_encoding == 'binary')
|
||||
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
||||
if ((!rbql_csv.is_ascii(input_delim) || !rbql_csv.is_ascii(output_delim)) && csv_encoding == 'binary')
|
||||
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
||||
|
||||
let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js');
|
||||
if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
|
||||
user_init_code = rbql_csv.read_user_init_code(default_init_source_path);
|
||||
}
|
||||
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
||||
let join_tables_registry = new VSCodeFileSystemCSVRegistry(vscode_global_state, input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
||||
let input_iterator = new rbql_csv.CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
|
||||
let output_writer = new rbql_csv.CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
||||
|
||||
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
||||
join_tables_registry.get_warnings(output_warnings);
|
||||
}
|
||||
|
||||
|
||||
module.exports.make_table_name_key = make_table_name_key;
|
||||
module.exports.find_table_path = find_table_path;
|
||||
module.exports.read_header = read_header;
|
||||
module.exports.rbql_query_web = rbql_query_web;
|
||||
module.exports.rbql_query_node = rbql_query_node;
|
||||
module.exports.get_header_line = get_header_line;
|
||||
module.exports.populate_optimistic_rfc_csv_record_map = populate_optimistic_rfc_csv_record_map;
|
||||
module.exports.get_default_js_udf_content = get_default_js_udf_content;
|
||||
module.exports.get_default_python_udf_content = get_default_python_udf_content;
|
||||
module.exports.align_columns = align_columns;
|
||||
module.exports.shrink_columns = shrink_columns;
|
||||
module.exports.calc_column_stats = calc_column_stats;
|
||||
module.exports.adjust_column_stats = adjust_column_stats;
|
||||
module.exports.update_subcomponent_stats = update_subcomponent_stats;
|
||||
module.exports.align_field = align_field;
|
||||
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import random
|
||||
import tempfile
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
def parse_params(param_string):
|
||||
test_marker = 'test '
|
||||
assert param_string.startswith(test_marker)
|
||||
param_string = param_string[len(test_marker):]
|
||||
result = dict()
|
||||
kv_pairs = param_string.split(',')
|
||||
for p in kv_pairs:
|
||||
if p.find(':') == -1:
|
||||
continue
|
||||
key, value = p.split(':')
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
class MockException(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
param_string = sys.argv[1]
|
||||
params = parse_params(param_string)
|
||||
output = ' '.join(sys.argv)
|
||||
tmp_dir = tempfile.gettempdir()
|
||||
tmp_file = os.path.join(tmp_dir, 'rnd_mock.{}.txt'.format(random.randint(0, 1000 * 1000 * 1000)))
|
||||
with open(tmp_file, 'w') as f:
|
||||
for i in range(10):
|
||||
f.write(param_string + '\n')
|
||||
report = {'result_path': tmp_file}
|
||||
|
||||
|
||||
if 'sleep' in params:
|
||||
sleep_time = float(params['sleep'])
|
||||
time.sleep(sleep_time)
|
||||
|
||||
if 'error' in params:
|
||||
report['error_type'] = 'Mock Error'
|
||||
report['error_details'] = params['error']
|
||||
|
||||
if 'warning' in params:
|
||||
report['warnings'] = params['warning'].split(';')
|
||||
|
||||
if 'unhandled_exception' in params:
|
||||
raise MockException('Unhandled Mock Exception')
|
||||
|
||||
if 'handled_exception' in params:
|
||||
try:
|
||||
raise MockException('Handled Mock Exception')
|
||||
except Exception as e:
|
||||
report['error_type'] = 'Exception'
|
||||
report['error_details'] = str(e)
|
||||
|
||||
if 'stderr' in params:
|
||||
sys.stderr.write(params['stderr'])
|
||||
|
||||
if 'stdout' in params:
|
||||
sys.stdout.write(params['stdout'])
|
||||
else:
|
||||
sys.stdout.write(json.dumps(report))
|
||||
|
||||
if 'return_code' in params:
|
||||
return_code = int(params['return_code'])
|
||||
sys.exit(return_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,620 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
|
||||
<style>
|
||||
|
||||
:root {
|
||||
--inputwidth: 80%;
|
||||
}
|
||||
|
||||
html * {
|
||||
font-size: 16px !important;
|
||||
}
|
||||
|
||||
body {
|
||||
background-color: var(--vscode-editor-background);
|
||||
color: var(--vscode-editor-foreground);
|
||||
}
|
||||
|
||||
th {
|
||||
padding: 3px 8px;
|
||||
border-right: 1px solid;
|
||||
border-top: 1px solid;
|
||||
border-bottom: 1px solid;
|
||||
border-right-color: var(--vscode-window-activeBorder);
|
||||
border-top-color: var(--vscode-window-activeBorder);
|
||||
border-bottom-color: var(--vscode-window-activeBorder);
|
||||
position: sticky;
|
||||
top: 0;
|
||||
background-color: var(--vscode-editor-background);
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
td {
|
||||
padding: 3px 8px;
|
||||
background-color: var(--vscode-editor-background);
|
||||
border-right: 1px solid;
|
||||
border-bottom: 1px solid;
|
||||
border-right-color: var(--vscode-window-activeBorder);
|
||||
border-bottom-color: var(--vscode-window-activeBorder);
|
||||
}
|
||||
|
||||
table {
|
||||
display: block;
|
||||
white-space: nowrap;
|
||||
border-collapse: separate;
|
||||
background-color: var(--vscode-editor-background);
|
||||
}
|
||||
|
||||
|
||||
table th:first-child, table td:first-child {
|
||||
/* Apply a left border on the first <td> or <th> in a row */
|
||||
border-left: 1px solid;
|
||||
border-left-color: var(--vscode-window-activeBorder);
|
||||
}
|
||||
|
||||
|
||||
.padded_label {
|
||||
padding: 5px;
|
||||
}
|
||||
|
||||
.code_sample {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
|
||||
#rbql_console {
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.checkbox_input {
|
||||
background-color: var(--vscode-input-background);
|
||||
color: var(--vscode-input-foreground);
|
||||
margin-bottom: 6px;
|
||||
font-size: 14px !important;
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
}
|
||||
|
||||
|
||||
.select_input {
|
||||
background-color: var(--vscode-input-background);
|
||||
color: var(--vscode-input-foreground);
|
||||
margin-bottom: 6px;
|
||||
width: 300px;
|
||||
font-size: 14px !important;
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
}
|
||||
|
||||
#rbql_input {
|
||||
margin: 0px;
|
||||
height: 24px !important;
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
font-size: 14px !important;
|
||||
padding-left: 2px;
|
||||
background-color: var(--vscode-input-background);
|
||||
caret-color: var(--vscode-input-foreground);
|
||||
color: var(--vscode-input-foreground);
|
||||
display: inline-block;
|
||||
box-sizing: border-box; /* See css-submit-button-looks-smaller-than-text-input-and-textarea */
|
||||
vertical-align:middle;
|
||||
width: var(--inputwidth);
|
||||
}
|
||||
|
||||
#toggle_history_btn {
|
||||
margin: 0px;
|
||||
width: 28px;
|
||||
background-color: var(--vscode-button-secondaryBackground);
|
||||
color: var(--vscode-button-secondaryForeground);
|
||||
height: 24px !important;
|
||||
padding: 0px;
|
||||
display: inline-block;
|
||||
border-width: 0px;
|
||||
outline:none;
|
||||
vertical-align:middle;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
#toggle_history_btn:hover {
|
||||
background-color: var(--vscode-button-secondaryHoverBackground);
|
||||
}
|
||||
|
||||
|
||||
#rbql_run_btn {
|
||||
margin: 0px;
|
||||
width: 70px;
|
||||
background-color: var(--vscode-button-background);
|
||||
color: var(--vscode-button-foreground);
|
||||
height: 24px !important;
|
||||
padding: 0px;
|
||||
display: inline-block;
|
||||
outline:none;
|
||||
vertical-align:middle;
|
||||
box-sizing: border-box;
|
||||
border-width: 0px;
|
||||
}
|
||||
|
||||
#rbql_run_btn:hover {
|
||||
background-color: var(--vscode-button-hoverBackground);
|
||||
}
|
||||
|
||||
|
||||
#help_btn {
|
||||
margin: 0px;
|
||||
width: 28px;
|
||||
background-color: var(--vscode-button-secondaryBackground);
|
||||
color: var(--vscode-button-secondaryForeground);
|
||||
height: 24px !important;
|
||||
padding: 0px;
|
||||
display: inline-block;
|
||||
outline:none;
|
||||
vertical-align:middle;
|
||||
box-sizing: border-box;
|
||||
border-width: 0px;
|
||||
}
|
||||
|
||||
#help_btn:hover {
|
||||
background-color: var(--vscode-button-secondaryHoverBackground);
|
||||
}
|
||||
|
||||
|
||||
#clear_history_btn {
|
||||
margin: 0px;
|
||||
background-color: var(--vscode-button-background);
|
||||
color: var(--vscode-button-foreground);
|
||||
height: 24px !important;
|
||||
padding: 0px 5px 0px 5px;
|
||||
display: inline-block;
|
||||
float: right;
|
||||
outline:none;
|
||||
vertical-align:middle;
|
||||
border:none;
|
||||
}
|
||||
|
||||
#clear_history_btn:hover {
|
||||
background-color: var(--vscode-button-hoverBackground);
|
||||
}
|
||||
|
||||
.history_button {
|
||||
margin: 0px;
|
||||
width: 100%;
|
||||
background-color: var(--vscode-button-secondaryBackground);
|
||||
color: var(--vscode-button-secondaryForeground);
|
||||
height: 24px !important;
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
font-size: 14px !important;
|
||||
border-width: 0px;
|
||||
outline: none;
|
||||
display: block;
|
||||
text-align: left;
|
||||
padding-left: 2px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.history_button:hover {
|
||||
background-color: var(--vscode-button-secondaryHoverBackground);
|
||||
}
|
||||
|
||||
.navigation_button {
|
||||
background-color: var(--vscode-button-secondaryBackground);
|
||||
color: var(--vscode-button-secondaryForeground);
|
||||
width: 28px;
|
||||
font-size: 18px !important;
|
||||
height: 28px;
|
||||
font-weight: bold;
|
||||
border-width: 0px;
|
||||
}
|
||||
|
||||
.navigation_button:hover {
|
||||
background-color: var(--vscode-button-secondaryHoverBackground);
|
||||
}
|
||||
|
||||
|
||||
#udf_button {
|
||||
background-color: var(--vscode-button-secondaryBackground);
|
||||
color: var(--vscode-button-secondaryForeground);
|
||||
font-size: 14px !important;
|
||||
border-width: 0px;
|
||||
height: 24px !important;
|
||||
outline:none;
|
||||
vertical-align:middle;
|
||||
padding: 0px;
|
||||
padding-left: 12px;
|
||||
padding-right: 12px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
#udf_button:hover {
|
||||
background-color: var(--vscode-button-secondaryHoverBackground);
|
||||
}
|
||||
|
||||
#enable_rfc_newlines_section {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#rbql_help {
|
||||
width: 80%;
|
||||
top: 10%;
|
||||
left: 10%;
|
||||
height: 80%;
|
||||
z-index: 1000000;
|
||||
position: fixed;
|
||||
display: none;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
#close_help {
|
||||
height: 28px;
|
||||
width: 28px;
|
||||
top: 10%;
|
||||
left: 90%;
|
||||
z-index: 1000010;
|
||||
position: fixed;
|
||||
font-size: 18px !important;
|
||||
background-color: var(--vscode-button-background);
|
||||
color: var(--vscode-button-foreground);
|
||||
font-weight: bold;
|
||||
display: none;
|
||||
border-width: 0px;
|
||||
|
||||
}
|
||||
#close_help:hover {
|
||||
background-color: var(--vscode-button-hoverBackground);
|
||||
}
|
||||
|
||||
|
||||
#rbql_help_text {
|
||||
margin-left: 2px;
|
||||
}
|
||||
|
||||
|
||||
#query_history {
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
font-size: 14px !important;
|
||||
width: var(--inputwidth);
|
||||
z-index: 1000000;
|
||||
background-color: var(--vscode-notifications-background);
|
||||
color: var(--vscode-notifications-foreground);
|
||||
position: fixed;
|
||||
display: none;
|
||||
border-top: 1px solid;
|
||||
border-left: 1px solid;
|
||||
border-right: 1px solid;
|
||||
border-color: var(--vscode-focusBorder);
|
||||
}
|
||||
|
||||
|
||||
#query_suggest{
|
||||
width: 200px;
|
||||
z-index: 1000010;
|
||||
background-color: var(--vscode-notifications-background);
|
||||
color: var(--vscode-notifications-foreground);
|
||||
position: fixed;
|
||||
display: none;
|
||||
border-top: 1px solid;
|
||||
border-left: 1px solid;
|
||||
border-right: 1px solid;
|
||||
border-color: var(--vscode-focusBorder);
|
||||
max-height: 300px;
|
||||
white-space: nowrap;
|
||||
overflow-y: auto;
|
||||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
#history_entries {
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
font-size: 14px !important;
|
||||
max-height: 300px;
|
||||
margin: 0px;
|
||||
overflow-x: auto;
|
||||
overflow-y: auto;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
|
||||
#query_history_header {
|
||||
font-size: 18px !important;
|
||||
height: 24px;
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
|
||||
#table_window {
|
||||
max-width: 95%;
|
||||
max-height: 350px;
|
||||
overflow: scroll;
|
||||
width:fit-content;
|
||||
/*border: 2px solid black;*/
|
||||
}
|
||||
|
||||
#rbql_error_message {
|
||||
width: 50%;
|
||||
top: 20%;
|
||||
left: 25%;
|
||||
z-index: 1000000;
|
||||
background-color: var(--vscode-notifications-background);
|
||||
color: var(--vscode-notifications-foreground);
|
||||
position: fixed;
|
||||
display: none;
|
||||
}
|
||||
|
||||
#rbql_error_message_header {
|
||||
background-color: var(--vscode-inputValidation-errorBackground);
|
||||
color: var(--vscode-inputValidation-errorForeground);
|
||||
font-size: 18px !important;
|
||||
height: 26px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#ack_error {
|
||||
width: 70px;
|
||||
background-color: var(--vscode-button-background);
|
||||
color: var(--vscode-button-foreground);
|
||||
display: inline-block;
|
||||
border:none;
|
||||
font-size: 18px !important;
|
||||
height: 28px;
|
||||
}
|
||||
|
||||
#ack_error:hover {
|
||||
background-color: var(--vscode-button-hoverBackground);
|
||||
}
|
||||
|
||||
#error_message_details {
|
||||
height: 100px;
|
||||
font-family: Consolas, Monaco, monospace;
|
||||
overflow: auto;
|
||||
border: 1px solid;
|
||||
border-color: var(--vscode-focusBorder);
|
||||
white-space: pre;
|
||||
margin: 5px;
|
||||
background-color: var(--vscode-notifications-background);
|
||||
color: var(--vscode-notifications-foreground);
|
||||
}
|
||||
|
||||
[type="checkbox"]
|
||||
{
|
||||
margin-left: 0px;
|
||||
vertical-align:middle;
|
||||
height: 20px;
|
||||
width: 20px;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
|
||||
<script src="contrib/textarea-caret-position/index.js"></script>
|
||||
<script src="rbql_suggest.js"></script>
|
||||
<script src="rbql_client.js"></script>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
|
||||
<div id="rbql_console">
|
||||
<div id="table_group">
|
||||
<div id="table_window">
|
||||
<table id="preview_table">
|
||||
</table>
|
||||
</div>
|
||||
<div id="navig_bar">
|
||||
<button class="navigation_button" title="Begin" id="go_begin">«</button>
|
||||
<button class="navigation_button" title="Back" id="go_backward">‹</button>
|
||||
<button class="navigation_button" title="Forward" id="go_forward">›</button>
|
||||
<button class="navigation_button" title="End" id="go_end">»</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="query_group" style="margin-top: 10px;">
|
||||
<!-- Do not remove the weird comments below, we need them to avoid spaces between inline elements, see https://css-tricks.com/fighting-the-space-between-inline-block-elements/ -->
|
||||
<input type="text" id="rbql_input" placeholder="Example: SELECT a1, a2 WHERE a2 != 'foobar' ORDER BY a1 LIMIT 10" autofocus><!--
|
||||
--><button id="toggle_history_btn" title="Query history">▲</button><!--
|
||||
--><button id="rbql_run_btn" title="Run query and open result set in a new tab">Run</button><!--
|
||||
--><button id="help_btn" title="Help">?</button>
|
||||
</div>
|
||||
<h3 style="margin-bottom: 6px; margin-top: 10px;">⚙ Query settings</h3>
|
||||
<div style="margin-left: 30px;">
|
||||
<div id="with_headers_section">
|
||||
<label style="word-wrap:break-word" title="Indicate that the input (and join) table has header in the first line"><input class="checkbox_input" id="with_headers" type="checkbox"/>Input table has header</label>
|
||||
</div>
|
||||
<div id="enable_rfc_newlines_section">
|
||||
<label style="word-wrap:break-word" title="This setting will not fix Rainbow syntax highlighting, but will allow RBQL to correctly handle multi-line (RFC-4180 - compatible) fields"><input class="checkbox_input" id="enable_rfc_newlines" type="checkbox"/>Enable double-quoted multiline fields</label>
|
||||
</div>
|
||||
<div>
|
||||
<div>
|
||||
<span class="select_info">Output format</span>
|
||||
</div>
|
||||
<select class="select_input" id="select_output_format" title="Output table format">
|
||||
<option class="select_input" value="input">Same as input (Default)</option>
|
||||
<option class="select_input" value="csv">CSV</option>
|
||||
<option class="select_input" value="tsv">TSV</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<div>
|
||||
<span class="select_info">Encoding</span>
|
||||
</div>
|
||||
<select class="select_input" id="select_encoding" title="Input and output encoding">
|
||||
<option class="select_input" value="utf-8">UTF-8</option>
|
||||
<option class="select_input" value="latin-1">Binary/Latin-1</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<div>
|
||||
<span class="select_info">Backend language for RBQL</span>
|
||||
</div>
|
||||
<select class="select_input" id="select_backend_language">
|
||||
<option class="select_input" value="js">JavaScript</option>
|
||||
<option class="select_input" value="pyhon">Python (Requires python installed)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<button id="udf_button" title="Edit UDF (User Defined Functions) - will open a new tab with UDF file">Edit UDF...</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div id="query_history">
|
||||
<div id="query_history_header">
|
||||
<span class="padded_label">Query history</span>
|
||||
<button id="clear_history_btn">Clear history</button>
|
||||
</div>
|
||||
<div id="history_entries">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div id="query_suggest">
|
||||
</div>
|
||||
|
||||
|
||||
<div id="rbql_error_message">
|
||||
<div id="rbql_error_message_header">
|
||||
<span class="padded_label">⚠ Error while executing RBQL query!</span>
|
||||
</div>
|
||||
<br>
|
||||
<div>
|
||||
<span class="padded_label" id="error_message_header"></span>
|
||||
</div>
|
||||
<br>
|
||||
<div>
|
||||
<span class="padded_label">Details:</span>
|
||||
<br>
|
||||
<div id="error_message_details"></div>
|
||||
</div>
|
||||
<div style="display: flex; justify-content:center;">
|
||||
<button id="ack_error">OK</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<button id="close_help">X</button>
|
||||
<div id="rbql_help">
|
||||
<a href="https://rbql.org"><img src="rbql_logo.svg" alt="RBQL" width="200px" style="background-color:white;"/></a>
|
||||
<p><a href="https://github.com/mechatroner/vscode_rainbow_csv/blob/master/rbql_core/README.md#rbql-rainbow-query-language-description">Read RBQL documentation on github</a></p>
|
||||
<div id="rbql_help_text">
|
||||
<h1 id="rbql-rainbow-query-language">RBQL: Rainbow Query Language</h1>
|
||||
<p>RBQL is an eval-based SQL-like query engine for (not only) CSV file processing. It provides SQL-like language that supports SELECT queries with Python or JavaScript expressions. <br />
|
||||
RBQL is best suited for data transformation, data cleaning, and analytical queries. <br />
|
||||
RBQL is distributed with CLI apps, text editor plugins, Python and JS libraries. </p>
|
||||
<h3 id="main-features">Main Features</h3>
|
||||
<ul>
|
||||
<li>Use Python or JavaScript expressions inside <em>SELECT</em>, <em>UPDATE</em>, <em>WHERE</em> and <em>ORDER BY</em> statements</li>
|
||||
<li>Supports multiple input formats</li>
|
||||
<li>Result set of any query immediately becomes a first-class table on its own</li>
|
||||
<li>No need to provide FROM statement in the query - input table is defined by the current context</li>
|
||||
<li>Supports all main SQL keywords</li>
|
||||
<li>Supports aggregate functions and GROUP BY queries</li>
|
||||
<li>Supports user-defined functions (UDF)</li>
|
||||
<li>Provides some new useful query modes which traditional SQL engines do not have</li>
|
||||
<li>Lightweight, dependency-free, works out of the box</li>
|
||||
</ul>
|
||||
<h4 id="limitations">Limitations:</h4>
|
||||
<ul>
|
||||
<li>RBQL doesn't support nested queries, but they can be emulated with consecutive queries</li>
|
||||
<li>Number of tables in all JOIN queries is always 2 (input table and join table), use consecutive queries to join 3 or more tables</li>
|
||||
</ul>
|
||||
<h3 id="supported-sql-keywords-keywords-are-case-insensitive">Supported SQL Keywords (Keywords are case insensitive)</h3>
|
||||
<ul>
|
||||
<li>SELECT</li>
|
||||
<li>UPDATE</li>
|
||||
<li>WHERE</li>
|
||||
<li>ORDER BY … [ DESC | ASC ]</li>
|
||||
<li>[ LEFT | INNER ] JOIN</li>
|
||||
<li>DISTINCT</li>
|
||||
<li>GROUP BY</li>
|
||||
<li>TOP <em>N</em></li>
|
||||
<li>LIMIT <em>N</em></li>
|
||||
</ul>
|
||||
<p>All keywords have the same meaning as in SQL queries. You can check them <a href="https://www.w3schools.com/sql/default.asp">online</a> </p>
|
||||
<h3 id="rbql-variables">RBQL variables</h3>
|
||||
<p>RBQL for CSV files provides the following variables which you can use in your queries:</p>
|
||||
<ul>
|
||||
<li><em>a1</em>, <em>a2</em>,…, <em>a{N}</em> <br />
|
||||
Variable type: <strong>string</strong> <br />
|
||||
Description: value of i-th field in the current record in input table </li>
|
||||
<li><em>b1</em>, <em>b2</em>,…, <em>b{N}</em> <br />
|
||||
Variable type: <strong>string</strong> <br />
|
||||
Description: value of i-th field in the current record in join table B </li>
|
||||
<li><em>NR</em> <br />
|
||||
Variable type: <strong>integer</strong> <br />
|
||||
Description: Record number (1-based) </li>
|
||||
<li><em>NF</em> <br />
|
||||
Variable type: <strong>integer</strong> <br />
|
||||
Description: Number of fields in the current record </li>
|
||||
<li><em>a.name</em>, <em>b.Person_age</em>, … <em>a.{Good_alphanumeric_column_name}</em> <br />
|
||||
Variable type: <strong>string</strong> <br />
|
||||
Description: Value of the field referenced by it's "name". You can use this notation if the field in the header has a "good" alphanumeric name </li>
|
||||
<li><em>a["object id"]</em>, <em>a['9.12341234']</em>, <em>b["%$ !! 10 20"]</em> … <em>a["Arbitrary column name!"]</em> <br />
|
||||
Variable type: <strong>string</strong> <br />
|
||||
Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the header</li>
|
||||
</ul>
|
||||
<h3 id="update-statement">UPDATE statement</h3>
|
||||
<p><em>UPDATE</em> query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query. This prevents accidental data loss from poorly written queries. <br />
|
||||
<em>UPDATE SET</em> is synonym to <em>UPDATE</em>, because in RBQL there is no need to specify the source table. </p>
|
||||
<h3 id="aggregate-functions-and-queries">Aggregate functions and queries</h3>
|
||||
<p>RBQL supports the following aggregate functions, which can also be used with <em>GROUP BY</em> keyword: <br />
|
||||
<em>COUNT</em>, <em>ARRAY_AGG</em>, <em>MIN</em>, <em>MAX</em>, <em>SUM</em>, <em>AVG</em>, <em>VARIANCE</em>, <em>MEDIAN</em> </p>
|
||||
<p>Limitation: aggregate functions inside Python (or JS) expressions are not supported. Although you can use expressions inside aggregate functions. <br />
|
||||
E.g. <code>MAX(float(a1) / 1000)</code> - valid; <code>MAX(a1) / 1000</code> - invalid. <br />
|
||||
There is a workaround for the limitation above for <em>ARRAY_AGG</em> function which supports an optional parameter - a callback function that can do something with the aggregated array. Example: <br />
|
||||
<code>select a2, ARRAY_AGG(a1, lambda v: sorted(v)[:5]) group by a2</code> - Python; <code>select a2, ARRAY_AGG(a1, v => v.sort().slice(0, 5)) group by a2</code> - JS</p>
|
||||
<h3 id="join-statements">JOIN statements</h3>
|
||||
<p>Join table B can be referenced either by its file path or by its name - an arbitrary string which the user should provide before executing the JOIN query. <br />
|
||||
RBQL supports <em>STRICT LEFT JOIN</em> which is like <em>LEFT JOIN</em>, but generates an error if any key in the left table "A" doesn't have exactly one matching key in the right table "B". <br />
|
||||
Limitation: <em>JOIN</em> statements can't contain Python/JS expressions and must have the following form: <em><JOIN_KEYWORD> (/path/to/table.tsv | table_name ) ON a… == b… [AND a… == b… [AND … ]]</em></p>
|
||||
<h3 id="select-except-statement">SELECT EXCEPT statement</h3>
|
||||
<p>SELECT EXCEPT can be used to select everything except specific columns. E.g. to select everything but columns 2 and 4, run: <code>SELECT * EXCEPT a2, a4</code> <br />
|
||||
Traditional SQL engines do not support this query mode.</p>
|
||||
<h3 id="unnest-operator">UNNEST() operator</h3>
|
||||
<p>UNNEST(list) takes a list/array as an argument and repeats the output record multiple times - one time for each value from the list argument. <br />
|
||||
Example: <code>SELECT a1, UNNEST(a2.split(';'))</code> </p>
|
||||
<h3 id="like-function">LIKE() function</h3>
|
||||
<p>RBQL does not support LIKE operator, instead it provides "like()" function which can be used like this:<br />
|
||||
<code>SELECT * where like(a1, 'foo%bar')</code></p>
|
||||
<h3 id="with-header-and-with-noheader-statements">WITH (header) and WITH (noheader) statements</h3>
|
||||
<p>You can set whether the input (and join) CSV file has a header or not using the environment configuration parameters which could be <code>--with_headers</code> CLI flag or GUI checkbox or something else.<br />
|
||||
But it is also possible to override this selection directly in the query by adding either <code>WITH (header)</code> or <code>WITH (noheader)</code> statement at the end of the query.<br />
|
||||
Example: <code>select top 5 NR, * with (header)</code></p>
|
||||
<h3 id="user-defined-functions-udf">User Defined Functions (UDF)</h3>
|
||||
<p>RBQL supports User Defined Functions <br />
|
||||
You can define custom functions and/or import libraries in two special files: </p>
|
||||
<ul>
|
||||
<li><code>~/.rbql_init_source.py</code> - for Python</li>
|
||||
<li><code>~/.rbql_init_source.js</code> - for JavaScript</li>
|
||||
</ul>
|
||||
<h2 id="examples-of-rbql-queries">Examples of RBQL queries</h2>
|
||||
<h4 id="with-python-expressions">With Python expressions</h4>
|
||||
<ul>
|
||||
<li><code>select top 100 a1, int(a2) * 10, len(a4) where a1 == "Buy" order by int(a2) desc</code></li>
|
||||
<li><code>select * order by random.random()</code> - random sort</li>
|
||||
<li><code>select len(a.vehicle_price) / 10, a2 where int(a.vehicle_price) < 500 and a['Vehicle type'] in ["car", "plane", "boat"] limit 20</code> - referencing columns by names from header and using Python's "in" to emulate SQL's "in"</li>
|
||||
<li><code>update set a3 = 'NPC' where a3.find('Non-playable character') != -1</code></li>
|
||||
<li><code>select NR, *</code> - enumerate records, NR is 1-based</li>
|
||||
<li><code>select * where re.match(".*ab.*", a1) is not None</code> - select entries where first column has "ab" pattern</li>
|
||||
<li><code>select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3</code> - example of join query</li>
|
||||
<li><code>select MAX(a1), MIN(a1) where a.Name != 'John' group by a2, a3</code> - example of aggregate query</li>
|
||||
<li><code>select *a1.split(':')</code> - Using Python3 unpack operator to split one column into many. Do not try this with other SQL engines!</li>
|
||||
</ul>
|
||||
<h4 id="with-javascript-expressions">With JavaScript expressions</h4>
|
||||
<ul>
|
||||
<li><code>select top 100 a1, a2 * 10, a4.length where a1 == "Buy" order by parseInt(a2) desc</code></li>
|
||||
<li><code>select * order by Math.random()</code> - random sort</li>
|
||||
<li><code>select top 20 a.vehicle_price.length / 10, a2 where parseInt(a.vehicle_price) < 500 && ["car", "plane", "boat"].indexOf(a['Vehicle type']) > -1 limit 20</code> - referencing columns by names from header</li>
|
||||
<li><code>update set a3 = 'NPC' where a3.indexOf('Non-playable character') != -1</code></li>
|
||||
<li><code>select NR, *</code> - enumerate records, NR is 1-based</li>
|
||||
<li><code>select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3</code> - example of join query</li>
|
||||
<li><code>select MAX(a1), MIN(a1) where a.Name != 'John' group by a2, a3</code> - example of aggregate query</li>
|
||||
<li><code>select ...a1.split(':')</code> - Using JS "destructuring assignment" syntax to split one column into many. Do not try this with other SQL engines!</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
437
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/rbql_client.js
Normal file
437
vscodium/extensions/mechatroner.rainbow-csv-2.3.0/rbql_client.js
Normal file
@ -0,0 +1,437 @@
|
||||
var rbql_running = false;
|
||||
|
||||
var handshake_completed = false;
|
||||
|
||||
var query_history = [];
|
||||
|
||||
const vscode = acquireVsCodeApi();
|
||||
|
||||
var global_css_style = null;
|
||||
|
||||
var last_preview_message = null;
|
||||
|
||||
var adjust_join_table_header_callback = null;
|
||||
|
||||
var global_header = null;
|
||||
|
||||
var is_web_ext = null;
|
||||
|
||||
function report_backend_language_change() {
|
||||
let backend_language = document.getElementById('select_backend_language').value;
|
||||
vscode.postMessage({'msg_type': 'global_param_change', 'key': 'rbql_backend_language', 'value': backend_language});
|
||||
assign_backend_lang_selection_title();
|
||||
}
|
||||
|
||||
|
||||
function report_encoding_change() {
|
||||
let encoding = document.getElementById('select_encoding').value;
|
||||
vscode.postMessage({'msg_type': 'global_param_change', 'key': 'rbql_encoding', 'value': encoding});
|
||||
}
|
||||
|
||||
|
||||
function report_rfc_fields_policy_change() {
|
||||
let enable_rfc_newlines = document.getElementById('enable_rfc_newlines').checked;
|
||||
vscode.postMessage({'msg_type': 'newlines_policy_change', 'enable_rfc_newlines': enable_rfc_newlines});
|
||||
}
|
||||
|
||||
|
||||
function remove_children(root_node) {
|
||||
while (root_node.firstChild) {
|
||||
root_node.removeChild(root_node.firstChild);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function get_max_num_columns(records, with_headers) {
|
||||
let max_num_columns = 0;
|
||||
for (let r = 0; r < records.length; r++) {
|
||||
max_num_columns = Math.max(max_num_columns, records[r].length);
|
||||
}
|
||||
if (with_headers && global_header && global_header.length)
|
||||
max_num_columns = Math.max(max_num_columns, global_header.length);
|
||||
return max_num_columns;
|
||||
}
|
||||
|
||||
|
||||
function add_header_cell_with_text(cell_text, dst_row_elem) {
|
||||
let cell = document.createElement('th');
|
||||
cell.textContent = cell_text;
|
||||
dst_row_elem.appendChild(cell);
|
||||
}
|
||||
|
||||
|
||||
function add_header_row(max_num_columns, with_headers, table) {
|
||||
let row_elem = document.createElement('tr');
|
||||
add_header_cell_with_text('NR', row_elem);
|
||||
let named_header_vars = [];
|
||||
if (with_headers && global_header && global_header.length) {
|
||||
named_header_vars = rbql_suggest.convert_header_to_rbql_variables(global_header, 'a');
|
||||
}
|
||||
for (let i = 0; i < max_num_columns; i++) {
|
||||
let cell_text = `a${i + 1}`;
|
||||
if (i < named_header_vars.length) {
|
||||
let var_column = named_header_vars[i].dot_var ? named_header_vars[i].dot_var : named_header_vars[i].single_q_var;
|
||||
cell_text += '\r\n' + var_column;
|
||||
}
|
||||
add_header_cell_with_text(cell_text, row_elem);
|
||||
}
|
||||
table.appendChild(row_elem);
|
||||
}
|
||||
|
||||
|
||||
function make_data_cell(cell_text) {
|
||||
let cell = document.createElement('td');
|
||||
const trim_marker = '###UI_STRING_TRIM_MARKER###';
|
||||
let add_ellipsis = false;
|
||||
if (cell_text.endsWith(trim_marker)) {
|
||||
cell_text = cell_text.substr(0, cell_text.length - trim_marker.length);
|
||||
add_ellipsis = true;
|
||||
}
|
||||
let field_rfc_lines = cell_text.split('\n');
|
||||
for (let i = 0; i < field_rfc_lines.length; i++) {
|
||||
let span = document.createElement('span');
|
||||
span.textContent = field_rfc_lines[i];
|
||||
cell.appendChild(span);
|
||||
if (i + 1 < field_rfc_lines.length) {
|
||||
let newline_span = document.createElement('span');
|
||||
newline_span.textContent = '\\n';
|
||||
newline_span.style.color = global_css_style.getPropertyValue('--vscode-editorWarning-foreground');
|
||||
newline_span.title = 'new line';
|
||||
cell.appendChild(newline_span);
|
||||
}
|
||||
}
|
||||
if (add_ellipsis) {
|
||||
let ellipsis_span = document.createElement('span');
|
||||
ellipsis_span.style.color = global_css_style.getPropertyValue('--vscode-editorWarning-foreground');
|
||||
ellipsis_span.textContent = ' ...';
|
||||
ellipsis_span.title = 'value too long to display';
|
||||
cell.appendChild(ellipsis_span);
|
||||
}
|
||||
return cell;
|
||||
}
|
||||
|
||||
|
||||
function make_nr_cell(cell_text) {
|
||||
let nr_cell = document.createElement('td');
|
||||
nr_cell.textContent = cell_text;
|
||||
return nr_cell;
|
||||
}
|
||||
|
||||
|
||||
function make_preview_table() {
|
||||
if (!last_preview_message)
|
||||
return;
|
||||
let records = last_preview_message.preview_records;
|
||||
let start_record_zero_based = last_preview_message.start_record_zero_based;
|
||||
let preview_error = last_preview_message.preview_sampling_error;
|
||||
|
||||
var table = document.getElementById('preview_table');
|
||||
remove_children(table);
|
||||
if (preview_error) {
|
||||
let row = document.createElement('tr');
|
||||
table.appendChild(row);
|
||||
let span = document.createElement('span');
|
||||
span.style.color = global_css_style.getPropertyValue('--vscode-inputValidation-errorForeground');
|
||||
span.textContent = 'Unable to display preview table and run RBQL query:';
|
||||
row.appendChild(span);
|
||||
row.appendChild(document.createElement('br'));
|
||||
span = document.createElement('span');
|
||||
span.style.color = global_css_style.getPropertyValue('--vscode-inputValidation-errorForeground');
|
||||
span.textContent = preview_error;
|
||||
row.appendChild(span);
|
||||
return;
|
||||
}
|
||||
|
||||
let with_headers = document.getElementById('with_headers').checked;
|
||||
let max_num_columns = get_max_num_columns(records, with_headers);
|
||||
add_header_row(max_num_columns, with_headers, table);
|
||||
for (var r = 0; r < records.length; r++) {
|
||||
let row = document.createElement('tr');
|
||||
let NR = r + start_record_zero_based + 1;
|
||||
if (with_headers) {
|
||||
NR -= 1;
|
||||
if (NR == 0)
|
||||
continue;
|
||||
}
|
||||
row.appendChild(make_nr_cell(String(NR)));
|
||||
for (var nf = 0; nf < records[r].length; nf++) {
|
||||
row.appendChild(make_data_cell(records[r][nf]));
|
||||
}
|
||||
table.appendChild(row);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function navigate_preview(direction) {
|
||||
vscode.postMessage({'msg_type': 'navigate', 'direction': direction});
|
||||
}
|
||||
|
||||
|
||||
function preview_backward() {
|
||||
navigate_preview('backward');
|
||||
}
|
||||
|
||||
|
||||
function preview_forward() {
|
||||
navigate_preview('forward');
|
||||
}
|
||||
|
||||
|
||||
function preview_begin() {
|
||||
navigate_preview('begin');
|
||||
}
|
||||
|
||||
|
||||
function preview_end() {
|
||||
navigate_preview('end');
|
||||
}
|
||||
|
||||
|
||||
function apply_suggest_callback(query) {
|
||||
vscode.postMessage({'msg_type': 'update_query', 'query': query});
|
||||
}
|
||||
|
||||
|
||||
function fetch_join_header_callback(join_table_id, adjust_join_table_headers) {
|
||||
adjust_join_table_header_callback = adjust_join_table_headers;
|
||||
let encoding = document.getElementById('select_encoding').value;
|
||||
vscode.postMessage({'msg_type': 'fetch_table_header', 'table_id': join_table_id, 'encoding': encoding});
|
||||
}
|
||||
|
||||
|
||||
function process_with_headers_change() {
|
||||
let with_headers = document.getElementById('with_headers').checked;
|
||||
vscode.postMessage({'msg_type': 'with_headers_change', 'with_headers': with_headers}); // We need to send it to remember preview state
|
||||
let header = with_headers ? global_header : null;
|
||||
rbql_suggest.initialize_suggest('rbql_input', 'query_suggest', 'history_button', apply_suggest_callback, header, fetch_join_header_callback);
|
||||
make_preview_table();
|
||||
}
|
||||
|
||||
|
||||
function show_error(error_type, error_msg) {
|
||||
error_msg = error_msg.replace('\r?\n', '\r\n');
|
||||
document.getElementById('error_message_header').textContent = 'Error type: "' + error_type + '"';
|
||||
document.getElementById('error_message_details').textContent = error_msg;
|
||||
document.getElementById('rbql_error_message').style.display = 'block';
|
||||
document.getElementById('ack_error').focus();
|
||||
}
|
||||
|
||||
|
||||
function hide_error_msg() {
|
||||
document.getElementById('rbql_error_message').style.display = 'none';
|
||||
document.getElementById("rbql_input").focus();
|
||||
}
|
||||
|
||||
|
||||
function toggle_help_msg() {
|
||||
let document_bg_color = global_css_style.getPropertyValue('--vscode-notifications-background');
|
||||
let rbql_help_element = document.getElementById('rbql_help');
|
||||
var style_before = rbql_help_element.style.display;
|
||||
var new_style = style_before == 'block' ? 'none' : 'block';
|
||||
if (new_style == 'block')
|
||||
rbql_help_element.style.backgroundColor = document_bg_color;
|
||||
rbql_help_element.style.display = new_style;
|
||||
document.getElementById('close_help').style.display = new_style;
|
||||
}
|
||||
|
||||
|
||||
function register_history_callback(button_element, query) {
|
||||
button_element.addEventListener("click", () => { document.getElementById('rbql_input').value = query; });
|
||||
}
|
||||
|
||||
|
||||
function toggle_history() {
|
||||
let query_history_block = document.getElementById('query_history');
|
||||
var style_before = query_history_block.style.display;
|
||||
var new_style = style_before == 'block' ? 'none' : 'block';
|
||||
if (new_style == 'block') {
|
||||
document.getElementById('toggle_history_btn').textContent = '\u25BC';
|
||||
} else {
|
||||
document.getElementById('toggle_history_btn').textContent = '\u25B2';
|
||||
}
|
||||
let history_entries_block = document.getElementById('history_entries');
|
||||
remove_children(history_entries_block);
|
||||
for (let nr = 0; nr < query_history.length; nr++) {
|
||||
let entry_button = document.createElement('button');
|
||||
entry_button.className = 'history_button';
|
||||
entry_button.textContent = query_history[nr];
|
||||
register_history_callback(entry_button, query_history[nr]);
|
||||
history_entries_block.appendChild(entry_button);
|
||||
}
|
||||
query_history_block.style.display = new_style;
|
||||
let calculated_height = query_history_block.offsetHeight;
|
||||
let text_input_coordinates = document.getElementById('rbql_input').getBoundingClientRect();
|
||||
query_history_block.style.left = text_input_coordinates.left + 'px';
|
||||
query_history_block.style.top = (text_input_coordinates.top - calculated_height) + 'px';
|
||||
}
|
||||
|
||||
|
||||
function clear_history() {
|
||||
query_history = [];
|
||||
toggle_history();
|
||||
vscode.postMessage({'msg_type': 'global_param_change', 'key': 'rbql_query_history', 'value': []});
|
||||
}
|
||||
|
||||
|
||||
function start_rbql() {
|
||||
var rbql_text = document.getElementById('rbql_input').value;
|
||||
if (!rbql_text || rbql_running)
|
||||
return;
|
||||
rbql_running = true;
|
||||
document.getElementById('rbql_run_btn').textContent = "\u231B";
|
||||
let backend_language = document.getElementById('select_backend_language').value;
|
||||
let output_format = document.getElementById('select_output_format').value;
|
||||
let encoding = document.getElementById('select_encoding').value;
|
||||
let enable_rfc_newlines = document.getElementById('enable_rfc_newlines').checked;
|
||||
let with_headers = document.getElementById('with_headers').checked;
|
||||
vscode.postMessage({'msg_type': 'run', 'query': rbql_text, 'backend_language': backend_language, 'output_dialect': output_format, 'encoding': encoding, 'enable_rfc_newlines': enable_rfc_newlines, 'with_headers': with_headers});
|
||||
}
|
||||
|
||||
|
||||
function handle_message(msg_event) {
|
||||
var message = msg_event.data;
|
||||
console.log('message received at client: ' + JSON.stringify(msg_event));
|
||||
let message_type = message['msg_type'];
|
||||
|
||||
if (message_type == 'handshake') {
|
||||
if (handshake_completed)
|
||||
return;
|
||||
handshake_completed = true;
|
||||
if (message.hasOwnProperty('last_query')) {
|
||||
document.getElementById('rbql_input').value = message['last_query'];
|
||||
}
|
||||
if (message.hasOwnProperty('query_history')) {
|
||||
query_history = message['query_history'];
|
||||
}
|
||||
global_header = message['header'];
|
||||
is_web_ext = message['is_web_ext'];
|
||||
let with_headers = message['with_headers'];
|
||||
let header = with_headers ? global_header : null;
|
||||
rbql_suggest.initialize_suggest('rbql_input', 'query_suggest', 'history_button', apply_suggest_callback, header, fetch_join_header_callback);
|
||||
let enable_rfc_newlines = message['enable_rfc_newlines'];
|
||||
last_preview_message = message;
|
||||
document.getElementById("select_backend_language").value = message['backend_language'];
|
||||
assign_backend_lang_selection_title();
|
||||
document.getElementById("select_encoding").value = message['encoding'];
|
||||
document.getElementById("enable_rfc_newlines").checked = enable_rfc_newlines;
|
||||
document.getElementById("with_headers").checked = with_headers;
|
||||
if (message['policy'] == 'quoted') {
|
||||
document.getElementById('enable_rfc_newlines_section').style.display = 'block';
|
||||
}
|
||||
make_preview_table();
|
||||
|
||||
let integration_test_query = message['integration_test_query'];
|
||||
let integration_test_language = message['integration_test_language'];
|
||||
if (integration_test_query && integration_test_language) {
|
||||
if (message['integration_test_enable_rfc_newlines']) {
|
||||
document.getElementById("enable_rfc_newlines").checked = true;
|
||||
report_rfc_fields_policy_change();
|
||||
}
|
||||
if (message['integration_test_with_headers']) {
|
||||
document.getElementById("with_headers").checked = true;
|
||||
process_with_headers_change();
|
||||
}
|
||||
document.getElementById("select_backend_language").value = integration_test_language;
|
||||
assign_backend_lang_selection_title();
|
||||
document.getElementById('rbql_input').value = integration_test_query;
|
||||
setTimeout(function() {
|
||||
start_rbql();
|
||||
}, 2000);
|
||||
}
|
||||
}
|
||||
|
||||
if (message_type == 'fetch_table_header_response') {
|
||||
if (adjust_join_table_header_callback && message['header']) {
|
||||
adjust_join_table_header_callback(message['header']);
|
||||
}
|
||||
}
|
||||
|
||||
if (message_type == 'navigate' || message_type == 'resample') {
|
||||
last_preview_message = message;
|
||||
make_preview_table();
|
||||
}
|
||||
|
||||
if (message_type == 'rbql_report') {
|
||||
rbql_running = false;
|
||||
if (message.hasOwnProperty('error_type') || message.hasOwnProperty('error_msg')) {
|
||||
let error_type = message.hasOwnProperty('error_type') ? message['error_type'] : 'Unexpected';
|
||||
let error_msg = message.hasOwnProperty('error_msg') ? message['error_msg'] : 'Unknown Error';
|
||||
show_error(error_type, error_msg);
|
||||
}
|
||||
document.getElementById('rbql_run_btn').textContent = "Run";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function is_printable_key_code(keycode) {
|
||||
// Taken from here: https://stackoverflow.com/a/12467610/2898283
|
||||
return (keycode > 47 && keycode < 58) || keycode == 32 || (keycode > 64 && keycode < 91) || (keycode > 185 && keycode < 193) || (keycode > 218 && keycode < 223);
|
||||
}
|
||||
|
||||
|
||||
function handle_input_keyup(event) {
|
||||
rbql_suggest.handle_input_keyup(event);
|
||||
if (is_printable_key_code(event.keyCode) || event.keyCode == 8 /* Bakspace */) {
|
||||
let current_query = document.getElementById('rbql_input').value;
|
||||
vscode.postMessage({'msg_type': 'update_query', 'query': current_query});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function handle_input_keydown(event) {
|
||||
if (event.keyCode == 13 && rbql_suggest.active_suggest_idx === null) {
|
||||
start_rbql();
|
||||
} else {
|
||||
rbql_suggest.handle_input_keydown(event);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function assign_backend_lang_selection_title() {
|
||||
let select_backend_element = document.getElementById('select_backend_language');
|
||||
let backend_language = select_backend_element.value;
|
||||
if (backend_language == 'js') {
|
||||
select_backend_element.title = 'Allows to use JS expressions such as: `Math.sqrt(a1)`, `a2.substring(1, 5)`, `a3.toUpperCase()`, etc';
|
||||
} else {
|
||||
select_backend_element.title = 'Allows to use Python expressions such as: `math.sqrt(float(a1))`, `a2[1:5]`, `a3.upper()`, etc';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function handle_udf_edit() {
|
||||
let backend_language = document.getElementById('select_backend_language').value;
|
||||
vscode.postMessage({'msg_type': 'edit_udf', 'backend_language': backend_language});
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
global_css_style = getComputedStyle(document.body);
|
||||
assign_backend_lang_selection_title();
|
||||
|
||||
window.addEventListener('message', handle_message);
|
||||
vscode.postMessage({'msg_type': 'handshake'});
|
||||
|
||||
document.getElementById("rbql_run_btn").addEventListener("click", start_rbql);
|
||||
document.getElementById("select_backend_language").addEventListener("change", report_backend_language_change);
|
||||
document.getElementById("select_encoding").addEventListener("change", report_encoding_change);
|
||||
document.getElementById("enable_rfc_newlines").addEventListener("click", report_rfc_fields_policy_change);
|
||||
document.getElementById("with_headers").addEventListener("click", process_with_headers_change);
|
||||
document.getElementById("ack_error").addEventListener("click", hide_error_msg);
|
||||
document.getElementById("help_btn").addEventListener("click", toggle_help_msg);
|
||||
document.getElementById("close_help").addEventListener("click", toggle_help_msg);
|
||||
document.getElementById("toggle_history_btn").addEventListener("click", toggle_history);
|
||||
document.getElementById("clear_history_btn").addEventListener("click", clear_history);
|
||||
document.getElementById("go_begin").addEventListener("click", preview_begin);
|
||||
document.getElementById("go_backward").addEventListener("click", preview_backward);
|
||||
document.getElementById("go_forward").addEventListener("click", preview_forward);
|
||||
document.getElementById("go_end").addEventListener("click", preview_end);
|
||||
document.getElementById("rbql_input").addEventListener("keyup", handle_input_keyup);
|
||||
document.getElementById("rbql_input").addEventListener("keydown", handle_input_keydown);
|
||||
document.getElementById("udf_button").addEventListener("click", handle_udf_edit);
|
||||
document.getElementById("rbql_input").focus();
|
||||
}
|
||||
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function(_event) {
|
||||
main();
|
||||
});
|
||||
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Dmitry Ignatovich
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@ -0,0 +1,200 @@
|
||||

|
||||
|
||||
# RBQL: Rainbow Query Language
|
||||
|
||||
RBQL is an eval-based SQL-like query engine for (not only) CSV file processing. It provides SQL-like language that supports SELECT queries with Python or JavaScript expressions.
|
||||
RBQL is best suited for data transformation, data cleaning, and analytical queries.
|
||||
RBQL is distributed with CLI apps, text editor plugins, IPython/Jupyter magic command, Python and JS libraries.
|
||||
|
||||
[Official Site](https://rbql.org/)
|
||||
|
||||
#### Supported formats
|
||||
|
||||
Matrix of data formats that RBQL supports out of the box. R=Read, W=Write
|
||||
|
||||
|Data Format | Python | JS |
|
||||
|-----------------------|----------|---------|
|
||||
|CSV, TSV, etc | **RW** | **RW** |
|
||||
|Native 2D arrays/lists | **RW** | **RW** |
|
||||
|Pandas dataframe | **RW** | |
|
||||
|Sqlite databases | **R** | |
|
||||
|
||||
If you use RBQL as a library you can write implementation for a couple of classes to support additional formats.
|
||||
|
||||
### Main Features
|
||||
|
||||
* Use Python or JavaScript expressions inside _SELECT_, _UPDATE_, _WHERE_ and _ORDER BY_ statements
|
||||
* Supports multiple input formats
|
||||
* Result set of any query immediately becomes a first-class table on its own
|
||||
* No need to provide FROM statement in the query when the input table is defined by the current context.
|
||||
* Supports all main SQL keywords
|
||||
* Supports aggregate functions and GROUP BY queries
|
||||
* Supports user-defined functions (UDF)
|
||||
* Provides some new useful query modes which traditional SQL engines do not have
|
||||
* Lightweight, dependency-free, works out of the box
|
||||
|
||||
#### Limitations:
|
||||
|
||||
* RBQL doesn't support nested queries, but they can be emulated with consecutive queries
|
||||
* Number of tables in all JOIN queries is always 2 (input table and join table), use consecutive queries to join 3 or more tables
|
||||
|
||||
### Supported SQL Keywords (Keywords are case insensitive)
|
||||
|
||||
* SELECT
|
||||
* UPDATE
|
||||
* WHERE
|
||||
* ORDER BY ... [ DESC | ASC ]
|
||||
* [ LEFT | INNER ] JOIN
|
||||
* DISTINCT
|
||||
* GROUP BY
|
||||
* TOP _N_
|
||||
* LIMIT _N_
|
||||
|
||||
All keywords have the same meaning as in SQL queries. You can check them [online](https://www.w3schools.com/sql/default.asp)
|
||||
|
||||
|
||||
### RBQL variables
|
||||
RBQL for CSV files provides the following variables which you can use in your queries:
|
||||
|
||||
* _a1_, _a2_,..., _a{N}_
|
||||
Variable type: **string**
|
||||
Description: value of i-th field in the current record in input table
|
||||
* _b1_, _b2_,..., _b{N}_
|
||||
Variable type: **string**
|
||||
Description: value of i-th field in the current record in join table B
|
||||
* _NR_
|
||||
Variable type: **integer**
|
||||
Description: Record number (1-based)
|
||||
* _NF_
|
||||
Variable type: **integer**
|
||||
Description: Number of fields in the current record
|
||||
* _a.name_, _b.Person_age_, ... _a.{Good_alphanumeric_column_name}_
|
||||
Variable type: **string**
|
||||
Description: Value of the field referenced by it's "name". You can use this notation if the field in the header has a "good" alphanumeric name
|
||||
* _a["object id"]_, _a['9.12341234']_, _b["%$ !! 10 20"]_ ... _a["Arbitrary column name!"]_
|
||||
Variable type: **string**
|
||||
Description: Value of the field referenced by it's "name". You can use this notation to reference fields by arbitrary values in the header
|
||||
|
||||
|
||||
### UPDATE statement
|
||||
|
||||
_UPDATE_ query produces a new table where original values are replaced according to the UPDATE expression, so it can also be considered a special type of SELECT query.
|
||||
|
||||
### Aggregate functions and queries
|
||||
|
||||
RBQL supports the following aggregate functions, which can also be used with _GROUP BY_ keyword:
|
||||
_COUNT_, _ARRAY_AGG_, _MIN_, _MAX_, _SUM_, _AVG_, _VARIANCE_, _MEDIAN_
|
||||
|
||||
Limitation: aggregate functions inside Python (or JS) expressions are not supported. Although you can use expressions inside aggregate functions.
|
||||
E.g. `MAX(float(a1) / 1000)` - valid; `MAX(a1) / 1000` - invalid.
|
||||
There is a workaround for the limitation above for _ARRAY_AGG_ function which supports an optional parameter - a callback function that can do something with the aggregated array. Example:
|
||||
`select a2, ARRAY_AGG(a1, lambda v: sorted(v)[:5]) group by a2` - Python; `select a2, ARRAY_AGG(a1, v => v.sort().slice(0, 5)) group by a2` - JS
|
||||
|
||||
|
||||
### JOIN statements
|
||||
|
||||
Join table B can be referenced either by its file path or by its name - an arbitrary string which the user should provide before executing the JOIN query.
|
||||
RBQL supports _STRICT LEFT JOIN_ which is like _LEFT JOIN_, but generates an error if any key in the left table "A" doesn't have exactly one matching key in the right table "B".
|
||||
Table B path can be either relative to the working dir, relative to the main table or absolute.
|
||||
Limitation: _JOIN_ statements can't contain Python/JS expressions and must have the following form: _<JOIN\_KEYWORD> (/path/to/table.tsv | table_name ) ON a... == b... [AND a... == b... [AND ... ]]_
|
||||
|
||||
### SELECT EXCEPT statement
|
||||
|
||||
SELECT EXCEPT can be used to select everything except specific columns. E.g. to select everything but columns 2 and 4, run: `SELECT * EXCEPT a2, a4`
|
||||
Traditional SQL engines do not support this query mode.
|
||||
|
||||
|
||||
### UNNEST() operator
|
||||
UNNEST(list) takes a list/array as an argument and repeats the output record multiple times - one time for each value from the list argument.
|
||||
Example: `SELECT a1, UNNEST(a2.split(';'))`
|
||||
|
||||
|
||||
### LIKE() function
|
||||
RBQL does not support LIKE operator, instead it provides "like()" function which can be used like this:
|
||||
`SELECT * where like(a1, 'foo%bar')`
|
||||
|
||||
|
||||
### WITH (header) and WITH (noheader) statements
|
||||
You can set whether the input (and join) CSV file has a header or not using the environment configuration parameters which could be `--with_headers` CLI flag or GUI checkbox or something else.
|
||||
But it is also possible to override this selection directly in the query by adding either `WITH (header)` or `WITH (noheader)` statement at the end of the query.
|
||||
Example: `select top 5 NR, * with (header)`
|
||||
|
||||
|
||||
### User Defined Functions (UDF)
|
||||
|
||||
RBQL supports User Defined Functions
|
||||
You can define custom functions and/or import libraries in two special files:
|
||||
* `~/.rbql_init_source.py` - for Python
|
||||
* `~/.rbql_init_source.js` - for JavaScript
|
||||
|
||||
|
||||
## Examples of RBQL queries
|
||||
|
||||
#### With Python expressions
|
||||
|
||||
* `select top 100 a1, int(a2) * 10, len(a4) where a1 == "Buy" order by int(a2) desc`
|
||||
* `select * order by random.random()` - random sort
|
||||
* `select len(a.vehicle_price) / 10, a2 where int(a.vehicle_price) < 500 and a['Vehicle type'] in ["car", "plane", "boat"] limit 20` - referencing columns by names from header and using Python's "in" to emulate SQL's "in"
|
||||
* `update set a3 = 'NPC' where a3.find('Non-playable character') != -1`
|
||||
* `select NR, *` - enumerate records, NR is 1-based
|
||||
* `select * where re.match(".*ab.*", a1) is not None` - select entries where first column has "ab" pattern
|
||||
* `select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3` - example of join query
|
||||
* `select MAX(a1), MIN(a1) where a.Name != 'John' group by a2, a3` - example of aggregate query
|
||||
* `select *a1.split(':')` - Using Python3 unpack operator to split one column into many. Do not try this with other SQL engines!
|
||||
|
||||
#### With JavaScript expressions
|
||||
|
||||
* `select top 100 a1, a2 * 10, a4.length where a1 == "Buy" order by parseInt(a2) desc`
|
||||
* `select * order by Math.random()` - random sort
|
||||
* `select top 20 a.vehicle_price.length / 10, a2 where parseInt(a.vehicle_price) < 500 && ["car", "plane", "boat"].indexOf(a['Vehicle type']) > -1 limit 20` - referencing columns by names from header
|
||||
* `update set a3 = 'NPC' where a3.indexOf('Non-playable character') != -1`
|
||||
* `select NR, *` - enumerate records, NR is 1-based
|
||||
* `select a1, b1, b2 inner join ./countries.txt on a2 == b1 order by a1, a3` - example of join query
|
||||
* `select MAX(a1), MIN(a1) where a.Name != 'John' group by a2, a3` - example of aggregate query
|
||||
* `select ...a1.split(':')` - Using JS "destructuring assignment" syntax to split one column into many. Do not try this with other SQL engines!
|
||||
|
||||
|
||||
## RBQL design principles and architecture
|
||||
RBQL core idea is based on dynamic code generation and execution with [exec](https://docs.python.org/3/library/functions.html#exec) and [eval](https://www.w3schools.com/jsref/jsref_eval.asp) functions.
|
||||
Here are the main steps that RBQL engine performs when processing a query:
|
||||
1. Shallow parsing: split the query into logical expressions such as "SELECT", "WHERE", "ORDER BY", etc.
|
||||
2. Embed the expression segments into the main loop template code
|
||||
3. Execute the hydrated loop code
|
||||
|
||||
Here you can find a very basic working script (only 15 lines of Python code) which implements this idea: [mini_rbql.py](https://github.com/mechatroner/mini-rbql/blob/master/mini_rbql.py)
|
||||
|
||||
The diagram below gives an overview of the main RBQL components and data flow:
|
||||

|
||||
|
||||
|
||||
### Advantages of RBQL over traditional SQL engines
|
||||
* Provides power and flexibility of general purpose Python and JS languages in relational expressions (including regexp, math, file system, json, xml, random and many other libraries that these languages provide)
|
||||
* Can work with different data sources including CSV files, sqlite tables, native 2D arrays/lists (traditional SQL engines are usually tightly coupled with their databases)
|
||||
* Result set of any query immediately becomes a first-class table on its own
|
||||
* Supports both TOP and LIMIT keywords
|
||||
* Provides additional NR (record number) variable which is especially useful for input sources where record order is well defined (such as CSV files)
|
||||
* Supports input tables with inconsistent number of fields per record
|
||||
* Allows to generate result sets with variable number of fields per record e.g. by using split() function and unpack operator (Python) / destructuring assignment (JS)
|
||||
* UPDATE is a special case of SELECT query - this prevents accidental data loss
|
||||
* No need to use FROM statement when the table name is defined by the context. This improves query typing speed and allows immediate autocomplete for variables inside SELECT statement (in traditional SQL engines autocomplete will not work until you write FROM statement, which goes after SELECT statement)
|
||||
* SELECT, WHERE, ORDER BY, and other statements can be rearranged in any way you like
|
||||
* Supports EXCEPT statement
|
||||
* Provides a fully-functional client-side browser demo application
|
||||
* Almost nonexistent entry barrier both for SQL users and JS/Python users
|
||||
* Integration with popular text editors (VSCode, Vim, Sublime Text, Atom)
|
||||
* Small, maintainable, dependency-free, eco-friendly and hackable code base: RBQL engine fits into a single file with less than 2000 LOC
|
||||
|
||||
### Disadvantages of RBQL compared to traditional SQL engines
|
||||
* Not suitable for transactional workload
|
||||
* RBQL doesn't support nested queries, but they can be emulated with consecutive queries
|
||||
* Number of tables in all JOIN queries is always 2 (input table and join table), use consecutive queries to join 3 or more tables
|
||||
* Does not support HAVING statement
|
||||
|
||||
|
||||
### References
|
||||
|
||||
* [RBQL: Official Site](https://rbql.org/)
|
||||
* RBQL is integrated with Rainbow CSV extensions in [Vim](https://github.com/mechatroner/rainbow_csv), [VSCode](https://marketplace.visualstudio.com/items?itemName=mechatroner.rainbow-csv), [Sublime Text](https://packagecontrol.io/packages/rainbow_csv) and [Atom](https://atom.io/packages/rainbow-csv) editors.
|
||||
* [Demo Google Colab notebook](https://colab.research.google.com/drive/1_cFPtnQUxILP0RE2_DBlqIfXaEzT-oZ6?usp=sharing)
|
||||
* [RBQL in npm](https://www.npmjs.com/package/rbql): `$ npm install -g rbql`
|
||||
* [RBQL in PyPI](https://pypi.org/project/rbql/): `$ pip install rbql` - the module also provides `%rbql` magic command for IPython/Jupyter.
|
||||
@ -0,0 +1,26 @@
|
||||
{
|
||||
"env": {
|
||||
"browser": false,
|
||||
"commonjs": true,
|
||||
"es6": true,
|
||||
"node": true
|
||||
},
|
||||
"parserOptions": {
|
||||
"ecmaFeatures": {
|
||||
"jsx": true
|
||||
},
|
||||
"sourceType": "module",
|
||||
"ecmaVersion": 2018
|
||||
},
|
||||
"rules": {
|
||||
"no-const-assign": "warn",
|
||||
"no-this-before-super": "warn",
|
||||
"no-undef": "warn",
|
||||
"semi": [2, "always"],
|
||||
"no-unreachable": "warn",
|
||||
"no-unused-vars": "warn",
|
||||
"constructor-super": "warn",
|
||||
"no-trailing-spaces": "error",
|
||||
"valid-typeof": "warn"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,74 @@
|
||||
function die(error_msg) {
|
||||
console.error('Error: ' + error_msg);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
||||
function normalize_cli_key(cli_key) {
|
||||
return cli_key.replace(/^-*/, '');
|
||||
}
|
||||
|
||||
|
||||
function show_help(scheme, description, epilog) {
|
||||
if (description)
|
||||
console.log(description);
|
||||
console.log('Options:');
|
||||
for (var k in scheme) {
|
||||
if (scheme[k].hasOwnProperty('hidden')) {
|
||||
continue;
|
||||
}
|
||||
let metavar = '';
|
||||
if (scheme[k].hasOwnProperty('metavar'))
|
||||
metavar = ' ' + scheme[k]['metavar'];
|
||||
console.log(' ' + k + metavar);
|
||||
if (scheme[k].hasOwnProperty('default')) {
|
||||
console.log(' Default: "' + scheme[k]['default'] + '"');
|
||||
}
|
||||
console.log(' ' + scheme[k]['help']);
|
||||
console.log();
|
||||
}
|
||||
if (epilog)
|
||||
console.log(epilog);
|
||||
}
|
||||
|
||||
|
||||
function parse_cmd_args(cmd_args, scheme, description=null, epilog=null) {
|
||||
var result = {};
|
||||
for (var arg_key in scheme) {
|
||||
var arg_info = scheme[arg_key];
|
||||
if (arg_info.hasOwnProperty('default'))
|
||||
result[normalize_cli_key(arg_key)] = arg_info['default'];
|
||||
if (arg_info.hasOwnProperty('boolean'))
|
||||
result[normalize_cli_key(arg_key)] = false;
|
||||
}
|
||||
cmd_args = cmd_args.slice(2);
|
||||
var i = 0;
|
||||
while(i < cmd_args.length) {
|
||||
var arg_key = cmd_args[i];
|
||||
if (arg_key == '--help' || arg_key == '-h') {
|
||||
show_help(scheme, description, epilog);
|
||||
process.exit(0);
|
||||
}
|
||||
i += 1;
|
||||
if (!scheme.hasOwnProperty(arg_key)) {
|
||||
die(`unknown argument: ${arg_key}`);
|
||||
}
|
||||
var arg_info = scheme[arg_key];
|
||||
var normalized_key = normalize_cli_key(arg_key);
|
||||
if (arg_info['boolean']) {
|
||||
result[normalized_key] = true;
|
||||
continue;
|
||||
}
|
||||
if (i >= cmd_args.length) {
|
||||
die(`no CLI value for key: ${arg_key}`);
|
||||
}
|
||||
var arg_value = cmd_args[i];
|
||||
i += 1;
|
||||
result[normalized_key] = arg_value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
module.exports.parse_cmd_args = parse_cmd_args;
|
||||
@ -0,0 +1,387 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require('fs');
|
||||
const readline = require('readline');
|
||||
|
||||
const rbql = require('./rbql.js');
|
||||
const rbql_csv = require('./rbql_csv.js');
|
||||
const csv_utils = require('./csv_utils.js');
|
||||
const cli_parser = require('./cli_parser.js');
|
||||
|
||||
let out_format_names = ['csv', 'tsv', 'monocolumn', 'input'];
|
||||
|
||||
var error_format = 'hr';
|
||||
var interactive_mode = false;
|
||||
|
||||
|
||||
// TODO implement colored output like in Python version
|
||||
// TODO implement query history like in Python version. "readline" modules allows to do that, see "completer" parameter.
|
||||
|
||||
// FIXME test readline on Win: disable interactive mode?
|
||||
|
||||
// FIXME handle broken pipe error and add tests. See Python version.
|
||||
|
||||
|
||||
class RbqlParsingError extends Error {}
|
||||
class GenericError extends Error {}
|
||||
|
||||
|
||||
function show_error_plain_text(error_type, error_msg) {
|
||||
if (interactive_mode) {
|
||||
console.log(`\x1b[31;1mError [${error_type}]:\x1b[0m ${error_msg}`);
|
||||
} else {
|
||||
console.error(`Error [${error_type}]: ${error_msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function report_error_json(error_type, error_msg) {
|
||||
let report = new Object();
|
||||
report.error_type = error_type;
|
||||
report.error = error_msg;
|
||||
process.stderr.write(JSON.stringify(report));
|
||||
}
|
||||
|
||||
|
||||
function show_exception(e) {
|
||||
let [error_type, error_msg] = rbql.exception_to_error_info(e);
|
||||
if (error_format == 'hr') {
|
||||
show_error_plain_text(error_type, error_msg);
|
||||
} else {
|
||||
report_error_json(error_type, error_msg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function show_warning(msg) {
|
||||
if (interactive_mode) {
|
||||
console.log('\x1b[33;1mWarning:\x1b[0m ' + msg);
|
||||
} else {
|
||||
console.error('Warning: ' + msg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function normalize_delim(delim) {
|
||||
if (delim == 'TAB')
|
||||
return '\t';
|
||||
if (delim == '\\t')
|
||||
return '\t';
|
||||
return delim;
|
||||
}
|
||||
|
||||
|
||||
function get_default(src, key, default_val) {
|
||||
return src.hasOwnProperty(key) ? src[key] : default_val;
|
||||
}
|
||||
|
||||
|
||||
async function read_user_query(user_input_reader) {
|
||||
let finish_promise = new Promise(function(resolve, reject) {
|
||||
user_input_reader.question('Input SQL-like RBQL query and press Enter:\n> ', (query) => {
|
||||
resolve(query);
|
||||
});
|
||||
});
|
||||
let query = await finish_promise;
|
||||
return query;
|
||||
}
|
||||
|
||||
|
||||
function get_default_policy(delim) {
|
||||
if ([';', ','].indexOf(delim) != -1) {
|
||||
return 'quoted';
|
||||
} else if (delim == ' ') {
|
||||
return 'whitespace';
|
||||
} else {
|
||||
return 'simple';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function is_delimited_table(sampled_lines, delim, policy) {
|
||||
if (sampled_lines.length < 10)
|
||||
return false;
|
||||
let num_fields = null;
|
||||
for (var i = 0; i < sampled_lines.length; i++) {
|
||||
let [fields, warning] = csv_utils.smart_split(sampled_lines[i], delim, policy, true);
|
||||
if (warning)
|
||||
return false;
|
||||
if (num_fields === null)
|
||||
num_fields = fields.length;
|
||||
if (num_fields < 2 || num_fields != fields.length)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
async function sample_lines(table_path) {
|
||||
let finish_promise = new Promise(function(resolve, reject) {
|
||||
let input_reader = readline.createInterface({ input: fs.createReadStream(table_path) });
|
||||
let sampled_lines = [];
|
||||
input_reader.on('line', line => {
|
||||
if (sampled_lines.length < 10) {
|
||||
sampled_lines.push(line);
|
||||
} else {
|
||||
input_reader.close();
|
||||
}
|
||||
});
|
||||
input_reader.on('close', () => { resolve(sampled_lines); });
|
||||
});
|
||||
let sampled_lines = await finish_promise;
|
||||
return sampled_lines;
|
||||
}
|
||||
|
||||
|
||||
async function sample_records(table_path, encoding, delim, policy) {
|
||||
let table_stream = fs.createReadStream(table_path);
|
||||
let sampling_iterator = new rbql_csv.CSVRecordIterator(table_stream, null, encoding, delim, policy);
|
||||
let sampled_records = await sampling_iterator.get_all_records(10);
|
||||
let warnings = sampling_iterator.get_warnings();
|
||||
return [sampled_records, warnings];
|
||||
}
|
||||
|
||||
|
||||
async function autodetect_delim_policy(table_path) {
|
||||
let sampled_lines = await sample_lines(table_path);
|
||||
let autodetection_dialects = [['\t', 'simple'], [',', 'quoted'], [';', 'quoted'], ['|', 'simple']];
|
||||
for (var i = 0; i < autodetection_dialects.length; i++) {
|
||||
let [delim, policy] = autodetection_dialects[i];
|
||||
if (is_delimited_table(sampled_lines, delim, policy))
|
||||
return [delim, policy];
|
||||
}
|
||||
if (table_path.endsWith('.csv'))
|
||||
return [',', 'quoted'];
|
||||
if (table_path.endsWith('.tsv'))
|
||||
return ['\t', 'simple'];
|
||||
return [null, null];
|
||||
}
|
||||
|
||||
|
||||
function print_colorized(records, delim, show_column_names, with_headers) {
|
||||
let reset_color_code = '\x1b[0m';
|
||||
let color_codes = ['\x1b[0m', '\x1b[31m', '\x1b[32m', '\x1b[33m', '\x1b[34m', '\x1b[35m', '\x1b[36m', '\x1b[31;1m', '\x1b[32;1m', '\x1b[33;1m'];
|
||||
for (let r = 0; r < records.length; r++) {
|
||||
let out_fields = [];
|
||||
for (let c = 0; c < records[r].length; c++) {
|
||||
let color_code = color_codes[c % color_codes.length];
|
||||
let field = records[r][c];
|
||||
let colored_field = (!show_column_names || (with_headers && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
|
||||
out_fields.push(colored_field);
|
||||
}
|
||||
let out_line = out_fields.join(delim) + reset_color_code;
|
||||
console.log(out_line);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function handle_query_success(warnings, output_path, encoding, delim, policy) {
|
||||
if (error_format == 'hr') {
|
||||
if (warnings !== null) {
|
||||
for (let i = 0; i < warnings.length; i++) {
|
||||
show_warning(warnings[i]);
|
||||
}
|
||||
}
|
||||
if (interactive_mode) {
|
||||
let [records, _warnings] = await sample_records(output_path, encoding, delim, policy);
|
||||
console.log('\nOutput table preview:');
|
||||
console.log('====================================');
|
||||
print_colorized(records, delim, false, false);
|
||||
console.log('====================================');
|
||||
console.log('Success! Result table was saved to: ' + output_path);
|
||||
}
|
||||
} else {
|
||||
if (warnings !== null && warnings.length) {
|
||||
var warnings_report = JSON.stringify({'warnings': warnings});
|
||||
process.stderr.write(warnings_report);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function run_with_js(args) {
|
||||
var delim = normalize_delim(args['delim']);
|
||||
var policy = args['policy'] ? args['policy'] : get_default_policy(delim);
|
||||
var query = args['query'];
|
||||
if (!query)
|
||||
throw new RbqlParsingError('RBQL query is empty');
|
||||
var input_path = get_default(args, 'input', null);
|
||||
var output_path = get_default(args, 'output', null);
|
||||
var csv_encoding = args['encoding'];
|
||||
var with_headers = args['with-headers'];
|
||||
var comment_prefix = args['comment-prefix'];
|
||||
var output_delim = get_default(args, 'out-delim', null);
|
||||
var output_policy = get_default(args, 'out-policy', null);
|
||||
let init_source_file = get_default(args, 'init-source-file', null);
|
||||
let output_format = args['out-format'];
|
||||
if (output_delim === null) {
|
||||
[output_delim, output_policy] = output_format == 'input' ? [delim, policy] : rbql_csv.interpret_named_csv_format(output_format);
|
||||
}
|
||||
|
||||
let user_init_code = '';
|
||||
if (init_source_file !== null)
|
||||
user_init_code = rbql_csv.read_user_init_code(init_source_file);
|
||||
try {
|
||||
let warnings = [];
|
||||
// Do not use bulk_read mode here because:
|
||||
// * Bulk read can't handle large file since node unable to read the whole file into a string, see https://github.com/mechatroner/rainbow_csv/issues/19
|
||||
// * In case of stdin read we would have to use the util.TextDecoder anyway
|
||||
// * binary/latin-1 do not require the decoder anyway
|
||||
// * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
|
||||
// * Streaming mode works a little faster (since we don't need to do the manual validation)
|
||||
// TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
|
||||
await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
|
||||
await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
|
||||
return true;
|
||||
} catch (e) {
|
||||
if (!interactive_mode)
|
||||
throw e;
|
||||
show_exception(e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function get_default_output_path(input_path, delim) {
|
||||
let well_known_extensions = {',': '.csv', '\t': '.tsv'};
|
||||
if (well_known_extensions.hasOwnProperty(delim))
|
||||
return input_path + well_known_extensions[delim];
|
||||
return input_path + '.txt';
|
||||
}
|
||||
|
||||
|
||||
async function show_preview(input_path, encoding, delim, policy, with_headers) {
|
||||
let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
|
||||
console.log('Input table preview:');
|
||||
console.log('====================================');
|
||||
print_colorized(records, delim, true, with_headers);
|
||||
console.log('====================================\n');
|
||||
for (let warning of warnings) {
|
||||
show_warning(warning);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function run_interactive_loop(args) {
|
||||
let input_path = get_default(args, 'input', null);
|
||||
if (!input_path)
|
||||
throw new GenericError('Input file must be provided in interactive mode. You can use stdin input only in non-interactive mode');
|
||||
if (error_format != 'hr')
|
||||
throw new GenericError('Only default "hr" error format is supported in interactive mode');
|
||||
|
||||
|
||||
let delim = get_default(args, 'delim', null);
|
||||
let policy = null;
|
||||
if (delim !== null) {
|
||||
delim = normalize_delim(delim);
|
||||
policy = args['policy'] ? args['policy'] : get_default_policy(delim);
|
||||
} else {
|
||||
[delim, policy] = await autodetect_delim_policy(input_path);
|
||||
if (!delim)
|
||||
throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
|
||||
}
|
||||
await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
|
||||
args.delim = delim;
|
||||
args.policy = policy;
|
||||
if (!args.output) {
|
||||
args.output = get_default_output_path(input_path, delim);
|
||||
show_warning('Output path was not provided. Result set will be saved as: ' + args.output);
|
||||
}
|
||||
|
||||
let user_input_reader = readline.createInterface({ input: process.stdin, output: process.stdout });
|
||||
try {
|
||||
while (true) {
|
||||
let query = await read_user_query(user_input_reader);
|
||||
args.query = query;
|
||||
let success = await run_with_js(args);
|
||||
if (success)
|
||||
break;
|
||||
}
|
||||
} finally {
|
||||
user_input_reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let tool_description = `rbql-js
|
||||
|
||||
Run RBQL queries against CSV files and data streams
|
||||
|
||||
rbql-js supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
|
||||
Interactive mode shows source table preview which makes query editing much easier. Usage example:
|
||||
$ rbql-js --input input.csv
|
||||
Non-interactive mode supports source tables in stdin. Usage example:
|
||||
$ rbql-js --query "select a1, a2 order by a1" --delim , < input.csv
|
||||
`;
|
||||
|
||||
let epilog = `
|
||||
Description of the available CSV split policies:
|
||||
* "simple" - RBQL uses simple split() function and doesn't perform special handling of double quote characters
|
||||
* "quoted" - Separator can be escaped inside double-quoted fields. Double quotes inside double-quoted fields must be doubled
|
||||
* "quoted_rfc" - Same as "quoted", but also allows newlines inside double-quoted fields, see RFC-4180: https://tools.ietf.org/html/rfc4180
|
||||
* "whitespace" - Works only with whitespace separator, multiple consecutive whitespaces are treated as a single whitespace
|
||||
* "monocolumn" - RBQL doesn't perform any split at all, each line is a single-element record, i.e. only "a1" and "NR" are available
|
||||
`;
|
||||
|
||||
|
||||
async function do_main(args) {
|
||||
|
||||
if (args['version']) {
|
||||
console.log(rbql.version);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (args.hasOwnProperty('policy') && args['policy'] === 'monocolumn')
|
||||
args['delim'] = '';
|
||||
|
||||
if (args.hasOwnProperty('policy') && !args.hasOwnProperty('delim'))
|
||||
throw new GenericError('Using "--policy" without "--delim" is not allowed');
|
||||
|
||||
if (args.encoding == 'latin-1')
|
||||
args.encoding = 'binary';
|
||||
|
||||
error_format = args['error-format'];
|
||||
|
||||
if (args.hasOwnProperty('query')) {
|
||||
interactive_mode = false;
|
||||
if (!args.hasOwnProperty('delim')) {
|
||||
throw new GenericError('Separator must be provided with "--delim" option in non-interactive mode');
|
||||
}
|
||||
await run_with_js(args);
|
||||
} else {
|
||||
interactive_mode = true;
|
||||
if (error_format == 'json') {
|
||||
throw new GenericError('json error format is not compatible with interactive mode');
|
||||
}
|
||||
await run_interactive_loop(args);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
var scheme = {
|
||||
'--input': {'help': 'Read csv table from FILE instead of stdin. Required in interactive mode', 'metavar': 'FILE'},
|
||||
'--query': {'help': 'Query string in rbql. Run in interactive mode if empty', 'metavar': 'QUERY'},
|
||||
'--output': {'help': 'Write output table to FILE instead of stdout', 'metavar': 'FILE'},
|
||||
'--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
|
||||
'--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
|
||||
'--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
|
||||
'--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
|
||||
'--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
|
||||
'--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
|
||||
'--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
|
||||
'--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'},
|
||||
'--error-format': {'default': 'hr', 'help': 'Errors and warnings format. [hr|json]', 'hidden': true},
|
||||
'--version': {'boolean': true, 'help': 'Print RBQL version and exit'},
|
||||
'--init-source-file': {'help': 'Path to init source file to use instead of ~/.rbql_init_source.js', 'hidden': true}
|
||||
};
|
||||
let args = cli_parser.parse_cmd_args(process.argv, scheme, tool_description, epilog);
|
||||
do_main(args).then(() => {}).catch(error_info => { show_exception(error_info); process.exit(1); });
|
||||
}
|
||||
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,151 @@
|
||||
let field_regular_expression = '"((?:[^"]*"")*[^"]*)"';
|
||||
let field_rgx = new RegExp('^' + field_regular_expression);
|
||||
let field_rgx_external_whitespaces = new RegExp('^ *' + field_regular_expression + ' *');
|
||||
|
||||
|
||||
// TODO consider making this file (and rbql.js) both node and browser compatible: https://caolan.org/posts/writing_for_node_and_the_browser.html
|
||||
|
||||
|
||||
function split_lines(text) {
|
||||
return text.split(/\r\n|\r|\n/);
|
||||
}
|
||||
|
||||
|
||||
function extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_external_whitespaces, cidx, result) {
|
||||
var warning = false;
|
||||
let src_cur = src.substring(cidx);
|
||||
let rgx = allow_external_whitespaces ? field_rgx_external_whitespaces : field_rgx;
|
||||
let match_obj = rgx.exec(src_cur);
|
||||
if (match_obj !== null) {
|
||||
let match_end = match_obj[0].length;
|
||||
if (cidx + match_end == src.length || src[cidx + match_end] == dlm) {
|
||||
if (preserve_quotes_and_whitespaces) {
|
||||
result.push(match_obj[0]);
|
||||
} else {
|
||||
result.push(match_obj[1].replace(/""/g, '"'));
|
||||
}
|
||||
return [cidx + match_end + 1, false];
|
||||
}
|
||||
warning = true;
|
||||
}
|
||||
var uidx = src.indexOf(dlm, cidx);
|
||||
if (uidx == -1)
|
||||
uidx = src.length;
|
||||
var field = src.substring(cidx, uidx);
|
||||
warning = warning || field.indexOf('"') != -1;
|
||||
result.push(field);
|
||||
return [uidx + 1, warning];
|
||||
}
|
||||
|
||||
|
||||
function split_quoted_str(src, dlm, preserve_quotes_and_whitespaces=false) {
|
||||
// This function is newline-agnostic i.e. it can also split records with multiline fields.
|
||||
if (src.indexOf('"') == -1) // Optimization for most common case
|
||||
return [src.split(dlm), false];
|
||||
var result = [];
|
||||
var cidx = 0;
|
||||
var warning = false;
|
||||
let allow_external_whitespaces = dlm != ' ';
|
||||
while (cidx < src.length) {
|
||||
var extraction_report = extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_external_whitespaces, cidx, result);
|
||||
cidx = extraction_report[0];
|
||||
warning = warning || extraction_report[1];
|
||||
}
|
||||
if (src.charAt(src.length - 1) == dlm)
|
||||
result.push('');
|
||||
return [result, warning];
|
||||
}
|
||||
|
||||
|
||||
function quote_field(src, delim) {
|
||||
if (src.indexOf(delim) != -1 || src.indexOf('"') != -1) {
|
||||
var escaped = src.replace(/"/g, '""');
|
||||
return `"${escaped}"`;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
|
||||
function rfc_quote_field(src, delim) {
|
||||
if (src.indexOf(delim) != -1 || src.indexOf('"') != -1 || src.indexOf('\n') != -1 || src.indexOf('\r') != -1) {
|
||||
var escaped = src.replace(/"/g, '""');
|
||||
return `"${escaped}"`;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
|
||||
function unquote_field(field) {
|
||||
let rgx = new RegExp('^' + ' *' + field_regular_expression + ' *$');
|
||||
let match_obj = rgx.exec(field);
|
||||
if (match_obj !== null) {
|
||||
return match_obj[1].replace(/""/g, '"');
|
||||
}
|
||||
return field;
|
||||
}
|
||||
|
||||
|
||||
function unquote_fields(fields) {
|
||||
return fields.map(unquote_field);
|
||||
}
|
||||
|
||||
|
||||
function split_whitespace_separated_str(src, preserve_whitespaces=false) {
|
||||
var rgxp = preserve_whitespaces ? new RegExp(' *[^ ]+ *', 'g') : new RegExp('[^ ]+', 'g');
|
||||
let result = [];
|
||||
let match_obj = null;
|
||||
while((match_obj = rgxp.exec(src)) !== null) {
|
||||
result.push(match_obj[0]);
|
||||
}
|
||||
if (preserve_whitespaces) {
|
||||
for (let i = 0; i < result.length - 1; i++) {
|
||||
result[i] = result[i].slice(0, -1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
function smart_split(src, dlm, policy, preserve_quotes_and_whitespaces) {
|
||||
if (policy === 'simple')
|
||||
return [src.split(dlm), false];
|
||||
if (policy === 'whitespace')
|
||||
return [split_whitespace_separated_str(src, preserve_quotes_and_whitespaces), false];
|
||||
if (policy === 'monocolumn')
|
||||
return [[src], false];
|
||||
return split_quoted_str(src, dlm, preserve_quotes_and_whitespaces);
|
||||
}
|
||||
|
||||
|
||||
function accumulate_rfc_line_into_record(external_rfc_line_buffer, current_line, comment_prefix=null) {
|
||||
// Return null if the current line yields no record.
|
||||
// Return a record string if the current line yields the record and cleans the external line buffer.
|
||||
if (comment_prefix !== null && external_rfc_line_buffer.length == 0 && current_line.startsWith(comment_prefix))
|
||||
return null;
|
||||
let match_list = current_line.match(/"/g);
|
||||
let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
|
||||
if (external_rfc_line_buffer.length == 0 && !has_unbalanced_double_quote) {
|
||||
return current_line;
|
||||
} else if (external_rfc_line_buffer.length == 0 && has_unbalanced_double_quote) {
|
||||
external_rfc_line_buffer.push(current_line);
|
||||
} else if (!has_unbalanced_double_quote) {
|
||||
external_rfc_line_buffer.push(current_line);
|
||||
} else {
|
||||
external_rfc_line_buffer.push(current_line);
|
||||
let multiline_row = external_rfc_line_buffer.join('\n');
|
||||
external_rfc_line_buffer.splice(0, external_rfc_line_buffer.length); // Cleanup the external buffer.
|
||||
return multiline_row;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
module.exports.split_quoted_str = split_quoted_str;
|
||||
module.exports.split_whitespace_separated_str = split_whitespace_separated_str;
|
||||
module.exports.smart_split = smart_split;
|
||||
module.exports.quote_field = quote_field;
|
||||
module.exports.rfc_quote_field = rfc_quote_field;
|
||||
module.exports.unquote_field = unquote_field;
|
||||
module.exports.unquote_fields = unquote_fields;
|
||||
module.exports.split_lines = split_lines;
|
||||
module.exports.accumulate_rfc_line_into_record = accumulate_rfc_line_into_record;
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,731 @@
|
||||
const fs = require('fs');
|
||||
const os = require('os');
|
||||
const path = require('path');
|
||||
const util = require('util');
|
||||
|
||||
const rbql = require('./rbql.js');
|
||||
const csv_utils = require('./csv_utils.js');
|
||||
|
||||
|
||||
const utf_decoding_error = 'Unable to decode input table as UTF-8. Use binary (latin-1) encoding instead';
|
||||
|
||||
|
||||
class RbqlIOHandlingError extends Error {}
|
||||
class AssertionError extends Error {}
|
||||
|
||||
|
||||
// TODO performance improvement: replace smart_split() with polymorphic_split()
|
||||
|
||||
|
||||
function assert(condition, message=null) {
|
||||
if (!condition) {
|
||||
if (!message) {
|
||||
message = 'Assertion error';
|
||||
}
|
||||
throw new AssertionError(message);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function interpret_named_csv_format(format_name) {
|
||||
format_name = format_name.toLowerCase();
|
||||
if (format_name == 'monocolumn')
|
||||
return ['', 'monocolumn'];
|
||||
if (format_name == 'csv')
|
||||
return [',', 'quoted'];
|
||||
if (format_name == 'tsv')
|
||||
return ['\t', 'simple'];
|
||||
throw new RbqlIOHandlingError(`Unknown format name: "${format_name}"`);
|
||||
}
|
||||
|
||||
|
||||
|
||||
function is_ascii(str) {
|
||||
return /^[\x00-\x7F]*$/.test(str);
|
||||
}
|
||||
|
||||
|
||||
function read_user_init_code(rbql_init_source_path) {
|
||||
return fs.readFileSync(rbql_init_source_path, 'utf-8');
|
||||
}
|
||||
|
||||
|
||||
function remove_utf8_bom(line, assumed_source_encoding) {
|
||||
if (assumed_source_encoding == 'binary' && line.length >= 3 && line.charCodeAt(0) === 0xEF && line.charCodeAt(1) === 0xBB && line.charCodeAt(2) === 0xBF) {
|
||||
return line.substring(3);
|
||||
}
|
||||
if (assumed_source_encoding == 'utf-8' && line.length >= 1 && line.charCodeAt(0) === 0xFEFF) {
|
||||
return line.substring(1);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
|
||||
function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
|
||||
let keys = Object.keys(inconsistent_records_info);
|
||||
let entries = [];
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
let key = keys[i];
|
||||
let record_id = inconsistent_records_info[key];
|
||||
entries.push([record_id, key]);
|
||||
}
|
||||
entries.sort(function(a, b) { return a[0] - b[0]; });
|
||||
assert(entries.length > 1);
|
||||
let [record_1, num_fields_1] = entries[0];
|
||||
let [record_2, num_fields_2] = entries[1];
|
||||
let warn_msg = `Number of fields in "${table_name}" table is not consistent: `;
|
||||
warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`;
|
||||
return warn_msg;
|
||||
}
|
||||
|
||||
|
||||
function expanduser(filepath) {
|
||||
if (filepath.charAt(0) === '~') {
|
||||
return path.join(os.homedir(), filepath.slice(1));
|
||||
}
|
||||
return filepath;
|
||||
}
|
||||
|
||||
|
||||
function try_read_index(index_path) {
|
||||
var content = null;
|
||||
try {
|
||||
content = fs.readFileSync(index_path, 'utf-8');
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
var lines = content.split('\n');
|
||||
var records = [];
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
if (!lines[i])
|
||||
continue;
|
||||
var record = lines[i].split('\t');
|
||||
records.push(record);
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
|
||||
function get_index_record(index_path, key) {
|
||||
var records = try_read_index(index_path);
|
||||
for (var i = 0; i < records.length; i++) {
|
||||
if (records[i].length && records[i][0] == key) {
|
||||
return records[i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
function find_table_path(main_table_dir, table_id) {
|
||||
// If table_id is a relative path it could be relative either to the current directory or to the main table dir.
|
||||
var candidate_path = expanduser(table_id);
|
||||
if (fs.existsSync(candidate_path)) {
|
||||
return candidate_path;
|
||||
}
|
||||
if (main_table_dir && !path.isAbsolute(candidate_path)) {
|
||||
candidate_path = path.join(main_table_dir, candidate_path);
|
||||
if (fs.existsSync(candidate_path)) {
|
||||
return candidate_path;
|
||||
}
|
||||
}
|
||||
let table_names_settings_path = path.join(os.homedir(), '.rbql_table_names');
|
||||
var name_record = get_index_record(table_names_settings_path, table_id);
|
||||
if (name_record && name_record.length > 1 && fs.existsSync(name_record[1])) {
|
||||
return name_record[1];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
class RecordQueue {
|
||||
// TODO compare performance with a linked list
|
||||
constructor() {
|
||||
this.push_stack = [];
|
||||
this.pull_stack = [];
|
||||
}
|
||||
|
||||
enqueue(record) {
|
||||
this.push_stack.push(record);
|
||||
}
|
||||
|
||||
dequeue() {
|
||||
if (!this.pull_stack.length) {
|
||||
if (!this.push_stack.length)
|
||||
return null;
|
||||
this.pull_stack = this.push_stack;
|
||||
this.pull_stack.reverse();
|
||||
this.push_stack = [];
|
||||
}
|
||||
return this.pull_stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class CSVRecordIterator extends rbql.RBQLInputIterator {
|
||||
// CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
|
||||
// get_record() - consumer
|
||||
// stream.on('data') - producer
|
||||
constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
|
||||
super();
|
||||
this.stream = stream;
|
||||
this.csv_path = csv_path;
|
||||
assert((this.stream === null) != (this.csv_path === null));
|
||||
this.encoding = encoding;
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
|
||||
this.has_header = has_header;
|
||||
this.first_record = null;
|
||||
this.first_record_should_be_emitted = !has_header;
|
||||
this.header_preread_complete = false;
|
||||
|
||||
this.table_name = table_name;
|
||||
this.variable_prefix = variable_prefix;
|
||||
this.comment_prefix = (comment_prefix !== null && comment_prefix.length) ? comment_prefix : null;
|
||||
|
||||
this.decoder = null;
|
||||
if (encoding == 'utf-8' && this.csv_path === null) {
|
||||
// Unfortunately util.TextDecoder has serious flaws:
|
||||
// 1. It doesn't work in Node without ICU: https://nodejs.org/api/util.html#util_new_textdecoder_encoding_options
|
||||
// 2. It is broken in Electron: https://github.com/electron/electron/issues/18733
|
||||
|
||||
// Technically we can implement our own custom streaming text decoder, using the 3 following technologies:
|
||||
// 1. decode-encode validation method from https://stackoverflow.com/a/32279283/2898283
|
||||
// 2. Scanning buffer chunks for non-continuation utf-8 bytes from the end of the buffer:
|
||||
// src_buffer -> (buffer_before, buffer_after) where buffer_after is very small(a couple of bytes) and buffer_before is large and ends with a non-continuation bytes
|
||||
// 3. Internal buffer to store small tail part from the previous buffer
|
||||
this.decoder = new util.TextDecoder(encoding, {fatal: true, stream: true});
|
||||
}
|
||||
|
||||
this.input_exhausted = false;
|
||||
this.started = false;
|
||||
|
||||
this.utf8_bom_removed = false; // BOM doesn't get automatically removed by the decoder when utf-8 file is treated as latin-1
|
||||
this.first_defective_line = null;
|
||||
|
||||
this.fields_info = new Object();
|
||||
this.NR = 0; // Record number
|
||||
this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
|
||||
|
||||
this.rfc_line_buffer = [];
|
||||
|
||||
this.partially_decoded_line = '';
|
||||
this.partially_decoded_line_ends_with_cr = false;
|
||||
|
||||
// Holds an external "resolve" function which is called when everything is fine.
|
||||
this.resolve_current_record = null;
|
||||
// Holds an external "reject" function which is called when error has occured.
|
||||
this.reject_current_record = null;
|
||||
// Holds last exception if we don't have any reject callbacks from clients yet.
|
||||
this.current_exception = null;
|
||||
|
||||
this.produced_records_queue = new RecordQueue();
|
||||
|
||||
this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line;
|
||||
}
|
||||
|
||||
|
||||
handle_query_modifier(modifier) {
|
||||
// For `... WITH (header) ...` syntax
|
||||
if (['header', 'headers'].indexOf(modifier) != -1) {
|
||||
this.has_header = true;
|
||||
this.first_record_should_be_emitted = false;
|
||||
}
|
||||
if (['noheader', 'noheaders'].indexOf(modifier) != -1) {
|
||||
this.has_header = false;
|
||||
this.first_record_should_be_emitted = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
reset_external_callbacks() {
|
||||
// Drop external callbacks simultaneously since promises can only resolve once, see: https://stackoverflow.com/a/18218542/2898283
|
||||
this.reject_current_record = null;
|
||||
this.resolve_current_record = null;
|
||||
}
|
||||
|
||||
try_propagate_exception() {
|
||||
if (this.current_exception && this.reject_current_record) {
|
||||
let reject = this.reject_current_record;
|
||||
let exception = this.current_exception;
|
||||
this.reset_external_callbacks();
|
||||
this.current_exception = null;
|
||||
reject(exception);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
store_or_propagate_exception(exception) {
|
||||
if (this.current_exception === null)
|
||||
// Ignore subsequent exceptions if we already have an unreported error. This way we prioritize earlier errors over the more recent ones.
|
||||
this.current_exception = exception;
|
||||
this.try_propagate_exception();
|
||||
}
|
||||
|
||||
|
||||
async preread_first_record() {
|
||||
if (this.header_preread_complete)
|
||||
return;
|
||||
this.first_record = await this.get_record();
|
||||
this.header_preread_complete = true; // We must set header_preread_complete to true after calling get_record(), because get_record() uses it internally.
|
||||
if (this.first_record === null) {
|
||||
return;
|
||||
}
|
||||
if (this.stream)
|
||||
this.stream.pause();
|
||||
this.first_record = this.first_record.slice();
|
||||
};
|
||||
|
||||
|
||||
async get_variables_map(query_text) {
|
||||
let variable_map = new Object();
|
||||
rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
|
||||
rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
|
||||
|
||||
await this.preread_first_record();
|
||||
if (this.has_header && this.first_record) {
|
||||
rbql.parse_attribute_variables(query_text, this.variable_prefix, this.first_record, 'CSV header line', variable_map);
|
||||
rbql.parse_dictionary_variables(query_text, this.variable_prefix, this.first_record, variable_map);
|
||||
}
|
||||
return variable_map;
|
||||
};
|
||||
|
||||
async get_header() {
|
||||
await this.preread_first_record();
|
||||
return this.has_header ? this.first_record : null;
|
||||
}
|
||||
|
||||
|
||||
try_resolve_next_record() {
|
||||
this.try_propagate_exception();
|
||||
if (this.resolve_current_record === null)
|
||||
return;
|
||||
|
||||
let record = null;
|
||||
if (this.first_record_should_be_emitted && this.header_preread_complete) {
|
||||
this.first_record_should_be_emitted = false;
|
||||
record = this.first_record;
|
||||
} else {
|
||||
record = this.produced_records_queue.dequeue();
|
||||
}
|
||||
|
||||
if (record === null && !this.input_exhausted)
|
||||
return;
|
||||
let resolve = this.resolve_current_record;
|
||||
this.reset_external_callbacks();
|
||||
resolve(record);
|
||||
};
|
||||
|
||||
|
||||
async get_record() {
|
||||
if (!this.started)
|
||||
await this.start();
|
||||
if (this.stream && this.stream.isPaused())
|
||||
this.stream.resume();
|
||||
|
||||
let parent_iterator = this;
|
||||
let current_record_promise = new Promise(function(resolve, reject) {
|
||||
parent_iterator.resolve_current_record = resolve;
|
||||
parent_iterator.reject_current_record = reject;
|
||||
});
|
||||
this.try_resolve_next_record();
|
||||
return current_record_promise;
|
||||
};
|
||||
|
||||
|
||||
async get_all_records(num_records=null) {
|
||||
let records = [];
|
||||
while (true) {
|
||||
let record = await this.get_record();
|
||||
if (record === null)
|
||||
break;
|
||||
records.push(record);
|
||||
if (num_records && records.length >= num_records) {
|
||||
this.stop();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return records;
|
||||
};
|
||||
|
||||
|
||||
process_record_line(line) {
|
||||
if (this.comment_prefix !== null && line.startsWith(this.comment_prefix))
|
||||
return; // Just skip the line
|
||||
this.NR += 1;
|
||||
var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
|
||||
if (warning) {
|
||||
if (this.first_defective_line === null) {
|
||||
this.first_defective_line = this.NL;
|
||||
if (this.policy == 'quoted_rfc')
|
||||
this.store_or_propagate_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`));
|
||||
}
|
||||
}
|
||||
let num_fields = record.length;
|
||||
if (!this.fields_info.hasOwnProperty(num_fields))
|
||||
this.fields_info[num_fields] = this.NR;
|
||||
this.produced_records_queue.enqueue(record);
|
||||
this.try_resolve_next_record();
|
||||
};
|
||||
|
||||
|
||||
process_partial_rfc_record_line(line) {
|
||||
let record_line = csv_utils.accumulate_rfc_line_into_record(this.rfc_line_buffer, line, this.comment_prefix);
|
||||
if (record_line !== null)
|
||||
this.process_record_line(record_line);
|
||||
};
|
||||
|
||||
|
||||
process_line(line) {
|
||||
this.NL += 1;
|
||||
if (this.NL === 1) {
|
||||
var clean_line = remove_utf8_bom(line, this.encoding);
|
||||
if (clean_line != line) {
|
||||
line = clean_line;
|
||||
this.utf8_bom_removed = true;
|
||||
}
|
||||
}
|
||||
this.process_line_polymorphic(line);
|
||||
};
|
||||
|
||||
|
||||
process_data_stream_chunk(data_chunk) {
|
||||
let decoded_string = null;
|
||||
if (this.decoder) {
|
||||
try {
|
||||
decoded_string = this.decoder.decode(data_chunk);
|
||||
} catch (e) {
|
||||
if (e instanceof TypeError) {
|
||||
this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
|
||||
} else {
|
||||
this.store_or_propagate_exception(e);
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
decoded_string = data_chunk.toString(this.encoding);
|
||||
}
|
||||
let line_starts_with_lf = decoded_string.length && decoded_string[0] == '\n';
|
||||
let first_line_index = line_starts_with_lf && this.partially_decoded_line_ends_with_cr ? 1 : 0;
|
||||
this.partially_decoded_line_ends_with_cr = decoded_string.length && decoded_string[decoded_string.length - 1] == '\r';
|
||||
let lines = csv_utils.split_lines(decoded_string);
|
||||
lines[0] = this.partially_decoded_line + lines[0];
|
||||
assert(first_line_index == 0 || lines[0].length == 0);
|
||||
this.partially_decoded_line = lines.pop();
|
||||
for (let i = first_line_index; i < lines.length; i++) {
|
||||
this.process_line(lines[i]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
process_data_bulk(data_chunk) {
|
||||
let decoded_string = data_chunk.toString(this.encoding);
|
||||
if (this.encoding == 'utf-8') {
|
||||
// Using hacky comparison method from here: https://stackoverflow.com/a/32279283/2898283
|
||||
// TODO get rid of this once TextDecoder is really fixed or when alternative method of reliable decoding appears
|
||||
let control_buffer = Buffer.from(decoded_string, 'utf-8');
|
||||
if (Buffer.compare(data_chunk, control_buffer) != 0) {
|
||||
this.store_or_propagate_exception(new RbqlIOHandlingError(utf_decoding_error));
|
||||
return;
|
||||
}
|
||||
}
|
||||
let lines = csv_utils.split_lines(decoded_string);
|
||||
if (lines.length && lines[lines.length - 1].length == 0)
|
||||
lines.pop();
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
this.process_line(lines[i]);
|
||||
}
|
||||
if (this.rfc_line_buffer.length > 0) {
|
||||
this.process_record_line(this.rfc_line_buffer.join('\n'));
|
||||
}
|
||||
this.input_exhausted = true;
|
||||
this.try_resolve_next_record(); // Should be a NOOP here?
|
||||
}
|
||||
|
||||
|
||||
process_data_stream_end() {
|
||||
this.input_exhausted = true;
|
||||
if (this.partially_decoded_line.length) {
|
||||
let last_line = this.partially_decoded_line;
|
||||
this.partially_decoded_line = '';
|
||||
this.process_line(last_line);
|
||||
}
|
||||
if (this.rfc_line_buffer.length > 0) {
|
||||
this.process_record_line(this.rfc_line_buffer.join('\n'));
|
||||
}
|
||||
this.try_resolve_next_record();
|
||||
};
|
||||
|
||||
|
||||
stop() {
|
||||
if (this.stream)
|
||||
this.stream.destroy(); // TODO consider using pause() instead
|
||||
};
|
||||
|
||||
|
||||
async start() {
|
||||
if (this.started)
|
||||
return;
|
||||
this.started = true;
|
||||
if (this.stream) {
|
||||
this.stream.on('data', (data_chunk) => { this.process_data_stream_chunk(data_chunk); });
|
||||
this.stream.on('end', () => { this.process_data_stream_end(); });
|
||||
} else {
|
||||
let parent_iterator = this;
|
||||
return new Promise(function(resolve, reject) {
|
||||
fs.readFile(parent_iterator.csv_path, (err, data_chunk) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
parent_iterator.process_data_bulk(data_chunk);
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
get_warnings() {
|
||||
let result = [];
|
||||
if (this.first_defective_line !== null)
|
||||
result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`);
|
||||
if (this.utf8_bom_removed)
|
||||
result.push(`UTF-8 Byte Order Mark (BOM) was found and skipped in ${this.table_name} table`);
|
||||
if (Object.keys(this.fields_info).length > 1)
|
||||
result.push(make_inconsistent_num_fields_warning(this.table_name, this.fields_info));
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
class CSVWriter extends rbql.RBQLOutputWriter {
|
||||
constructor(stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n') {
|
||||
super();
|
||||
this.stream = stream;
|
||||
this.encoding = encoding;
|
||||
if (encoding)
|
||||
this.stream.setDefaultEncoding(encoding);
|
||||
this.stream.on('error', (error_obj) => { this.store_first_error(error_obj); })
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
this.line_separator = line_separator;
|
||||
this.sub_array_delim = delim == '|' ? ';' : '|';
|
||||
|
||||
this.close_stream_on_finish = close_stream_on_finish;
|
||||
|
||||
this.null_in_output = false;
|
||||
this.delim_in_simple_output = false;
|
||||
this.header_len = null;
|
||||
this.first_error = null;
|
||||
|
||||
if (policy == 'simple') {
|
||||
this.polymorphic_join = this.simple_join;
|
||||
} else if (policy == 'quoted') {
|
||||
this.polymorphic_join = this.quoted_join;
|
||||
} else if (policy == 'quoted_rfc') {
|
||||
this.polymorphic_join = this.quoted_join_rfc;
|
||||
} else if (policy == 'monocolumn') {
|
||||
this.polymorphic_join = this.mono_join;
|
||||
} else if (policy == 'whitespace') {
|
||||
this.polymorphic_join = this.simple_join;
|
||||
} else {
|
||||
throw new RbqlIOHandlingError('Unknown output csv policy');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
store_first_error(error_obj) {
|
||||
// Store only first error because it is typically more important than the subsequent ones.
|
||||
if (this.first_error === null)
|
||||
this.first_error = error_obj;
|
||||
}
|
||||
|
||||
set_header(header) {
|
||||
if (header !== null) {
|
||||
this.header_len = header.length;
|
||||
this.write(header);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
quoted_join(fields) {
|
||||
let delim = this.delim;
|
||||
var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
|
||||
return quoted_fields.join(this.delim);
|
||||
};
|
||||
|
||||
|
||||
quoted_join_rfc(fields) {
|
||||
let delim = this.delim;
|
||||
var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); });
|
||||
return quoted_fields.join(this.delim);
|
||||
};
|
||||
|
||||
|
||||
mono_join(fields) {
|
||||
if (fields.length > 1) {
|
||||
throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field');
|
||||
}
|
||||
return fields[0];
|
||||
};
|
||||
|
||||
|
||||
simple_join(fields) {
|
||||
var res = fields.join(this.delim);
|
||||
if (fields.join('').indexOf(this.delim) != -1) {
|
||||
this.delim_in_simple_output = true;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
|
||||
normalize_fields(out_fields) {
|
||||
for (var i = 0; i < out_fields.length; i++) {
|
||||
if (out_fields[i] == null) {
|
||||
this.null_in_output = true;
|
||||
out_fields[i] = '';
|
||||
} else if (Array.isArray(out_fields[i])) {
|
||||
this.normalize_fields(out_fields[i]);
|
||||
out_fields[i] = out_fields[i].join(this.sub_array_delim);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
async write(fields) {
|
||||
if (this.header_len !== null && fields.length != this.header_len)
|
||||
throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
|
||||
this.normalize_fields(fields);
|
||||
this.stream.write(this.polymorphic_join(fields));
|
||||
this.stream.write(this.line_separator);
|
||||
let writer_error = this.first_error;
|
||||
return new Promise(function(resolve, reject) {
|
||||
if (writer_error !== null) {
|
||||
reject(writer_error);
|
||||
} else {
|
||||
resolve(true);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
_write_all(table) {
|
||||
for (let i = 0; i < table.length; i++) {
|
||||
this.write(table[i]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
async finish() {
|
||||
let close_stream_on_finish = this.close_stream_on_finish;
|
||||
let output_stream = this.stream;
|
||||
let output_encoding = this.encoding;
|
||||
let writer_error = this.first_error;
|
||||
let finish_promise = new Promise(function(resolve, reject) {
|
||||
if (writer_error !== null) {
|
||||
reject(writer_error);
|
||||
}
|
||||
if (close_stream_on_finish) {
|
||||
output_stream.end('', output_encoding, () => { resolve(); });
|
||||
} else {
|
||||
setTimeout(() => { resolve(); }, 0);
|
||||
}
|
||||
});
|
||||
return finish_promise;
|
||||
};
|
||||
|
||||
|
||||
get_warnings() {
|
||||
let result = [];
|
||||
if (this.null_in_output)
|
||||
result.push('null values in output were replaced by empty strings');
|
||||
if (this.delim_in_simple_output)
|
||||
result.push('Some output fields contain separator');
|
||||
return result;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
|
||||
constructor(input_file_dir, delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
|
||||
super();
|
||||
this.input_file_dir = input_file_dir;
|
||||
this.delim = delim;
|
||||
this.policy = policy;
|
||||
this.encoding = encoding;
|
||||
this.has_header = has_header;
|
||||
this.comment_prefix = comment_prefix;
|
||||
this.stream = null;
|
||||
this.record_iterator = null;
|
||||
|
||||
this.options = options;
|
||||
this.bulk_input_path = null;
|
||||
this.table_path = null;
|
||||
}
|
||||
|
||||
get_iterator_by_table_id(table_id) {
|
||||
this.table_path = find_table_path(this.input_file_dir, table_id);
|
||||
if (this.table_path === null) {
|
||||
throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`);
|
||||
}
|
||||
if (this.options && this.options['bulk_read']) {
|
||||
this.bulk_input_path = this.table_path;
|
||||
} else {
|
||||
this.stream = fs.createReadStream(this.table_path);
|
||||
}
|
||||
this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
|
||||
return this.record_iterator;
|
||||
};
|
||||
|
||||
get_warnings(output_warnings) {
|
||||
if (this.record_iterator && this.has_header) {
|
||||
output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
|
||||
let input_stream = null;
|
||||
let bulk_input_path = null;
|
||||
if (options && options['bulk_read'] && input_path) {
|
||||
bulk_input_path = input_path;
|
||||
} else {
|
||||
input_stream = input_path === null ? process.stdin : fs.createReadStream(input_path);
|
||||
}
|
||||
let [output_stream, close_output_on_finish] = output_path === null ? [process.stdout, false] : [fs.createWriteStream(output_path), true];
|
||||
if (input_delim == '"' && input_policy == 'quoted')
|
||||
throw new RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy');
|
||||
if (csv_encoding == 'latin-1')
|
||||
csv_encoding = 'binary';
|
||||
if (!is_ascii(query_text) && csv_encoding == 'binary')
|
||||
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
||||
if ((!is_ascii(input_delim) || !is_ascii(output_delim)) && csv_encoding == 'binary')
|
||||
throw new RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary');
|
||||
|
||||
let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js');
|
||||
if (user_init_code == '' && fs.existsSync(default_init_source_path)) {
|
||||
user_init_code = read_user_init_code(default_init_source_path);
|
||||
}
|
||||
let input_file_dir = input_path ? path.dirname(input_path) : null;
|
||||
let join_tables_registry = new FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
|
||||
let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
|
||||
let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
|
||||
|
||||
await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
|
||||
join_tables_registry.get_warnings(output_warnings);
|
||||
}
|
||||
|
||||
|
||||
module.exports.is_ascii = is_ascii;
|
||||
module.exports.CSVRecordIterator = CSVRecordIterator;
|
||||
module.exports.CSVWriter = CSVWriter;
|
||||
module.exports.FileSystemCSVRegistry = FileSystemCSVRegistry;
|
||||
module.exports.interpret_named_csv_format = interpret_named_csv_format;
|
||||
module.exports.read_user_init_code = read_user_init_code;
|
||||
module.exports.query_csv = query_csv;
|
||||
module.exports.RecordQueue = RecordQueue;
|
||||
module.exports.exception_to_error_info = rbql.exception_to_error_info;
|
||||
@ -0,0 +1,11 @@
|
||||
from .rbql_engine import query
|
||||
from .rbql_engine import query_table
|
||||
from .rbql_engine import exception_to_error_info
|
||||
|
||||
from ._version import __version__
|
||||
|
||||
from .rbql_csv import query_csv
|
||||
|
||||
from .rbql_pandas import query_dataframe as query_pandas_dataframe
|
||||
|
||||
from .rbql_ipython import load_ipython_extension
|
||||
@ -0,0 +1,2 @@
|
||||
from .rbql_main import main
|
||||
main()
|
||||
@ -0,0 +1,3 @@
|
||||
# Explanation of this file purpose: https://stackoverflow.com/a/16084844/2898283
|
||||
__version__ = '0.25.0'
|
||||
|
||||
@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
import re
|
||||
|
||||
|
||||
newline_rgx = re.compile('(?:\r\n)|\r|\n')
|
||||
|
||||
field_regular_expression = '"((?:[^"]*"")*[^"]*)"'
|
||||
field_rgx = re.compile(field_regular_expression)
|
||||
field_rgx_external_whitespaces = re.compile(' *' + field_regular_expression + ' *')
|
||||
|
||||
|
||||
def extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_external_whitespaces, cidx, result):
|
||||
warning = False
|
||||
rgx = field_rgx_external_whitespaces if allow_external_whitespaces else field_rgx
|
||||
match_obj = rgx.match(src, cidx)
|
||||
if match_obj is not None:
|
||||
match_end = match_obj.span()[1]
|
||||
if match_end == len(src) or src[match_end] == dlm:
|
||||
if preserve_quotes_and_whitespaces:
|
||||
result.append(match_obj.group(0))
|
||||
else:
|
||||
result.append(match_obj.group(1).replace('""', '"'))
|
||||
return (match_end + 1, False)
|
||||
warning = True
|
||||
uidx = src.find(dlm, cidx)
|
||||
if uidx == -1:
|
||||
uidx = len(src)
|
||||
field = src[cidx:uidx]
|
||||
warning = warning or field.find('"') != -1
|
||||
result.append(field)
|
||||
return (uidx + 1, warning)
|
||||
|
||||
|
||||
|
||||
def split_quoted_str(src, dlm, preserve_quotes_and_whitespaces=False):
|
||||
# This function is newline-agnostic i.e. it can also split records with multiline fields.
|
||||
assert dlm != '"'
|
||||
if src.find('"') == -1: # Optimization for most common case
|
||||
return (src.split(dlm), False)
|
||||
result = list()
|
||||
cidx = 0
|
||||
warning = False
|
||||
allow_external_whitespaces = dlm != ' '
|
||||
while cidx < len(src):
|
||||
extraction_report = extract_next_field(src, dlm, preserve_quotes_and_whitespaces, allow_external_whitespaces, cidx, result)
|
||||
cidx = extraction_report[0]
|
||||
warning = warning or extraction_report[1]
|
||||
|
||||
if src[-1] == dlm:
|
||||
result.append('')
|
||||
return (result, warning)
|
||||
|
||||
|
||||
def split_whitespace_separated_str(src, preserve_whitespaces=False):
|
||||
rgxp = re.compile(" *[^ ]+ *") if preserve_whitespaces else re.compile("[^ ]+")
|
||||
result = []
|
||||
for m in rgxp.finditer(src):
|
||||
result.append(m.group())
|
||||
if preserve_whitespaces and len(result) > 1:
|
||||
for i in range(len(result) - 1):
|
||||
result[i] = result[i][:-1]
|
||||
return result
|
||||
|
||||
|
||||
def smart_split(src, dlm, policy, preserve_quotes_and_whitespaces):
|
||||
if policy == 'simple':
|
||||
return (src.split(dlm), False)
|
||||
if policy == 'whitespace':
|
||||
return (split_whitespace_separated_str(src, preserve_quotes_and_whitespaces), False)
|
||||
if policy == 'monocolumn':
|
||||
return ([src], False)
|
||||
return split_quoted_str(src, dlm, preserve_quotes_and_whitespaces)
|
||||
|
||||
|
||||
def extract_line_from_data(data):
|
||||
mobj = newline_rgx.search(data)
|
||||
if mobj is None:
|
||||
return (None, None, data)
|
||||
pos_start, pos_end = mobj.span()
|
||||
str_before = data[:pos_start]
|
||||
str_after = data[pos_end:]
|
||||
return (str_before, mobj.group(0), str_after)
|
||||
|
||||
|
||||
def quote_field(src, delim):
|
||||
if src.find('"') != -1:
|
||||
return '"{}"'.format(src.replace('"', '""'))
|
||||
if src.find(delim) != -1:
|
||||
return '"{}"'.format(src)
|
||||
return src
|
||||
|
||||
|
||||
def rfc_quote_field(src, delim):
|
||||
# A single regexp can be used to find all 4 characters simultaneously, but this approach doesn't significantly improve performance according to my tests.
|
||||
if src.find('"') != -1:
|
||||
return '"{}"'.format(src.replace('"', '""'))
|
||||
if src.find(delim) != -1 or src.find('\n') != -1 or src.find('\r') != -1:
|
||||
return '"{}"'.format(src)
|
||||
return src
|
||||
|
||||
|
||||
def unquote_field(field):
|
||||
field_rgx_external_whitespaces_full = re.compile('^ *'+ field_regular_expression + ' *$')
|
||||
match_obj = field_rgx_external_whitespaces_full.match(field)
|
||||
if match_obj is not None:
|
||||
return match_obj.group(1).replace('""', '"')
|
||||
return field
|
||||
|
||||
|
||||
def unquote_fields(fields):
|
||||
return [unquote_field(f) for f in fields]
|
||||
|
||||
|
||||
@ -0,0 +1,584 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import codecs
|
||||
import io
|
||||
from errno import EPIPE
|
||||
|
||||
from . import rbql_engine
|
||||
from . import csv_utils
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
polymorphic_xrange = range if PY3 else xrange
|
||||
|
||||
default_csv_encoding = 'utf-8'
|
||||
ansi_reset_color_code = '\u001b[0m'
|
||||
|
||||
debug_mode = False
|
||||
|
||||
try:
|
||||
broken_pipe_exception = BrokenPipeError
|
||||
except NameError: # Python 2
|
||||
broken_pipe_exception = IOError
|
||||
|
||||
|
||||
def is_ascii(s):
|
||||
return all(ord(c) < 128 for c in s)
|
||||
|
||||
|
||||
def read_user_init_code(rbql_init_source_path):
|
||||
with open(rbql_init_source_path) as src:
|
||||
return src.read()
|
||||
|
||||
|
||||
def normalize_delim(delim):
|
||||
if delim == 'TAB':
|
||||
return '\t'
|
||||
if delim == r'\t':
|
||||
return '\t'
|
||||
return delim
|
||||
|
||||
|
||||
def interpret_named_csv_format(format_name):
|
||||
format_name = format_name.lower()
|
||||
if format_name == 'monocolumn':
|
||||
return ('', 'monocolumn')
|
||||
if format_name == 'csv':
|
||||
return (',', 'quoted')
|
||||
if format_name == 'tsv':
|
||||
return ('\t', 'simple')
|
||||
raise RuntimeError('Unknown format name: "{}"'.format(format_name))
|
||||
|
||||
|
||||
|
||||
def encode_input_stream(stream, encoding):
|
||||
if encoding is None:
|
||||
return stream
|
||||
if PY3:
|
||||
# Reference: https://stackoverflow.com/a/16549381/2898283
|
||||
# typical stream (e.g. sys.stdin) in Python 3 is actually a io.TextIOWrapper but with some unknown encoding
|
||||
try:
|
||||
return io.TextIOWrapper(stream.buffer, encoding=encoding)
|
||||
except AttributeError:
|
||||
# BytesIO doesn't have "buffer"
|
||||
return io.TextIOWrapper(stream, encoding=encoding)
|
||||
else:
|
||||
# Reference: https://stackoverflow.com/a/27425797/2898283
|
||||
# Python 2 streams don't have stream.buffer and therefore we can't use io.TextIOWrapper. Instead we use codecs
|
||||
return codecs.getreader(encoding)(stream)
|
||||
|
||||
|
||||
def encode_output_stream(stream, encoding):
|
||||
if encoding is None:
|
||||
return stream
|
||||
if PY3:
|
||||
try:
|
||||
return io.TextIOWrapper(stream.buffer, encoding=encoding)
|
||||
except AttributeError:
|
||||
# BytesIO doesn't have "buffer"
|
||||
return io.TextIOWrapper(stream, encoding=encoding)
|
||||
else:
|
||||
return codecs.getwriter(encoding)(stream)
|
||||
|
||||
|
||||
def remove_utf8_bom(line, assumed_source_encoding):
|
||||
if assumed_source_encoding == 'latin-1' and len(line) >= 3 and line[:3] == '\xef\xbb\xbf':
|
||||
return line[3:]
|
||||
# TODO consider replacing "utf-8" with "utf-8-sig" to automatically remove BOM, see https://stackoverflow.com/a/44573867/2898283
|
||||
if assumed_source_encoding == 'utf-8' and len(line) >= 1 and line[0] == u'\ufeff':
|
||||
return line[1:]
|
||||
return line
|
||||
|
||||
|
||||
def try_read_index(index_path):
|
||||
lines = []
|
||||
try:
|
||||
with open(index_path) as f:
|
||||
lines = f.readlines()
|
||||
except Exception:
|
||||
return []
|
||||
result = list()
|
||||
for line in lines:
|
||||
line = line.rstrip('\r\n')
|
||||
record = line.split('\t')
|
||||
result.append(record)
|
||||
return result
|
||||
|
||||
|
||||
def get_index_record(index_path, key):
|
||||
records = try_read_index(index_path)
|
||||
for record in records:
|
||||
if len(record) and record[0] == key:
|
||||
return record
|
||||
return None
|
||||
|
||||
|
||||
def find_table_path(main_table_dir, table_id):
|
||||
# If table_id is a relative path it could be relative either to the current directory or to the main table dir.
|
||||
candidate_path = os.path.expanduser(table_id)
|
||||
if os.path.exists(candidate_path):
|
||||
return candidate_path
|
||||
if main_table_dir and not os.path.isabs(candidate_path):
|
||||
candidate_path = os.path.join(main_table_dir, candidate_path)
|
||||
if os.path.exists(candidate_path):
|
||||
return candidate_path
|
||||
user_home_dir = os.path.expanduser('~')
|
||||
table_names_settings_path = os.path.join(user_home_dir, '.rbql_table_names')
|
||||
name_record = get_index_record(table_names_settings_path, table_id)
|
||||
if name_record is not None and len(name_record) > 1 and os.path.exists(name_record[1]):
|
||||
return name_record[1]
|
||||
return None
|
||||
|
||||
|
||||
def make_inconsistent_num_fields_warning(table_name, inconsistent_records_info):
|
||||
assert len(inconsistent_records_info) > 1
|
||||
inconsistent_records_info = inconsistent_records_info.items()
|
||||
inconsistent_records_info = sorted(inconsistent_records_info, key=lambda v: v[1])
|
||||
num_fields_1, record_num_1 = inconsistent_records_info[0]
|
||||
num_fields_2, record_num_2 = inconsistent_records_info[1]
|
||||
warn_msg = 'Number of fields in "{}" table is not consistent: '.format(table_name)
|
||||
warn_msg += 'e.g. record {} -> {} fields, record {} -> {} fields'.format(record_num_1, num_fields_1, record_num_2, num_fields_2)
|
||||
return warn_msg
|
||||
|
||||
|
||||
def init_ansi_terminal_colors():
|
||||
result = [ansi_reset_color_code]
|
||||
foreground_codes = list(range(31, 37 + 1))
|
||||
background_codes = list(range(41, 47 + 1))
|
||||
for fc in foreground_codes:
|
||||
result.append('\u001b[{}m'.format(fc))
|
||||
for fc in foreground_codes:
|
||||
for bc in background_codes:
|
||||
if fc % 10 == bc % 10:
|
||||
continue
|
||||
if fc % 10 in [2, 6] and bc % 10 in [2, 6]: # Skipping green - cyan pair cause they might have low contrast
|
||||
continue
|
||||
result.append('\u001b[{};{}m'.format(fc, bc))
|
||||
return result
|
||||
|
||||
|
||||
|
||||
class CSVWriter(rbql_engine.RBQLOutputWriter):
|
||||
def __init__(self, stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n', colorize_output=False):
|
||||
assert encoding in ['utf-8', 'latin-1', None]
|
||||
self.stream = encode_output_stream(stream, encoding)
|
||||
self.line_separator = line_separator
|
||||
self.delim = delim
|
||||
self.sub_array_delim = '|' if delim != '|' else ';'
|
||||
self.broken_pipe = False
|
||||
self.close_stream_on_finish = close_stream_on_finish
|
||||
self.polymorphic_preprocess = None
|
||||
self.polymorphic_join = self.join_by_delim
|
||||
self.check_separators_after_join = False
|
||||
self.colors = None
|
||||
if policy == 'simple' or policy == 'whitespace':
|
||||
if colorize_output:
|
||||
self.polymorphic_preprocess = self.check_separators_in_fields_before_join
|
||||
else:
|
||||
self.check_separators_after_join = True
|
||||
elif policy == 'quoted':
|
||||
self.polymorphic_preprocess = self.quote_fields
|
||||
elif policy == 'quoted_rfc':
|
||||
self.polymorphic_preprocess = self.quote_fields_rfc
|
||||
elif policy == 'monocolumn':
|
||||
colorize_output = False
|
||||
self.polymorphic_preprocess = self.ensure_single_field
|
||||
self.polymorphic_join = self.monocolumn_join
|
||||
else:
|
||||
raise RuntimeError('unknown output csv policy')
|
||||
|
||||
if colorize_output:
|
||||
self.colors = init_ansi_terminal_colors()
|
||||
|
||||
self.none_in_output = False
|
||||
self.delim_in_simple_output = False
|
||||
self.header_len = None
|
||||
|
||||
|
||||
def set_header(self, header):
|
||||
if header is not None:
|
||||
self.header_len = len(header)
|
||||
self.write(header)
|
||||
|
||||
|
||||
def monocolumn_join(self, fields):
|
||||
return fields[0]
|
||||
|
||||
|
||||
def check_separators_in_fields_before_join(self, fields):
|
||||
if ''.join(fields).find(self.delim) != -1:
|
||||
self.delim_in_simple_output = True
|
||||
|
||||
|
||||
def check_separator_in_fields_after_join(self, output_line, num_fields_expected):
|
||||
num_fields_calculated = output_line.count(self.delim) + 1
|
||||
if num_fields_calculated != num_fields_expected:
|
||||
self.delim_in_simple_output = True
|
||||
|
||||
|
||||
def join_by_delim(self, fields):
|
||||
return self.delim.join(fields)
|
||||
|
||||
|
||||
def write(self, fields):
|
||||
if self.header_len is not None and len(fields) != self.header_len:
|
||||
raise rbql_engine.RbqlIOHandlingError('Inconsistent number of columns in output header and the current record: {} != {}'.format(self.header_len, len(fields)))
|
||||
self.normalize_fields(fields)
|
||||
|
||||
if self.polymorphic_preprocess is not None:
|
||||
self.polymorphic_preprocess(fields)
|
||||
|
||||
if self.colors is not None:
|
||||
self.colorize_fields(fields)
|
||||
|
||||
out_line = self.polymorphic_join(fields)
|
||||
|
||||
if self.check_separators_after_join:
|
||||
self.check_separator_in_fields_after_join(out_line, len(fields))
|
||||
|
||||
try:
|
||||
self.stream.write(out_line)
|
||||
if self.colors is not None:
|
||||
self.stream.write(ansi_reset_color_code)
|
||||
self.stream.write(self.line_separator)
|
||||
return True
|
||||
except broken_pipe_exception as exc:
|
||||
if broken_pipe_exception == IOError:
|
||||
if exc.errno != EPIPE:
|
||||
raise
|
||||
self.broken_pipe = True
|
||||
return False
|
||||
|
||||
|
||||
def colorize_fields(self, fields):
|
||||
for i in polymorphic_xrange(len(fields)):
|
||||
fields[i] = self.colors[i % len(self.colors)] + fields[i]
|
||||
|
||||
|
||||
def quote_fields(self, fields):
|
||||
for i in polymorphic_xrange(len(fields)):
|
||||
fields[i] = csv_utils.quote_field(fields[i], self.delim)
|
||||
|
||||
|
||||
def quote_fields_rfc(self, fields):
|
||||
for i in polymorphic_xrange(len(fields)):
|
||||
fields[i] = csv_utils.rfc_quote_field(fields[i], self.delim)
|
||||
|
||||
|
||||
def ensure_single_field(self, fields):
|
||||
if len(fields) > 1:
|
||||
raise rbql_engine.RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field')
|
||||
|
||||
|
||||
def normalize_fields(self, fields):
|
||||
for i in polymorphic_xrange(len(fields)):
|
||||
if PY3 and isinstance(fields[i], str):
|
||||
continue
|
||||
elif not PY3 and isinstance(fields[i], basestring):
|
||||
continue
|
||||
elif fields[i] is None:
|
||||
fields[i] = ''
|
||||
self.none_in_output = True
|
||||
elif isinstance(fields[i], list):
|
||||
self.normalize_fields(fields[i])
|
||||
fields[i] = self.sub_array_delim.join(fields[i])
|
||||
else:
|
||||
fields[i] = str(fields[i])
|
||||
|
||||
|
||||
def _write_all(self, table):
|
||||
for record in table:
|
||||
self.write(record[:])
|
||||
self.finish()
|
||||
|
||||
|
||||
def finish(self):
|
||||
if self.broken_pipe:
|
||||
return
|
||||
if self.close_stream_on_finish:
|
||||
self.stream.close()
|
||||
else:
|
||||
try:
|
||||
self.stream.flush() # This flush still can throw if all flushes before were sucessfull! And the exceptions would be printed anyway, even if it was explicitly catched just couple of lines after.
|
||||
# Basically this fails if output is small and this is the first flush after the pipe was broken e.g. second flush if piped to head -n 1
|
||||
# Here head -n 1 finished after the first flush, and the final explict flush here just killing it
|
||||
except broken_pipe_exception as exc:
|
||||
if broken_pipe_exception == IOError:
|
||||
if exc.errno != EPIPE:
|
||||
raise
|
||||
# In order to avoid BrokenPipeError from being printed as a warning to stderr, we need to perform this magic below. See:
|
||||
# Explanation 1: https://stackoverflow.com/a/35761190/2898283
|
||||
# Explanation 2: https://bugs.python.org/issue11380
|
||||
try:
|
||||
sys.stdout.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_warnings(self):
|
||||
result = list()
|
||||
if self.none_in_output:
|
||||
result.append('None values in output were replaced by empty strings')
|
||||
if self.delim_in_simple_output:
|
||||
result.append('Some output fields contain separator')
|
||||
return result
|
||||
|
||||
|
||||
class CSVRecordIterator(rbql_engine.RBQLInputIterator):
|
||||
def __init__(self, stream, encoding, delim, policy, has_header=False, comment_prefix=None, table_name='input', variable_prefix='a', chunk_size=1024, line_mode=False):
|
||||
assert encoding in ['utf-8', 'latin-1', None]
|
||||
self.encoding = encoding
|
||||
self.stream = encode_input_stream(stream, encoding)
|
||||
self.delim = delim
|
||||
self.policy = policy
|
||||
self.table_name = table_name
|
||||
self.variable_prefix = variable_prefix
|
||||
self.comment_prefix = comment_prefix if (comment_prefix is not None and len(comment_prefix)) else None
|
||||
|
||||
self.buffer = ''
|
||||
self.detected_line_separator = '\n'
|
||||
self.exhausted = False
|
||||
self.NR = 0 # Record number
|
||||
self.NL = 0 # Line number (NL != NR when the CSV file has comments or multiline fields)
|
||||
self.chunk_size = chunk_size
|
||||
self.fields_info = dict()
|
||||
|
||||
self.utf8_bom_removed = False
|
||||
self.first_defective_line = None
|
||||
self.polymorphic_get_row = self.get_row_rfc if policy == 'quoted_rfc' else self.get_row_simple
|
||||
self.has_header = has_header
|
||||
self.first_record_should_be_emitted = False
|
||||
|
||||
if not line_mode:
|
||||
self.first_record = None
|
||||
self.first_record = self.get_record()
|
||||
self.first_record_should_be_emitted = not has_header
|
||||
|
||||
|
||||
def handle_query_modifier(self, modifier):
|
||||
# For `... WITH (header) ...` syntax
|
||||
if modifier in ['header', 'headers']:
|
||||
self.has_header = True
|
||||
self.first_record_should_be_emitted = False
|
||||
if modifier in ['noheader', 'noheaders']:
|
||||
self.has_header = False
|
||||
self.first_record_should_be_emitted = True
|
||||
|
||||
|
||||
def get_variables_map(self, query_text):
|
||||
variable_map = dict()
|
||||
rbql_engine.parse_basic_variables(query_text, self.variable_prefix, variable_map)
|
||||
rbql_engine.parse_array_variables(query_text, self.variable_prefix, variable_map)
|
||||
if self.has_header and self.first_record is not None:
|
||||
rbql_engine.parse_attribute_variables(query_text, self.variable_prefix, self.first_record, 'CSV header line', variable_map)
|
||||
rbql_engine.parse_dictionary_variables(query_text, self.variable_prefix, self.first_record, variable_map)
|
||||
return variable_map
|
||||
|
||||
def get_header(self):
|
||||
return self.first_record if self.has_header else None
|
||||
|
||||
def _get_row_from_buffer(self):
|
||||
str_before, separator, str_after = csv_utils.extract_line_from_data(self.buffer)
|
||||
if separator is None:
|
||||
return None
|
||||
if separator == '\r' and str_after == '':
|
||||
one_more = self.stream.read(1)
|
||||
if one_more == '\n':
|
||||
separator = '\r\n'
|
||||
else:
|
||||
str_after = one_more
|
||||
self.detected_line_separator = separator
|
||||
self.buffer = str_after
|
||||
return str_before
|
||||
|
||||
|
||||
def _read_until_found(self):
|
||||
if self.exhausted:
|
||||
return
|
||||
chunks = []
|
||||
while True:
|
||||
chunk = self.stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
self.exhausted = True
|
||||
break
|
||||
chunks.append(chunk)
|
||||
if csv_utils.newline_rgx.search(chunk) is not None:
|
||||
break
|
||||
self.buffer += ''.join(chunks)
|
||||
|
||||
|
||||
def get_row_simple(self):
|
||||
try:
|
||||
row = self._get_row_from_buffer()
|
||||
if row is None:
|
||||
self._read_until_found()
|
||||
row = self._get_row_from_buffer()
|
||||
if row is None:
|
||||
assert self.exhausted
|
||||
if not len(self.buffer):
|
||||
return None
|
||||
row = self.buffer
|
||||
self.buffer = ''
|
||||
self.NL += 1
|
||||
if self.NL == 1:
|
||||
clean_line = remove_utf8_bom(row, self.encoding)
|
||||
if clean_line != row:
|
||||
row = clean_line
|
||||
self.utf8_bom_removed = True
|
||||
return row
|
||||
except UnicodeDecodeError:
|
||||
raise rbql_engine.RbqlIOHandlingError('Unable to decode input table as UTF-8. Use binary (latin-1) encoding instead')
|
||||
|
||||
|
||||
def get_row_rfc(self):
|
||||
first_row = self.get_row_simple()
|
||||
if first_row is None:
|
||||
return None
|
||||
if self.comment_prefix is not None and first_row.startswith(self.comment_prefix):
|
||||
return first_row
|
||||
if first_row.count('"') % 2 == 0:
|
||||
return first_row
|
||||
rows_buffer = [first_row]
|
||||
while True:
|
||||
row = self.get_row_simple()
|
||||
if row is None:
|
||||
return '\n'.join(rows_buffer)
|
||||
rows_buffer.append(row)
|
||||
if row.count('"') % 2 == 1:
|
||||
return '\n'.join(rows_buffer)
|
||||
|
||||
|
||||
def get_record(self):
|
||||
if self.first_record_should_be_emitted:
|
||||
self.first_record_should_be_emitted = False
|
||||
return self.first_record
|
||||
while True:
|
||||
line = self.polymorphic_get_row()
|
||||
if line is None:
|
||||
return None
|
||||
if self.comment_prefix is None or not line.startswith(self.comment_prefix):
|
||||
break
|
||||
self.NR += 1
|
||||
record, warning = csv_utils.smart_split(line, self.delim, self.policy, preserve_quotes_and_whitespaces=False)
|
||||
if warning:
|
||||
if self.first_defective_line is None:
|
||||
self.first_defective_line = self.NL
|
||||
if self.policy == 'quoted_rfc':
|
||||
raise rbql_engine.RbqlIOHandlingError('Inconsistent double quote escaping in {} table at record {}, line {}'.format(self.table_name, self.NR, self.NL))
|
||||
num_fields = len(record)
|
||||
if num_fields not in self.fields_info:
|
||||
self.fields_info[num_fields] = self.NR
|
||||
return record
|
||||
|
||||
|
||||
def _get_all_rows(self):
|
||||
result = []
|
||||
while True:
|
||||
row = self.polymorphic_get_row()
|
||||
if row is None:
|
||||
break
|
||||
result.append(row)
|
||||
return result
|
||||
|
||||
|
||||
def get_all_records(self, num_rows=None):
|
||||
result = []
|
||||
while True:
|
||||
record = self.get_record()
|
||||
if record is None:
|
||||
break
|
||||
result.append(record)
|
||||
if num_rows is not None and len(result) >= num_rows:
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def get_warnings(self):
|
||||
result = list()
|
||||
if self.utf8_bom_removed:
|
||||
result.append('UTF-8 Byte Order Mark (BOM) was found and skipped in {} table'.format(self.table_name))
|
||||
if self.first_defective_line is not None:
|
||||
result.append('Inconsistent double quote escaping in {} table. E.g. at line {}'.format(self.table_name, self.first_defective_line))
|
||||
if len(self.fields_info) > 1:
|
||||
result.append(make_inconsistent_num_fields_warning(self.table_name, self.fields_info))
|
||||
return result
|
||||
|
||||
|
||||
class FileSystemCSVRegistry(rbql_engine.RBQLTableRegistry):
|
||||
def __init__(self, input_file_dir, delim, policy, encoding, has_header, comment_prefix):
|
||||
self.input_file_dir = input_file_dir
|
||||
self.delim = delim
|
||||
self.policy = policy
|
||||
self.encoding = encoding
|
||||
self.record_iterator = None
|
||||
self.input_stream = None
|
||||
self.has_header = has_header
|
||||
self.comment_prefix = comment_prefix
|
||||
self.table_path = None
|
||||
|
||||
def get_iterator_by_table_id(self, table_id, single_char_alias):
|
||||
self.table_path = find_table_path(self.input_file_dir, table_id)
|
||||
if self.table_path is None:
|
||||
raise rbql_engine.RbqlIOHandlingError('Unable to find join table "{}"'.format(table_id))
|
||||
self.input_stream = open(self.table_path, 'rb')
|
||||
self.record_iterator = CSVRecordIterator(self.input_stream, self.encoding, self.delim, self.policy, self.has_header, comment_prefix=self.comment_prefix, table_name=table_id, variable_prefix=single_char_alias)
|
||||
return self.record_iterator
|
||||
|
||||
def finish(self):
|
||||
if self.input_stream is not None:
|
||||
self.input_stream.close()
|
||||
|
||||
def get_warnings(self):
|
||||
result = []
|
||||
if self.record_iterator is not None and self.has_header:
|
||||
result.append('The first record in JOIN file {} was also treated as header (and skipped)'.format(os.path.basename(self.table_path))) # UT JSON CSV
|
||||
return result
|
||||
|
||||
|
||||
def query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers, comment_prefix=None, user_init_code='', colorize_output=False):
|
||||
output_stream, close_output_on_finish = (None, False)
|
||||
input_stream, close_input_on_finish = (None, False)
|
||||
join_tables_registry = None
|
||||
try:
|
||||
output_stream, close_output_on_finish = (sys.stdout, False) if output_path is None else (open(output_path, 'wb'), True)
|
||||
input_stream, close_input_on_finish = (sys.stdin, False) if input_path is None else (open(input_path, 'rb'), True)
|
||||
|
||||
if input_delim == '"' and input_policy == 'quoted':
|
||||
raise rbql_engine.RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy')
|
||||
if input_delim != ' ' and input_policy == 'whitespace':
|
||||
raise rbql_engine.RbqlIOHandlingError('Only whitespace " " delim is supported with "whitespace" policy')
|
||||
|
||||
if not is_ascii(query_text) and csv_encoding == 'latin-1':
|
||||
raise rbql_engine.RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary')
|
||||
|
||||
if (not is_ascii(input_delim) or not is_ascii(output_delim)) and csv_encoding == 'latin-1':
|
||||
raise rbql_engine.RbqlIOHandlingError('To use non-ascii separators enable UTF-8 encoding instead of latin-1/binary')
|
||||
|
||||
default_init_source_path = os.path.join(os.path.expanduser('~'), '.rbql_init_source.py')
|
||||
if user_init_code == '' and os.path.exists(default_init_source_path):
|
||||
user_init_code = read_user_init_code(default_init_source_path)
|
||||
|
||||
input_file_dir = None if not input_path else os.path.dirname(input_path)
|
||||
join_tables_registry = FileSystemCSVRegistry(input_file_dir, input_delim, input_policy, csv_encoding, with_headers, comment_prefix)
|
||||
input_iterator = CSVRecordIterator(input_stream, csv_encoding, input_delim, input_policy, with_headers, comment_prefix=comment_prefix)
|
||||
output_writer = CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy, colorize_output=colorize_output)
|
||||
if debug_mode:
|
||||
rbql_engine.set_debug_mode()
|
||||
rbql_engine.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code)
|
||||
finally:
|
||||
if close_input_on_finish:
|
||||
input_stream.close()
|
||||
if close_output_on_finish:
|
||||
output_stream.close()
|
||||
if join_tables_registry:
|
||||
join_tables_registry.finish()
|
||||
output_warnings += join_tables_registry.get_warnings()
|
||||
|
||||
|
||||
def set_debug_mode():
|
||||
global debug_mode
|
||||
debug_mode = True
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,117 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
from . import rbql_engine
|
||||
from . import rbql_pandas
|
||||
|
||||
# TODO figure out how to implement at least basic autocomplete for the magic command.
|
||||
|
||||
import re
|
||||
from_autocomplete_matcher = re.compile(r'(?:^| )from +([_a-zA-Z0-9]+)(?:$| )', flags=re.IGNORECASE)
|
||||
join_autocomplete_matcher = re.compile(r'(?:^| )join +([_a-zA-Z0-9]+)(?:$| )', flags=re.IGNORECASE)
|
||||
|
||||
|
||||
class IPythonDataframeRegistry(rbql_engine.RBQLTableRegistry):
|
||||
# TODO consider making this class nested under load_ipython_extension to avoid redundant `import pandas`.
|
||||
def __init__(self, all_ns_refs):
|
||||
self.all_ns_refs = all_ns_refs
|
||||
|
||||
def get_iterator_by_table_id(self, table_id, single_char_alias):
|
||||
import pandas
|
||||
# It seems to be the first namespace is "user" namespace, at least according to this code:
|
||||
# https://github.com/google/picatrix/blob/a2f39766ad4b007b125dc8f84916e18fb3dc5478/picatrix/lib/utils.py
|
||||
for ns in self.all_ns_refs:
|
||||
if table_id in ns and isinstance(ns[table_id], pandas.DataFrame):
|
||||
return rbql_pandas.DataframeIterator(ns[table_id], normalize_column_names=True, variable_prefix=single_char_alias)
|
||||
return None
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
import sys
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
# Helper class to convert dict keys to attributes. See explanation here: https://stackoverflow.com/a/14620633/2898283
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AttrDict, self).__init__(*args, **kwargs)
|
||||
self.__dict__ = self
|
||||
|
||||
|
||||
def load_ipython_extension(ipython):
|
||||
from IPython.core.magic import register_line_magic
|
||||
from IPython.core.getipython import get_ipython
|
||||
import pandas
|
||||
|
||||
ipython = ipython or get_ipython() # The pattern taken from here: https://github.com/pydoit/doit/blob/9efe141a5dc96d4912143561695af7fc4a076490/doit/tools.py
|
||||
# ipython is interactiveshell. Docs: https://ipython.readthedocs.io/en/stable/api/generated/IPython.core.interactiveshell.html
|
||||
|
||||
|
||||
def get_table_column_names(table_id):
|
||||
user_namespace = ipython.all_ns_refs[0] if len(ipython.all_ns_refs) else dict()
|
||||
if table_id not in user_namespace or not isinstance(user_namespace[table_id], pandas.DataFrame):
|
||||
return []
|
||||
input_df = user_namespace[table_id]
|
||||
if isinstance(input_df.columns, pandas.RangeIndex) or not len(input_df.columns):
|
||||
return []
|
||||
return [str(v) for v in list(input_df.columns)]
|
||||
|
||||
|
||||
def rbql_completers(self, event):
|
||||
# This should return a list of strings with possible completions.
|
||||
# Note that all the included strings that don't start with event.symbol
|
||||
# are removed, in order to not confuse readline.
|
||||
|
||||
# eg Typing %%rbql foo then hitting tab would yield an event like so: namespace(command='%%rbql', line='%%rbql foo', symbol='foo', text_until_cursor='%%rbql foo')
|
||||
# https://stackoverflow.com/questions/36479197/ipython-custom-tab-completion-for-user-magic-function
|
||||
# https://github.com/ipython/ipython/issues/11878
|
||||
|
||||
simple_sql_keys_lower_case = ['update', 'select', 'where', 'limit', 'from', 'group by', 'order by']
|
||||
simple_sql_keys_upper_case = [sk.upper() for sk in simple_sql_keys_lower_case]
|
||||
autocomplete_suggestions = simple_sql_keys_lower_case + simple_sql_keys_upper_case
|
||||
|
||||
if event.symbol and event.symbol.startswith('a.'):
|
||||
from_match = from_autocomplete_matcher.search(event.line)
|
||||
if from_match is not None:
|
||||
table_id = from_match.group(1)
|
||||
table_column_names = get_table_column_names(table_id)
|
||||
autocomplete_suggestions += ['a.' + cn for cn in table_column_names]
|
||||
|
||||
if event.symbol and event.symbol.startswith('b.'):
|
||||
from_match = join_autocomplete_matcher.search(event.line)
|
||||
if from_match is not None:
|
||||
table_id = from_match.group(1)
|
||||
table_column_names = get_table_column_names(table_id)
|
||||
autocomplete_suggestions += ['b.' + cn for cn in table_column_names]
|
||||
|
||||
return autocomplete_suggestions
|
||||
|
||||
ipython.set_hook('complete_command', rbql_completers, str_key='%rbql')
|
||||
|
||||
|
||||
# The difference between line and cell magic is described here: https://jakevdp.github.io/PythonDataScienceHandbook/01.03-magic-commands.html.
|
||||
# In short: line magic only accepts one line of input whereas cell magic supports multiline input as magic command argument.
|
||||
# Both line and cell magic would make sense for RBQL queries but for MVP it should be enough to implement just the cell magic.
|
||||
@register_line_magic("rbql")
|
||||
def run_rbql_query(query_text):
|
||||
# Unfortunately globals() and locals() called from here won't contain user variables defined in the notebook.
|
||||
|
||||
tables_registry = IPythonDataframeRegistry(ipython.all_ns_refs)
|
||||
output_writer = rbql_pandas.DataframeWriter()
|
||||
# Ignore warnings because pandas dataframes can't cause them.
|
||||
output_warnings = []
|
||||
# TODO make it possible to specify user_init_code in code cells.
|
||||
error_type, error_msg = None, None
|
||||
user_namespace = None
|
||||
if len(ipython.all_ns_refs) > 0:
|
||||
user_namespace = AttrDict(ipython.all_ns_refs[0])
|
||||
try:
|
||||
rbql_engine.query(query_text, input_iterator=None, output_writer=output_writer, output_warnings=output_warnings, join_tables_registry=tables_registry, user_init_code='', user_namespace=user_namespace)
|
||||
except Exception as e:
|
||||
error_type, error_msg = rbql_engine.exception_to_error_info(e)
|
||||
if error_type is None:
|
||||
return output_writer.result
|
||||
else:
|
||||
# TODO use IPython.display to print error in red color, see https://stackoverflow.com/questions/16816013/is-it-possible-to-print-using-different-colors-in-ipythons-notebook
|
||||
eprint('Error [{}]: {}'.format(error_type, error_msg))
|
||||
@ -0,0 +1,519 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
|
||||
from . import csv_utils
|
||||
from . import rbql_csv
|
||||
from . import rbql_sqlite
|
||||
from . import rbql_engine
|
||||
from . import _version
|
||||
|
||||
# TODO support sqlite input join on both sqlite and csv tables - pass 2 join registries
|
||||
# TODO add demo gif to python package README.md for pypi website
|
||||
|
||||
# TODO add --output_header param
|
||||
# TODO add option to write to other sqlite dbs
|
||||
|
||||
# TODO add an option to align columns for content preview. This would be especially useful for Windows which doesn't support terminal colors
|
||||
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
|
||||
history_path = os.path.join(os.path.expanduser("~"), ".rbql_py_query_history")
|
||||
|
||||
polymorphic_input = input if PY3 else raw_input
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
policy_names = ['quoted', 'simple', 'whitespace', 'quoted_rfc', 'monocolumn']
|
||||
out_format_names = ['csv', 'tsv', 'input']
|
||||
|
||||
|
||||
polymorphic_xrange = range if PY3 else xrange
|
||||
|
||||
|
||||
def get_default_policy(delim):
|
||||
if delim in [';', ',']:
|
||||
return 'quoted'
|
||||
elif delim == ' ':
|
||||
return 'whitespace'
|
||||
else:
|
||||
return 'simple'
|
||||
|
||||
|
||||
def show_error(error_type, error_msg, is_interactive):
|
||||
if is_interactive:
|
||||
if os.name == 'nt': # Windows does not support terminal colors
|
||||
print('Error [{}]: {}'.format(error_type, error_msg))
|
||||
else:
|
||||
print('{}Error [{}]:{} {}'.format('\u001b[31;1m', error_type, '\u001b[0m', error_msg))
|
||||
else:
|
||||
eprint('Error [{}]: {}'.format(error_type, error_msg))
|
||||
|
||||
|
||||
def show_warning(msg, is_interactive):
|
||||
if is_interactive:
|
||||
if os.name == 'nt': # Windows does not support terminal colors
|
||||
print('Warning: ' + msg)
|
||||
else:
|
||||
print('{}Warning:{} {}'.format('\u001b[33;1m', '\u001b[0m', msg))
|
||||
else:
|
||||
eprint('Warning: ' + msg)
|
||||
|
||||
|
||||
def run_with_python_csv(args, is_interactive):
|
||||
if args.debug_mode:
|
||||
rbql_csv.set_debug_mode()
|
||||
delim = rbql_csv.normalize_delim(args.delim)
|
||||
policy = args.policy if args.policy is not None else get_default_policy(delim)
|
||||
query = args.query
|
||||
with_headers = args.with_headers
|
||||
input_path = args.input
|
||||
output_path = args.output
|
||||
csv_encoding = args.encoding
|
||||
args.output_delim, args.output_policy = (delim, policy) if args.out_format == 'input' else rbql_csv.interpret_named_csv_format(args.out_format)
|
||||
out_delim, out_policy = args.output_delim, args.output_policy
|
||||
|
||||
user_init_code = rbql_csv.read_user_init_code(args.init_source_file) if args.init_source_file is not None else ''
|
||||
|
||||
warnings = []
|
||||
error_type, error_msg = None, None
|
||||
try:
|
||||
rbql_csv.query_csv(query, input_path, delim, policy, output_path, out_delim, out_policy, csv_encoding, warnings, with_headers, args.comment_prefix, user_init_code, args.color)
|
||||
except Exception as e:
|
||||
if args.debug_mode:
|
||||
raise
|
||||
error_type, error_msg = rbql_engine.exception_to_error_info(e)
|
||||
|
||||
if error_type is None:
|
||||
success = True
|
||||
for warning in warnings:
|
||||
show_warning(warning, is_interactive)
|
||||
else:
|
||||
success = False
|
||||
show_error(error_type, error_msg, is_interactive)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
def run_with_python_sqlite(args, is_interactive):
|
||||
import sqlite3
|
||||
user_init_code = rbql_csv.read_user_init_code(args.init_source_file) if args.init_source_file is not None else ''
|
||||
|
||||
warnings = []
|
||||
error_type, error_msg = None, None
|
||||
try:
|
||||
# TODO open in readonly mode
|
||||
db_connection = sqlite3.connect(args.database)
|
||||
if args.debug_mode:
|
||||
rbql_engine.set_debug_mode()
|
||||
rbql_sqlite.query_sqlite_to_csv(args.query, db_connection, args.input, args.output, args.output_delim, args.output_policy, args.encoding, warnings, user_init_code, args.color)
|
||||
except Exception as e:
|
||||
if args.debug_mode:
|
||||
raise
|
||||
error_type, error_msg = rbql_engine.exception_to_error_info(e)
|
||||
finally:
|
||||
db_connection.close()
|
||||
|
||||
if error_type is None:
|
||||
success = True
|
||||
for warning in warnings:
|
||||
show_warning(warning, is_interactive)
|
||||
else:
|
||||
success = False
|
||||
show_error(error_type, error_msg, is_interactive)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
def is_delimited_table(sampled_lines, delim, policy):
|
||||
if len(sampled_lines) < 2:
|
||||
return False
|
||||
num_fields = None
|
||||
for sl in sampled_lines:
|
||||
fields, warning = csv_utils.smart_split(sl, delim, policy, True)
|
||||
if warning or len(fields) < 2:
|
||||
return False
|
||||
if num_fields is None:
|
||||
num_fields = len(fields)
|
||||
if num_fields != len(fields):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def sample_lines(src_path, encoding, delim, policy, comment_prefix=None):
|
||||
# TODO this should be a dependency-free function, remove sample line functionality from CSVRecordIterator
|
||||
result = []
|
||||
with open(src_path, 'rb') as source:
|
||||
line_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy, line_mode=True, comment_prefix=comment_prefix)
|
||||
for _i in polymorphic_xrange(10):
|
||||
line = line_iterator.polymorphic_get_row()
|
||||
if line is None:
|
||||
break
|
||||
result.append(line)
|
||||
return result
|
||||
|
||||
|
||||
def autodetect_delim_policy(input_path, encoding, comment_prefix=None):
|
||||
sampled_lines = sample_lines(input_path, encoding, None, None, comment_prefix)
|
||||
autodetection_dialects = [('\t', 'simple'), (',', 'quoted'), (';', 'quoted'), ('|', 'simple')]
|
||||
for delim, policy in autodetection_dialects:
|
||||
if is_delimited_table(sampled_lines, delim, policy):
|
||||
return (delim, policy)
|
||||
if input_path.endswith('.csv'):
|
||||
return (',', 'quoted')
|
||||
if input_path.endswith('.tsv'):
|
||||
return ('\t', 'simple')
|
||||
return (None, None)
|
||||
|
||||
|
||||
def sample_records(input_path, delim, policy, encoding, comment_prefix=None):
|
||||
with open(input_path, 'rb') as source:
|
||||
record_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy, comment_prefix=comment_prefix)
|
||||
sampled_records = record_iterator.get_all_records(num_rows=10);
|
||||
warnings = record_iterator.get_warnings()
|
||||
return (sampled_records, warnings)
|
||||
|
||||
|
||||
def print_colorized(records, delim, encoding, show_column_names, with_headers):
|
||||
# TODO consider colorizing a1,a2,... in different default color
|
||||
if os.name == 'nt': # Windows does not support terminal colors
|
||||
reset_color_code = ''
|
||||
color_codes = ['']
|
||||
else:
|
||||
reset_color_code = '\u001b[0m'
|
||||
color_codes = ['\u001b[0m', '\u001b[31m', '\u001b[32m', '\u001b[33m', '\u001b[34m', '\u001b[35m', '\u001b[36m', '\u001b[31;1m', '\u001b[32;1m', '\u001b[33;1m']
|
||||
for rnum, record in enumerate(records):
|
||||
out_fields = []
|
||||
for i, field in enumerate(record):
|
||||
color_code = color_codes[i % len(color_codes)]
|
||||
if not show_column_names or (with_headers and rnum == 0):
|
||||
colored_field = '{}{}'.format(color_code, field)
|
||||
else:
|
||||
colored_field = '{}a{}:{}'.format(color_code, i + 1, field)
|
||||
out_fields.append(colored_field)
|
||||
out_line = delim.join(out_fields) + reset_color_code
|
||||
if PY3:
|
||||
sys.stdout.buffer.write(out_line.encode(encoding))
|
||||
else:
|
||||
sys.stdout.write(out_line.encode(encoding))
|
||||
sys.stdout.write('\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def get_default_output_path(input_path, delim):
|
||||
well_known_extensions = {',': '.csv', '\t': '.tsv'}
|
||||
if delim in well_known_extensions:
|
||||
return input_path + well_known_extensions[delim]
|
||||
return input_path + '.txt'
|
||||
|
||||
|
||||
def run_interactive_loop(mode, args):
|
||||
assert mode in ['csv', 'sqlite']
|
||||
try:
|
||||
import readline # Module readline is not available on Windows
|
||||
if os.path.exists(history_path):
|
||||
readline.read_history_file(history_path)
|
||||
readline.set_history_length(100)
|
||||
except Exception:
|
||||
pass
|
||||
while True:
|
||||
try:
|
||||
query = polymorphic_input('Input SQL-like RBQL query and press Enter:\n> ')
|
||||
query = query.strip()
|
||||
except EOFError:
|
||||
print()
|
||||
break # Ctrl-D
|
||||
if not len(query):
|
||||
break
|
||||
try:
|
||||
readline.write_history_file(history_path) # This can fail sometimes for no valid reason
|
||||
except Exception:
|
||||
pass
|
||||
args.query = query
|
||||
if mode == 'csv':
|
||||
success = run_with_python_csv(args, is_interactive=True)
|
||||
else:
|
||||
success = run_with_python_sqlite(args, is_interactive=True)
|
||||
if success:
|
||||
print('\nOutput table preview:')
|
||||
print('====================================')
|
||||
records, _warnings = sample_records(args.output, args.output_delim, args.output_policy, args.encoding, comment_prefix=None)
|
||||
print_colorized(records, args.output_delim, args.encoding, show_column_names=False, with_headers=False)
|
||||
print('====================================')
|
||||
print('Success! Result table was saved to: ' + args.output)
|
||||
break
|
||||
|
||||
|
||||
def sample_records_sqlite(db_connection, table_name):
|
||||
import sqlite3
|
||||
record_iterator = rbql_sqlite.SqliteRecordIterator(db_connection, table_name)
|
||||
records = []
|
||||
records.append(record_iterator.get_column_names())
|
||||
records += record_iterator.get_all_records(num_rows=10)
|
||||
return records
|
||||
|
||||
|
||||
def read_table_names(db_connection):
|
||||
cursor = db_connection.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
||||
table_names = [r[0] for r in cursor.fetchall()]
|
||||
return table_names
|
||||
|
||||
|
||||
def select_table_name_by_user_choice(db_connection):
|
||||
table_names = read_table_names(db_connection)
|
||||
max_to_show = 20
|
||||
if len(table_names) > max_to_show:
|
||||
print('Database has {} tables, showing top {}:'.format(len(table_names), max_to_show))
|
||||
else:
|
||||
print('Showing database tables:')
|
||||
print(', '.join(table_names[:max_to_show]))
|
||||
table_name = polymorphic_input('No input table was provided as a CLI argument, please type in the table name to use:\n> ')
|
||||
table_name = table_name.strip()
|
||||
while table_name not in table_names:
|
||||
table_name = polymorphic_input('"{}" is not a valid table name. Please enter a valid table name:\n> '.format(table_name))
|
||||
table_name = table_name.strip()
|
||||
return table_name
|
||||
|
||||
|
||||
def start_preview_mode_sqlite(args):
|
||||
import sqlite3
|
||||
db_path = args.database
|
||||
db_connection = sqlite3.connect(db_path)
|
||||
if not args.input:
|
||||
args.input = select_table_name_by_user_choice(db_connection)
|
||||
try:
|
||||
records = sample_records_sqlite(db_connection, table_name=args.input)
|
||||
except Exception as e:
|
||||
if args.debug_mode:
|
||||
raise
|
||||
error_type, error_msg = rbql_engine.exception_to_error_info(e)
|
||||
show_error(error_type, 'Unable to sample preview records: {}'.format(error_msg), is_interactive=True)
|
||||
sys.exit(1)
|
||||
db_connection.close()
|
||||
|
||||
print('Input table preview:')
|
||||
print('====================================')
|
||||
print_colorized(records, '|', args.encoding, show_column_names=True, with_headers=False)
|
||||
print('====================================\n')
|
||||
if args.output is None:
|
||||
args.output = get_default_output_path('rbql_sqlite_rs', args.output_delim)
|
||||
show_warning('Output path was not provided. Result set will be saved as: ' + args.output, is_interactive=True)
|
||||
try:
|
||||
run_interactive_loop('sqlite', args)
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
|
||||
|
||||
def start_preview_mode_csv(args):
|
||||
input_path = args.input
|
||||
if not input_path:
|
||||
show_error('generic', 'Input file must be provided in interactive mode. You can use stdin input only in non-interactive mode', is_interactive=True)
|
||||
return
|
||||
if not os.path.exists(input_path):
|
||||
show_error('generic', 'Input file {} does not exist'.format(input_path), is_interactive=True)
|
||||
return
|
||||
if args.delim is not None:
|
||||
delim = rbql_csv.normalize_delim(args.delim)
|
||||
policy = args.policy if args.policy is not None else get_default_policy(delim)
|
||||
else:
|
||||
delim, policy = autodetect_delim_policy(input_path, args.encoding, args.comment_prefix)
|
||||
if delim is None:
|
||||
show_error('generic', 'Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option', is_interactive=True)
|
||||
return
|
||||
args.delim = delim
|
||||
args.policy = policy
|
||||
records, warnings = sample_records(input_path, delim, policy, args.encoding, args.comment_prefix)
|
||||
print('Input table preview:')
|
||||
print('====================================')
|
||||
print_colorized(records, delim, args.encoding, show_column_names=True, with_headers=args.with_headers)
|
||||
print('====================================\n')
|
||||
for warning in warnings:
|
||||
show_warning(warning, is_interactive=True)
|
||||
if args.output is None:
|
||||
args.output = get_default_output_path(input_path, delim)
|
||||
show_warning('Output path was not provided. Result set will be saved as: ' + args.output, is_interactive=True)
|
||||
try:
|
||||
run_interactive_loop('csv', args)
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
|
||||
|
||||
csv_tool_description = '''
|
||||
Run RBQL queries against CSV files, sqlite databases
|
||||
|
||||
rbql supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
|
||||
|
||||
Interactive mode shows source table preview which makes query editing much easier. Usage example:
|
||||
$ rbql --input input.csv
|
||||
|
||||
Non-interactive mode supports reading input tables from stdin and writing output to stdout. Usage example:
|
||||
$ rbql --query "select a1, a2 order by a1" --delim , < input.csv
|
||||
|
||||
By default rbql works with CSV input files.
|
||||
To learn how to use rbql to query an sqlite database, run this command:
|
||||
|
||||
$ rbql sqlite --help
|
||||
|
||||
'''
|
||||
|
||||
csv_epilog = '''
|
||||
Description of the available CSV split policies:
|
||||
* "simple" - RBQL uses simple split() function and doesn't perform special handling of double quote characters
|
||||
* "quoted" - Separator can be escaped inside double-quoted fields. Double quotes inside double-quoted fields must be doubled
|
||||
* "quoted_rfc" - Same as "quoted", but also allows newlines inside double-quoted fields, see RFC-4180: https://tools.ietf.org/html/rfc4180
|
||||
* "whitespace" - Works only with whitespace separator, multiple consecutive whitespaces are treated as a single whitespace
|
||||
* "monocolumn" - RBQL doesn't perform any split at all, each line is a single-element record, i.e. only "a1" and "NR" column variables are available
|
||||
'''
|
||||
|
||||
|
||||
def csv_main():
|
||||
parser = argparse.ArgumentParser(prog='rbql [csv]', formatter_class=argparse.RawDescriptionHelpFormatter, description=csv_tool_description, epilog=csv_epilog)
|
||||
parser.add_argument('--input', metavar='FILE', help='read csv table from FILE instead of stdin. Required in interactive mode')
|
||||
parser.add_argument('--delim', help='delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode')
|
||||
parser.add_argument('--policy', help='CSV split policy, see the explanation below. Can be autodetected in interactive mode', choices=policy_names)
|
||||
parser.add_argument('--with-headers', action='store_true', help='indicates that input (and join) table has header')
|
||||
parser.add_argument('--comment-prefix', metavar='PREFIX', help='ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"')
|
||||
parser.add_argument('--query', help='query string in rbql. Run in interactive mode if empty')
|
||||
parser.add_argument('--out-format', help='output format', default='input', choices=out_format_names)
|
||||
parser.add_argument('--encoding', help='manually set csv encoding', default=rbql_csv.default_csv_encoding, choices=['latin-1', 'utf-8'])
|
||||
parser.add_argument('--output', metavar='FILE', help='write output table to FILE instead of stdout')
|
||||
parser.add_argument('--color', action='store_true', help='colorize columns in output in non-interactive mode')
|
||||
parser.add_argument('--version', action='store_true', help='print RBQL version and exit')
|
||||
parser.add_argument('--init-source-file', metavar='FILE', help=argparse.SUPPRESS) # Path to init source file to use instead of ~/.rbql_init_source.py
|
||||
parser.add_argument('--debug-mode', action='store_true', help=argparse.SUPPRESS) # Run in debug mode
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.version:
|
||||
print(_version.__version__)
|
||||
return
|
||||
|
||||
if args.color and os.name == 'nt':
|
||||
show_error('generic', '--color option is not supported for Windows terminals', is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
if args.output is not None and args.color:
|
||||
show_error('generic', '"--output" is not compatible with "--color" option', is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
if args.policy == 'monocolumn':
|
||||
args.delim = ''
|
||||
|
||||
if args.delim is None and args.policy is not None:
|
||||
show_error('generic', 'Using "--policy" without "--delim" is not allowed', is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
if args.encoding != 'latin-1' and not PY3:
|
||||
if args.delim is not None:
|
||||
args.delim = args.delim.decode(args.encoding)
|
||||
if args.query is not None:
|
||||
args.query = args.query.decode(args.encoding)
|
||||
|
||||
is_interactive_mode = args.query is None
|
||||
if is_interactive_mode:
|
||||
if args.color:
|
||||
show_error('generic', '"--color" option is not compatible with interactive mode. Output and Input files preview would be colorized anyway', is_interactive=False)
|
||||
sys.exit(1)
|
||||
start_preview_mode_csv(args)
|
||||
else:
|
||||
if args.delim is None:
|
||||
show_error('generic', 'Separator must be provided with "--delim" option in non-interactive mode', is_interactive=False)
|
||||
sys.exit(1)
|
||||
if not run_with_python_csv(args, is_interactive=False):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
sqlite_tool_description = '''
|
||||
Run RBQL queries against sqlite databases
|
||||
Although sqlite database can serve as an input data source, the query engine which will be used is RBQL (not sqlite).
|
||||
Result set will be written to a csv file. This is also true for UPDATE queries because in RBQL UPDATE is just a special case of SELECT.
|
||||
|
||||
rbql sqlite supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
|
||||
|
||||
Interactive mode shows source table preview which makes query editing much easier.
|
||||
$ rbql sqlite path/to/database.sqlite
|
||||
|
||||
Non-interactive mode supports reading input tables from stdin and writing output to stdout. Usage example:
|
||||
$ rbql sqlite path/to/database.sqlite --input Employee --query "select top 20 a1, random.random(), a.salary // 1000 order by a.emp_id"
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def sqlite_main():
|
||||
parser = argparse.ArgumentParser(prog='rbql sqlite', formatter_class=argparse.RawDescriptionHelpFormatter, description=sqlite_tool_description)
|
||||
parser.add_argument('database', metavar='PATH', help='PATH to sqlite db')
|
||||
parser.add_argument('--input', metavar='NAME', help='NAME of the table in sqlite database')
|
||||
parser.add_argument('--query', help='query string in rbql. Run in interactive mode if empty')
|
||||
parser.add_argument('--out-format', help='output format', default='csv', choices=['csv', 'tsv'])
|
||||
parser.add_argument('--output', metavar='FILE', help='write output table to FILE instead of stdout')
|
||||
parser.add_argument('--color', action='store_true', help='colorize columns in output in non-interactive mode. Do NOT use if redirecting output to a file')
|
||||
parser.add_argument('--version', action='store_true', help='print RBQL version and exit')
|
||||
parser.add_argument('--init-source-file', metavar='FILE', help=argparse.SUPPRESS) # Path to init source file to use instead of ~/.rbql_init_source.py
|
||||
parser.add_argument('--debug-mode', action='store_true', help=argparse.SUPPRESS) # Run in debug mode
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.version:
|
||||
print(_version.__version__)
|
||||
return
|
||||
|
||||
if not os.path.isfile(args.database):
|
||||
show_error('generic', 'The database does not exist: {}'.format(args.database), is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
is_interactive_mode = args.query is None
|
||||
|
||||
import sqlite3
|
||||
if not args.input:
|
||||
db_connection = sqlite3.connect(args.database)
|
||||
table_names = read_table_names(db_connection)
|
||||
db_connection.close()
|
||||
if len(table_names) == 1:
|
||||
args.input = table_names[0]
|
||||
# TODO Consider showing a warning here
|
||||
elif not is_interactive_mode:
|
||||
show_error('generic', 'Please provide input table name with --input parameter: source database has more than one table', is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
if args.output is not None and args.color:
|
||||
show_error('generic', '"--output" is not compatible with "--color" option', is_interactive=False)
|
||||
sys.exit(1)
|
||||
|
||||
args.encoding = 'utf-8'
|
||||
args.output_delim, args.output_policy = (',', 'quoted_rfc') if args.out_format == 'csv' else rbql_csv.interpret_named_csv_format(args.out_format)
|
||||
|
||||
if is_interactive_mode:
|
||||
if args.color:
|
||||
show_error('generic', '"--color" option is not compatible with interactive mode. Output and Input files preview would be colorized anyway', is_interactive=False)
|
||||
sys.exit(1)
|
||||
start_preview_mode_sqlite(args)
|
||||
else:
|
||||
if not run_with_python_sqlite(args, is_interactive=False):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == 'sqlite':
|
||||
del sys.argv[1]
|
||||
sqlite_main()
|
||||
elif sys.argv[1] == 'csv':
|
||||
del sys.argv[1]
|
||||
csv_main()
|
||||
else:
|
||||
# TODO Consider showing "uknown mode" error if the first argument doesn't start with '--'
|
||||
csv_main()
|
||||
else:
|
||||
csv_main()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,96 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
from . import rbql_engine
|
||||
|
||||
|
||||
def get_dataframe_column_names_for_rbql(dataframe):
|
||||
import pandas
|
||||
if isinstance(dataframe.columns, pandas.RangeIndex) or not len(dataframe.columns):
|
||||
return None
|
||||
return [str(v) for v in list(dataframe.columns)]
|
||||
|
||||
|
||||
class DataframeIterator(rbql_engine.RBQLInputIterator):
|
||||
def __init__(self, table, normalize_column_names=True, variable_prefix='a'):
|
||||
self.table = table
|
||||
self.normalize_column_names = normalize_column_names
|
||||
self.variable_prefix = variable_prefix
|
||||
self.NR = 0
|
||||
# TODO include `Index` into the list of addressable variable names.
|
||||
self.column_names = get_dataframe_column_names_for_rbql(table)
|
||||
self.table_itertuples = self.table.itertuples(index=False)
|
||||
|
||||
def get_variables_map(self, query_text):
|
||||
variable_map = dict()
|
||||
rbql_engine.parse_basic_variables(query_text, self.variable_prefix, variable_map)
|
||||
rbql_engine.parse_array_variables(query_text, self.variable_prefix, variable_map)
|
||||
if self.column_names is not None:
|
||||
if self.normalize_column_names:
|
||||
rbql_engine.parse_dictionary_variables(query_text, self.variable_prefix, self.column_names, variable_map)
|
||||
rbql_engine.parse_attribute_variables(query_text, self.variable_prefix, self.column_names, 'column names list', variable_map)
|
||||
else:
|
||||
rbql_engine.map_variables_directly(query_text, self.column_names, variable_map)
|
||||
return variable_map
|
||||
|
||||
def get_record(self):
|
||||
try:
|
||||
record = next(self.table_itertuples)
|
||||
except StopIteration:
|
||||
return None
|
||||
self.NR += 1
|
||||
# Convert to list because `record` has `Pandas` type.
|
||||
return list(record)
|
||||
|
||||
def get_warnings(self):
|
||||
return []
|
||||
|
||||
def get_header(self):
|
||||
return self.column_names
|
||||
|
||||
|
||||
class DataframeWriter(rbql_engine.RBQLOutputWriter):
|
||||
def __init__(self):
|
||||
self.header = None
|
||||
self.output_rows = []
|
||||
self.result = None
|
||||
|
||||
def write(self, fields):
|
||||
self.output_rows.append(fields)
|
||||
return True
|
||||
|
||||
def set_header(self, header):
|
||||
self.header = header
|
||||
|
||||
def finish(self):
|
||||
import pandas as pd
|
||||
self.result = pd.DataFrame(self.output_rows, columns=self.header)
|
||||
|
||||
|
||||
class SingleDataframeRegistry(rbql_engine.RBQLTableRegistry):
|
||||
def __init__(self, table, table_name, normalize_column_names=True):
|
||||
self.table = table
|
||||
self.normalize_column_names = normalize_column_names
|
||||
self.table_name = table_name
|
||||
|
||||
def get_iterator_by_table_id(self, table_id, single_char_alias):
|
||||
if table_id.lower() != self.table_name:
|
||||
raise rbql_engine.RbqlParsingError('Unable to find join table: "{}"'.format(table_id))
|
||||
return DataframeIterator(self.table, self.normalize_column_names, single_char_alias)
|
||||
|
||||
|
||||
def query_dataframe(query_text, input_dataframe, output_warnings=None, join_dataframe=None, normalize_column_names=True, user_init_code=''):
|
||||
if output_warnings is None:
|
||||
# Ignore output warnings if the output_warnings container hasn't been provided.
|
||||
output_warnings = []
|
||||
if not normalize_column_names and join_dataframe is not None:
|
||||
input_columns = get_dataframe_column_names_for_rbql(input_dataframe)
|
||||
join_columns = get_dataframe_column_names_for_rbql(join_dataframe)
|
||||
if input_columns is not None and join_columns is not None:
|
||||
rbql_engine.ensure_no_ambiguous_variables(query_text, input_columns, join_columns)
|
||||
input_iterator = DataframeIterator(input_dataframe, normalize_column_names)
|
||||
output_writer = DataframeWriter()
|
||||
join_tables_registry = None if join_dataframe is None else SingleDataframeRegistry(join_dataframe, 'b', normalize_column_names)
|
||||
rbql_engine.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code=user_init_code)
|
||||
return output_writer.result
|
||||
@ -0,0 +1,105 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This module allows to query sqlite databases using RBQL
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
# TODO consider to support table names in "FROM" section of the query, making table_name param of SqliteRecordIterator optional
|
||||
# TODO consider adding support for multiple variable_prefixes i.e. "a" and <table_name> or "b" and <join_table_name> to alias input and join tables
|
||||
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
|
||||
from . import rbql_engine
|
||||
from . import rbql_csv
|
||||
|
||||
|
||||
class SqliteRecordIterator(rbql_engine.RBQLInputIterator):
|
||||
def __init__(self, db_connection, table_name, variable_prefix='a'):
|
||||
self.db_connection = db_connection
|
||||
self.table_name = table_name
|
||||
self.variable_prefix = variable_prefix
|
||||
self.cursor = self.db_connection.cursor()
|
||||
import sqlite3
|
||||
if re.match('^[a-zA-Z0-9_]*$', table_name) is None:
|
||||
raise rbql_engine.RbqlIOHandlingError('Unable to use "{}": input table name can contain only alphanumeric characters and underscore'.format(table_name))
|
||||
try:
|
||||
self.cursor.execute('SELECT * FROM {};'.format(table_name))
|
||||
except sqlite3.OperationalError as e:
|
||||
if str(e).find('no such table') != -1:
|
||||
raise rbql_engine.RbqlIOHandlingError('no such table "{}"'.format(table_name))
|
||||
raise
|
||||
|
||||
def get_header(self):
|
||||
column_names = [description[0] for description in self.cursor.description]
|
||||
return column_names
|
||||
|
||||
def get_variables_map(self, query_text):
|
||||
variable_map = dict()
|
||||
rbql_engine.parse_basic_variables(query_text, self.variable_prefix, variable_map)
|
||||
rbql_engine.parse_array_variables(query_text, self.variable_prefix, variable_map)
|
||||
rbql_engine.parse_dictionary_variables(query_text, self.variable_prefix, self.get_header(), variable_map)
|
||||
rbql_engine.parse_attribute_variables(query_text, self.variable_prefix, self.get_header(), 'table column names', variable_map)
|
||||
return variable_map
|
||||
|
||||
def get_record(self):
|
||||
record_tuple = self.cursor.fetchone()
|
||||
if record_tuple is None:
|
||||
return None
|
||||
# We need to convert tuple to list here because otherwise we won't be able to concatinate lists in expressions with star `*` operator
|
||||
return list(record_tuple)
|
||||
|
||||
def get_all_records(self, num_rows=None):
|
||||
# TODO consider to use TOP in the sqlite query when num_rows is not None
|
||||
if num_rows is None:
|
||||
return self.cursor.fetchall()
|
||||
result = []
|
||||
for i in range(num_rows):
|
||||
row = self.cursor.fetchone()
|
||||
if row is None:
|
||||
break
|
||||
result.append(row)
|
||||
return result
|
||||
|
||||
def get_warnings(self):
|
||||
return []
|
||||
|
||||
|
||||
class SqliteDbRegistry(rbql_engine.RBQLTableRegistry):
|
||||
def __init__(self, db_connection):
|
||||
self.db_connection = db_connection
|
||||
|
||||
def get_iterator_by_table_id(self, table_id, single_char_alias):
|
||||
self.record_iterator = SqliteRecordIterator(self.db_connection, table_id, single_char_alias)
|
||||
return self.record_iterator
|
||||
|
||||
|
||||
def query_sqlite_to_csv(query_text, db_connection, input_table_name, output_path, output_delim, output_policy, output_csv_encoding, output_warnings, user_init_code='', colorize_output=False):
|
||||
output_stream, close_output_on_finish = (None, False)
|
||||
join_tables_registry = None
|
||||
try:
|
||||
output_stream, close_output_on_finish = (sys.stdout, False) if output_path is None else (open(output_path, 'wb'), True)
|
||||
|
||||
if not rbql_csv.is_ascii(query_text) and output_csv_encoding == 'latin-1':
|
||||
raise rbql_engine.RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary')
|
||||
|
||||
if not rbql_csv.is_ascii(output_delim) and output_csv_encoding == 'latin-1':
|
||||
raise rbql_engine.RbqlIOHandlingError('To use non-ascii separators enable UTF-8 encoding instead of latin-1/binary')
|
||||
|
||||
default_init_source_path = os.path.join(os.path.expanduser('~'), '.rbql_init_source.py')
|
||||
if user_init_code == '' and os.path.exists(default_init_source_path):
|
||||
user_init_code = rbql_csv.read_user_init_code(default_init_source_path)
|
||||
|
||||
join_tables_registry = SqliteDbRegistry(db_connection)
|
||||
input_iterator = SqliteRecordIterator(db_connection, input_table_name)
|
||||
output_writer = rbql_csv.CSVWriter(output_stream, close_output_on_finish, output_csv_encoding, output_delim, output_policy, colorize_output=colorize_output)
|
||||
rbql_engine.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code)
|
||||
finally:
|
||||
if close_output_on_finish:
|
||||
output_stream.close()
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 111 KiB |
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
import time
|
||||
import tempfile
|
||||
import subprocess
|
||||
import argparse
|
||||
import json
|
||||
import base64
|
||||
|
||||
import rbql
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('query', help='Query string')
|
||||
parser.add_argument('input_table_path', metavar='FILE', help='input path')
|
||||
parser.add_argument('delim', help='Delimiter')
|
||||
parser.add_argument('policy', help='csv split policy')
|
||||
parser.add_argument('output_table_path', metavar='FILE', help='output path')
|
||||
parser.add_argument('output_delim', help='Out Delimiter')
|
||||
parser.add_argument('output_policy', help='Out csv policy')
|
||||
parser.add_argument('encoding', help='encoding')
|
||||
parser.add_argument('--with_headers', action='store_true', help='use headers')
|
||||
args = parser.parse_args()
|
||||
|
||||
delim = args.delim
|
||||
policy = args.policy
|
||||
output_delim = args.output_delim
|
||||
output_policy = args.output_policy
|
||||
query = base64.standard_b64decode(args.query).decode("utf-8")
|
||||
input_path = args.input_table_path
|
||||
csv_encoding = args.encoding
|
||||
output_path = args.output_table_path
|
||||
with_headers = args.with_headers
|
||||
|
||||
try:
|
||||
warnings = []
|
||||
rbql.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers)
|
||||
sys.stdout.write(json.dumps({'warnings': warnings}))
|
||||
except Exception as e:
|
||||
error_type, error_msg = rbql.exception_to_error_info(e)
|
||||
sys.stdout.write(json.dumps({'error_type': error_type, 'error_msg': error_msg}))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 120 KiB |
@ -0,0 +1,286 @@
|
||||
let rbql_suggest = {};
|
||||
|
||||
( function() {
|
||||
|
||||
rbql_suggest.active_suggest_idx = null;
|
||||
rbql_suggest.suggest_list = []
|
||||
rbql_suggest.apply_suggest_callback = null;
|
||||
rbql_suggest.autosuggest_header_vars = [];
|
||||
rbql_suggest.input_id = null;
|
||||
rbql_suggest.suggest_list_id = null;
|
||||
rbql_suggest.suggest_entry_class = null;
|
||||
rbql_suggest.current_join_table_id = null;
|
||||
rbql_suggest.fetch_join_header_callback = null;
|
||||
|
||||
|
||||
|
||||
function js_string_escape_column_name(column_name, quote_char) {
|
||||
column_name = column_name.replace(/\\/g, '\\\\');
|
||||
column_name = column_name.replace(/\n/g, '\\n');
|
||||
column_name = column_name.replace(/\r/g, '\\r');
|
||||
column_name = column_name.replace(/\t/g, '\\t');
|
||||
if (quote_char === "'")
|
||||
return column_name.replace(/'/g, "\\'");
|
||||
if (quote_char === '"')
|
||||
return column_name.replace(/"/g, '\\"');
|
||||
return column_name.replace(/`/g, "\\`");
|
||||
}
|
||||
|
||||
|
||||
function convert_header_to_rbql_variables(header, table_var_prefix) {
|
||||
let max_suggest_len = 100; // Suggest UI could become unresponsive if there are too many suggest options to consider
|
||||
let result = [];
|
||||
for (let h of header) {
|
||||
let column_var_options = {orig_column_name: h, table_var_prefix: table_var_prefix};
|
||||
if (h.match('^[_a-zA-Z][_a-zA-Z0-9]*$') !== null) {
|
||||
column_var_options.dot_var = `${table_var_prefix}.${h}`;
|
||||
} else {
|
||||
column_var_options.dot_var = null;
|
||||
}
|
||||
let escaped_column_name = js_string_escape_column_name(h, '"');
|
||||
column_var_options.double_q_var = `${table_var_prefix}["${escaped_column_name}"]`;
|
||||
escaped_column_name = js_string_escape_column_name(h, "'");
|
||||
column_var_options.single_q_var = `${table_var_prefix}['${escaped_column_name}']`;
|
||||
result.push(column_var_options);
|
||||
if (result.length > max_suggest_len)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
function initialize_suggest(input_id, suggest_list_id, suggest_entry_class, apply_suggest_callback, header, fetch_join_header_callback=null) {
|
||||
if (!header)
|
||||
header = [];
|
||||
rbql_suggest.autosuggest_header_vars = convert_header_to_rbql_variables(header, 'a');
|
||||
rbql_suggest.active_suggest_idx = null;
|
||||
rbql_suggest.suggest_list = []
|
||||
rbql_suggest.input_id = input_id;
|
||||
rbql_suggest.suggest_list_id = suggest_list_id;
|
||||
rbql_suggest.suggest_entry_class = suggest_entry_class;
|
||||
rbql_suggest.apply_suggest_callback = apply_suggest_callback;
|
||||
rbql_suggest.fetch_join_header_callback = fetch_join_header_callback;
|
||||
}
|
||||
|
||||
|
||||
function hide_suggest(suggest_div) {
|
||||
if (rbql_suggest.active_suggest_idx !== null) {
|
||||
suggest_div.style.display = 'none';
|
||||
rbql_suggest.active_suggest_idx = null;
|
||||
rbql_suggest.suggest_list = [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function apply_suggest(suggest_index) {
|
||||
try {
|
||||
let rbql_input = document.getElementById(rbql_suggest.input_id);
|
||||
rbql_input.value = rbql_suggest.suggest_list[suggest_index][0];
|
||||
rbql_input.selectionStart = rbql_suggest.suggest_list[suggest_index][1];
|
||||
rbql_input.selectionEnd = rbql_suggest.suggest_list[suggest_index][1];
|
||||
rbql_input.focus();
|
||||
if (rbql_suggest.apply_suggest_callback) {
|
||||
rbql_suggest.apply_suggest_callback(rbql_suggest.suggest_list[suggest_index][0]);
|
||||
}
|
||||
hide_suggest(document.getElementById(rbql_suggest.suggest_list_id));
|
||||
} catch (e) {
|
||||
console.error(`Autocomplete error: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function register_suggest_callback(button_element, suggest_index) {
|
||||
button_element.addEventListener("click", () => {
|
||||
apply_suggest(suggest_index);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function highlight_active_suggest_entry(do_highlight) {
|
||||
let entry_button = document.getElementById(`rbql_suggest_var_${rbql_suggest.active_suggest_idx}`);
|
||||
if (!entry_button)
|
||||
return;
|
||||
if (do_highlight) {
|
||||
let active_entry_class = rbql_suggest.suggest_entry_class + '_active';
|
||||
entry_button.className = [rbql_suggest.suggest_entry_class, active_entry_class].join(' ');
|
||||
entry_button.scrollIntoView();
|
||||
} else {
|
||||
entry_button.className = rbql_suggest.suggest_entry_class;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function remove_children(root_node) {
|
||||
while (root_node.firstChild) {
|
||||
root_node.removeChild(root_node.firstChild);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function show_suggest(suggest_div, query_before_var, relevant_suggest_list, query_after_cursor) {
|
||||
let rbql_input = document.getElementById(rbql_suggest.input_id);
|
||||
let caret_left_shift = 0;
|
||||
try {
|
||||
let caret_coordinates = getCaretCoordinates(rbql_input, rbql_input.selectionStart);
|
||||
caret_left_shift = caret_coordinates.left ? caret_coordinates.left : 0;
|
||||
} catch (e) {
|
||||
caret_left_shift = 0;
|
||||
}
|
||||
remove_children(suggest_div);
|
||||
rbql_suggest.active_suggest_idx = 0;
|
||||
rbql_suggest.suggest_list = [];
|
||||
for (let i = 0; i < relevant_suggest_list.length; i++) {
|
||||
let suggest_text = relevant_suggest_list[i];
|
||||
let entry_button = document.createElement('button');
|
||||
entry_button.className = rbql_suggest.suggest_entry_class;
|
||||
entry_button.textContent = suggest_text;
|
||||
entry_button.setAttribute('id', `rbql_suggest_var_${i}`);
|
||||
register_suggest_callback(entry_button, i);
|
||||
suggest_div.appendChild(entry_button);
|
||||
rbql_suggest.suggest_list.push([query_before_var + suggest_text + query_after_cursor, (query_before_var + suggest_text).length]);
|
||||
}
|
||||
highlight_active_suggest_entry(true);
|
||||
suggest_div.style.display = 'block';
|
||||
let calculated_height = suggest_div.offsetHeight;
|
||||
let calculated_width = suggest_div.offsetWidth;
|
||||
let box = rbql_input.getBoundingClientRect();
|
||||
suggest_div.style.left = Math.max(0, Math.min(box.left + caret_left_shift, box.right - calculated_width)) + 'px';
|
||||
suggest_div.style.top = (box.top - calculated_height) + 'px';
|
||||
}
|
||||
|
||||
|
||||
function switch_active_suggest(direction) {
|
||||
if (rbql_suggest.active_suggest_idx === null)
|
||||
return false;
|
||||
highlight_active_suggest_entry(false);
|
||||
if (direction == 'up') {
|
||||
rbql_suggest.active_suggest_idx = (rbql_suggest.active_suggest_idx + rbql_suggest.suggest_list.length - 1) % rbql_suggest.suggest_list.length;
|
||||
} else {
|
||||
rbql_suggest.active_suggest_idx = (rbql_suggest.active_suggest_idx + 1) % rbql_suggest.suggest_list.length;
|
||||
}
|
||||
highlight_active_suggest_entry(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
function is_printable_key_code(keycode) {
|
||||
// Taken from here: https://stackoverflow.com/a/12467610/2898283
|
||||
return (keycode > 47 && keycode < 58) || keycode == 32 || (keycode > 64 && keycode < 91) || (keycode > 185 && keycode < 193) || (keycode > 218 && keycode < 223);
|
||||
}
|
||||
|
||||
|
||||
function handle_input_keydown(event) {
|
||||
// We need this logic to prevent the caret from going to the start of the input field with the default arrow-up keydown handler
|
||||
try {
|
||||
if (event.keyCode == 38) {
|
||||
if (switch_active_suggest('up'))
|
||||
event.preventDefault();
|
||||
} else if (event.keyCode == 40) {
|
||||
if (switch_active_suggest('down'))
|
||||
event.preventDefault();
|
||||
} else if (event.keyCode == 39) {
|
||||
if (rbql_suggest.active_suggest_idx !== null) {
|
||||
apply_suggest(rbql_suggest.active_suggest_idx);
|
||||
event.preventDefault();
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Autocomplete error: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function variable_has_prefix(full_variable, cursor_var_prefix) {
|
||||
return full_variable && full_variable.toLowerCase().startsWith(cursor_var_prefix.toLowerCase()) && full_variable != cursor_var_prefix;
|
||||
}
|
||||
|
||||
|
||||
function get_best_matching_variable(cursor_var_prefix, column_var_options) {
|
||||
if (cursor_var_prefix.startsWith(column_var_options.table_var_prefix + '.')) {
|
||||
if (variable_has_prefix(column_var_options.dot_var, cursor_var_prefix))
|
||||
return column_var_options.dot_var;
|
||||
if (variable_has_prefix(column_var_options.table_var_prefix + '.' + column_var_options.orig_column_name, cursor_var_prefix))
|
||||
return column_var_options.single_q_var;
|
||||
}
|
||||
if (variable_has_prefix(column_var_options.single_q_var, cursor_var_prefix))
|
||||
return column_var_options.single_q_var;
|
||||
if (variable_has_prefix(column_var_options.double_q_var, cursor_var_prefix))
|
||||
return column_var_options.double_q_var;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
function get_join_table_id(query) {
|
||||
let match = query.match(/ join +([^ ]+)(?: *$| +o$| +on)/i);
|
||||
if (!match)
|
||||
return null;
|
||||
return match[1];
|
||||
}
|
||||
|
||||
|
||||
function adjust_join_table_header_callback(join_header) {
|
||||
if (!join_header || !join_header.length) {
|
||||
rbql_suggest.autosuggest_header_vars = rbql_suggest.autosuggest_header_vars.filter(v => v.table_var_prefix != 'b');
|
||||
} else {
|
||||
let join_header_vars = convert_header_to_rbql_variables(join_header, 'b');
|
||||
rbql_suggest.autosuggest_header_vars = rbql_suggest.autosuggest_header_vars.concat(join_header_vars);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function handle_input_keyup(event) {
|
||||
try {
|
||||
if (event.keyCode == 13) {
|
||||
if (rbql_suggest.active_suggest_idx !== null)
|
||||
apply_suggest(rbql_suggest.active_suggest_idx);
|
||||
return;
|
||||
}
|
||||
if (is_printable_key_code(event.keyCode) || event.keyCode == 8 /* Bakspace */) {
|
||||
// We can't move this into the keydown handler because the characters appear in the input box only after keyUp event.
|
||||
// Or alternatively we could scan the event.keyCode to find out the next char, but this is additional logic
|
||||
let rbql_input = document.getElementById(rbql_suggest.input_id);
|
||||
let current_query = rbql_input.value;
|
||||
if (rbql_suggest.fetch_join_header_callback !== null) {
|
||||
let join_table_id = get_join_table_id(current_query);
|
||||
if (rbql_suggest.current_join_table_id != join_table_id) {
|
||||
rbql_suggest.current_join_table_id = join_table_id;
|
||||
if (join_table_id === null) {
|
||||
adjust_join_table_header_callback([]);
|
||||
} else {
|
||||
rbql_suggest.fetch_join_header_callback(join_table_id, adjust_join_table_header_callback)
|
||||
}
|
||||
}
|
||||
}
|
||||
let suggest_div = document.getElementById(rbql_suggest.suggest_list_id);
|
||||
hide_suggest(suggest_div);
|
||||
let cursor_pos = rbql_input.selectionStart;
|
||||
let query_before_cursor = current_query.substr(0, cursor_pos);
|
||||
let query_after_cursor = current_query.substr(cursor_pos);
|
||||
// TODO improve the match - just find last var-looking expression. The problem with this one - it won't match extended suggest like a.arbitrary-var -> a['arbitrary-var']
|
||||
let last_var_prefix_match = query_before_cursor.match(/(?:[^_a-zA-Z0-9])([ab](?:\.[_a-zA-Z0-9]*|\[[^\]]*))$/);
|
||||
if (last_var_prefix_match) {
|
||||
let relevant_suggest_list = [];
|
||||
let cursor_var_prefix = last_var_prefix_match[1];
|
||||
let query_before_var = query_before_cursor.substr(0, last_var_prefix_match.index + 1);
|
||||
for (let column_var_options of rbql_suggest.autosuggest_header_vars) {
|
||||
let suggested_var = get_best_matching_variable(cursor_var_prefix, column_var_options);
|
||||
if (suggested_var)
|
||||
relevant_suggest_list.push(suggested_var);
|
||||
}
|
||||
if (relevant_suggest_list.length) {
|
||||
show_suggest(suggest_div, query_before_var, relevant_suggest_list, query_after_cursor);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Autocomplete error: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rbql_suggest.initialize_suggest = initialize_suggest;
|
||||
rbql_suggest.handle_input_keydown = handle_input_keydown;
|
||||
rbql_suggest.handle_input_keyup = handle_input_keyup;
|
||||
rbql_suggest.convert_header_to_rbql_variables = convert_header_to_rbql_variables;
|
||||
|
||||
} )();
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "clnsv syntax",
|
||||
"scopeName": "text.clnsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)([^:]*:?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c058"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "crtsv syntax",
|
||||
"scopeName": "text.crtsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)([^^]*\\^?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c094"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "csv syntax",
|
||||
"scopeName": "text.csv",
|
||||
"fileTypes": ["csv"],
|
||||
"patterns": [
|
||||
{ "match": "((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:,|$))|(?:[^,]*(?:,|$)))?",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c418"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "dbqsv syntax",
|
||||
"scopeName": "text.dbqsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)([^\"]*\"?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c034"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "dotsv syntax",
|
||||
"scopeName": "text.dotsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)([^.]*\\.?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c046"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "eqlsv syntax",
|
||||
"scopeName": "text.eqlsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)([^=]*=?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c061"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "hypsv syntax",
|
||||
"scopeName": "text.hypsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)([^\\-]*-?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c045"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "pipe syntax",
|
||||
"scopeName": "text.psv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)([^|]*\\|?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca13e332-04ef-1340-9a6b-9b99aae1c418"
|
||||
}
|
||||
@ -0,0 +1,91 @@
|
||||
{ "name": "rbhover syntax",
|
||||
"scopeName": "text.rbhover",
|
||||
"fileTypes": ["rbhover"],
|
||||
"patterns": [
|
||||
{
|
||||
"match": "^Col #[0-9]*1(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "rainbow1",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*2(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "keyword.rainbow2",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*3(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "entity.name.function.rainbow3",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*4(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "comment.rainbow4",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*5(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "string.rainbow5",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*6(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "variable.parameter.rainbow6",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*7(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "constant.numeric.rainbow7",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*8(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "entity.name.type.rainbow8",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*9(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "markup.bold.rainbow9",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Col #[0-9]*0(?:[^0-9].*?)?( WARN: Inconsistent num of fields, header: [0-9]*, this line: [0-9]*)?( ERR: Inconsistent double quotes in line)?$",
|
||||
"name": "invalid.rainbow10",
|
||||
"captures": {
|
||||
"1": {"name": "entity.name.function"},
|
||||
"2": {"name": "invalid"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"match": "^Comment *$",
|
||||
"name": "comment"
|
||||
}
|
||||
],
|
||||
"uuid": "cb14e353-04bf-4319-9a2b-9b99aae1c419"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "scsv syntax",
|
||||
"scopeName": "text.scsv",
|
||||
"fileTypes": ["scsv"],
|
||||
"patterns": [
|
||||
{ "match": "((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?((?: *\"(?:[^\"]*\"\")*[^\"]*\" *(?:;|$))|(?:[^;]*(?:;|$)))?",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "cb13e352-03bf-4340-9a6b-9b99aae1c418"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "tldsv syntax",
|
||||
"scopeName": "text.tldsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)([^~]*~?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca03e352-04ef-4340-9a6b-9b99aae1c126"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "tsv syntax",
|
||||
"scopeName": "text.tsv",
|
||||
"fileTypes": ["tsv", "tab"],
|
||||
"patterns": [
|
||||
{ "match": "([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)([^\\t]*\\t?)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca13e332-04ef-1340-9a6b-9b99aae1c418"
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
{ "name": "wspcsv syntax",
|
||||
"scopeName": "text.wspcsv",
|
||||
"fileTypes": [],
|
||||
"patterns": [
|
||||
{ "match": "( *[^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)([^ ]* *)",
|
||||
"name": "rainbowgroup",
|
||||
"captures": {
|
||||
"1": {"name": "rainbow1"},
|
||||
"2": {"name": "keyword.rainbow2"},
|
||||
"3": {"name": "entity.name.function.rainbow3"},
|
||||
"4": {"name": "comment.rainbow4"},
|
||||
"5": {"name": "string.rainbow5"},
|
||||
"6": {"name": "variable.parameter.rainbow6"},
|
||||
"7": {"name": "constant.numeric.rainbow7"},
|
||||
"8": {"name": "entity.name.type.rainbow8"},
|
||||
"9": {"name": "markup.bold.rainbow9"},
|
||||
"10": {"name": "invalid.rainbow10"}
|
||||
}
|
||||
}
|
||||
|
||||
],
|
||||
"uuid": "ca02e451-04ef-4040-9a6b-9b99aae1c058"
|
||||
}
|
||||
@ -1 +1 @@
|
||||
[{"analyzerName":"intellisense-members-lstm-pylance","languageName":"python","identity":{"modelId":"E61945A9A512ED5E1A3EE3F1A2365B88F8FE","outputId":"E4E9EADA96734F01970E616FAB2FAC19","modifiedTimeUtc":"2020-08-11T14:06:50.811Z"},"filePath":"/home/kristofers/.vscode/extensions/visualstudioexptteam.vscodeintellicode-1.2.20/cache/E61945A9A512ED5E1A3EE3F1A2365B88F8FE_E4E9EADA96734F01970E616FAB2FAC19","lastAccessTimeUtc":"2022-04-28T20:51:11.821Z"}]
|
||||
[{"analyzerName":"intellisense-members-lstm-pylance","languageName":"python","identity":{"modelId":"E61945A9A512ED5E1A3EE3F1A2365B88F8FE","outputId":"E4E9EADA96734F01970E616FAB2FAC19","modifiedTimeUtc":"2020-08-11T14:06:50.811Z"},"filePath":"/home/kristofers/.vscode/extensions/visualstudioexptteam.vscodeintellicode-1.2.20/cache/E61945A9A512ED5E1A3EE3F1A2365B88F8FE_E4E9EADA96734F01970E616FAB2FAC19","lastAccessTimeUtc":"2022-05-01T12:19:01.804Z"}]
|
||||
@ -18,7 +18,7 @@
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.python",
|
||||
"editor.insertSpaces": false,
|
||||
"editor.formatOnSave": true,
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"editor.insertSpaces": false,
|
||||
"python.terminal.executeInFileDir": true,
|
||||
@ -57,10 +57,7 @@
|
||||
"suppressLineUncommittedWarning": true
|
||||
},
|
||||
"vsicons.dontShowNewVersionMessage": true,
|
||||
"indentRainbow.excludedLanguages": [
|
||||
"plaintext",
|
||||
"django-txt"
|
||||
],
|
||||
"indentRainbow.excludedLanguages": ["plaintext", "django-txt"],
|
||||
"[markdown]": {
|
||||
"editor.defaultFormatter": "yzhang.markdown-all-in-one"
|
||||
},
|
||||
@ -114,4 +111,4 @@
|
||||
]
|
||||
},
|
||||
"workbench.colorTheme": "Sweet Dracula"
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,7 +45,7 @@ WORDCHARS=${WORDCHARS//\/[&.;]} # Don't consider certain part of the word
|
||||
source /usr/share/zsh/plugins/zsh-autosuggestions/zsh-autosuggestions.zsh 2>/dev/null
|
||||
source /usr/share/zsh/plugins/zsh-history-substring-search/zsh-history-substring-search.zsh 2>/dev/null
|
||||
zmodload zsh/terminfo
|
||||
|
||||
eval "$(lua ~/.config/zsh/z.lua --init zsh)"
|
||||
|
||||
# Keybindings
|
||||
bindkey -e
|
||||
|
||||
Loading…
Reference in New Issue
Block a user