wip on lezer parser grammar for query language

This commit is contained in:
bkellam 2025-11-15 15:23:32 -08:00
parent 4f394519fd
commit cfdadf29e0
17 changed files with 1919 additions and 0 deletions

2
packages/queryLanguage/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/node_modules/
/dist

View file

@ -0,0 +1,19 @@
{
"name": "@sourcebot/query-language",
"private": true,
"scripts": {
"build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc",
"test": "vitest",
"asdf": "tsx test.ts"
},
"devDependencies": {
"@lezer/generator": "^1.8.0",
"tsx": "^4.19.1",
"typescript": "^5.7.3",
"vitest": "^2.1.9"
},
"dependencies": {
"@lezer/common": "^1.3.0",
"@lezer/lr": "^1.4.3"
}
}

View file

@ -0,0 +1,10 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
export const
negate = 24,
Program = 1,
OrExpr = 2,
AndExpr = 3,
NegateExpr = 4,
PrefixExpr = 5,
ParenExpr = 19,
Term = 20

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,89 @@
@external tokens negateToken from "./tokens" { negate }
@top Program { query }
@precedence {
negate,
and,
or @left
}
query {
OrExpr |
AndExpr |
expr
}
OrExpr { andExpr (or andExpr)+ }
AndExpr { expr expr+ }
andExpr { AndExpr | expr }
expr {
NegateExpr |
ParenExpr |
PrefixExpr |
Term
}
NegateExpr { !negate negate (PrefixExpr | ParenExpr) }
ParenExpr { "(" query ")" }
PrefixExpr {
ArchivedExpr { archivedKw value } |
BranchExpr { branchKw value } |
ContentExpr { contentKw value } |
CaseExpr { caseKw value } |
FileExpr { fileKw value } |
ForkExpr { forkKw value } |
PublicExpr { publicKw value } |
RepoExpr { repoKw value } |
RegexExpr { regexKw value } |
LangExpr { langKw value } |
SymExpr { symKw value } |
TypeExpr { typeKw value } |
RepoSetExpr { reposetKw value }
}
Term { quotedString | word }
value { quotedString | word }
@skip { space }
@tokens {
archivedKw { "archived:" }
branchKw { "branch:" | "b:" }
contentKw { "content:" | "c:" }
caseKw { "case:" }
fileKw { "file:" | "f:" }
forkKw { "fork:" }
publicKw { "public:" }
repoKw { "repo:" | "r:" }
regexKw { "regex:" }
langKw { "lang:" }
symKw { "sym:" }
typeKw { "type:" | "t:" }
reposetKw { "reposet:" }
or { "or" ![a-zA-Z0-9_] }
quotedString { '"' (!["\\\n] | "\\" _)* '"' }
// Allow almost anything in a word except spaces, parens, quotes
// Colons and dashes are allowed anywhere in words (including at the start)
word { (![ \t\n()"]) (![ \t\n()":] | ":" | "-")* }
space { $[ \t\n]+ }
@precedence {
quotedString,
archivedKw, branchKw, contentKw, caseKw, fileKw,
forkKw, publicKw, repoKw, regexKw, langKw,
symKw, typeKw, reposetKw, or,
word
}
}

View file

@ -0,0 +1,61 @@
import { ExternalTokenizer } from "@lezer/lr";
import { negate } from "./parser.terms";
// External tokenizer for negation
// Only tokenizes `-` as negate when followed by a prefix keyword or `(`
export const negateToken = new ExternalTokenizer((input, stack) => {
if (input.next !== 45 /* '-' */) return; // Not a dash
const startPos = input.pos;
// Look ahead to see what follows the dash
input.advance();
// Skip whitespace
let ch = input.next;
while (ch === 32 || ch === 9 || ch === 10) {
input.advance();
ch = input.next;
}
// Check if followed by opening paren
if (ch === 40 /* '(' */) {
input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash
return;
}
// Check if followed by a prefix keyword (by checking for keyword followed by colon)
// We need to look ahead to find the colon
const checkPos = input.pos;
let foundColon = false;
let charCount = 0;
// Look ahead up to 10 characters to find a colon
while (charCount < 10 && ch >= 0) {
if (ch === 58 /* ':' */) {
foundColon = true;
break;
}
if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) {
// Hit whitespace, paren, or quote - not a prefix
break;
}
input.advance();
ch = input.next;
charCount++;
}
// Reset position
while (input.pos > checkPos) {
input.advance(-1);
}
if (foundColon) {
// It's a prefix keyword, accept as negate
input.acceptToken(negate, -input.pos + startPos + 1);
return;
}
// Otherwise, don't tokenize as negate (let word handle it)
});

View file

@ -0,0 +1,46 @@
import { parser } from "./src/parser";
const input = "hello case:yes";
const tree = parser.parse(input);
const prettyPrint = (tree: ReturnType<typeof parser.parse>, input: string) => {
let result = "";
let lastPos = 0;
tree.iterate({
enter: (node) => {
// If this is a leaf node (terminal), collect its text
if (node.from >= node.to) {
// Empty node, skip
return;
}
// Check if this node has any children by checking the tree structure
const nodeTree = node.node;
const isLeaf = !nodeTree.firstChild;
if (isLeaf) {
// Add any whitespace between the last position and this node
if (node.from > lastPos) {
result += input.slice(lastPos, node.from);
}
// Add the node's text
result += input.slice(node.from, node.to);
lastPos = node.to;
}
}
});
// Add any trailing content
if (lastPos < input.length) {
result += input.slice(lastPos, input.length);
}
return result;
}
const reconstructed = prettyPrint(tree, input);
console.log("Original:", input);
console.log("Reconstructed:", reconstructed);
console.log("Match:", input === reconstructed);

View file

@ -0,0 +1,72 @@
# Single term
hello
==>
Program(Term)
# Multiple terms
hello world
==>
Program(AndExpr(Term,Term))
# Multiple terms with various characters
console.log error_handler
==>
Program(AndExpr(Term,Term))
# Term with underscores
my_variable_name
==>
Program(Term)
# Term with dots
com.example.package
==>
Program(Term)
# Term with numbers
func123 test_456
==>
Program(AndExpr(Term,Term))
# Regex pattern
[a-z]+
==>
Program(Term)
# Wildcard pattern
test.*
==>
Program(Term)
# Multiple regex patterns
\w+ [0-9]+ \s*
==>
Program(AndExpr(Term,Term,Term))

View file

@ -0,0 +1,21 @@
import { parser } from "../src/parser";
import { fileTests } from "@lezer/generator/dist/test";
import { describe, it } from "vitest";
import { fileURLToPath } from "url"
import * as fs from "fs";
import * as path from "path";
const caseDir = path.dirname(fileURLToPath(import.meta.url))
for (const file of fs.readdirSync(caseDir)) {
if (!/\.txt$/.test(file)) {
continue;
}
let name = /^[^\.]*/.exec(file)?.[0];
describe(name ?? "unknown", () => {
for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) {
it(name, () => run(parser));
}
});
}

View file

@ -0,0 +1,120 @@
# Empty parentheses
()
==>
Program(ParenExpr(Term(⚠)))
# Simple grouping
(test)
==>
Program(ParenExpr(Term))
# Multiple terms in group
(hello world)
==>
Program(ParenExpr(AndExpr(Term,Term)))
# Nested parentheses
((test))
==>
Program(ParenExpr(ParenExpr(Term)))
# Multiple groups
(first) (second)
==>
Program(AndExpr(ParenExpr(Term),ParenExpr(Term)))
# Group with multiple terms
(one two three)
==>
Program(ParenExpr(AndExpr(Term,Term,Term)))
# Mixed grouped and ungrouped
test (grouped) another
==>
Program(AndExpr(Term,ParenExpr(Term),Term))
# Deeply nested
(((nested)))
==>
Program(ParenExpr(ParenExpr(ParenExpr(Term))))
# Multiple nested groups
((a b) (c d))
==>
Program(ParenExpr(AndExpr(ParenExpr(AndExpr(Term,Term)),ParenExpr(AndExpr(Term,Term)))))
# Group at start
(start) middle end
==>
Program(AndExpr(ParenExpr(Term),Term,Term))
# Group at end
start middle (end)
==>
Program(AndExpr(Term,Term,ParenExpr(Term)))
# Complex grouping pattern
(a (b c) d)
==>
Program(ParenExpr(AndExpr(Term,ParenExpr(AndExpr(Term,Term)),Term)))
# Sequential groups
(a)(b)(c)
==>
Program(AndExpr(ParenExpr(Term),ParenExpr(Term),ParenExpr(Term)))
# Group with regex
([a-z]+)
==>
Program(ParenExpr(Term))
# Group with dots
(com.example.test)
==>
Program(ParenExpr(Term))

View file

@ -0,0 +1,287 @@
# Literal dash term
-test
==>
Program(Term)
# Quoted dash term
"-excluded"
==>
Program(Term)
# Dash in middle
test-case
==>
Program(Term)
# Multiple dash terms
-one -two -three
==>
Program(AndExpr(Term,Term,Term))
# Negate file prefix
-file:test.js
==>
Program(NegateExpr(PrefixExpr(FileExpr)))
# Negate repo prefix
-repo:archived
==>
Program(NegateExpr(PrefixExpr(RepoExpr)))
# Negate lang prefix
-lang:python
==>
Program(NegateExpr(PrefixExpr(LangExpr)))
# Negate content prefix
-content:TODO
==>
Program(NegateExpr(PrefixExpr(ContentExpr)))
# Negate branch prefix
-branch:develop
==>
Program(NegateExpr(PrefixExpr(BranchExpr)))
# Negate case prefix
-case:yes
==>
Program(NegateExpr(PrefixExpr(CaseExpr)))
# Negate archived prefix
-archived:yes
==>
Program(NegateExpr(PrefixExpr(ArchivedExpr)))
# Negate fork prefix
-fork:yes
==>
Program(NegateExpr(PrefixExpr(ForkExpr)))
# Negate public prefix
-public:no
==>
Program(NegateExpr(PrefixExpr(PublicExpr)))
# Negate symbol prefix
-sym:OldClass
==>
Program(NegateExpr(PrefixExpr(SymExpr)))
# Negate type prefix
-type:repo
==>
Program(NegateExpr(PrefixExpr(TypeExpr)))
# Negate regex prefix
-regex:test.*
==>
Program(NegateExpr(PrefixExpr(RegexExpr)))
# Negate parentheses
-(test)
==>
Program(NegateExpr(ParenExpr(Term)))
# Negate group with multiple terms
-(test exclude)
==>
Program(NegateExpr(ParenExpr(AndExpr(Term,Term))))
# Negate group with prefix
-(file:test.js console.log)
==>
Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),Term))))
# Prefix with negated term
file:test.js -console
==>
Program(AndExpr(PrefixExpr(FileExpr),Term))
# Multiple prefixes with negation
file:test.js -lang:python
==>
Program(AndExpr(PrefixExpr(FileExpr),NegateExpr(PrefixExpr(LangExpr))))
# Complex negation pattern
function -file:test.js -lang:java
==>
Program(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr))))
# Negation inside parentheses
(-file:test.js)
==>
Program(ParenExpr(NegateExpr(PrefixExpr(FileExpr))))
# Multiple negations in group
(-file:a.js -lang:python)
==>
Program(ParenExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr)))))
# Mixed in parentheses
(include -file:test.js)
==>
Program(ParenExpr(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)))))
# Negate nested group
-((file:test.js))
==>
Program(NegateExpr(ParenExpr(ParenExpr(PrefixExpr(FileExpr)))))
# Negate short form prefix
-f:test.js
==>
Program(NegateExpr(PrefixExpr(FileExpr)))
# Negate short form repo
-r:myrepo
==>
Program(NegateExpr(PrefixExpr(RepoExpr)))
# Negate short form branch
-b:main
==>
Program(NegateExpr(PrefixExpr(BranchExpr)))
# Negate short form content
-c:console
==>
Program(NegateExpr(PrefixExpr(ContentExpr)))
# Negate short form type
-t:file
==>
Program(NegateExpr(PrefixExpr(TypeExpr)))
# Negate with prefix in quotes
-file:"test file.js"
==>
Program(NegateExpr(PrefixExpr(FileExpr)))
# Complex with multiple negated prefixes
lang:typescript -file:*.test.ts -file:*.spec.ts
==>
Program(AndExpr(PrefixExpr(LangExpr),NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr))))
# Negated group with prefix
-(file:test.js lang:python)
==>
Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))))
# Negate empty group
-()
==>
Program(NegateExpr(ParenExpr(Term(⚠))))
# Negate with space after dash
- file:test.js
==>
Program(NegateExpr(PrefixExpr(FileExpr)))

View file

@ -0,0 +1,271 @@
# Simple OR
test or example
==>
Program(OrExpr(Term,Term))
# Multiple OR
one or two or three
==>
Program(OrExpr(Term,Term,Term))
# OR with prefixes
file:test.js or file:example.js
==>
Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr)))
# OR with negation
test or -file:excluded.js
==>
Program(OrExpr(Term,NegateExpr(PrefixExpr(FileExpr))))
# OR with quoted strings
"first option" or "second option"
==>
Program(OrExpr(Term,Term))
# OR with different prefixes
lang:python or lang:javascript
==>
Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr)))
# Multiple terms with OR
function test or class example
==>
Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term)))
# OR in parentheses
(test or example)
==>
Program(ParenExpr(OrExpr(Term,Term)))
# OR with parentheses outside
(test) or (example)
==>
Program(OrExpr(ParenExpr(Term),ParenExpr(Term)))
# Complex OR with grouping
(file:*.js lang:javascript) or (file:*.ts lang:typescript)
==>
Program(OrExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))),ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))))
# OR with mixed content
test or file:example.js
==>
Program(OrExpr(Term,PrefixExpr(FileExpr)))
# Prefix OR term
file:test.js or example
==>
Program(OrExpr(PrefixExpr(FileExpr),Term))
# OR with short form prefixes
f:test.js or r:myrepo
==>
Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
# OR with repo prefixes
repo:project1 or repo:project2
==>
Program(OrExpr(PrefixExpr(RepoExpr),PrefixExpr(RepoExpr)))
# OR with branch prefixes
branch:main or branch:develop
==>
Program(OrExpr(PrefixExpr(BranchExpr),PrefixExpr(BranchExpr)))
# OR with lang prefixes
lang:rust or lang:go
==>
Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr)))
# OR with content
content:TODO or content:FIXME
==>
Program(OrExpr(PrefixExpr(ContentExpr),PrefixExpr(ContentExpr)))
# OR with negated terms
-file:test.js or -file:spec.js
==>
Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr))))
# OR in nested parentheses
((a or b) or (c or d))
==>
Program(ParenExpr(OrExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term)))))
# Multiple OR with parentheses and implicit AND
(a or b) and (c or d)
==>
Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term,ParenExpr(OrExpr(Term,Term))))
# OR with wildcards
*.test.js or *.spec.js
==>
Program(OrExpr(Term,Term))
# OR with regex patterns
[a-z]+ or [0-9]+
==>
Program(OrExpr(Term,Term))
# OR with dots
com.example.test or org.example.test
==>
Program(OrExpr(Term,Term))
# OR with dashes
test-one or test-two
==>
Program(OrExpr(Term,Term))
# Word containing 'or'
order
==>
Program(Term)
# Word containing 'or' in middle
before
==>
Program(Term)
# OR at start
or test
==>
Program(⚠,Term)
# OR at end (or becomes term)
test or
==>
Program(AndExpr(Term,Term))
# Multiple consecutive OR
test or or example
==>
Program(OrExpr(Term,⚠,Term))
# OR with all prefix types
file:*.js or repo:myrepo or lang:javascript
==>
Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr),PrefixExpr(LangExpr)))
# Complex query with OR and negation
(lang:python or lang:ruby) -file:test.py
==>
Program(AndExpr(ParenExpr(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))),NegateExpr(PrefixExpr(FileExpr))))
# OR with quoted prefix values
file:"test one.js" or file:"test two.js"
==>
Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr)))
# OR with empty parentheses
() or ()
==>
Program(OrExpr(ParenExpr(Term(⚠)),ParenExpr(Term(⚠))))
# OR with negated groups
-(file:a.js) or -(file:b.js)
==>
Program(OrExpr(NegateExpr(ParenExpr(PrefixExpr(FileExpr))),NegateExpr(ParenExpr(PrefixExpr(FileExpr)))))

View file

@ -0,0 +1,336 @@
# File prefix
file:README.md
==>
Program(PrefixExpr(FileExpr))
# File prefix short form
f:index.ts
==>
Program(PrefixExpr(FileExpr))
# Repo prefix
repo:myproject
==>
Program(PrefixExpr(RepoExpr))
# Repo prefix short form
r:github.com/user/repo
==>
Program(PrefixExpr(RepoExpr))
# Content prefix
content:function
==>
Program(PrefixExpr(ContentExpr))
# Content prefix short form
c:console.log
==>
Program(PrefixExpr(ContentExpr))
# Branch prefix
branch:main
==>
Program(PrefixExpr(BranchExpr))
# Branch prefix short form
b:develop
==>
Program(PrefixExpr(BranchExpr))
# Lang prefix
lang:typescript
==>
Program(PrefixExpr(LangExpr))
# Case prefix
case:yes
==>
Program(PrefixExpr(CaseExpr))
# Archived prefix
archived:no
==>
Program(PrefixExpr(ArchivedExpr))
# Fork prefix
fork:yes
==>
Program(PrefixExpr(ForkExpr))
# Public prefix
public:yes
==>
Program(PrefixExpr(PublicExpr))
# Symbol prefix
sym:MyClass
==>
Program(PrefixExpr(SymExpr))
# Type prefix
type:file
==>
Program(PrefixExpr(TypeExpr))
# Type prefix short form
t:repo
==>
Program(PrefixExpr(TypeExpr))
# Regex prefix
regex:test.*
==>
Program(PrefixExpr(RegexExpr))
# RepoSet prefix
reposet:repo1,repo2
==>
Program(PrefixExpr(RepoSetExpr))
# File with wildcard
file:*.ts
==>
Program(PrefixExpr(FileExpr))
# File with path
file:src/components/Button.tsx
==>
Program(PrefixExpr(FileExpr))
# Repo with full URL
repo:github.com/org/project
==>
Program(PrefixExpr(RepoExpr))
# Multiple prefixes
file:test.js repo:myproject
==>
Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
# Prefix with term
file:test.js console.log
==>
Program(AndExpr(PrefixExpr(FileExpr),Term))
# Term then prefix
console.log file:handler.ts
==>
Program(AndExpr(Term,PrefixExpr(FileExpr)))
# Multiple prefixes and terms
lang:typescript function file:handler.ts
==>
Program(AndExpr(PrefixExpr(LangExpr),Term,PrefixExpr(FileExpr)))
# Prefix with regex pattern
file:[a-z]+\.test\.js
==>
Program(PrefixExpr(FileExpr))
# Content with spaces in value (no quotes)
content:hello
==>
Program(PrefixExpr(ContentExpr))
# Branch with slashes
branch:feature/new-feature
==>
Program(PrefixExpr(BranchExpr))
# Case values
case:auto
==>
Program(PrefixExpr(CaseExpr))
# RepoSet with multiple repos
reposet:repo1,repo2,repo3
==>
Program(PrefixExpr(RepoSetExpr))
# Symbol with dots
sym:package.Class.method
==>
Program(PrefixExpr(SymExpr))
# Type variations
type:filename
==>
Program(PrefixExpr(TypeExpr))
# Lang with various languages
lang:python
==>
Program(PrefixExpr(LangExpr))
# Archived values
archived:yes
==>
Program(PrefixExpr(ArchivedExpr))
# Fork values
fork:no
==>
Program(PrefixExpr(ForkExpr))
# Public values
public:no
==>
Program(PrefixExpr(PublicExpr))
# Regex with complex pattern
regex:\w+\s*=\s*\d+
==>
Program(PrefixExpr(RegexExpr))
# File with dashes
file:my-component.tsx
==>
Program(PrefixExpr(FileExpr))
# Repo with numbers
repo:project123
==>
Program(PrefixExpr(RepoExpr))
# Content with special chars
content:@Component
==>
Program(PrefixExpr(ContentExpr))
# Prefix in parentheses
(file:test.js)
==>
Program(ParenExpr(PrefixExpr(FileExpr)))
# Multiple prefixes in group
(file:*.ts lang:typescript)
==>
Program(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))

View file

@ -0,0 +1,495 @@
# Simple quoted string
"hello"
==>
Program(Term)
# Quoted string with spaces
"hello world"
==>
Program(Term)
# Multiple words in quotes
"this is a search term"
==>
Program(Term)
# Quoted string with escaped quote
"hello \"world\""
==>
Program(Term)
# Quoted string with escaped backslash
"path\\to\\file"
==>
Program(Term)
# Double backslash
"test\\\\path"
==>
Program(Term)
# Multiple escaped quotes
"\"quoted\" \"words\""
==>
Program(Term)
# Mixed escaped characters
"test\\nvalue\"quoted"
==>
Program(Term)
# Empty quoted string
""
==>
Program(Term)
# Quoted string with only spaces
" "
==>
Program(Term)
# Quoted string in file prefix
file:"my file.txt"
==>
Program(PrefixExpr(FileExpr))
# Quoted string in repo prefix
repo:"github.com/user/repo name"
==>
Program(PrefixExpr(RepoExpr))
# Quoted string in content prefix
content:"console.log"
==>
Program(PrefixExpr(ContentExpr))
# Quoted string in branch prefix
branch:"feature/my feature"
==>
Program(PrefixExpr(BranchExpr))
# Multiple quoted strings
"first string" "second string"
==>
Program(AndExpr(Term,Term))
# Quoted and unquoted mixed
unquoted "quoted string" another
==>
Program(AndExpr(Term,Term,Term))
# Quoted string with parentheses inside
"(test)"
==>
Program(Term)
# Quoted string with brackets
"[a-z]+"
==>
Program(Term)
# Quoted string with special chars
"test@example.com"
==>
Program(Term)
# Quoted string with colons
"key:value"
==>
Program(Term)
# Quoted string with dashes
"test-case-example"
==>
Program(Term)
# Quoted string with dots
"com.example.package"
==>
Program(Term)
# Quoted string with regex pattern
"\\w+\\s*=\\s*\\d+"
==>
Program(Term)
# Quoted string with forward slashes
"path/to/file"
==>
Program(Term)
# Quoted string with underscores
"my_variable_name"
==>
Program(Term)
# Quoted string with numbers
"test123"
==>
Program(Term)
# Quoted string with mixed case
"CamelCaseTest"
==>
Program(Term)
# Quoted prefix value with spaces
file:"test file.js"
==>
Program(PrefixExpr(FileExpr))
# Multiple prefixes with quoted values
file:"my file.txt" repo:"my repo"
==>
Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
# Quoted string in parentheses
("quoted term")
==>
Program(ParenExpr(Term))
# Multiple quoted in parentheses
("first" "second")
==>
Program(ParenExpr(AndExpr(Term,Term)))
# Quoted with escaped newline
"line1\\nline2"
==>
Program(Term)
# Quoted with tab character
"value\\ttab"
==>
Program(Term)
# Lang prefix with quoted value
lang:"objective-c"
==>
Program(PrefixExpr(LangExpr))
# Sym prefix with quoted value
sym:"My Class"
==>
Program(PrefixExpr(SymExpr))
# Content with quoted phrase
content:"TODO: fix this"
==>
Program(PrefixExpr(ContentExpr))
# Regex prefix with quoted pattern
regex:"func\\s+\\w+"
==>
Program(PrefixExpr(RegexExpr))
# Case prefix with quoted value
case:"yes"
==>
Program(PrefixExpr(CaseExpr))
# Quoted string with at symbol
"@decorator"
==>
Program(Term)
# Quoted string with hash
"#define"
==>
Program(Term)
# Quoted string with dollar sign
"$variable"
==>
Program(Term)
# Quoted string with percent
"100%"
==>
Program(Term)
# Quoted string with ampersand
"foo&bar"
==>
Program(Term)
# Quoted string with asterisk
"test*"
==>
Program(Term)
# Quoted string with plus
"a+b"
==>
Program(Term)
# Quoted string with equals
"a=b"
==>
Program(Term)
# Quoted string with angle brackets
"<template>"
==>
Program(Term)
# Quoted string with pipe
"a|b"
==>
Program(Term)
# Quoted string with tilde
"~/.config"
==>
Program(Term)
# Quoted string with backtick
"`code`"
==>
Program(Term)
# Quoted string with question mark
"what?"
==>
Program(Term)
# Quoted string with exclamation
"important!"
==>
Program(Term)
# Quoted string with semicolon
"stmt;"
==>
Program(Term)
# Quoted string with comma
"a,b,c"
==>
Program(Term)
# Multiple quotes in content
content:"function \"test\" {"
==>
Program(PrefixExpr(ContentExpr))
# Quoted prefix keyword becomes literal
"repo:hello"
==>
Program(Term)
# Quoted file prefix as literal
"file:test.js"
==>
Program(Term)
# Quoted lang prefix as literal
"lang:python"
==>
Program(Term)
# Quoted partial prefix
"repo:"
==>
Program(Term)
# Mix of quoted prefix and real prefix
"repo:test" file:actual.js
==>
Program(AndExpr(Term,PrefixExpr(FileExpr)))
# Quoted short form prefix
"f:test"
==>
Program(Term)
# Quoted branch prefix
"branch:main"
==>
Program(Term)

View file

@ -0,0 +1,23 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "Node16",
"moduleResolution": "Node16",
"lib": ["ES2023"],
"outDir": "dist",
"rootDir": "src",
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"strict": true,
"noImplicitAny": true,
"strictNullChecks": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"skipLibCheck": true,
"isolatedModules": true,
"resolveJsonModule": true
},
"include": ["src/parser.ts"],
"exclude": ["node_modules", "dist"]
}

View file

@ -0,0 +1,8 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
environment: 'node',
watch: false,
}
});

View file

@ -2925,6 +2925,13 @@ __metadata:
languageName: node
linkType: hard
"@lezer/common@npm:^1.3.0":
version: 1.3.0
resolution: "@lezer/common@npm:1.3.0"
checksum: 10c0/e164094920761c2f56c8634d0ae9261ea7c5e6b8202aa08773febc59b8d8284dde5bc7a810c9438e27b978e5ad67d0db03af1ed72924df61b8fa2704acb55deb
languageName: node
linkType: hard
"@lezer/cpp@npm:^1.0.0":
version: 1.1.3
resolution: "@lezer/cpp@npm:1.1.3"
@ -2947,6 +2954,18 @@ __metadata:
languageName: node
linkType: hard
"@lezer/generator@npm:^1.8.0":
version: 1.8.0
resolution: "@lezer/generator@npm:1.8.0"
dependencies:
"@lezer/common": "npm:^1.1.0"
"@lezer/lr": "npm:^1.3.0"
bin:
lezer-generator: src/lezer-generator.cjs
checksum: 10c0/c9dab9a27b6b757544f51b1612842ded77db7322d23cfd175274f89d783e0987b106c0f51e1203af74b7e56ccc567e8efd633aaffa2086cb55bfc1e3ea591fa6
languageName: node
linkType: hard
"@lezer/go@npm:^1.0.0":
version: 1.0.0
resolution: "@lezer/go@npm:1.0.0"
@ -3029,6 +3048,15 @@ __metadata:
languageName: node
linkType: hard
"@lezer/lr@npm:^1.4.3":
version: 1.4.3
resolution: "@lezer/lr@npm:1.4.3"
dependencies:
"@lezer/common": "npm:^1.0.0"
checksum: 10c0/3c9fd7eefb0641addfdd0955b4c4014bb8702285c52890b58c937d766320ba2fec8c6b374b46f514079a093c9dd21b6632746a01fed16c250c90d649e5dd12c1
languageName: node
linkType: hard
"@lezer/markdown@npm:^1.0.0":
version: 1.4.2
resolution: "@lezer/markdown@npm:1.4.2"
@ -7988,6 +8016,19 @@ __metadata:
languageName: unknown
linkType: soft
"@sourcebot/query-language@workspace:packages/queryLanguage":
version: 0.0.0-use.local
resolution: "@sourcebot/query-language@workspace:packages/queryLanguage"
dependencies:
"@lezer/common": "npm:^1.3.0"
"@lezer/generator": "npm:^1.8.0"
"@lezer/lr": "npm:^1.4.3"
tsx: "npm:^4.19.1"
typescript: "npm:^5.7.3"
vitest: "npm:^2.1.9"
languageName: unknown
linkType: soft
"@sourcebot/schemas@workspace:*, @sourcebot/schemas@workspace:packages/schemas":
version: 0.0.0-use.local
resolution: "@sourcebot/schemas@workspace:packages/schemas"