wip on lezer parser grammar for query language

2025-12-12 04:15:30 +00:00 · 2025-11-15 15:23:32 -08:00 · 2025-11-15 15:23:32 -08:00 · cfdadf29e0
commit cfdadf29e0
parent 4f394519fd
17 changed files with 1919 additions and 0 deletions
--- a/packages/queryLanguage/.gitignore
+++ b/packages/queryLanguage/.gitignore
@ -0,0 +1,2 @@
+/node_modules/
+/dist
--- a/packages/queryLanguage/package.json
+++ b/packages/queryLanguage/package.json
@ -0,0 +1,19 @@
+{
+    "name": "@sourcebot/query-language",
+    "private": true,
+    "scripts": {
+        "build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc",
+        "test": "vitest",
+        "asdf": "tsx test.ts"
+    },
+    "devDependencies": {
+        "@lezer/generator": "^1.8.0",
+        "tsx": "^4.19.1",
+        "typescript": "^5.7.3",
+        "vitest": "^2.1.9"
+    },
+    "dependencies": {
+        "@lezer/common": "^1.3.0",
+        "@lezer/lr": "^1.4.3"
+    }
+}
--- a/packages/queryLanguage/src/parser.terms.ts
+++ b/packages/queryLanguage/src/parser.terms.ts
@ -0,0 +1,10 @@
+// This file was generated by lezer-generator. You probably shouldn't edit it.
+export const
+  negate = 24,
+  Program = 1,
+  OrExpr = 2,
+  AndExpr = 3,
+  NegateExpr = 4,
+  PrefixExpr = 5,
+  ParenExpr = 19,
+  Term = 20
--- a/packages/queryLanguage/src/parser.ts
+++ b/packages/queryLanguage/src/parser.ts
--- a/packages/queryLanguage/src/query.grammar
+++ b/packages/queryLanguage/src/query.grammar
@ -0,0 +1,89 @@
+@external tokens negateToken from "./tokens" { negate }
+
+@top Program { query }
+
+@precedence {
+  negate,
+  and,
+  or @left
+}
+
+query {
+  OrExpr |
+  AndExpr |
+  expr
+}
+
+OrExpr { andExpr (or andExpr)+ }
+
+AndExpr { expr expr+ }
+
+andExpr { AndExpr | expr }
+
+expr {
+  NegateExpr |
+  ParenExpr |
+  PrefixExpr |
+  Term
+}
+
+NegateExpr { !negate negate (PrefixExpr | ParenExpr) }
+
+ParenExpr { "(" query ")" }
+
+PrefixExpr {
+  ArchivedExpr { archivedKw value } |
+  BranchExpr { branchKw value } |
+  ContentExpr { contentKw value } |
+  CaseExpr { caseKw value } |
+  FileExpr { fileKw value } |
+  ForkExpr { forkKw value } |
+  PublicExpr { publicKw value } |
+  RepoExpr { repoKw value } |
+  RegexExpr { regexKw value } |
+  LangExpr { langKw value } |
+  SymExpr { symKw value } |
+  TypeExpr { typeKw value } |
+  RepoSetExpr { reposetKw value }
+}
+
+
+Term { quotedString | word }
+
+value { quotedString | word }
+
+@skip { space }
+
+@tokens {
+  archivedKw { "archived:" }
+  branchKw { "branch:" | "b:" }
+  contentKw { "content:" | "c:" }
+  caseKw { "case:" }
+  fileKw { "file:" | "f:" }
+  forkKw { "fork:" }
+  publicKw { "public:" }
+  repoKw { "repo:" | "r:" }
+  regexKw { "regex:" }
+  langKw { "lang:" }
+  symKw { "sym:" }
+  typeKw { "type:" | "t:" }
+  reposetKw { "reposet:" }
+  
+  or { "or" ![a-zA-Z0-9_] }
+  
+  quotedString { '"' (!["\\\n] | "\\" _)* '"' }
+  
+  // Allow almost anything in a word except spaces, parens, quotes
+  // Colons and dashes are allowed anywhere in words (including at the start)
+  word { (![ \t\n()"]) (![ \t\n()":] | ":" | "-")* }
+  
+  space { $[ \t\n]+ }
+  
+  @precedence { 
+    quotedString,
+    archivedKw, branchKw, contentKw, caseKw, fileKw, 
+    forkKw, publicKw, repoKw, regexKw, langKw, 
+    symKw, typeKw, reposetKw, or,
+    word 
+  }
+}
--- a/packages/queryLanguage/src/tokens.ts
+++ b/packages/queryLanguage/src/tokens.ts
@ -0,0 +1,61 @@
+import { ExternalTokenizer } from "@lezer/lr";
+import { negate } from "./parser.terms";
+
+// External tokenizer for negation
+// Only tokenizes `-` as negate when followed by a prefix keyword or `(`
+export const negateToken = new ExternalTokenizer((input, stack) => {
+    if (input.next !== 45 /* '-' */) return; // Not a dash
+    
+    const startPos = input.pos;
+    
+    // Look ahead to see what follows the dash
+    input.advance();
+    
+    // Skip whitespace
+    let ch = input.next;
+    while (ch === 32 || ch === 9 || ch === 10) {
+        input.advance();
+        ch = input.next;
+    }
+    
+    // Check if followed by opening paren
+    if (ch === 40 /* '(' */) {
+        input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash
+        return;
+    }
+    
+    // Check if followed by a prefix keyword (by checking for keyword followed by colon)
+    // We need to look ahead to find the colon
+    const checkPos = input.pos;
+    let foundColon = false;
+    let charCount = 0;
+    
+    // Look ahead up to 10 characters to find a colon
+    while (charCount < 10 && ch >= 0) {
+        if (ch === 58 /* ':' */) {
+            foundColon = true;
+            break;
+        }
+        if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) {
+            // Hit whitespace, paren, or quote - not a prefix
+            break;
+        }
+        input.advance();
+        ch = input.next;
+        charCount++;
+    }
+    
+    // Reset position
+    while (input.pos > checkPos) {
+        input.advance(-1);
+    }
+    
+    if (foundColon) {
+        // It's a prefix keyword, accept as negate
+        input.acceptToken(negate, -input.pos + startPos + 1);
+        return;
+    }
+    
+    // Otherwise, don't tokenize as negate (let word handle it)
+});
+
--- a/packages/queryLanguage/test.ts
+++ b/packages/queryLanguage/test.ts
@ -0,0 +1,46 @@
+import { parser } from "./src/parser";
+
+const input = "hello case:yes";
+const tree = parser.parse(input);
+
+const prettyPrint = (tree: ReturnType<typeof parser.parse>, input: string) => {
+    let result = "";
+    let lastPos = 0;
+    
+    tree.iterate({
+        enter: (node) => {
+            // If this is a leaf node (terminal), collect its text
+            if (node.from >= node.to) {
+                // Empty node, skip
+                return;
+            }
+            
+            // Check if this node has any children by checking the tree structure
+            const nodeTree = node.node;
+            const isLeaf = !nodeTree.firstChild;
+            
+            if (isLeaf) {
+                // Add any whitespace between the last position and this node
+                if (node.from > lastPos) {
+                    result += input.slice(lastPos, node.from);
+                }
+                
+                // Add the node's text
+                result += input.slice(node.from, node.to);
+                lastPos = node.to;
+            }
+        }
+    });
+    
+    // Add any trailing content
+    if (lastPos < input.length) {
+        result += input.slice(lastPos, input.length);
+    }
+    
+    return result;
+}
+
+const reconstructed = prettyPrint(tree, input);
+console.log("Original:", input);
+console.log("Reconstructed:", reconstructed);
+console.log("Match:", input === reconstructed);
--- a/packages/queryLanguage/test/basic.txt
+++ b/packages/queryLanguage/test/basic.txt
@ -0,0 +1,72 @@
+# Single term
+
+hello
+
+==>
+
+Program(Term)
+
+# Multiple terms
+
+hello world
+
+==>
+
+Program(AndExpr(Term,Term))
+
+# Multiple terms with various characters
+
+console.log error_handler
+
+==>
+
+Program(AndExpr(Term,Term))
+
+# Term with underscores
+
+my_variable_name
+
+==>
+
+Program(Term)
+
+# Term with dots
+
+com.example.package
+
+==>
+
+Program(Term)
+
+# Term with numbers
+
+func123 test_456
+
+==>
+
+Program(AndExpr(Term,Term))
+
+# Regex pattern
+
+[a-z]+
+
+==>
+
+Program(Term)
+
+# Wildcard pattern
+
+test.*
+
+==>
+
+Program(Term)
+
+# Multiple regex patterns
+
+\w+ [0-9]+ \s*
+
+==>
+
+Program(AndExpr(Term,Term,Term))
+
--- a/packages/queryLanguage/test/grammar.test.ts
+++ b/packages/queryLanguage/test/grammar.test.ts
@ -0,0 +1,21 @@
+import { parser } from "../src/parser";
+import { fileTests } from "@lezer/generator/dist/test";
+import { describe, it } from "vitest";
+import { fileURLToPath } from "url"
+import * as fs from "fs";
+import * as path from "path";
+
+const caseDir = path.dirname(fileURLToPath(import.meta.url))
+
+for (const file of fs.readdirSync(caseDir)) {
+    if (!/\.txt$/.test(file)) {
+        continue;
+    }
+
+    let name = /^[^\.]*/.exec(file)?.[0];
+    describe(name ?? "unknown", () => {
+        for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) {
+            it(name, () => run(parser));
+        }
+    });
+}
--- a/packages/queryLanguage/test/grouping.txt
+++ b/packages/queryLanguage/test/grouping.txt
@ -0,0 +1,120 @@
+# Empty parentheses
+
+()
+
+==>
+
+Program(ParenExpr(Term(⚠)))
+
+# Simple grouping
+
+(test)
+
+==>
+
+Program(ParenExpr(Term))
+
+# Multiple terms in group
+
+(hello world)
+
+==>
+
+Program(ParenExpr(AndExpr(Term,Term)))
+
+# Nested parentheses
+
+((test))
+
+==>
+
+Program(ParenExpr(ParenExpr(Term)))
+
+# Multiple groups
+
+(first) (second)
+
+==>
+
+Program(AndExpr(ParenExpr(Term),ParenExpr(Term)))
+
+# Group with multiple terms
+
+(one two three)
+
+==>
+
+Program(ParenExpr(AndExpr(Term,Term,Term)))
+
+# Mixed grouped and ungrouped
+
+test (grouped) another
+
+==>
+
+Program(AndExpr(Term,ParenExpr(Term),Term))
+
+# Deeply nested
+
+(((nested)))
+
+==>
+
+Program(ParenExpr(ParenExpr(ParenExpr(Term))))
+
+# Multiple nested groups
+
+((a b) (c d))
+
+==>
+
+Program(ParenExpr(AndExpr(ParenExpr(AndExpr(Term,Term)),ParenExpr(AndExpr(Term,Term)))))
+
+# Group at start
+
+(start) middle end
+
+==>
+
+Program(AndExpr(ParenExpr(Term),Term,Term))
+
+# Group at end
+
+start middle (end)
+
+==>
+
+Program(AndExpr(Term,Term,ParenExpr(Term)))
+
+# Complex grouping pattern
+
+(a (b c) d)
+
+==>
+
+Program(ParenExpr(AndExpr(Term,ParenExpr(AndExpr(Term,Term)),Term)))
+
+# Sequential groups
+
+(a)(b)(c)
+
+==>
+
+Program(AndExpr(ParenExpr(Term),ParenExpr(Term),ParenExpr(Term)))
+
+# Group with regex
+
+([a-z]+)
+
+==>
+
+Program(ParenExpr(Term))
+
+# Group with dots
+
+(com.example.test)
+
+==>
+
+Program(ParenExpr(Term))
+
--- a/packages/queryLanguage/test/negation.txt
+++ b/packages/queryLanguage/test/negation.txt
@ -0,0 +1,287 @@
+# Literal dash term
+
+-test
+
+==>
+
+Program(Term)
+
+# Quoted dash term
+
+"-excluded"
+
+==>
+
+Program(Term)
+
+# Dash in middle
+
+test-case
+
+==>
+
+Program(Term)
+
+# Multiple dash terms
+
+-one -two -three
+
+==>
+
+Program(AndExpr(Term,Term,Term))
+
+# Negate file prefix
+
+-file:test.js
+
+==>
+
+Program(NegateExpr(PrefixExpr(FileExpr)))
+
+# Negate repo prefix
+
+-repo:archived
+
+==>
+
+Program(NegateExpr(PrefixExpr(RepoExpr)))
+
+# Negate lang prefix
+
+-lang:python
+
+==>
+
+Program(NegateExpr(PrefixExpr(LangExpr)))
+
+# Negate content prefix
+
+-content:TODO
+
+==>
+
+Program(NegateExpr(PrefixExpr(ContentExpr)))
+
+# Negate branch prefix
+
+-branch:develop
+
+==>
+
+Program(NegateExpr(PrefixExpr(BranchExpr)))
+
+# Negate case prefix
+
+-case:yes
+
+==>
+
+Program(NegateExpr(PrefixExpr(CaseExpr)))
+
+# Negate archived prefix
+
+-archived:yes
+
+==>
+
+Program(NegateExpr(PrefixExpr(ArchivedExpr)))
+
+# Negate fork prefix
+
+-fork:yes
+
+==>
+
+Program(NegateExpr(PrefixExpr(ForkExpr)))
+
+# Negate public prefix
+
+-public:no
+
+==>
+
+Program(NegateExpr(PrefixExpr(PublicExpr)))
+
+# Negate symbol prefix
+
+-sym:OldClass
+
+==>
+
+Program(NegateExpr(PrefixExpr(SymExpr)))
+
+# Negate type prefix
+
+-type:repo
+
+==>
+
+Program(NegateExpr(PrefixExpr(TypeExpr)))
+
+# Negate regex prefix
+
+-regex:test.*
+
+==>
+
+Program(NegateExpr(PrefixExpr(RegexExpr)))
+
+# Negate parentheses
+
+-(test)
+
+==>
+
+Program(NegateExpr(ParenExpr(Term)))
+
+# Negate group with multiple terms
+
+-(test exclude)
+
+==>
+
+Program(NegateExpr(ParenExpr(AndExpr(Term,Term))))
+
+# Negate group with prefix
+
+-(file:test.js console.log)
+
+==>
+
+Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),Term))))
+
+# Prefix with negated term
+
+file:test.js -console
+
+==>
+
+Program(AndExpr(PrefixExpr(FileExpr),Term))
+
+# Multiple prefixes with negation
+
+file:test.js -lang:python
+
+==>
+
+Program(AndExpr(PrefixExpr(FileExpr),NegateExpr(PrefixExpr(LangExpr))))
+
+# Complex negation pattern
+
+function -file:test.js -lang:java
+
+==>
+
+Program(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr))))
+
+# Negation inside parentheses
+
+(-file:test.js)
+
+==>
+
+Program(ParenExpr(NegateExpr(PrefixExpr(FileExpr))))
+
+# Multiple negations in group
+
+(-file:a.js -lang:python)
+
+==>
+
+Program(ParenExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr)))))
+
+# Mixed in parentheses
+
+(include -file:test.js)
+
+==>
+
+Program(ParenExpr(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)))))
+
+# Negate nested group
+
+-((file:test.js))
+
+==>
+
+Program(NegateExpr(ParenExpr(ParenExpr(PrefixExpr(FileExpr)))))
+
+# Negate short form prefix
+
+-f:test.js
+
+==>
+
+Program(NegateExpr(PrefixExpr(FileExpr)))
+
+# Negate short form repo
+
+-r:myrepo
+
+==>
+
+Program(NegateExpr(PrefixExpr(RepoExpr)))
+
+# Negate short form branch
+
+-b:main
+
+==>
+
+Program(NegateExpr(PrefixExpr(BranchExpr)))
+
+# Negate short form content
+
+-c:console
+
+==>
+
+Program(NegateExpr(PrefixExpr(ContentExpr)))
+
+# Negate short form type
+
+-t:file
+
+==>
+
+Program(NegateExpr(PrefixExpr(TypeExpr)))
+
+# Negate with prefix in quotes
+
+-file:"test file.js"
+
+==>
+
+Program(NegateExpr(PrefixExpr(FileExpr)))
+
+# Complex with multiple negated prefixes
+
+lang:typescript -file:*.test.ts -file:*.spec.ts
+
+==>
+
+Program(AndExpr(PrefixExpr(LangExpr),NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr))))
+
+# Negated group with prefix
+
+-(file:test.js lang:python)
+
+==>
+
+Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))))
+
+# Negate empty group
+
+-()
+
+==>
+
+Program(NegateExpr(ParenExpr(Term(⚠))))
+
+# Negate with space after dash
+
+- file:test.js
+
+==>
+
+Program(NegateExpr(PrefixExpr(FileExpr)))
--- a/packages/queryLanguage/test/operators.txt
+++ b/packages/queryLanguage/test/operators.txt
@ -0,0 +1,271 @@
+# Simple OR
+
+test or example
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# Multiple OR
+
+one or two or three
+
+==>
+
+Program(OrExpr(Term,Term,Term))
+
+# OR with prefixes
+
+file:test.js or file:example.js
+
+==>
+
+Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr)))
+
+# OR with negation
+
+test or -file:excluded.js
+
+==>
+
+Program(OrExpr(Term,NegateExpr(PrefixExpr(FileExpr))))
+
+# OR with quoted strings
+
+"first option" or "second option"
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# OR with different prefixes
+
+lang:python or lang:javascript
+
+==>
+
+Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr)))
+
+# Multiple terms with OR
+
+function test or class example
+
+==>
+
+Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term)))
+
+# OR in parentheses
+
+(test or example)
+
+==>
+
+Program(ParenExpr(OrExpr(Term,Term)))
+
+# OR with parentheses outside
+
+(test) or (example)
+
+==>
+
+Program(OrExpr(ParenExpr(Term),ParenExpr(Term)))
+
+# Complex OR with grouping
+
+(file:*.js lang:javascript) or (file:*.ts lang:typescript)
+
+==>
+
+Program(OrExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))),ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))))
+
+# OR with mixed content
+
+test or file:example.js
+
+==>
+
+Program(OrExpr(Term,PrefixExpr(FileExpr)))
+
+# Prefix OR term
+
+file:test.js or example
+
+==>
+
+Program(OrExpr(PrefixExpr(FileExpr),Term))
+
+# OR with short form prefixes
+
+f:test.js or r:myrepo
+
+==>
+
+Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
+
+# OR with repo prefixes
+
+repo:project1 or repo:project2
+
+==>
+
+Program(OrExpr(PrefixExpr(RepoExpr),PrefixExpr(RepoExpr)))
+
+# OR with branch prefixes
+
+branch:main or branch:develop
+
+==>
+
+Program(OrExpr(PrefixExpr(BranchExpr),PrefixExpr(BranchExpr)))
+
+# OR with lang prefixes
+
+lang:rust or lang:go
+
+==>
+
+Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr)))
+
+# OR with content
+
+content:TODO or content:FIXME
+
+==>
+
+Program(OrExpr(PrefixExpr(ContentExpr),PrefixExpr(ContentExpr)))
+
+# OR with negated terms
+
+-file:test.js or -file:spec.js
+
+==>
+
+Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr))))
+
+# OR in nested parentheses
+
+((a or b) or (c or d))
+
+==>
+
+Program(ParenExpr(OrExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term)))))
+
+# Multiple OR with parentheses and implicit AND
+
+(a or b) and (c or d)
+
+==>
+
+Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term,ParenExpr(OrExpr(Term,Term))))
+
+# OR with wildcards
+
+*.test.js or *.spec.js
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# OR with regex patterns
+
+[a-z]+ or [0-9]+
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# OR with dots
+
+com.example.test or org.example.test
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# OR with dashes
+
+test-one or test-two
+
+==>
+
+Program(OrExpr(Term,Term))
+
+# Word containing 'or'
+
+order
+
+==>
+
+Program(Term)
+
+# Word containing 'or' in middle
+
+before
+
+==>
+
+Program(Term)
+
+# OR at start
+
+or test
+
+==>
+
+Program(⚠,Term)
+
+# OR at end (or becomes term)
+
+test or
+
+==>
+
+Program(AndExpr(Term,Term))
+
+# Multiple consecutive OR
+
+test or or example
+
+==>
+
+Program(OrExpr(Term,⚠,Term))
+
+# OR with all prefix types
+
+file:*.js or repo:myrepo or lang:javascript
+
+==>
+
+Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr),PrefixExpr(LangExpr)))
+
+# Complex query with OR and negation
+
+(lang:python or lang:ruby) -file:test.py
+
+==>
+
+Program(AndExpr(ParenExpr(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))),NegateExpr(PrefixExpr(FileExpr))))
+
+# OR with quoted prefix values
+
+file:"test one.js" or file:"test two.js"
+
+==>
+
+Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr)))
+
+# OR with empty parentheses
+
+() or ()
+
+==>
+
+Program(OrExpr(ParenExpr(Term(⚠)),ParenExpr(Term(⚠))))
+
+# OR with negated groups
+
+-(file:a.js) or -(file:b.js)
+
+==>
+
+Program(OrExpr(NegateExpr(ParenExpr(PrefixExpr(FileExpr))),NegateExpr(ParenExpr(PrefixExpr(FileExpr)))))
--- a/packages/queryLanguage/test/prefixes.txt
+++ b/packages/queryLanguage/test/prefixes.txt
@ -0,0 +1,336 @@
+# File prefix
+
+file:README.md
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# File prefix short form
+
+f:index.ts
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Repo prefix
+
+repo:myproject
+
+==>
+
+Program(PrefixExpr(RepoExpr))
+
+# Repo prefix short form
+
+r:github.com/user/repo
+
+==>
+
+Program(PrefixExpr(RepoExpr))
+
+# Content prefix
+
+content:function
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Content prefix short form
+
+c:console.log
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Branch prefix
+
+branch:main
+
+==>
+
+Program(PrefixExpr(BranchExpr))
+
+# Branch prefix short form
+
+b:develop
+
+==>
+
+Program(PrefixExpr(BranchExpr))
+
+# Lang prefix
+
+lang:typescript
+
+==>
+
+Program(PrefixExpr(LangExpr))
+
+# Case prefix
+
+case:yes
+
+==>
+
+Program(PrefixExpr(CaseExpr))
+
+# Archived prefix
+
+archived:no
+
+==>
+
+Program(PrefixExpr(ArchivedExpr))
+
+# Fork prefix
+
+fork:yes
+
+==>
+
+Program(PrefixExpr(ForkExpr))
+
+# Public prefix
+
+public:yes
+
+==>
+
+Program(PrefixExpr(PublicExpr))
+
+# Symbol prefix
+
+sym:MyClass
+
+==>
+
+Program(PrefixExpr(SymExpr))
+
+# Type prefix
+
+type:file
+
+==>
+
+Program(PrefixExpr(TypeExpr))
+
+# Type prefix short form
+
+t:repo
+
+==>
+
+Program(PrefixExpr(TypeExpr))
+
+# Regex prefix
+
+regex:test.*
+
+==>
+
+Program(PrefixExpr(RegexExpr))
+
+# RepoSet prefix
+
+reposet:repo1,repo2
+
+==>
+
+Program(PrefixExpr(RepoSetExpr))
+
+# File with wildcard
+
+file:*.ts
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# File with path
+
+file:src/components/Button.tsx
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Repo with full URL
+
+repo:github.com/org/project
+
+==>
+
+Program(PrefixExpr(RepoExpr))
+
+# Multiple prefixes
+
+file:test.js repo:myproject
+
+==>
+
+Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
+
+# Prefix with term
+
+file:test.js console.log
+
+==>
+
+Program(AndExpr(PrefixExpr(FileExpr),Term))
+
+# Term then prefix
+
+console.log file:handler.ts
+
+==>
+
+Program(AndExpr(Term,PrefixExpr(FileExpr)))
+
+# Multiple prefixes and terms
+
+lang:typescript function file:handler.ts
+
+==>
+
+Program(AndExpr(PrefixExpr(LangExpr),Term,PrefixExpr(FileExpr)))
+
+# Prefix with regex pattern
+
+file:[a-z]+\.test\.js
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Content with spaces in value (no quotes)
+
+content:hello
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Branch with slashes
+
+branch:feature/new-feature
+
+==>
+
+Program(PrefixExpr(BranchExpr))
+
+# Case values
+
+case:auto
+
+==>
+
+Program(PrefixExpr(CaseExpr))
+
+# RepoSet with multiple repos
+
+reposet:repo1,repo2,repo3
+
+==>
+
+Program(PrefixExpr(RepoSetExpr))
+
+# Symbol with dots
+
+sym:package.Class.method
+
+==>
+
+Program(PrefixExpr(SymExpr))
+
+# Type variations
+
+type:filename
+
+==>
+
+Program(PrefixExpr(TypeExpr))
+
+# Lang with various languages
+
+lang:python
+
+==>
+
+Program(PrefixExpr(LangExpr))
+
+# Archived values
+
+archived:yes
+
+==>
+
+Program(PrefixExpr(ArchivedExpr))
+
+# Fork values
+
+fork:no
+
+==>
+
+Program(PrefixExpr(ForkExpr))
+
+# Public values
+
+public:no
+
+==>
+
+Program(PrefixExpr(PublicExpr))
+
+# Regex with complex pattern
+
+regex:\w+\s*=\s*\d+
+
+==>
+
+Program(PrefixExpr(RegexExpr))
+
+# File with dashes
+
+file:my-component.tsx
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Repo with numbers
+
+repo:project123
+
+==>
+
+Program(PrefixExpr(RepoExpr))
+
+# Content with special chars
+
+content:@Component
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Prefix in parentheses
+
+(file:test.js)
+
+==>
+
+Program(ParenExpr(PrefixExpr(FileExpr)))
+
+# Multiple prefixes in group
+
+(file:*.ts lang:typescript)
+
+==>
+
+Program(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))
+
--- a/packages/queryLanguage/test/quoted.txt
+++ b/packages/queryLanguage/test/quoted.txt
@ -0,0 +1,495 @@
+# Simple quoted string
+
+"hello"
+
+==>
+
+Program(Term)
+
+# Quoted string with spaces
+
+"hello world"
+
+==>
+
+Program(Term)
+
+# Multiple words in quotes
+
+"this is a search term"
+
+==>
+
+Program(Term)
+
+# Quoted string with escaped quote
+
+"hello \"world\""
+
+==>
+
+Program(Term)
+
+# Quoted string with escaped backslash
+
+"path\\to\\file"
+
+==>
+
+Program(Term)
+
+# Double backslash
+
+"test\\\\path"
+
+==>
+
+Program(Term)
+
+# Multiple escaped quotes
+
+"\"quoted\" \"words\""
+
+==>
+
+Program(Term)
+
+# Mixed escaped characters
+
+"test\\nvalue\"quoted"
+
+==>
+
+Program(Term)
+
+# Empty quoted string
+
+""
+
+==>
+
+Program(Term)
+
+# Quoted string with only spaces
+
+"   "
+
+==>
+
+Program(Term)
+
+# Quoted string in file prefix
+
+file:"my file.txt"
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Quoted string in repo prefix
+
+repo:"github.com/user/repo name"
+
+==>
+
+Program(PrefixExpr(RepoExpr))
+
+# Quoted string in content prefix
+
+content:"console.log"
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Quoted string in branch prefix
+
+branch:"feature/my feature"
+
+==>
+
+Program(PrefixExpr(BranchExpr))
+
+# Multiple quoted strings
+
+"first string" "second string"
+
+==>
+
+Program(AndExpr(Term,Term))
+
+# Quoted and unquoted mixed
+
+unquoted "quoted string" another
+
+==>
+
+Program(AndExpr(Term,Term,Term))
+
+# Quoted string with parentheses inside
+
+"(test)"
+
+==>
+
+Program(Term)
+
+# Quoted string with brackets
+
+"[a-z]+"
+
+==>
+
+Program(Term)
+
+# Quoted string with special chars
+
+"test@example.com"
+
+==>
+
+Program(Term)
+
+# Quoted string with colons
+
+"key:value"
+
+==>
+
+Program(Term)
+
+# Quoted string with dashes
+
+"test-case-example"
+
+==>
+
+Program(Term)
+
+# Quoted string with dots
+
+"com.example.package"
+
+==>
+
+Program(Term)
+
+# Quoted string with regex pattern
+
+"\\w+\\s*=\\s*\\d+"
+
+==>
+
+Program(Term)
+
+# Quoted string with forward slashes
+
+"path/to/file"
+
+==>
+
+Program(Term)
+
+# Quoted string with underscores
+
+"my_variable_name"
+
+==>
+
+Program(Term)
+
+# Quoted string with numbers
+
+"test123"
+
+==>
+
+Program(Term)
+
+# Quoted string with mixed case
+
+"CamelCaseTest"
+
+==>
+
+Program(Term)
+
+# Quoted prefix value with spaces
+
+file:"test file.js"
+
+==>
+
+Program(PrefixExpr(FileExpr))
+
+# Multiple prefixes with quoted values
+
+file:"my file.txt" repo:"my repo"
+
+==>
+
+Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr)))
+
+# Quoted string in parentheses
+
+("quoted term")
+
+==>
+
+Program(ParenExpr(Term))
+
+# Multiple quoted in parentheses
+
+("first" "second")
+
+==>
+
+Program(ParenExpr(AndExpr(Term,Term)))
+
+# Quoted with escaped newline
+
+"line1\\nline2"
+
+==>
+
+Program(Term)
+
+# Quoted with tab character
+
+"value\\ttab"
+
+==>
+
+Program(Term)
+
+# Lang prefix with quoted value
+
+lang:"objective-c"
+
+==>
+
+Program(PrefixExpr(LangExpr))
+
+# Sym prefix with quoted value
+
+sym:"My Class"
+
+==>
+
+Program(PrefixExpr(SymExpr))
+
+# Content with quoted phrase
+
+content:"TODO: fix this"
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Regex prefix with quoted pattern
+
+regex:"func\\s+\\w+"
+
+==>
+
+Program(PrefixExpr(RegexExpr))
+
+# Case prefix with quoted value
+
+case:"yes"
+
+==>
+
+Program(PrefixExpr(CaseExpr))
+
+# Quoted string with at symbol
+
+"@decorator"
+
+==>
+
+Program(Term)
+
+# Quoted string with hash
+
+"#define"
+
+==>
+
+Program(Term)
+
+# Quoted string with dollar sign
+
+"$variable"
+
+==>
+
+Program(Term)
+
+# Quoted string with percent
+
+"100%"
+
+==>
+
+Program(Term)
+
+# Quoted string with ampersand
+
+"foo&bar"
+
+==>
+
+Program(Term)
+
+# Quoted string with asterisk
+
+"test*"
+
+==>
+
+Program(Term)
+
+# Quoted string with plus
+
+"a+b"
+
+==>
+
+Program(Term)
+
+# Quoted string with equals
+
+"a=b"
+
+==>
+
+Program(Term)
+
+# Quoted string with angle brackets
+
+"<template>"
+
+==>
+
+Program(Term)
+
+# Quoted string with pipe
+
+"a|b"
+
+==>
+
+Program(Term)
+
+# Quoted string with tilde
+
+"~/.config"
+
+==>
+
+Program(Term)
+
+# Quoted string with backtick
+
+"`code`"
+
+==>
+
+Program(Term)
+
+# Quoted string with question mark
+
+"what?"
+
+==>
+
+Program(Term)
+
+# Quoted string with exclamation
+
+"important!"
+
+==>
+
+Program(Term)
+
+# Quoted string with semicolon
+
+"stmt;"
+
+==>
+
+Program(Term)
+
+# Quoted string with comma
+
+"a,b,c"
+
+==>
+
+Program(Term)
+
+# Multiple quotes in content
+
+content:"function \"test\" {"
+
+==>
+
+Program(PrefixExpr(ContentExpr))
+
+# Quoted prefix keyword becomes literal
+
+"repo:hello"
+
+==>
+
+Program(Term)
+
+# Quoted file prefix as literal
+
+"file:test.js"
+
+==>
+
+Program(Term)
+
+# Quoted lang prefix as literal
+
+"lang:python"
+
+==>
+
+Program(Term)
+
+# Quoted partial prefix
+
+"repo:"
+
+==>
+
+Program(Term)
+
+# Mix of quoted prefix and real prefix
+
+"repo:test" file:actual.js
+
+==>
+
+Program(AndExpr(Term,PrefixExpr(FileExpr)))
+
+# Quoted short form prefix
+
+"f:test"
+
+==>
+
+Program(Term)
+
+# Quoted branch prefix
+
+"branch:main"
+
+==>
+
+Program(Term)
--- a/packages/queryLanguage/tsconfig.json
+++ b/packages/queryLanguage/tsconfig.json
@ -0,0 +1,23 @@
+{
+    "compilerOptions": {
+      "target": "ES2022",
+      "module": "Node16",
+      "moduleResolution": "Node16",
+      "lib": ["ES2023"],
+      "outDir": "dist",
+      "rootDir": "src",
+      "declaration": true,
+      "declarationMap": true,
+      "sourceMap": true,
+      "strict": true,
+      "noImplicitAny": true,
+      "strictNullChecks": true,
+      "esModuleInterop": true,
+      "forceConsistentCasingInFileNames": true,
+      "skipLibCheck": true,
+      "isolatedModules": true,
+      "resolveJsonModule": true
+    },
+    "include": ["src/parser.ts"],
+    "exclude": ["node_modules", "dist"]
+  } 
--- a/packages/queryLanguage/vitest.config.ts
+++ b/packages/queryLanguage/vitest.config.ts
@ -0,0 +1,8 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+    test: {
+        environment: 'node',
+        watch: false,
+    }
+});
--- a/yarn.lock
+++ b/yarn.lock
@ -2925,6 +2925,13 @@ __metadata:
  languageName: node
  linkType: hard

+"@lezer/common@npm:^1.3.0":
+  version: 1.3.0
+  resolution: "@lezer/common@npm:1.3.0"
+  checksum: 10c0/e164094920761c2f56c8634d0ae9261ea7c5e6b8202aa08773febc59b8d8284dde5bc7a810c9438e27b978e5ad67d0db03af1ed72924df61b8fa2704acb55deb
+  languageName: node
+  linkType: hard
+
 "@lezer/cpp@npm:^1.0.0":
  version: 1.1.3
  resolution: "@lezer/cpp@npm:1.1.3"
@ -2947,6 +2954,18 @@ __metadata:
  languageName: node
  linkType: hard

+"@lezer/generator@npm:^1.8.0":
+  version: 1.8.0
+  resolution: "@lezer/generator@npm:1.8.0"
+  dependencies:
+    "@lezer/common": "npm:^1.1.0"
+    "@lezer/lr": "npm:^1.3.0"
+  bin:
+    lezer-generator: src/lezer-generator.cjs
+  checksum: 10c0/c9dab9a27b6b757544f51b1612842ded77db7322d23cfd175274f89d783e0987b106c0f51e1203af74b7e56ccc567e8efd633aaffa2086cb55bfc1e3ea591fa6
+  languageName: node
+  linkType: hard
+
 "@lezer/go@npm:^1.0.0":
  version: 1.0.0
  resolution: "@lezer/go@npm:1.0.0"
@ -3029,6 +3048,15 @@ __metadata:
  languageName: node
  linkType: hard

+"@lezer/lr@npm:^1.4.3":
+  version: 1.4.3
+  resolution: "@lezer/lr@npm:1.4.3"
+  dependencies:
+    "@lezer/common": "npm:^1.0.0"
+  checksum: 10c0/3c9fd7eefb0641addfdd0955b4c4014bb8702285c52890b58c937d766320ba2fec8c6b374b46f514079a093c9dd21b6632746a01fed16c250c90d649e5dd12c1
+  languageName: node
+  linkType: hard
+
 "@lezer/markdown@npm:^1.0.0":
  version: 1.4.2
  resolution: "@lezer/markdown@npm:1.4.2"
@ -7988,6 +8016,19 @@ __metadata:
  languageName: unknown
  linkType: soft

+"@sourcebot/query-language@workspace:packages/queryLanguage":
+  version: 0.0.0-use.local
+  resolution: "@sourcebot/query-language@workspace:packages/queryLanguage"
+  dependencies:
+    "@lezer/common": "npm:^1.3.0"
+    "@lezer/generator": "npm:^1.8.0"
+    "@lezer/lr": "npm:^1.4.3"
+    tsx: "npm:^4.19.1"
+    typescript: "npm:^5.7.3"
+    vitest: "npm:^2.1.9"
+  languageName: unknown
+  linkType: soft
+
 "@sourcebot/schemas@workspace:*, @sourcebot/schemas@workspace:packages/schemas":
  version: 0.0.0-use.local
  resolution: "@sourcebot/schemas@workspace:packages/schemas"