diff --git a/packages/queryLanguage/.gitignore b/packages/queryLanguage/.gitignore new file mode 100644 index 00000000..81d9910b --- /dev/null +++ b/packages/queryLanguage/.gitignore @@ -0,0 +1,2 @@ +/node_modules/ +/dist diff --git a/packages/queryLanguage/package.json b/packages/queryLanguage/package.json new file mode 100644 index 00000000..f1659da1 --- /dev/null +++ b/packages/queryLanguage/package.json @@ -0,0 +1,19 @@ +{ + "name": "@sourcebot/query-language", + "private": true, + "scripts": { + "build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc", + "test": "vitest", + "asdf": "tsx test.ts" + }, + "devDependencies": { + "@lezer/generator": "^1.8.0", + "tsx": "^4.19.1", + "typescript": "^5.7.3", + "vitest": "^2.1.9" + }, + "dependencies": { + "@lezer/common": "^1.3.0", + "@lezer/lr": "^1.4.3" + } +} diff --git a/packages/queryLanguage/src/parser.terms.ts b/packages/queryLanguage/src/parser.terms.ts new file mode 100644 index 00000000..a123cf2b --- /dev/null +++ b/packages/queryLanguage/src/parser.terms.ts @@ -0,0 +1,10 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +export const + negate = 24, + Program = 1, + OrExpr = 2, + AndExpr = 3, + NegateExpr = 4, + PrefixExpr = 5, + ParenExpr = 19, + Term = 20 diff --git a/packages/queryLanguage/src/parser.ts b/packages/queryLanguage/src/parser.ts new file mode 100644 index 00000000..f1cb1153 --- /dev/null +++ b/packages/queryLanguage/src/parser.ts @@ -0,0 +1,18 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +import {LRParser} from "@lezer/lr" +import {negateToken} from "./tokens" +export const parser = LRParser.deserialize({ + version: 14, + states: "'hOVQROOO!^QQO'#CbO!^QQO'#CcO!^QQO'#CdO!^QQO'#CeO!^QQO'#CfO!^QQO'#CgO!^QQO'#ChO!^QQO'#CiO!^QQO'#CjO!^QQO'#CkO!^QQO'#ClO!^QQO'#CmO!^QQO'#CnOOQP'#Ca'#CaOVQRO'#CoO!fQQO'#C`OOQP'#Cp'#CpOOQP'#Cy'#CyO#dQRO'#CxO#qQQO'#CxO#|QQO'#C^OOQO'#Cw'#CwQOQQOOOOQP'#C{'#C{OOQP,58|,58|OOQP,58},58}OOQP,59O,59OOOQP,59P,59POOQP,59Q,59QOOQP,59R,59ROOQP,59S,59SOOQP,59T,59TOOQP,59U,59UOOQP,59V,59VOOQP,59W,59WOOQP,59X,59XOOQP,59Y,59YO$RQQO,59ZOOQP,58z,58zOOQP'#Cq'#CqO$WQRO,58yOVQRO'#CrO$eQQO,58xOOQP1G.u1G.uOOQP-E6o-E6oO$pQRO'#CxOOQO'#Cx'#CxOOQO,59^,59^OOQO-E6p-E6p", + stateData: "%a~OjOS~Oh`OnPOpaOqaOrQOsROtSOuTOvUOwVOxWOyXOzYO{ZO|[O}]O!O_O~OphOqhO~OnPOrQOsROtSOuTOvUOwVOxWOyXOzYO{ZO|[O}]O!O_O~OgkX!QlX!PkX~PVOgkX!QlX!PkX~O!QzO~O!P|O~OgRa!QRa!PRa~PVO!QzOgQa!PQa~OglX!QlX!PlX~PVOpnrstuvwxyz{|}!Qqx~", + goto: "$ZpPPqu|!U!a!a!a!a!a!a!a!a!a!a!a!a!a!U|!j!qPPPP!w!}#UP#bTfO_SdO_R!Pz]bO_cyz!O[bO_cyz!ORw`_^O_`cyz!OSyc!OR}yQ{eR!R{QgORv_SeO_R!QzScO_Uxcy!OR!OzQiPQjQQkRQlSQmTQnUQoVQpWQqXQrYQsZQt[Ru]", + nodeNames: "⚠ Program OrExpr AndExpr NegateExpr PrefixExpr ArchivedExpr BranchExpr ContentExpr CaseExpr FileExpr ForkExpr PublicExpr RepoExpr RegexExpr LangExpr SymExpr TypeExpr RepoSetExpr ParenExpr Term", + maxTerm: 48, + skippedNodes: [0], + repeatNodeCount: 2, + tokenData: "!=_~RlOX!yXY#wYZ#wZp!ypq#wqr!yrs$Vsx!yxy%yyz&Oz#T!y#T#U&T#U#V.g#V#W4}#W#Y!y#Y#Z@V#Z#`!y#`#aHe#a#c!y#c#dMP#d#e!!m#e#f!y#f#g!)T#g#h!5V#h#i!8s#i;'S!y;'S;=`#q<%lO!y~#OZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~#tP;=`<%l!y~#|Rj~XY#wYZ#wpq#w~$YWOY$VZr$Vrs$rs#O$V#O#P$w#P;'S$V;'S;=`%s<%lO$V~$wOp~~$zRO;'S$V;'S;=`%T;=`O$V~%WXOY$VZr$Vrs$rs#O$V#O#P$w#P;'S$V;'S;=`%s;=`<%l$V<%lO$V~%vP;=`<%l$V~&OO!O~~&TO!P~~&Y]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#f!y#f#g'R#g;'S!y;'S;=`#q<%lO!y~'W]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#V!y#V#W(P#W;'S!y;'S;=`#q<%lO!y~(U]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#[!y#[#](}#];'S!y;'S;=`#q<%lO!y~)S]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#]!y#]#^){#^;'S!y;'S;=`#q<%lO!y~*Q]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#j!y#j#k*y#k;'S!y;'S;=`#q<%lO!y~+O]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#Y+w#Y;'S!y;'S;=`#q<%lO!y~+|]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#W!y#W#X,u#X;'S!y;'S;=`#q<%lO!y~,zZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]-m!];'S!y;'S;=`#q<%lO!y~-tZn~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~.l]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]/e!]#f!y#f#g0_#g;'S!y;'S;=`#q<%lO!y~/lZr~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~0d]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#T!y#T#U1]#U;'S!y;'S;=`#q<%lO!y~1b]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#b!y#b#c2Z#c;'S!y;'S;=`#q<%lO!y~2`]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#V!y#V#W3X#W;'S!y;'S;=`#q<%lO!y~3^]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#[!y#[#]4V#];'S!y;'S;=`#q<%lO!y~4[Zq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]/e!];'S!y;'S;=`#q<%lO!y~5S_q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]6R!]#T!y#T#U6{#U#c!y#c#d:i#d;'S!y;'S;=`#q<%lO!y~6YZs~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~7Q]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#g!y#g#h7y#h;'S!y;'S;=`#q<%lO!y~8O]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#Y8w#Y;'S!y;'S;=`#q<%lO!y~8|Zq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]9o!];'S!y;'S;=`#q<%lO!y~9vZt~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~:n]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#b!y#b#c;g#c;'S!y;'S;=`#q<%lO!y~;l]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#h!y#h#ia#c;'S!y;'S;=`#q<%lO!y~>f]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#h!y#h#i?_#i;'S!y;'S;=`#q<%lO!y~?dZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]6R!];'S!y;'S;=`#q<%lO!y~@[_q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]AZ!]#]!y#]#^BT#^#c!y#c#dDw#d;'S!y;'S;=`#q<%lO!y~AbZu~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~BY]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#`!y#`#aCR#a;'S!y;'S;=`#q<%lO!y~CW]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#YDP#Y;'S!y;'S;=`#q<%lO!y~DUZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]AZ!];'S!y;'S;=`#q<%lO!y~D|]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#f!y#f#gEu#g;'S!y;'S;=`#q<%lO!y~Ez]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#_!y#_#`Fs#`;'S!y;'S;=`#q<%lO!y~FxZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]Gk!];'S!y;'S;=`#q<%lO!y~GrZv~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~Hj]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#T!y#T#UIc#U;'S!y;'S;=`#q<%lO!y~Ih]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#b!y#b#cJa#c;'S!y;'S;=`#q<%lO!y~Jf]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#Z!y#Z#[K_#[;'S!y;'S;=`#q<%lO!y~KdZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]LV!];'S!y;'S;=`#q<%lO!y~L^Zz~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~MU]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#f!y#f#gM}#g;'S!y;'S;=`#q<%lO!y~NSfq~OX! hXZ!!bZp! hpq!!bqr! hrs!!bsx! hxz!!bz}! h}!O! h!O!Q! h!Q![!y![!]! h!]!c! h!c!}!y!}#R! h#R#S!y#S#T! h#T#o!y#o;'S! h;'S;=`!!g<%lO! h~! oZ!Q~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!!gO!Q~~!!jP;=`<%l! h~!!r]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#i!y#i#j!#k#j;'S!y;'S;=`#q<%lO!y~!#p]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#U!y#U#V!$i#V;'S!y;'S;=`#q<%lO!y~!$n]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#`!y#`#a!%g#a;'S!y;'S;=`#q<%lO!y~!%l]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#]!y#]#^!&e#^;'S!y;'S;=`#q<%lO!y~!&j]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#V!y#V#W!'c#W;'S!y;'S;=`#q<%lO!y~!'hZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!(Z!];'S!y;'S;=`#q<%lO!y~!(bZw~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!)Y]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!*R!]#X!y#X#Y!*{#Y;'S!y;'S;=`#q<%lO!y~!*YZx~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!+Q_q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#Z!y#Z#[!,P#[#d!y#d#e!/m#e;'S!y;'S;=`#q<%lO!y~!,U]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#Y!,}#Y;'S!y;'S;=`#q<%lO!y~!-S]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#l!y#l#m!-{#m;'S!y;'S;=`#q<%lO!y~!.QZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!.s!];'S!y;'S;=`#q<%lO!y~!.zZy~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!/r]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#c!y#c#d!0k#d;'S!y;'S;=`#q<%lO!y~!0p]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!*R!]#g!y#g#h!1i#h;'S!y;'S;=`#q<%lO!y~!1n]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#Y!2g#Y;'S!y;'S;=`#q<%lO!y~!2l]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#h!y#h#i!3e#i;'S!y;'S;=`#q<%lO!y~!3jZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!4]!];'S!y;'S;=`#q<%lO!y~!4dZ}~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!5[]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#m!y#m#n!6T#n;'S!y;'S;=`#q<%lO!y~!6Y]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#a!y#a#b!7R#b;'S!y;'S;=`#q<%lO!y~!7WZq~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!7y!];'S!y;'S;=`#q<%lO!y~!8QZ{~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!8x]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!9q!]#m!y#m#n!:k#n;'S!y;'S;=`#q<%lO!y~!9xZ|~q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!];'S!y;'S;=`#q<%lO!y~!:p]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#d!y#d#e!;i#e;'S!y;'S;=`#q<%lO!y~!;n]q~OX!yZp!yqr!ysx!yz}!y}!O!y!O![!y![!]!y!]#X!y#X#Y! { + if (input.next !== 45 /* '-' */) return; // Not a dash + + const startPos = input.pos; + + // Look ahead to see what follows the dash + input.advance(); + + // Skip whitespace + let ch = input.next; + while (ch === 32 || ch === 9 || ch === 10) { + input.advance(); + ch = input.next; + } + + // Check if followed by opening paren + if (ch === 40 /* '(' */) { + input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash + return; + } + + // Check if followed by a prefix keyword (by checking for keyword followed by colon) + // We need to look ahead to find the colon + const checkPos = input.pos; + let foundColon = false; + let charCount = 0; + + // Look ahead up to 10 characters to find a colon + while (charCount < 10 && ch >= 0) { + if (ch === 58 /* ':' */) { + foundColon = true; + break; + } + if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) { + // Hit whitespace, paren, or quote - not a prefix + break; + } + input.advance(); + ch = input.next; + charCount++; + } + + // Reset position + while (input.pos > checkPos) { + input.advance(-1); + } + + if (foundColon) { + // It's a prefix keyword, accept as negate + input.acceptToken(negate, -input.pos + startPos + 1); + return; + } + + // Otherwise, don't tokenize as negate (let word handle it) +}); + diff --git a/packages/queryLanguage/test.ts b/packages/queryLanguage/test.ts new file mode 100644 index 00000000..e7a6e378 --- /dev/null +++ b/packages/queryLanguage/test.ts @@ -0,0 +1,46 @@ +import { parser } from "./src/parser"; + +const input = "hello case:yes"; +const tree = parser.parse(input); + +const prettyPrint = (tree: ReturnType, input: string) => { + let result = ""; + let lastPos = 0; + + tree.iterate({ + enter: (node) => { + // If this is a leaf node (terminal), collect its text + if (node.from >= node.to) { + // Empty node, skip + return; + } + + // Check if this node has any children by checking the tree structure + const nodeTree = node.node; + const isLeaf = !nodeTree.firstChild; + + if (isLeaf) { + // Add any whitespace between the last position and this node + if (node.from > lastPos) { + result += input.slice(lastPos, node.from); + } + + // Add the node's text + result += input.slice(node.from, node.to); + lastPos = node.to; + } + } + }); + + // Add any trailing content + if (lastPos < input.length) { + result += input.slice(lastPos, input.length); + } + + return result; +} + +const reconstructed = prettyPrint(tree, input); +console.log("Original:", input); +console.log("Reconstructed:", reconstructed); +console.log("Match:", input === reconstructed); \ No newline at end of file diff --git a/packages/queryLanguage/test/basic.txt b/packages/queryLanguage/test/basic.txt new file mode 100644 index 00000000..de8bb93b --- /dev/null +++ b/packages/queryLanguage/test/basic.txt @@ -0,0 +1,72 @@ +# Single term + +hello + +==> + +Program(Term) + +# Multiple terms + +hello world + +==> + +Program(AndExpr(Term,Term)) + +# Multiple terms with various characters + +console.log error_handler + +==> + +Program(AndExpr(Term,Term)) + +# Term with underscores + +my_variable_name + +==> + +Program(Term) + +# Term with dots + +com.example.package + +==> + +Program(Term) + +# Term with numbers + +func123 test_456 + +==> + +Program(AndExpr(Term,Term)) + +# Regex pattern + +[a-z]+ + +==> + +Program(Term) + +# Wildcard pattern + +test.* + +==> + +Program(Term) + +# Multiple regex patterns + +\w+ [0-9]+ \s* + +==> + +Program(AndExpr(Term,Term,Term)) + diff --git a/packages/queryLanguage/test/grammar.test.ts b/packages/queryLanguage/test/grammar.test.ts new file mode 100644 index 00000000..a0286285 --- /dev/null +++ b/packages/queryLanguage/test/grammar.test.ts @@ -0,0 +1,21 @@ +import { parser } from "../src/parser"; +import { fileTests } from "@lezer/generator/dist/test"; +import { describe, it } from "vitest"; +import { fileURLToPath } from "url" +import * as fs from "fs"; +import * as path from "path"; + +const caseDir = path.dirname(fileURLToPath(import.meta.url)) + +for (const file of fs.readdirSync(caseDir)) { + if (!/\.txt$/.test(file)) { + continue; + } + + let name = /^[^\.]*/.exec(file)?.[0]; + describe(name ?? "unknown", () => { + for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) { + it(name, () => run(parser)); + } + }); +} \ No newline at end of file diff --git a/packages/queryLanguage/test/grouping.txt b/packages/queryLanguage/test/grouping.txt new file mode 100644 index 00000000..e8c7798e --- /dev/null +++ b/packages/queryLanguage/test/grouping.txt @@ -0,0 +1,120 @@ +# Empty parentheses + +() + +==> + +Program(ParenExpr(Term(⚠))) + +# Simple grouping + +(test) + +==> + +Program(ParenExpr(Term)) + +# Multiple terms in group + +(hello world) + +==> + +Program(ParenExpr(AndExpr(Term,Term))) + +# Nested parentheses + +((test)) + +==> + +Program(ParenExpr(ParenExpr(Term))) + +# Multiple groups + +(first) (second) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term))) + +# Group with multiple terms + +(one two three) + +==> + +Program(ParenExpr(AndExpr(Term,Term,Term))) + +# Mixed grouped and ungrouped + +test (grouped) another + +==> + +Program(AndExpr(Term,ParenExpr(Term),Term)) + +# Deeply nested + +(((nested))) + +==> + +Program(ParenExpr(ParenExpr(ParenExpr(Term)))) + +# Multiple nested groups + +((a b) (c d)) + +==> + +Program(ParenExpr(AndExpr(ParenExpr(AndExpr(Term,Term)),ParenExpr(AndExpr(Term,Term))))) + +# Group at start + +(start) middle end + +==> + +Program(AndExpr(ParenExpr(Term),Term,Term)) + +# Group at end + +start middle (end) + +==> + +Program(AndExpr(Term,Term,ParenExpr(Term))) + +# Complex grouping pattern + +(a (b c) d) + +==> + +Program(ParenExpr(AndExpr(Term,ParenExpr(AndExpr(Term,Term)),Term))) + +# Sequential groups + +(a)(b)(c) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term),ParenExpr(Term))) + +# Group with regex + +([a-z]+) + +==> + +Program(ParenExpr(Term)) + +# Group with dots + +(com.example.test) + +==> + +Program(ParenExpr(Term)) + diff --git a/packages/queryLanguage/test/negation.txt b/packages/queryLanguage/test/negation.txt new file mode 100644 index 00000000..bb61e7db --- /dev/null +++ b/packages/queryLanguage/test/negation.txt @@ -0,0 +1,287 @@ +# Literal dash term + +-test + +==> + +Program(Term) + +# Quoted dash term + +"-excluded" + +==> + +Program(Term) + +# Dash in middle + +test-case + +==> + +Program(Term) + +# Multiple dash terms + +-one -two -three + +==> + +Program(AndExpr(Term,Term,Term)) + +# Negate file prefix + +-file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate repo prefix + +-repo:archived + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate lang prefix + +-lang:python + +==> + +Program(NegateExpr(PrefixExpr(LangExpr))) + +# Negate content prefix + +-content:TODO + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate branch prefix + +-branch:develop + +==> + +Program(NegateExpr(PrefixExpr(BranchExpr))) + +# Negate case prefix + +-case:yes + +==> + +Program(NegateExpr(PrefixExpr(CaseExpr))) + +# Negate archived prefix + +-archived:yes + +==> + +Program(NegateExpr(PrefixExpr(ArchivedExpr))) + +# Negate fork prefix + +-fork:yes + +==> + +Program(NegateExpr(PrefixExpr(ForkExpr))) + +# Negate public prefix + +-public:no + +==> + +Program(NegateExpr(PrefixExpr(PublicExpr))) + +# Negate symbol prefix + +-sym:OldClass + +==> + +Program(NegateExpr(PrefixExpr(SymExpr))) + +# Negate type prefix + +-type:repo + +==> + +Program(NegateExpr(PrefixExpr(TypeExpr))) + +# Negate regex prefix + +-regex:test.* + +==> + +Program(NegateExpr(PrefixExpr(RegexExpr))) + +# Negate parentheses + +-(test) + +==> + +Program(NegateExpr(ParenExpr(Term))) + +# Negate group with multiple terms + +-(test exclude) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(Term,Term)))) + +# Negate group with prefix + +-(file:test.js console.log) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),Term)))) + +# Prefix with negated term + +file:test.js -console + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Multiple prefixes with negation + +file:test.js -lang:python + +==> + +Program(AndExpr(PrefixExpr(FileExpr),NegateExpr(PrefixExpr(LangExpr)))) + +# Complex negation pattern + +function -file:test.js -lang:java + +==> + +Program(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr)))) + +# Negation inside parentheses + +(-file:test.js) + +==> + +Program(ParenExpr(NegateExpr(PrefixExpr(FileExpr)))) + +# Multiple negations in group + +(-file:a.js -lang:python) + +==> + +Program(ParenExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr))))) + +# Mixed in parentheses + +(include -file:test.js) + +==> + +Program(ParenExpr(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr))))) + +# Negate nested group + +-((file:test.js)) + +==> + +Program(NegateExpr(ParenExpr(ParenExpr(PrefixExpr(FileExpr))))) + +# Negate short form prefix + +-f:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate short form repo + +-r:myrepo + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate short form branch + +-b:main + +==> + +Program(NegateExpr(PrefixExpr(BranchExpr))) + +# Negate short form content + +-c:console + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate short form type + +-t:file + +==> + +Program(NegateExpr(PrefixExpr(TypeExpr))) + +# Negate with prefix in quotes + +-file:"test file.js" + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Complex with multiple negated prefixes + +lang:typescript -file:*.test.ts -file:*.spec.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# Negated group with prefix + +-(file:test.js lang:python) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# Negate empty group + +-() + +==> + +Program(NegateExpr(ParenExpr(Term(⚠)))) + +# Negate with space after dash + +- file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) diff --git a/packages/queryLanguage/test/operators.txt b/packages/queryLanguage/test/operators.txt new file mode 100644 index 00000000..a1aa9a44 --- /dev/null +++ b/packages/queryLanguage/test/operators.txt @@ -0,0 +1,271 @@ +# Simple OR + +test or example + +==> + +Program(OrExpr(Term,Term)) + +# Multiple OR + +one or two or three + +==> + +Program(OrExpr(Term,Term,Term)) + +# OR with prefixes + +file:test.js or file:example.js + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with negation + +test or -file:excluded.js + +==> + +Program(OrExpr(Term,NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted strings + +"first option" or "second option" + +==> + +Program(OrExpr(Term,Term)) + +# OR with different prefixes + +lang:python or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# Multiple terms with OR + +function test or class example + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# OR in parentheses + +(test or example) + +==> + +Program(ParenExpr(OrExpr(Term,Term))) + +# OR with parentheses outside + +(test) or (example) + +==> + +Program(OrExpr(ParenExpr(Term),ParenExpr(Term))) + +# Complex OR with grouping + +(file:*.js lang:javascript) or (file:*.ts lang:typescript) + +==> + +Program(OrExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))),ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# OR with mixed content + +test or file:example.js + +==> + +Program(OrExpr(Term,PrefixExpr(FileExpr))) + +# Prefix OR term + +file:test.js or example + +==> + +Program(OrExpr(PrefixExpr(FileExpr),Term)) + +# OR with short form prefixes + +f:test.js or r:myrepo + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# OR with repo prefixes + +repo:project1 or repo:project2 + +==> + +Program(OrExpr(PrefixExpr(RepoExpr),PrefixExpr(RepoExpr))) + +# OR with branch prefixes + +branch:main or branch:develop + +==> + +Program(OrExpr(PrefixExpr(BranchExpr),PrefixExpr(BranchExpr))) + +# OR with lang prefixes + +lang:rust or lang:go + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# OR with content + +content:TODO or content:FIXME + +==> + +Program(OrExpr(PrefixExpr(ContentExpr),PrefixExpr(ContentExpr))) + +# OR with negated terms + +-file:test.js or -file:spec.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# OR in nested parentheses + +((a or b) or (c or d)) + +==> + +Program(ParenExpr(OrExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term))))) + +# Multiple OR with parentheses and implicit AND + +(a or b) and (c or d) + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term,ParenExpr(OrExpr(Term,Term)))) + +# OR with wildcards + +*.test.js or *.spec.js + +==> + +Program(OrExpr(Term,Term)) + +# OR with regex patterns + +[a-z]+ or [0-9]+ + +==> + +Program(OrExpr(Term,Term)) + +# OR with dots + +com.example.test or org.example.test + +==> + +Program(OrExpr(Term,Term)) + +# OR with dashes + +test-one or test-two + +==> + +Program(OrExpr(Term,Term)) + +# Word containing 'or' + +order + +==> + +Program(Term) + +# Word containing 'or' in middle + +before + +==> + +Program(Term) + +# OR at start + +or test + +==> + +Program(⚠,Term) + +# OR at end (or becomes term) + +test or + +==> + +Program(AndExpr(Term,Term)) + +# Multiple consecutive OR + +test or or example + +==> + +Program(OrExpr(Term,⚠,Term)) + +# OR with all prefix types + +file:*.js or repo:myrepo or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr),PrefixExpr(LangExpr))) + +# Complex query with OR and negation + +(lang:python or lang:ruby) -file:test.py + +==> + +Program(AndExpr(ParenExpr(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))),NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted prefix values + +file:"test one.js" or file:"test two.js" + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with empty parentheses + +() or () + +==> + +Program(OrExpr(ParenExpr(Term(⚠)),ParenExpr(Term(⚠)))) + +# OR with negated groups + +-(file:a.js) or -(file:b.js) + +==> + +Program(OrExpr(NegateExpr(ParenExpr(PrefixExpr(FileExpr))),NegateExpr(ParenExpr(PrefixExpr(FileExpr))))) diff --git a/packages/queryLanguage/test/prefixes.txt b/packages/queryLanguage/test/prefixes.txt new file mode 100644 index 00000000..1213526d --- /dev/null +++ b/packages/queryLanguage/test/prefixes.txt @@ -0,0 +1,336 @@ +# File prefix + +file:README.md + +==> + +Program(PrefixExpr(FileExpr)) + +# File prefix short form + +f:index.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo prefix + +repo:myproject + +==> + +Program(PrefixExpr(RepoExpr)) + +# Repo prefix short form + +r:github.com/user/repo + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content prefix + +content:function + +==> + +Program(PrefixExpr(ContentExpr)) + +# Content prefix short form + +c:console.log + +==> + +Program(PrefixExpr(ContentExpr)) + +# Branch prefix + +branch:main + +==> + +Program(PrefixExpr(BranchExpr)) + +# Branch prefix short form + +b:develop + +==> + +Program(PrefixExpr(BranchExpr)) + +# Lang prefix + +lang:typescript + +==> + +Program(PrefixExpr(LangExpr)) + +# Case prefix + +case:yes + +==> + +Program(PrefixExpr(CaseExpr)) + +# Archived prefix + +archived:no + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Fork prefix + +fork:yes + +==> + +Program(PrefixExpr(ForkExpr)) + +# Public prefix + +public:yes + +==> + +Program(PrefixExpr(PublicExpr)) + +# Symbol prefix + +sym:MyClass + +==> + +Program(PrefixExpr(SymExpr)) + +# Type prefix + +type:file + +==> + +Program(PrefixExpr(TypeExpr)) + +# Type prefix short form + +t:repo + +==> + +Program(PrefixExpr(TypeExpr)) + +# Regex prefix + +regex:test.* + +==> + +Program(PrefixExpr(RegexExpr)) + +# RepoSet prefix + +reposet:repo1,repo2 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# File with wildcard + +file:*.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# File with path + +file:src/components/Button.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with full URL + +repo:github.com/org/project + +==> + +Program(PrefixExpr(RepoExpr)) + +# Multiple prefixes + +file:test.js repo:myproject + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Prefix with term + +file:test.js console.log + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Term then prefix + +console.log file:handler.ts + +==> + +Program(AndExpr(Term,PrefixExpr(FileExpr))) + +# Multiple prefixes and terms + +lang:typescript function file:handler.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),Term,PrefixExpr(FileExpr))) + +# Prefix with regex pattern + +file:[a-z]+\.test\.js + +==> + +Program(PrefixExpr(FileExpr)) + +# Content with spaces in value (no quotes) + +content:hello + +==> + +Program(PrefixExpr(ContentExpr)) + +# Branch with slashes + +branch:feature/new-feature + +==> + +Program(PrefixExpr(BranchExpr)) + +# Case values + +case:auto + +==> + +Program(PrefixExpr(CaseExpr)) + +# RepoSet with multiple repos + +reposet:repo1,repo2,repo3 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# Symbol with dots + +sym:package.Class.method + +==> + +Program(PrefixExpr(SymExpr)) + +# Type variations + +type:filename + +==> + +Program(PrefixExpr(TypeExpr)) + +# Lang with various languages + +lang:python + +==> + +Program(PrefixExpr(LangExpr)) + +# Archived values + +archived:yes + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Fork values + +fork:no + +==> + +Program(PrefixExpr(ForkExpr)) + +# Public values + +public:no + +==> + +Program(PrefixExpr(PublicExpr)) + +# Regex with complex pattern + +regex:\w+\s*=\s*\d+ + +==> + +Program(PrefixExpr(RegexExpr)) + +# File with dashes + +file:my-component.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with numbers + +repo:project123 + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content with special chars + +content:@Component + +==> + +Program(PrefixExpr(ContentExpr)) + +# Prefix in parentheses + +(file:test.js) + +==> + +Program(ParenExpr(PrefixExpr(FileExpr))) + +# Multiple prefixes in group + +(file:*.ts lang:typescript) + +==> + +Program(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))) + diff --git a/packages/queryLanguage/test/quoted.txt b/packages/queryLanguage/test/quoted.txt new file mode 100644 index 00000000..d55089d2 --- /dev/null +++ b/packages/queryLanguage/test/quoted.txt @@ -0,0 +1,495 @@ +# Simple quoted string + +"hello" + +==> + +Program(Term) + +# Quoted string with spaces + +"hello world" + +==> + +Program(Term) + +# Multiple words in quotes + +"this is a search term" + +==> + +Program(Term) + +# Quoted string with escaped quote + +"hello \"world\"" + +==> + +Program(Term) + +# Quoted string with escaped backslash + +"path\\to\\file" + +==> + +Program(Term) + +# Double backslash + +"test\\\\path" + +==> + +Program(Term) + +# Multiple escaped quotes + +"\"quoted\" \"words\"" + +==> + +Program(Term) + +# Mixed escaped characters + +"test\\nvalue\"quoted" + +==> + +Program(Term) + +# Empty quoted string + +"" + +==> + +Program(Term) + +# Quoted string with only spaces + +" " + +==> + +Program(Term) + +# Quoted string in file prefix + +file:"my file.txt" + +==> + +Program(PrefixExpr(FileExpr)) + +# Quoted string in repo prefix + +repo:"github.com/user/repo name" + +==> + +Program(PrefixExpr(RepoExpr)) + +# Quoted string in content prefix + +content:"console.log" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Quoted string in branch prefix + +branch:"feature/my feature" + +==> + +Program(PrefixExpr(BranchExpr)) + +# Multiple quoted strings + +"first string" "second string" + +==> + +Program(AndExpr(Term,Term)) + +# Quoted and unquoted mixed + +unquoted "quoted string" another + +==> + +Program(AndExpr(Term,Term,Term)) + +# Quoted string with parentheses inside + +"(test)" + +==> + +Program(Term) + +# Quoted string with brackets + +"[a-z]+" + +==> + +Program(Term) + +# Quoted string with special chars + +"test@example.com" + +==> + +Program(Term) + +# Quoted string with colons + +"key:value" + +==> + +Program(Term) + +# Quoted string with dashes + +"test-case-example" + +==> + +Program(Term) + +# Quoted string with dots + +"com.example.package" + +==> + +Program(Term) + +# Quoted string with regex pattern + +"\\w+\\s*=\\s*\\d+" + +==> + +Program(Term) + +# Quoted string with forward slashes + +"path/to/file" + +==> + +Program(Term) + +# Quoted string with underscores + +"my_variable_name" + +==> + +Program(Term) + +# Quoted string with numbers + +"test123" + +==> + +Program(Term) + +# Quoted string with mixed case + +"CamelCaseTest" + +==> + +Program(Term) + +# Quoted prefix value with spaces + +file:"test file.js" + +==> + +Program(PrefixExpr(FileExpr)) + +# Multiple prefixes with quoted values + +file:"my file.txt" repo:"my repo" + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Quoted string in parentheses + +("quoted term") + +==> + +Program(ParenExpr(Term)) + +# Multiple quoted in parentheses + +("first" "second") + +==> + +Program(ParenExpr(AndExpr(Term,Term))) + +# Quoted with escaped newline + +"line1\\nline2" + +==> + +Program(Term) + +# Quoted with tab character + +"value\\ttab" + +==> + +Program(Term) + +# Lang prefix with quoted value + +lang:"objective-c" + +==> + +Program(PrefixExpr(LangExpr)) + +# Sym prefix with quoted value + +sym:"My Class" + +==> + +Program(PrefixExpr(SymExpr)) + +# Content with quoted phrase + +content:"TODO: fix this" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Regex prefix with quoted pattern + +regex:"func\\s+\\w+" + +==> + +Program(PrefixExpr(RegexExpr)) + +# Case prefix with quoted value + +case:"yes" + +==> + +Program(PrefixExpr(CaseExpr)) + +# Quoted string with at symbol + +"@decorator" + +==> + +Program(Term) + +# Quoted string with hash + +"#define" + +==> + +Program(Term) + +# Quoted string with dollar sign + +"$variable" + +==> + +Program(Term) + +# Quoted string with percent + +"100%" + +==> + +Program(Term) + +# Quoted string with ampersand + +"foo&bar" + +==> + +Program(Term) + +# Quoted string with asterisk + +"test*" + +==> + +Program(Term) + +# Quoted string with plus + +"a+b" + +==> + +Program(Term) + +# Quoted string with equals + +"a=b" + +==> + +Program(Term) + +# Quoted string with angle brackets + +"