mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 04:15:30 +00:00
add lezer tree -> grpc transformer
This commit is contained in:
parent
cfdadf29e0
commit
8dce2aaac3
9 changed files with 465 additions and 19 deletions
|
|
@ -18,7 +18,7 @@
|
|||
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
|
||||
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset",
|
||||
"dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push",
|
||||
"build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared}' run build"
|
||||
"build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build"
|
||||
},
|
||||
"devDependencies": {
|
||||
"concurrently": "^9.2.1",
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
{
|
||||
"name": "@sourcebot/query-language",
|
||||
"private": true,
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
"build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc",
|
||||
"test": "vitest",
|
||||
"asdf": "tsx test.ts"
|
||||
"test": "vitest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@lezer/generator": "^1.8.0",
|
||||
|
|
|
|||
6
packages/queryLanguage/src/index.ts
Normal file
6
packages/queryLanguage/src/index.ts
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
import { parser } from "./parser";
|
||||
|
||||
type Tree = ReturnType<typeof parser.parse>;
|
||||
type SyntaxNode = Tree['topNode'];
|
||||
export type { Tree, SyntaxNode };
|
||||
export * from "./parser";
|
||||
200
packages/queryLanguage/test/precedence.txt
Normal file
200
packages/queryLanguage/test/precedence.txt
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
# OR has lowest precedence - implicit AND groups first
|
||||
|
||||
a b or c d
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term)))
|
||||
|
||||
# Multiple OR operators are left-associative
|
||||
|
||||
a or b or c
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(Term,Term,Term))
|
||||
|
||||
# AND before OR
|
||||
|
||||
file:test.js error or file:test.go panic
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(PrefixExpr(FileExpr),Term),AndExpr(PrefixExpr(FileExpr),Term)))
|
||||
|
||||
# Negation binds tighter than AND
|
||||
|
||||
-file:test.js error
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(NegateExpr(PrefixExpr(FileExpr)),Term))
|
||||
|
||||
# Negation binds tighter than OR
|
||||
|
||||
-file:a.js or file:b.js
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr)))
|
||||
|
||||
# Parentheses override precedence
|
||||
|
||||
(a or b) c
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term))
|
||||
|
||||
# Parentheses override - OR inside parens groups first
|
||||
|
||||
a (b or c)
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term))))
|
||||
|
||||
# Complex: AND, OR, and negation
|
||||
|
||||
a -b or c d
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term)))
|
||||
|
||||
# Negated group in OR expression
|
||||
|
||||
-(a b) or c
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(NegateExpr(ParenExpr(AndExpr(Term,Term))),Term))
|
||||
|
||||
# Multiple negations in OR
|
||||
|
||||
-file:a.js or -file:b.js or file:c.js
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr)))
|
||||
|
||||
# Prefix binds to its value only
|
||||
|
||||
file:a.js b.js
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(PrefixExpr(FileExpr),Term))
|
||||
|
||||
# OR with prefixes and terms mixed
|
||||
|
||||
repo:backend error or repo:frontend warning
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(PrefixExpr(RepoExpr),Term),AndExpr(PrefixExpr(RepoExpr),Term)))
|
||||
|
||||
# Nested parentheses with OR
|
||||
|
||||
((a or b) c) or d
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(ParenExpr(AndExpr(ParenExpr(OrExpr(Term,Term)),Term)),Term))
|
||||
|
||||
# OR at different nesting levels
|
||||
|
||||
(a or (b or c))
|
||||
|
||||
==>
|
||||
|
||||
Program(ParenExpr(OrExpr(Term,ParenExpr(OrExpr(Term,Term)))))
|
||||
|
||||
# Implicit AND groups all adjacent terms before OR
|
||||
|
||||
a b c or d e f
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(Term,Term,Term),AndExpr(Term,Term,Term)))
|
||||
|
||||
# Mixed prefix and regular terms with OR
|
||||
|
||||
lang:go func or lang:rust fn
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(PrefixExpr(LangExpr),Term),AndExpr(PrefixExpr(LangExpr),Term)))
|
||||
|
||||
# Negation doesn't affect OR grouping
|
||||
|
||||
a or -b or c
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(Term,Term,Term))
|
||||
|
||||
# Parentheses can isolate OR from surrounding AND
|
||||
|
||||
a (b or c) d
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term)),Term))
|
||||
|
||||
# Multiple parenthesized groups with AND
|
||||
|
||||
(a or b) (c or d)
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term))))
|
||||
|
||||
# Quoted strings are atomic - no precedence inside
|
||||
|
||||
"a or b"
|
||||
|
||||
==>
|
||||
|
||||
Program(Term)
|
||||
|
||||
# Prefix with OR value doesn't split
|
||||
|
||||
file:"a.js or b.js"
|
||||
|
||||
==>
|
||||
|
||||
Program(PrefixExpr(FileExpr))
|
||||
|
||||
# Negated prefix in complex expression
|
||||
|
||||
-file:test.js lang:go error or warning
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(LangExpr),Term),Term))
|
||||
|
||||
# OR followed by parenthesized AND
|
||||
|
||||
a or (b c)
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(Term,ParenExpr(AndExpr(Term,Term))))
|
||||
|
||||
# Empty parens don't affect precedence
|
||||
|
||||
() or a b
|
||||
|
||||
==>
|
||||
|
||||
Program(OrExpr(ParenExpr(Term(⚠)),AndExpr(Term,Term)))
|
||||
|
||||
# Negation of empty group
|
||||
|
||||
-() a
|
||||
|
||||
==>
|
||||
|
||||
Program(AndExpr(NegateExpr(ParenExpr(Term(⚠))),Term))
|
||||
|
||||
|
|
@ -18,6 +18,6 @@
|
|||
"isolatedModules": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/parser.ts"],
|
||||
"include": ["src/index.ts"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
|
|
@ -94,6 +94,7 @@
|
|||
"@shopify/lang-jsonc": "^1.0.0",
|
||||
"@sourcebot/codemirror-lang-tcl": "^1.0.12",
|
||||
"@sourcebot/db": "workspace:*",
|
||||
"@sourcebot/query-language": "workspace:*",
|
||||
"@sourcebot/schemas": "workspace:*",
|
||||
"@sourcebot/shared": "workspace:*",
|
||||
"@ssddanbrown/codemirror-lang-twig": "^1.0.0",
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ import { PrismaClient, Repo } from '@sourcebot/db';
|
|||
import { createLogger, env } from '@sourcebot/shared';
|
||||
import { NextRequest } from 'next/server';
|
||||
import * as path from 'path';
|
||||
import { parser } from '@sourcebot/query-language';
|
||||
import { transformToZoektQuery } from './transformer';
|
||||
|
||||
const logger = createLogger('streamSearchApi');
|
||||
|
||||
|
|
@ -67,21 +69,27 @@ export const POST = async (request: NextRequest) => {
|
|||
|
||||
const { query, matches, contextLines, whole } = parsed.data;
|
||||
|
||||
const tree = parser.parse(query);
|
||||
const zoektQuery = transformToZoektQuery(tree, query);
|
||||
|
||||
console.log(JSON.stringify(zoektQuery, null, 2));
|
||||
|
||||
const searchRequest: SearchRequest = {
|
||||
query: {
|
||||
and: {
|
||||
// @todo: we should use repo_ids to filter out repositories that the user
|
||||
// has access to (if permission syncing is enabled!).
|
||||
children: [
|
||||
{
|
||||
regexp: {
|
||||
regexp: query,
|
||||
case_sensitive: true,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
query: zoektQuery,
|
||||
// query: {
|
||||
// and: {
|
||||
// // @todo: we should use repo_ids to filter out repositories that the user
|
||||
// // has access to (if permission syncing is enabled!).
|
||||
// children: [
|
||||
// {
|
||||
// regexp: {
|
||||
// regexp: query,
|
||||
// case_sensitive: true,
|
||||
// }
|
||||
// }
|
||||
// ]
|
||||
// }
|
||||
// },
|
||||
opts: {
|
||||
chunk_matches: true,
|
||||
max_match_display_count: matches,
|
||||
|
|
|
|||
230
packages/web/src/app/api/(server)/stream_search/transformer.ts
Normal file
230
packages/web/src/app/api/(server)/stream_search/transformer.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { Tree, SyntaxNode } from "@sourcebot/query-language";
|
||||
import { Q } from '@/proto/zoekt/webserver/v1/Q';
|
||||
|
||||
/**
|
||||
* Transform a Lezer parse tree into a Zoekt gRPC query
|
||||
*/
|
||||
export function transformToZoektQuery(tree: Tree, input: string): Q {
|
||||
return transformNode(tree.topNode, input);
|
||||
}
|
||||
|
||||
function transformNode(node: SyntaxNode, input: string): Q {
|
||||
const nodeName = node.type.name;
|
||||
|
||||
switch (nodeName) {
|
||||
case "Program": {
|
||||
// Program wraps the actual query - transform its child
|
||||
const child = node.firstChild;
|
||||
if (!child) {
|
||||
// Empty query - match nothing
|
||||
return { const: false, query: "const" };
|
||||
}
|
||||
return transformNode(child, input);
|
||||
}
|
||||
case "AndExpr":
|
||||
return {
|
||||
and: {
|
||||
children: getChildren(node).map(c => transformNode(c, input))
|
||||
},
|
||||
query: "and"
|
||||
}
|
||||
|
||||
case "OrExpr":
|
||||
return {
|
||||
or: {
|
||||
children: getChildren(node).map(c => transformNode(c, input))
|
||||
},
|
||||
query: "or"
|
||||
};
|
||||
|
||||
case "NegateExpr": {
|
||||
// Find the child after the negate token
|
||||
const negateChild = node.getChild("PrefixExpr") || node.getChild("ParenExpr");
|
||||
if (!negateChild) {
|
||||
throw new Error("NegateExpr missing child");
|
||||
}
|
||||
return {
|
||||
not: {
|
||||
child: transformNode(negateChild, input)
|
||||
},
|
||||
query: "not"
|
||||
};
|
||||
}
|
||||
case "ParenExpr": {
|
||||
// Parentheses just group - transform the inner query
|
||||
const innerQuery = node.getChild("query") || node.firstChild;
|
||||
if (!innerQuery) {
|
||||
return { const: false, query: "const" };
|
||||
}
|
||||
return transformNode(innerQuery, input);
|
||||
}
|
||||
case "PrefixExpr":
|
||||
// PrefixExpr contains specific prefix types
|
||||
return transformPrefixExpr(node, input);
|
||||
|
||||
case "Term": {
|
||||
// Plain search term - becomes substring search in content
|
||||
const termText = input.substring(node.from, node.to);
|
||||
return {
|
||||
substring: {
|
||||
pattern: termText.replace(/^"|"$/g, ''), // Remove quotes if present
|
||||
case_sensitive: false,
|
||||
file_name: false,
|
||||
content: true
|
||||
},
|
||||
query: "substring"
|
||||
};
|
||||
}
|
||||
default:
|
||||
console.warn(`Unhandled node type: ${nodeName}`);
|
||||
return { const: true, query: "const" };
|
||||
}
|
||||
}
|
||||
|
||||
function transformPrefixExpr(node: SyntaxNode, input: string): Q {
|
||||
// Find which specific prefix type this is
|
||||
const prefixNode = node.firstChild;
|
||||
if (!prefixNode) {
|
||||
throw new Error("PrefixExpr has no child");
|
||||
}
|
||||
|
||||
const prefixType = prefixNode.type.name;
|
||||
|
||||
// Extract the full text (e.g., "file:test.js") and split on the colon
|
||||
const fullText = input.substring(prefixNode.from, prefixNode.to);
|
||||
const colonIndex = fullText.indexOf(':');
|
||||
if (colonIndex === -1) {
|
||||
throw new Error(`${prefixType} missing colon`);
|
||||
}
|
||||
|
||||
// Get the value part after the colon and remove quotes if present
|
||||
const value = fullText.substring(colonIndex + 1).replace(/^"|"$/g, '');
|
||||
|
||||
switch (prefixType) {
|
||||
case "FileExpr":
|
||||
return {
|
||||
substring: {
|
||||
pattern: value,
|
||||
case_sensitive: false,
|
||||
file_name: true,
|
||||
content: false
|
||||
},
|
||||
query: "substring"
|
||||
};
|
||||
|
||||
case "RepoExpr":
|
||||
return {
|
||||
repo: {
|
||||
regexp: value
|
||||
},
|
||||
query: "repo"
|
||||
};
|
||||
|
||||
case "BranchExpr":
|
||||
return {
|
||||
branch: {
|
||||
pattern: value,
|
||||
exact: false
|
||||
},
|
||||
query: "branch"
|
||||
};
|
||||
|
||||
case "ContentExpr":
|
||||
return {
|
||||
substring: {
|
||||
pattern: value,
|
||||
case_sensitive: false,
|
||||
file_name: false,
|
||||
content: true
|
||||
},
|
||||
query: "substring"
|
||||
};
|
||||
|
||||
case "CaseExpr": {
|
||||
// case:yes/no wraps the next term with case sensitivity
|
||||
const caseValue = value.toLowerCase();
|
||||
const isCaseSensitive = caseValue === "yes" || caseValue === "true";
|
||||
return {
|
||||
substring: {
|
||||
pattern: value,
|
||||
case_sensitive: isCaseSensitive,
|
||||
file_name: false,
|
||||
content: true
|
||||
},
|
||||
query: "substring"
|
||||
};
|
||||
}
|
||||
case "LangExpr":
|
||||
return {
|
||||
language: {
|
||||
language: value
|
||||
},
|
||||
query: "language"
|
||||
};
|
||||
|
||||
case "SymExpr":
|
||||
// Symbol search wraps a pattern
|
||||
return {
|
||||
symbol: {
|
||||
expr: {
|
||||
substring: {
|
||||
pattern: value,
|
||||
case_sensitive: false,
|
||||
file_name: false,
|
||||
content: true
|
||||
},
|
||||
query: "substring"
|
||||
}
|
||||
},
|
||||
query: "symbol"
|
||||
};
|
||||
case "RegexExpr":
|
||||
return {
|
||||
regexp: {
|
||||
regexp: value,
|
||||
case_sensitive: false,
|
||||
file_name: false,
|
||||
content: true
|
||||
},
|
||||
query: "regexp"
|
||||
};
|
||||
|
||||
// @todo: handle this
|
||||
case "ArchivedExpr":
|
||||
case "ForkExpr":
|
||||
case "PublicExpr":
|
||||
// These are repo metadata filters
|
||||
// They need to be handled via repo filters in Zoekt
|
||||
// For now, return a const query (you might need custom handling)
|
||||
console.warn(`${prefixType} not yet implemented`);
|
||||
return { const: true, query: "const" };
|
||||
|
||||
case "RepoSetExpr": {
|
||||
return {
|
||||
repo_set: {
|
||||
set: value.split(',').reduce((acc, s) => {
|
||||
acc[s.trim()] = true;
|
||||
return acc;
|
||||
}, {} as Record<string, boolean>)
|
||||
},
|
||||
query: "repo_set"
|
||||
};
|
||||
}
|
||||
default:
|
||||
throw new Error(`Unknown prefix type: ${prefixType}`);
|
||||
}
|
||||
}
|
||||
|
||||
function getChildren(node: SyntaxNode): SyntaxNode[] {
|
||||
const children: SyntaxNode[] = [];
|
||||
let child = node.firstChild;
|
||||
while (child) {
|
||||
// Skip certain node types that are just structural
|
||||
if (!["(", ")", "or"].includes(child.type.name)) {
|
||||
children.push(child);
|
||||
}
|
||||
child = child.nextSibling;
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
||||
|
|
@ -8016,7 +8016,7 @@ __metadata:
|
|||
languageName: unknown
|
||||
linkType: soft
|
||||
|
||||
"@sourcebot/query-language@workspace:packages/queryLanguage":
|
||||
"@sourcebot/query-language@workspace:*, @sourcebot/query-language@workspace:packages/queryLanguage":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "@sourcebot/query-language@workspace:packages/queryLanguage"
|
||||
dependencies:
|
||||
|
|
@ -8152,6 +8152,7 @@ __metadata:
|
|||
"@shopify/lang-jsonc": "npm:^1.0.0"
|
||||
"@sourcebot/codemirror-lang-tcl": "npm:^1.0.12"
|
||||
"@sourcebot/db": "workspace:*"
|
||||
"@sourcebot/query-language": "workspace:*"
|
||||
"@sourcebot/schemas": "workspace:*"
|
||||
"@sourcebot/shared": "workspace:*"
|
||||
"@ssddanbrown/codemirror-lang-twig": "npm:^1.0.0"
|
||||
|
|
|
|||
Loading…
Reference in a new issue