sourcebot/packages/web/src/features/search/parser.ts
2025-11-20 18:42:45 -08:00

406 lines
13 KiB
TypeScript

import { QueryIR } from './ir';
import {
AndExpr,
ArchivedExpr,
ContentExpr,
ContextExpr,
FileExpr,
ForkExpr,
LangExpr,
NegateExpr,
OrExpr,
ParenExpr,
PrefixExpr,
Program,
RepoExpr,
RepoSetExpr,
RevisionExpr,
SymExpr,
SyntaxNode,
Term,
Tree,
VisibilityExpr,
} from '@sourcebot/query-language';
import { parser as _parser } from '@sourcebot/query-language';
import { PrismaClient } from '@sourcebot/db';
import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
import { ServiceErrorException } from '@/lib/serviceError';
import { StatusCodes } from 'http-status-codes';
import { ErrorCode } from '@/lib/errorCodes';
// Configure the parser to throw errors when encountering invalid syntax.
const parser = _parser.configure({
strict: true,
});
type ArchivedValue = 'yes' | 'no' | 'only';
type VisibilityValue = 'public' | 'private' | 'any';
type ForkValue = 'yes' | 'no' | 'only';
const isArchivedValue = (value: string): value is ArchivedValue => {
return value === 'yes' || value === 'no' || value === 'only';
}
const isVisibilityValue = (value: string): value is VisibilityValue => {
return value === 'public' || value === 'private' || value === 'any';
}
const isForkValue = (value: string): value is ForkValue => {
return value === 'yes' || value === 'no' || value === 'only';
}
/**
* Given a query string, parses it into the query intermediate representation.
*/
export const parseQuerySyntaxIntoIR = async ({
query,
options,
prisma,
}: {
query: string,
options: {
isCaseSensitivityEnabled?: boolean;
isRegexEnabled?: boolean;
},
prisma: PrismaClient,
}): Promise<QueryIR> => {
try {
// First parse the query into a Lezer tree.
const tree = parser.parse(query);
// Then transform the tree into the intermediate representation.
return transformTreeToIR({
tree,
input: query,
isCaseSensitivityEnabled: options.isCaseSensitivityEnabled ?? false,
isRegexEnabled: options.isRegexEnabled ?? false,
onExpandSearchContext: async (contextName: string) => {
const context = await prisma.searchContext.findUnique({
where: {
name_orgId: {
name: contextName,
orgId: SINGLE_TENANT_ORG_ID,
}
},
include: {
repos: true,
}
});
if (!context) {
throw new Error(`Search context "${contextName}" not found`);
}
return context.repos.map((repo) => repo.name);
},
});
} catch (error) {
if (error instanceof SyntaxError) {
throw new ServiceErrorException({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.FAILED_TO_PARSE_QUERY,
message: `Failed to parse query "${query}" with message: ${error.message}`,
});
}
throw error;
}
}
/**
* Given a Lezer tree, transforms it into the query intermediate representation.
*/
const transformTreeToIR = async ({
tree,
input,
isCaseSensitivityEnabled,
isRegexEnabled,
onExpandSearchContext,
}: {
tree: Tree;
input: string;
isCaseSensitivityEnabled: boolean;
isRegexEnabled: boolean;
onExpandSearchContext: (contextName: string) => Promise<string[]>;
}): Promise<QueryIR> => {
const transformNode = async (node: SyntaxNode): Promise<QueryIR> => {
switch (node.type.id) {
case Program: {
// Program wraps the actual query - transform its child
const child = node.firstChild;
if (!child) {
// Empty query - match nothing
return { const: false, query: "const" };
}
return transformNode(child);
}
case AndExpr:
return {
and: {
children: await Promise.all(getChildren(node).map(c => transformNode(c)))
},
query: "and"
}
case OrExpr:
return {
or: {
children: await Promise.all(getChildren(node).map(c => transformNode(c)))
},
query: "or"
};
case NegateExpr: {
// Find the child after the negate token
const negateChild = node.getChild("PrefixExpr") || node.getChild("ParenExpr");
if (!negateChild) {
throw new Error("NegateExpr missing child");
}
return {
not: {
child: await transformNode(negateChild)
},
query: "not"
};
}
case ParenExpr: {
// Parentheses just group - transform the inner query
const innerQuery = node.getChild("query") || node.firstChild;
if (!innerQuery) {
return { const: false, query: "const" };
}
return transformNode(innerQuery);
}
case PrefixExpr:
// PrefixExpr contains specific prefix types
return transformPrefixExpr(node);
case Term: {
const termText = input.substring(node.from, node.to).replace(/^"|"$/g, '');
return isRegexEnabled ? {
regexp: {
regexp: termText,
case_sensitive: isCaseSensitivityEnabled,
file_name: false,
content: true
},
query: "regexp"
} : {
substring: {
pattern: termText,
case_sensitive: isCaseSensitivityEnabled,
file_name: false,
content: true
},
query: "substring"
};
}
default:
console.warn(`Unhandled node type: ${node.type.name} (id: ${node.type.id})`);
return { const: true, query: "const" };
}
}
const transformPrefixExpr = async (node: SyntaxNode): Promise<QueryIR> => {
// Find which specific prefix type this is
const prefixNode = node.firstChild;
if (!prefixNode) {
throw new Error("PrefixExpr has no child");
}
const prefixTypeId = prefixNode.type.id;
// Extract the full text (e.g., "file:test.js") and split on the colon
const fullText = input.substring(prefixNode.from, prefixNode.to);
const colonIndex = fullText.indexOf(':');
if (colonIndex === -1) {
throw new Error(`${prefixNode.type.name} missing colon`);
}
// Get the value part after the colon and remove quotes if present
const value = fullText.substring(colonIndex + 1).replace(/^"|"$/g, '');
switch (prefixTypeId) {
case FileExpr:
return {
regexp: {
regexp: value,
case_sensitive: isCaseSensitivityEnabled,
file_name: true,
content: false
},
query: "regexp"
};
case RepoExpr:
return {
repo: {
regexp: value
},
query: "repo"
};
case RevisionExpr:
return {
branch: {
// Special case - "*" means search all branches. Passing in a
// blank string will match all branches.
pattern: value === '*' ? "" : value,
exact: false
},
query: "branch"
};
case ContentExpr:
return {
substring: {
pattern: value,
case_sensitive: isCaseSensitivityEnabled,
file_name: false,
content: true
},
query: "substring"
};
case LangExpr:
return {
language: {
language: value
},
query: "language"
};
case SymExpr:
// Symbol search wraps a pattern
return {
symbol: {
expr: {
regexp: {
regexp: value,
case_sensitive: isCaseSensitivityEnabled,
file_name: false,
content: true
},
query: "regexp"
}
},
query: "symbol"
};
case VisibilityExpr: {
const rawValue = value.toLowerCase();
if (!isVisibilityValue(rawValue)) {
throw new Error(`Invalid visibility value: ${rawValue}. Expected 'public', 'private', or 'any'`);
}
const flags: ('FLAG_ONLY_PUBLIC' | 'FLAG_ONLY_PRIVATE')[] = [];
if (rawValue === 'any') {
// 'any' means no filter
} else if (rawValue === 'public') {
flags.push('FLAG_ONLY_PUBLIC');
} else if (rawValue === 'private') {
flags.push('FLAG_ONLY_PRIVATE');
}
return {
raw_config: {
flags
},
query: "raw_config"
};
}
case ArchivedExpr: {
const rawValue = value.toLowerCase();
if (!isArchivedValue(rawValue)) {
throw new Error(`Invalid archived value: ${rawValue}. Expected 'yes', 'no', or 'only'`);
}
const flags: ('FLAG_ONLY_ARCHIVED' | 'FLAG_NO_ARCHIVED')[] = [];
if (rawValue === 'yes') {
// 'yes' means include archived repositories (default)
} else if (rawValue === 'no') {
flags.push('FLAG_NO_ARCHIVED');
} else if (rawValue === 'only') {
flags.push('FLAG_ONLY_ARCHIVED');
}
return {
raw_config: {
flags
},
query: "raw_config"
};
}
case ForkExpr: {
const rawValue = value.toLowerCase();
if (!isForkValue(rawValue)) {
throw new Error(`Invalid fork value: ${rawValue}. Expected 'yes', 'no', or 'only'`);
}
const flags: ('FLAG_ONLY_FORKS' | 'FLAG_NO_FORKS')[] = [];
if (rawValue === 'yes') {
// 'yes' means include forks (default)
} else if (rawValue === 'no') {
flags.push('FLAG_NO_FORKS');
} else if (rawValue === 'only') {
flags.push('FLAG_ONLY_FORKS');
}
return {
raw_config: {
flags
},
query: "raw_config"
};
}
case ContextExpr: {
const repoNames = await onExpandSearchContext(value);
return {
repo_set: {
set: repoNames.reduce((acc, s) => {
acc[s.trim()] = true;
return acc;
}, {} as Record<string, boolean>)
},
query: "repo_set"
};
}
case RepoSetExpr: {
return {
repo_set: {
set: value.split(',').reduce((acc, s) => {
acc[s.trim()] = true;
return acc;
}, {} as Record<string, boolean>)
},
query: "repo_set"
};
}
default:
throw new Error(`Unknown prefix type: ${prefixNode.type.name} (id: ${prefixTypeId})`);
}
}
return transformNode(tree.topNode);
}
const getChildren = (node: SyntaxNode): SyntaxNode[] => {
const children: SyntaxNode[] = [];
let child = node.firstChild;
while (child) {
children.push(child);
child = child.nextSibling;
}
return children;
}