mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 12:25:22 +00:00
406 lines
13 KiB
TypeScript
406 lines
13 KiB
TypeScript
import { QueryIR } from './ir';
|
|
import {
|
|
AndExpr,
|
|
ArchivedExpr,
|
|
ContentExpr,
|
|
ContextExpr,
|
|
FileExpr,
|
|
ForkExpr,
|
|
LangExpr,
|
|
NegateExpr,
|
|
OrExpr,
|
|
ParenExpr,
|
|
PrefixExpr,
|
|
Program,
|
|
RepoExpr,
|
|
RepoSetExpr,
|
|
RevisionExpr,
|
|
SymExpr,
|
|
SyntaxNode,
|
|
Term,
|
|
Tree,
|
|
VisibilityExpr,
|
|
} from '@sourcebot/query-language';
|
|
import { parser as _parser } from '@sourcebot/query-language';
|
|
import { PrismaClient } from '@sourcebot/db';
|
|
import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
|
|
import { ServiceErrorException } from '@/lib/serviceError';
|
|
import { StatusCodes } from 'http-status-codes';
|
|
import { ErrorCode } from '@/lib/errorCodes';
|
|
|
|
// Configure the parser to throw errors when encountering invalid syntax.
|
|
const parser = _parser.configure({
|
|
strict: true,
|
|
});
|
|
|
|
type ArchivedValue = 'yes' | 'no' | 'only';
|
|
type VisibilityValue = 'public' | 'private' | 'any';
|
|
type ForkValue = 'yes' | 'no' | 'only';
|
|
|
|
const isArchivedValue = (value: string): value is ArchivedValue => {
|
|
return value === 'yes' || value === 'no' || value === 'only';
|
|
}
|
|
|
|
const isVisibilityValue = (value: string): value is VisibilityValue => {
|
|
return value === 'public' || value === 'private' || value === 'any';
|
|
}
|
|
|
|
const isForkValue = (value: string): value is ForkValue => {
|
|
return value === 'yes' || value === 'no' || value === 'only';
|
|
}
|
|
|
|
/**
|
|
* Given a query string, parses it into the query intermediate representation.
|
|
*/
|
|
export const parseQuerySyntaxIntoIR = async ({
|
|
query,
|
|
options,
|
|
prisma,
|
|
}: {
|
|
query: string,
|
|
options: {
|
|
isCaseSensitivityEnabled?: boolean;
|
|
isRegexEnabled?: boolean;
|
|
},
|
|
prisma: PrismaClient,
|
|
}): Promise<QueryIR> => {
|
|
|
|
try {
|
|
// First parse the query into a Lezer tree.
|
|
const tree = parser.parse(query);
|
|
|
|
// Then transform the tree into the intermediate representation.
|
|
return transformTreeToIR({
|
|
tree,
|
|
input: query,
|
|
isCaseSensitivityEnabled: options.isCaseSensitivityEnabled ?? false,
|
|
isRegexEnabled: options.isRegexEnabled ?? false,
|
|
onExpandSearchContext: async (contextName: string) => {
|
|
const context = await prisma.searchContext.findUnique({
|
|
where: {
|
|
name_orgId: {
|
|
name: contextName,
|
|
orgId: SINGLE_TENANT_ORG_ID,
|
|
}
|
|
},
|
|
include: {
|
|
repos: true,
|
|
}
|
|
});
|
|
|
|
if (!context) {
|
|
throw new Error(`Search context "${contextName}" not found`);
|
|
}
|
|
|
|
return context.repos.map((repo) => repo.name);
|
|
},
|
|
});
|
|
} catch (error) {
|
|
if (error instanceof SyntaxError) {
|
|
throw new ServiceErrorException({
|
|
statusCode: StatusCodes.BAD_REQUEST,
|
|
errorCode: ErrorCode.FAILED_TO_PARSE_QUERY,
|
|
message: `Failed to parse query "${query}" with message: ${error.message}`,
|
|
});
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a Lezer tree, transforms it into the query intermediate representation.
|
|
*/
|
|
const transformTreeToIR = async ({
|
|
tree,
|
|
input,
|
|
isCaseSensitivityEnabled,
|
|
isRegexEnabled,
|
|
onExpandSearchContext,
|
|
}: {
|
|
tree: Tree;
|
|
input: string;
|
|
isCaseSensitivityEnabled: boolean;
|
|
isRegexEnabled: boolean;
|
|
onExpandSearchContext: (contextName: string) => Promise<string[]>;
|
|
}): Promise<QueryIR> => {
|
|
const transformNode = async (node: SyntaxNode): Promise<QueryIR> => {
|
|
switch (node.type.id) {
|
|
case Program: {
|
|
// Program wraps the actual query - transform its child
|
|
const child = node.firstChild;
|
|
if (!child) {
|
|
// Empty query - match nothing
|
|
return { const: false, query: "const" };
|
|
}
|
|
return transformNode(child);
|
|
}
|
|
case AndExpr:
|
|
return {
|
|
and: {
|
|
children: await Promise.all(getChildren(node).map(c => transformNode(c)))
|
|
},
|
|
query: "and"
|
|
}
|
|
|
|
case OrExpr:
|
|
return {
|
|
or: {
|
|
children: await Promise.all(getChildren(node).map(c => transformNode(c)))
|
|
},
|
|
query: "or"
|
|
};
|
|
|
|
case NegateExpr: {
|
|
// Find the child after the negate token
|
|
const negateChild = node.getChild("PrefixExpr") || node.getChild("ParenExpr");
|
|
if (!negateChild) {
|
|
throw new Error("NegateExpr missing child");
|
|
}
|
|
return {
|
|
not: {
|
|
child: await transformNode(negateChild)
|
|
},
|
|
query: "not"
|
|
};
|
|
}
|
|
case ParenExpr: {
|
|
// Parentheses just group - transform the inner query
|
|
const innerQuery = node.getChild("query") || node.firstChild;
|
|
if (!innerQuery) {
|
|
return { const: false, query: "const" };
|
|
}
|
|
return transformNode(innerQuery);
|
|
}
|
|
case PrefixExpr:
|
|
// PrefixExpr contains specific prefix types
|
|
return transformPrefixExpr(node);
|
|
|
|
case Term: {
|
|
const termText = input.substring(node.from, node.to).replace(/^"|"$/g, '');
|
|
|
|
return isRegexEnabled ? {
|
|
regexp: {
|
|
regexp: termText,
|
|
case_sensitive: isCaseSensitivityEnabled,
|
|
file_name: false,
|
|
content: true
|
|
},
|
|
query: "regexp"
|
|
} : {
|
|
substring: {
|
|
pattern: termText,
|
|
case_sensitive: isCaseSensitivityEnabled,
|
|
file_name: false,
|
|
content: true
|
|
},
|
|
query: "substring"
|
|
};
|
|
}
|
|
default:
|
|
console.warn(`Unhandled node type: ${node.type.name} (id: ${node.type.id})`);
|
|
return { const: true, query: "const" };
|
|
}
|
|
}
|
|
|
|
const transformPrefixExpr = async (node: SyntaxNode): Promise<QueryIR> => {
|
|
// Find which specific prefix type this is
|
|
const prefixNode = node.firstChild;
|
|
if (!prefixNode) {
|
|
throw new Error("PrefixExpr has no child");
|
|
}
|
|
|
|
const prefixTypeId = prefixNode.type.id;
|
|
|
|
// Extract the full text (e.g., "file:test.js") and split on the colon
|
|
const fullText = input.substring(prefixNode.from, prefixNode.to);
|
|
const colonIndex = fullText.indexOf(':');
|
|
if (colonIndex === -1) {
|
|
throw new Error(`${prefixNode.type.name} missing colon`);
|
|
}
|
|
|
|
// Get the value part after the colon and remove quotes if present
|
|
const value = fullText.substring(colonIndex + 1).replace(/^"|"$/g, '');
|
|
|
|
switch (prefixTypeId) {
|
|
case FileExpr:
|
|
return {
|
|
regexp: {
|
|
regexp: value,
|
|
case_sensitive: isCaseSensitivityEnabled,
|
|
file_name: true,
|
|
content: false
|
|
},
|
|
query: "regexp"
|
|
};
|
|
|
|
case RepoExpr:
|
|
return {
|
|
repo: {
|
|
regexp: value
|
|
},
|
|
query: "repo"
|
|
};
|
|
|
|
case RevisionExpr:
|
|
return {
|
|
branch: {
|
|
// Special case - "*" means search all branches. Passing in a
|
|
// blank string will match all branches.
|
|
pattern: value === '*' ? "" : value,
|
|
exact: false
|
|
},
|
|
query: "branch"
|
|
};
|
|
|
|
case ContentExpr:
|
|
return {
|
|
substring: {
|
|
pattern: value,
|
|
case_sensitive: isCaseSensitivityEnabled,
|
|
file_name: false,
|
|
content: true
|
|
},
|
|
query: "substring"
|
|
};
|
|
|
|
|
|
case LangExpr:
|
|
return {
|
|
language: {
|
|
language: value
|
|
},
|
|
query: "language"
|
|
};
|
|
|
|
case SymExpr:
|
|
// Symbol search wraps a pattern
|
|
return {
|
|
symbol: {
|
|
expr: {
|
|
regexp: {
|
|
regexp: value,
|
|
case_sensitive: isCaseSensitivityEnabled,
|
|
file_name: false,
|
|
content: true
|
|
},
|
|
query: "regexp"
|
|
}
|
|
},
|
|
query: "symbol"
|
|
};
|
|
|
|
case VisibilityExpr: {
|
|
const rawValue = value.toLowerCase();
|
|
|
|
if (!isVisibilityValue(rawValue)) {
|
|
throw new Error(`Invalid visibility value: ${rawValue}. Expected 'public', 'private', or 'any'`);
|
|
}
|
|
|
|
const flags: ('FLAG_ONLY_PUBLIC' | 'FLAG_ONLY_PRIVATE')[] = [];
|
|
|
|
if (rawValue === 'any') {
|
|
// 'any' means no filter
|
|
} else if (rawValue === 'public') {
|
|
flags.push('FLAG_ONLY_PUBLIC');
|
|
} else if (rawValue === 'private') {
|
|
flags.push('FLAG_ONLY_PRIVATE');
|
|
}
|
|
|
|
return {
|
|
raw_config: {
|
|
flags
|
|
},
|
|
query: "raw_config"
|
|
};
|
|
}
|
|
|
|
case ArchivedExpr: {
|
|
const rawValue = value.toLowerCase();
|
|
|
|
if (!isArchivedValue(rawValue)) {
|
|
throw new Error(`Invalid archived value: ${rawValue}. Expected 'yes', 'no', or 'only'`);
|
|
}
|
|
|
|
const flags: ('FLAG_ONLY_ARCHIVED' | 'FLAG_NO_ARCHIVED')[] = [];
|
|
|
|
if (rawValue === 'yes') {
|
|
// 'yes' means include archived repositories (default)
|
|
} else if (rawValue === 'no') {
|
|
flags.push('FLAG_NO_ARCHIVED');
|
|
} else if (rawValue === 'only') {
|
|
flags.push('FLAG_ONLY_ARCHIVED');
|
|
}
|
|
|
|
return {
|
|
raw_config: {
|
|
flags
|
|
},
|
|
query: "raw_config"
|
|
};
|
|
}
|
|
case ForkExpr: {
|
|
const rawValue = value.toLowerCase();
|
|
|
|
if (!isForkValue(rawValue)) {
|
|
throw new Error(`Invalid fork value: ${rawValue}. Expected 'yes', 'no', or 'only'`);
|
|
}
|
|
|
|
const flags: ('FLAG_ONLY_FORKS' | 'FLAG_NO_FORKS')[] = [];
|
|
|
|
if (rawValue === 'yes') {
|
|
// 'yes' means include forks (default)
|
|
} else if (rawValue === 'no') {
|
|
flags.push('FLAG_NO_FORKS');
|
|
} else if (rawValue === 'only') {
|
|
flags.push('FLAG_ONLY_FORKS');
|
|
}
|
|
|
|
return {
|
|
raw_config: {
|
|
flags
|
|
},
|
|
query: "raw_config"
|
|
};
|
|
}
|
|
|
|
case ContextExpr: {
|
|
const repoNames = await onExpandSearchContext(value);
|
|
return {
|
|
repo_set: {
|
|
set: repoNames.reduce((acc, s) => {
|
|
acc[s.trim()] = true;
|
|
return acc;
|
|
}, {} as Record<string, boolean>)
|
|
},
|
|
query: "repo_set"
|
|
};
|
|
}
|
|
|
|
case RepoSetExpr: {
|
|
return {
|
|
repo_set: {
|
|
set: value.split(',').reduce((acc, s) => {
|
|
acc[s.trim()] = true;
|
|
return acc;
|
|
}, {} as Record<string, boolean>)
|
|
},
|
|
query: "repo_set"
|
|
};
|
|
}
|
|
default:
|
|
throw new Error(`Unknown prefix type: ${prefixNode.type.name} (id: ${prefixTypeId})`);
|
|
}
|
|
}
|
|
|
|
return transformNode(tree.topNode);
|
|
}
|
|
|
|
const getChildren = (node: SyntaxNode): SyntaxNode[] => {
|
|
const children: SyntaxNode[] = [];
|
|
let child = node.firstChild;
|
|
while (child) {
|
|
children.push(child);
|
|
child = child.nextSibling;
|
|
}
|
|
return children;
|
|
}
|