sourcebot/packages/queryLanguage/src/tokens.ts
Brendan Kellam f3a8fa3dab
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
feat(web): Streamed code search (#623)
* generate protobuf types

* stream poc over SSE

* wip: make stream search api follow existing schema. Modify UI to support streaming

* fix scrolling issue

* Dockerfile

* wip on lezer parser grammar for query language

* add lezer tree -> grpc transformer

* remove spammy log message

* fix syntax highlighting by adding a module resolution for @lezer/common

* further wip on query language

* Add case sensitivity and regexp toggles

* Improved type safety / cleanup for query lang

* support search contexts

* update Dockerfile with query langauge package

* fix filter

* Add skeletons to filter panel when search is streaming

* add client side caching

* improved cancelation handling

* add isSearchExausted flag for flagging when a search captured all results

* Add back posthog search_finished event

* remove zoekt tenant enforcement

* migrate blocking search over to grpc. Centralize everything in searchApi

* branch handling

* plumb file weburl

* add repo_sets filter for repositories a user has access to

* refactor a bunch of stuff + add support for passing in Query IR to search api

* refactor

* dev README

* wip on better error handling

* error handling for stream path

* update mcp

* changelog wip

* type fix

* style

* Support rev:* wildcard

* changelog

* changelog nit

* feedback

* fix build

* update docs and remove uneeded test file
2025-11-22 15:33:31 -08:00

59 lines
1.7 KiB
TypeScript

import { ExternalTokenizer } from "@lezer/lr";
import { negate } from "./parser.terms";
// External tokenizer for negation
// Only tokenizes `-` as negate when followed by a prefix keyword or `(`
export const negateToken = new ExternalTokenizer((input) => {
if (input.next !== 45 /* '-' */) return; // Not a dash
const startPos = input.pos;
// Look ahead to see what follows the dash
input.advance();
// Skip whitespace
let ch = input.next;
while (ch === 32 || ch === 9 || ch === 10) {
input.advance();
ch = input.next;
}
// Check if followed by opening paren
if (ch === 40 /* '(' */) {
input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash
return;
}
// Check if followed by a prefix keyword (by checking for keyword followed by colon)
// Look ahead until we hit a delimiter or colon
const checkPos = input.pos;
let foundColon = false;
// Look ahead until we hit a delimiter or colon
while (ch >= 0) {
if (ch === 58 /* ':' */) {
foundColon = true;
break;
}
// Hit a delimiter (whitespace, paren, or quote) - not a prefix keyword
if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) {
break;
}
input.advance();
ch = input.next;
}
// Reset position
while (input.pos > checkPos) {
input.advance(-1);
}
if (foundColon) {
// It's a prefix keyword, accept as negate
input.acceptToken(negate, -input.pos + startPos + 1);
return;
}
// Otherwise, don't tokenize as negate (let word handle it)
});