signoz/frontend/src/utils/antlrQueryUtils.ts

/* eslint-disable sonarjs/no-collapsible-if */
/* eslint-disable no-continue */
/* eslint-disable sonarjs/cognitive-complexity */
import { CharStreams, CommonTokenStream } from 'antlr4';
import FilterQueryLexer from 'parser/FilterQueryLexer';
import FilterQueryParser from 'parser/FilterQueryParser';
import {
	IDetailedError,
	IQueryContext,
	IToken,
	IValidationResult,
} from 'types/antlrQueryTypes';

// Custom error listener to capture ANTLR errors
class QueryErrorListener {
	private errors: IDetailedError[] = [];

	syntaxError(
		_recognizer: any,
		offendingSymbol: any,
		line: number,
		column: number,
		msg: string,
	): void {
		// For unterminated quotes, we only want to show one error
		if (this.hasUnterminatedQuoteError() && msg.includes('expecting')) {
			return;
		}

		const error: IDetailedError = {
			message: msg,
			line,
			column,
			offendingSymbol: offendingSymbol?.text || String(offendingSymbol),
		};

		// Extract expected tokens if available
		if (msg.includes('expecting')) {
			const expectedTokens = msg
				.split('expecting')[1]
				.trim()
				.split(',')
				.map((token) => token.trim());
			error.expectedTokens = expectedTokens;
		}

		// Check if this is a duplicate error (same location and similar message)
		const isDuplicate = this.errors.some(
			(e) =>
				e.line === line &&
				e.column === column &&
				this.isSimilarError(e.message, msg),
		);

		if (!isDuplicate) {
			this.errors.push(error);
		}
	}

	private hasUnterminatedQuoteError(): boolean {
		return this.errors.some(
			(error) =>
				error.message.includes('unterminated') ||
				(error.message.includes('missing') && error.message.includes("'")),
		);
	}

	private isSimilarError = (msg1: string, msg2: string): boolean => {
		// Consider errors similar if they're for the same core issue
		const normalize = (msg: string): string =>
			msg.toLowerCase().replace(/['"`]/g, 'quote').replace(/\s+/g, ' ').trim();

		return normalize(msg1) === normalize(msg2);
	};

	// eslint-disable-next-line @typescript-eslint/no-empty-function
	reportAmbiguity = (): void => {};

	// eslint-disable-next-line @typescript-eslint/no-empty-function
	reportAttemptingFullContext = (): void => {};

	// eslint-disable-next-line @typescript-eslint/no-empty-function
	reportContextSensitivity = (): void => {};

	getErrors(): IDetailedError[] {
		return this.errors;
	}

	hasErrors(): boolean {
		return this.errors.length > 0;
	}

	getFormattedErrors(): string[] {
		return this.errors.map((error) => {
			let message = `Line ${error.line}:${error.column} - ${error.message}`;

			if (error.offendingSymbol && error.offendingSymbol !== 'undefined') {
				message += `\nOffending symbol: '${error.offendingSymbol}'`;
			}

			if (error.expectedTokens && error.expectedTokens.length > 0) {
				message += `\nExpected: ${error.expectedTokens.join(', ')}`;
			}

			return message;
		});
	}
}

export const validateQuery = (query: string): IValidationResult => {
	// Empty query is considered invalid
	if (!query.trim()) {
		return {
			isValid: false,
			message: 'Query cannot be empty',
			errors: ['Query cannot be empty'],
		};
	}

	try {
		const errorListener = new QueryErrorListener();
		const inputStream = CharStreams.fromString(query);

		// Setup lexer
		const lexer = new FilterQueryLexer(inputStream);
		lexer.removeErrorListeners(); // Remove default error listeners
		lexer.addErrorListener(errorListener);

		// Setup parser
		const tokenStream = new CommonTokenStream(lexer);
		const parser = new FilterQueryParser(tokenStream);
		parser.removeErrorListeners(); // Remove default error listeners
		parser.addErrorListener(errorListener);

		// Try parsing
		const parsedTree = parser.query();

		console.log('parsedTree', parsedTree);

		// Check if any errors were captured
		if (errorListener.hasErrors()) {
			return {
				isValid: false,
				message: 'Query syntax error',
				errors: errorListener.getFormattedErrors(),
			};
		}

		return {
			isValid: true,
			message: 'Query is valid!',
			errors: [],
		};
	} catch (error) {
		const errorMessage =
			error instanceof Error ? error.message : 'Invalid query syntax';
		return {
			isValid: false,
			message: 'Invalid query syntax',
			errors: [errorMessage],
		};
	}
};

// Helper function to find key-operator-value triplets in token stream
function findKeyOperatorValueTriplet(
	allTokens: IToken[],
	currentToken: IToken,
	isInKey: boolean,
	isInOperator: boolean,
	isInValue: boolean,
): { keyToken?: string; operatorToken?: string; valueToken?: string } {
	// Find current token index in allTokens
	let currentTokenIndex = -1;
	for (let i = 0; i < allTokens.length; i++) {
		if (
			allTokens[i].start === currentToken.start &&
			allTokens[i].stop === currentToken.stop &&
			allTokens[i].type === currentToken.type
		) {
			currentTokenIndex = i;
			break;
		}
	}

	if (currentTokenIndex === -1) return {};

	// Initialize result with empty object
	const result: {
		keyToken?: string;
		operatorToken?: string;
		valueToken?: string;
	} = {};

	if (isInKey) {
		// When in key context, we only know the key
		result.keyToken = currentToken.text;
	} else if (isInOperator) {
		// When in operator context, we know the operator and can find the preceding key
		result.operatorToken = currentToken.text;

		// Look backward for key
		for (let i = currentTokenIndex - 1; i >= 0; i--) {
			const token = allTokens[i];
			// Skip whitespace and other hidden channel tokens
			if (token.channel !== 0) continue;

			if (token.type === FilterQueryLexer.KEY) {
				result.keyToken = token.text;
				break;
			}
		}
	} else if (isInValue) {
		// When in value context, we know the value and can find the preceding operator and key
		result.valueToken = currentToken.text;

		let foundOperator = false;

		// Look backward for operator and key
		for (let i = currentTokenIndex - 1; i >= 0; i--) {
			const token = allTokens[i];
			// Skip whitespace and other hidden channel tokens
			if (token.channel !== 0) continue;

			// If we haven't found an operator yet, check for operator
			if (
				!foundOperator &&
				[
					FilterQueryLexer.EQUALS,
					FilterQueryLexer.NOT_EQUALS,
					FilterQueryLexer.NEQ,
					FilterQueryLexer.LT,
					FilterQueryLexer.LE,
					FilterQueryLexer.GT,
					FilterQueryLexer.GE,
					FilterQueryLexer.LIKE,
					FilterQueryLexer.NOT_LIKE,
					FilterQueryLexer.ILIKE,
					FilterQueryLexer.NOT_ILIKE,
					FilterQueryLexer.BETWEEN,
					FilterQueryLexer.EXISTS,
					FilterQueryLexer.REGEXP,
					FilterQueryLexer.CONTAINS,
					FilterQueryLexer.IN,
					FilterQueryLexer.NOT,
				].includes(token.type)
			) {
				result.operatorToken = token.text;
				foundOperator = true;
			}
			// If we already found an operator and this is a key, record it
			else if (foundOperator && token.type === FilterQueryLexer.KEY) {
				result.keyToken = token.text;
				break; // We found our triplet
			}
		}
	}

	return result;
}

export function getQueryContextAtCursor(
	query: string,
	cursorIndex: number,
): IQueryContext {
	try {
		// Create input stream and lexer
		const input = query || '';
		const chars = CharStreams.fromString(input);
		const lexer = new FilterQueryLexer(chars);

		// Create token stream and force token generation
		const tokenStream = new CommonTokenStream(lexer);
		tokenStream.fill();

		// Get all tokens including whitespace
		const allTokens = tokenStream.tokens as IToken[];

		// Find exact token at cursor, including whitespace
		let exactToken: IToken | null = null;
		let previousToken: IToken | null = null;
		let nextToken: IToken | null = null;

		// Handle cursor at the very end of input
		if (cursorIndex === input.length && allTokens.length > 0) {
			const lastRealToken = allTokens
				.filter((t) => t.type !== FilterQueryLexer.EOF)
				.pop();
			if (lastRealToken) {
				exactToken = lastRealToken;
				previousToken =
					allTokens.filter((t) => t.stop < lastRealToken.start).pop() || null;
			}
		} else {
			// Normal token search
			for (let i = 0; i < allTokens.length; i++) {
				const token = allTokens[i];
				// Skip EOF token in normal search
				if (token.type === FilterQueryLexer.EOF) {
					continue;
				}

				// Check if cursor is within token bounds (inclusive)
				if (token.start <= cursorIndex && cursorIndex <= token.stop + 1) {
					exactToken = token;
					previousToken = i > 0 ? allTokens[i - 1] : null;
					nextToken = i < allTokens.length - 1 ? allTokens[i + 1] : null;
					break;
				}
			}

			// If cursor is between tokens, find surrounding tokens
			if (!exactToken) {
				for (let i = 0; i < allTokens.length - 1; i++) {
					const current = allTokens[i];
					const next = allTokens[i + 1];
					if (current.type === FilterQueryLexer.EOF) {
						continue;
					}
					if (next.type === FilterQueryLexer.EOF) {
						continue;
					}

					if (current.stop + 1 < cursorIndex && cursorIndex < next.start) {
						previousToken = current;
						nextToken = next;
						break;
					}
				}
			}
		}

		// Determine the context based on cursor position and surrounding tokens
		let currentToken: IToken | null = null;

		if (exactToken) {
			// If cursor is in a non-whitespace token, use that
			if (exactToken.channel === 0) {
				currentToken = exactToken;
			} else {
				// If in whitespace, use the previous non-whitespace token
				currentToken = previousToken?.channel === 0 ? previousToken : nextToken;
			}
		} else if (previousToken?.channel === 0) {
			// If between tokens, prefer the previous non-whitespace token
			currentToken = previousToken;
		} else if (nextToken?.channel === 0) {
			// Otherwise use the next non-whitespace token
			currentToken = nextToken;
		}

		// If still no token (empty query or all whitespace), return default context
		if (!currentToken) {
			// Handle transitions based on spaces and current state
			if (query.trim() === '') {
				return {
					tokenType: -1,
					text: '',
					start: cursorIndex,
					stop: cursorIndex,
					currentToken: '',
					isInValue: false,
					isInKey: true, // Default to key context when input is empty
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
				};
			}
			return {
				tokenType: -1,
				text: '',
				start: cursorIndex,
				stop: cursorIndex,
				currentToken: '',
				isInValue: false,
				isInKey: false,
				isInOperator: false,
				isInFunction: false,
				isInConjunction: false,
				isInParenthesis: false,
			};
		}

		// Determine if the current token is a conjunction (AND or OR)
		const isInConjunction = [FilterQueryLexer.AND, FilterQueryLexer.OR].includes(
			currentToken.type,
		);

		// // Determine if the current token is a parenthesis
		// const isInParenthesis = [
		// 	FilterQueryLexer.LPAREN,
		// 	FilterQueryLexer.RPAREN,
		// ].includes(currentToken.type);

		// Determine the context based on the token type
		const isInValue = [
			FilterQueryLexer.QUOTED_TEXT,
			FilterQueryLexer.NUMBER,
			FilterQueryLexer.BOOL,
		].includes(currentToken.type);

		const isInKey = currentToken.type === FilterQueryLexer.KEY;

		const isInOperator = [
			FilterQueryLexer.EQUALS,
			FilterQueryLexer.NOT_EQUALS,
			FilterQueryLexer.NEQ,
			FilterQueryLexer.LT,
			FilterQueryLexer.LE,
			FilterQueryLexer.GT,
			FilterQueryLexer.GE,
			FilterQueryLexer.LIKE,
			FilterQueryLexer.NOT_LIKE,
			FilterQueryLexer.ILIKE,
			FilterQueryLexer.NOT_ILIKE,
			FilterQueryLexer.BETWEEN,
			FilterQueryLexer.EXISTS,
			FilterQueryLexer.REGEXP,
			FilterQueryLexer.CONTAINS,
			FilterQueryLexer.IN,
			FilterQueryLexer.NOT,
		].includes(currentToken.type);

		const isInFunction = [
			FilterQueryLexer.HAS,
			FilterQueryLexer.HASANY,
			FilterQueryLexer.HASALL,
			FilterQueryLexer.HASNONE,
		].includes(currentToken.type);

		// Get the context-related tokens (key, operator, value)
		const relationTokens = findKeyOperatorValueTriplet(
			allTokens,
			currentToken,
			isInKey,
			isInOperator,
			isInValue,
		);

		// Handle transitions based on spaces
		// When a user adds a space after a token, change the context accordingly
		if (
			currentToken &&
			cursorIndex === currentToken.stop + 2 &&
			query[currentToken.stop + 1] === ' '
		) {
			// User added a space right after this token

			if (isInKey) {
				// After a key + space, we should be in operator context
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: false,
					isInOperator: true,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (isInOperator) {
				// After an operator + space, we should be in value context
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: true,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (isInValue) {
				// After a value + space, we should be in conjunction context
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: true,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (isInConjunction) {
				// After a conjunction + space, we should be in key context again
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: true,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}
		}

		// Add logic for context detection that works for both forward and backward navigation
		// This handles both cases: when user is typing forward and when they're moving backward
		if (previousToken && nextToken) {
			// Determine context based on token sequence pattern

			// Key -> Operator -> Value -> Conjunction pattern detection
			if (isInKey && nextToken.type === FilterQueryLexer.EQUALS) {
				// When cursor is on a key and next token is an operator
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: true,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (
				isInOperator &&
				previousToken.type === FilterQueryLexer.KEY &&
				(nextToken.type === FilterQueryLexer.QUOTED_TEXT ||
					nextToken.type === FilterQueryLexer.NUMBER ||
					nextToken.type === FilterQueryLexer.BOOL)
			) {
				// When cursor is on an operator between a key and value
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: false,
					isInOperator: true,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (
				isInValue &&
				previousToken.type !== FilterQueryLexer.AND &&
				previousToken.type !== FilterQueryLexer.OR &&
				(nextToken.type === FilterQueryLexer.AND ||
					nextToken.type === FilterQueryLexer.OR)
			) {
				// When cursor is on a value and next token is a conjunction
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: true,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (
				isInConjunction &&
				(previousToken.type === FilterQueryLexer.QUOTED_TEXT ||
					previousToken.type === FilterQueryLexer.NUMBER ||
					previousToken.type === FilterQueryLexer.BOOL) &&
				nextToken.type === FilterQueryLexer.KEY
			) {
				// When cursor is on a conjunction between a value and a key
				return {
					tokenType: currentToken.type,
					text: currentToken.text,
					start: currentToken.start,
					stop: currentToken.stop,
					currentToken: currentToken.text,
					isInValue: false,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: true,
					isInParenthesis: false,
				};
			}
		}

		// If we're in between tokens (no exact token match), use next token type to determine context
		if (!exactToken && nextToken) {
			if (nextToken.type === FilterQueryLexer.KEY) {
				return {
					tokenType: -1,
					text: '',
					start: cursorIndex,
					stop: cursorIndex,
					currentToken: '',
					isInValue: false,
					isInKey: true,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (
				[
					FilterQueryLexer.EQUALS,
					FilterQueryLexer.NOT_EQUALS,
					FilterQueryLexer.GT,
					FilterQueryLexer.LT,
					FilterQueryLexer.GE,
					FilterQueryLexer.LE,
				].includes(nextToken.type)
			) {
				return {
					tokenType: -1,
					text: '',
					start: cursorIndex,
					stop: cursorIndex,
					currentToken: '',
					isInValue: false,
					isInKey: false,
					isInOperator: true,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if (
				[
					FilterQueryLexer.QUOTED_TEXT,
					FilterQueryLexer.NUMBER,
					FilterQueryLexer.BOOL,
				].includes(nextToken.type)
			) {
				return {
					tokenType: -1,
					text: '',
					start: cursorIndex,
					stop: cursorIndex,
					currentToken: '',
					isInValue: true,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: false,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}

			if ([FilterQueryLexer.AND, FilterQueryLexer.OR].includes(nextToken.type)) {
				return {
					tokenType: -1,
					text: '',
					start: cursorIndex,
					stop: cursorIndex,
					currentToken: '',
					isInValue: false,
					isInKey: false,
					isInOperator: false,
					isInFunction: false,
					isInConjunction: true,
					isInParenthesis: false,
					...relationTokens, // Include related tokens
				};
			}
		}

		// Fall back to default context detection based on current token
		return {
			tokenType: currentToken.type,
			text: currentToken.text,
			start: currentToken.start,
			stop: currentToken.stop,
			currentToken: currentToken.text,
			isInValue,
			isInKey,
			isInOperator,
			isInFunction,
			isInConjunction,
			// isInParenthesis,
			...relationTokens, // Include related tokens
		};
	} catch (error) {
		console.error('Error in getQueryContextAtCursor:', error);
		return {
			tokenType: -1,
			text: '',
			start: cursorIndex,
			stop: cursorIndex,
			currentToken: '',
			isInValue: false,
			isInKey: false,
			isInOperator: false,
			isInFunction: false,
			isInConjunction: false,
			isInParenthesis: false,
		};
	}
}

export const queryOperatorSuggestions = [
	{ label: '=', type: 'operator', info: 'Equal to' },
	{ label: '!=', type: 'operator', info: 'Not equal to' },
	{ label: '>', type: 'operator', info: 'Greater than' },
	{ label: '<', type: 'operator', info: 'Less than' },
	{ label: '>=', type: 'operator', info: 'Greater than or equal to' },
	{ label: '<=', type: 'operator', info: 'Less than or equal to' },
	{ label: 'LIKE', type: 'operator', info: 'Like' },
	{ label: 'ILIKE', type: 'operator', info: 'Case insensitive like' },
	{ label: 'BETWEEN', type: 'operator', info: 'Between' },
	{ label: 'EXISTS', type: 'operator', info: 'Exists' },
	{ label: 'REGEXP', type: 'operator', info: 'Regular expression' },
	{ label: 'CONTAINS', type: 'operator', info: 'Contains' },
	{ label: 'IN', type: 'operator', info: 'In' },
	{ label: 'NOT', type: 'operator', info: 'Not' },
	{ label: 'NOT_LIKE', type: 'operator', info: 'Not like' },
	{ label: 'IS_NULL', type: 'operator', info: 'Is null' },
	{ label: 'IS_NOT_NULL', type: 'operator', info: 'Is not null' },
];