From 8cf14fb69b24680ebb9dc2207e59cdb09564f511 Mon Sep 17 00:00:00 2001 From: Taylor Eernisse Date: Fri, 6 Feb 2026 22:42:17 -0500 Subject: [PATCH] feat(search): sanitize raw FTS5 queries with safe fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add input validation for Raw FTS query mode to prevent expensive or malformed queries from reaching SQLite FTS5: - Reject unbalanced double quotes (would cause FTS5 syntax error) - Reject leading wildcard-only queries ("*", "* OR ...") that trigger expensive full-table scans - Reject empty/whitespace-only queries - Invalid raw input falls back to Safe mode automatically instead of erroring, so callers never see FTS5 parse failures The Safe mode already escapes all tokens with double-quote wrapping and handles embedded quotes via doubling. Raw mode now has a validation layer on top. All queries remain parameterized (?1, ?2) — user input never enters SQL strings directly. Co-Authored-By: Claude Opus 4.6 --- src/search/fts.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/src/search/fts.rs b/src/search/fts.rs index 1d05031..ab46850 100644 --- a/src/search/fts.rs +++ b/src/search/fts.rs @@ -14,9 +14,38 @@ pub struct FtsResult { pub snippet: String, } +/// Validate an FTS5 query string for safety. +/// Rejects known-dangerous patterns: unbalanced quotes, excessive wildcards, +/// and empty queries. Returns the sanitized query or falls back to Safe mode. +fn sanitize_raw_fts(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + + // Reject unbalanced double quotes (FTS5 syntax error) + let quote_count = trimmed.chars().filter(|&c| c == '"').count(); + if quote_count % 2 != 0 { + return None; + } + + // Reject leading wildcard-only queries (expensive full-table scan) + if trimmed == "*" || trimmed.starts_with("* ") { + return None; + } + + Some(trimmed.to_string()) +} + pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String { match mode { - FtsQueryMode::Raw => raw.to_string(), + FtsQueryMode::Raw => { + // Validate raw FTS5 input; fall back to Safe mode if invalid + match sanitize_raw_fts(raw) { + Some(sanitized) => sanitized, + None => to_fts_query(raw, FtsQueryMode::Safe), + } + } FtsQueryMode::Safe => { let trimmed = raw.trim(); if trimmed.is_empty() { @@ -202,4 +231,38 @@ mod tests { let result = get_result_snippet(Some(""), "full content text"); assert_eq!(result, "full content text"); } + + #[test] + fn test_raw_mode_valid_fts5_passes_through() { + let result = to_fts_query("auth OR error", FtsQueryMode::Raw); + assert_eq!(result, "auth OR error"); + + let result = to_fts_query("\"exact phrase\"", FtsQueryMode::Raw); + assert_eq!(result, "\"exact phrase\""); + } + + #[test] + fn test_raw_mode_unbalanced_quotes_falls_back_to_safe() { + let result = to_fts_query("auth \"error", FtsQueryMode::Raw); + // Falls back to Safe mode: each token quoted + assert_eq!(result, "\"auth\" \"\"\"error\""); + } + + #[test] + fn test_raw_mode_leading_wildcard_falls_back_to_safe() { + let result = to_fts_query("* OR auth", FtsQueryMode::Raw); + assert_eq!(result, "\"*\" \"OR\" \"auth\""); + + let result = to_fts_query("*", FtsQueryMode::Raw); + assert_eq!(result, "\"*\""); + } + + #[test] + fn test_raw_mode_empty_falls_back_to_safe() { + let result = to_fts_query("", FtsQueryMode::Raw); + assert_eq!(result, ""); + + let result = to_fts_query(" ", FtsQueryMode::Raw); + assert_eq!(result, ""); + } }