feat(search): sanitize raw FTS5 queries with safe fallback

Add input validation for Raw FTS query mode to prevent expensive or
malformed queries from reaching SQLite FTS5:

- Reject unbalanced double quotes (would cause FTS5 syntax error)
- Reject leading wildcard-only queries ("*", "* OR ...") that trigger
  expensive full-table scans
- Reject empty/whitespace-only queries
- Invalid raw input falls back to Safe mode automatically instead of
  erroring, so callers never see FTS5 parse failures

The Safe mode already escapes all tokens with double-quote wrapping
and handles embedded quotes via doubling. Raw mode now has a
validation layer on top.

All queries remain parameterized (?1, ?2) — user input never enters
SQL strings directly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-06 22:42:17 -05:00
parent c2036c64e9
commit 8cf14fb69b

View File

@@ -14,9 +14,38 @@ pub struct FtsResult {
pub snippet: String,
}
/// Validate an FTS5 query string for safety.
/// Rejects known-dangerous patterns: unbalanced quotes, excessive wildcards,
/// and empty queries. Returns the sanitized query or falls back to Safe mode.
fn sanitize_raw_fts(raw: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
// Reject unbalanced double quotes (FTS5 syntax error)
let quote_count = trimmed.chars().filter(|&c| c == '"').count();
if quote_count % 2 != 0 {
return None;
}
// Reject leading wildcard-only queries (expensive full-table scan)
if trimmed == "*" || trimmed.starts_with("* ") {
return None;
}
Some(trimmed.to_string())
}
pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
match mode {
FtsQueryMode::Raw => raw.to_string(),
FtsQueryMode::Raw => {
// Validate raw FTS5 input; fall back to Safe mode if invalid
match sanitize_raw_fts(raw) {
Some(sanitized) => sanitized,
None => to_fts_query(raw, FtsQueryMode::Safe),
}
}
FtsQueryMode::Safe => {
let trimmed = raw.trim();
if trimmed.is_empty() {
@@ -202,4 +231,38 @@ mod tests {
let result = get_result_snippet(Some(""), "full content text");
assert_eq!(result, "full content text");
}
#[test]
fn test_raw_mode_valid_fts5_passes_through() {
let result = to_fts_query("auth OR error", FtsQueryMode::Raw);
assert_eq!(result, "auth OR error");
let result = to_fts_query("\"exact phrase\"", FtsQueryMode::Raw);
assert_eq!(result, "\"exact phrase\"");
}
#[test]
fn test_raw_mode_unbalanced_quotes_falls_back_to_safe() {
let result = to_fts_query("auth \"error", FtsQueryMode::Raw);
// Falls back to Safe mode: each token quoted
assert_eq!(result, "\"auth\" \"\"\"error\"");
}
#[test]
fn test_raw_mode_leading_wildcard_falls_back_to_safe() {
let result = to_fts_query("* OR auth", FtsQueryMode::Raw);
assert_eq!(result, "\"*\" \"OR\" \"auth\"");
let result = to_fts_query("*", FtsQueryMode::Raw);
assert_eq!(result, "\"*\"");
}
#[test]
fn test_raw_mode_empty_falls_back_to_safe() {
let result = to_fts_query("", FtsQueryMode::Raw);
assert_eq!(result, "");
let result = to_fts_query(" ", FtsQueryMode::Raw);
assert_eq!(result, "");
}
}