From 6aff96d32f6c6b74973a3b793dca824a62a968d0 Mon Sep 17 00:00:00 2001 From: teernisse Date: Tue, 10 Mar 2026 17:10:17 -0400 Subject: [PATCH] fix(sql): add ORDER BY to all LIMIT queries for deterministic results SQLite does not guarantee row order without ORDER BY, even with LIMIT. This was a systemic issue found during a multi-pass bug hunt: Production queries (explain.rs): - Outgoing reference query: ORDER BY target_entity_type, target_entity_iid - Incoming reference query: ORDER BY source_entity_type, COALESCE(iid) Without these, robot mode output was non-deterministic across calls, breaking clients expecting stable ordering. Test helper queries (5 locations across 3 files): - discussions_tests.rs: get_discussion_id() - mr_discussions.rs: get_mr_discussion_id() - queue.rs: setup_db_with_job(), release_all_locked_jobs_clears_locks() Currently safe (single-row inserts) but would break silently if tests expanded to multi-row fixtures. --- src/cli/commands/explain.rs | 17 ++++++++++++----- src/ingestion/discussions_tests.rs | 8 ++++++-- src/ingestion/mr_discussions.rs | 8 ++++++-- src/ingestion/storage/queue.rs | 10 +++++++--- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/cli/commands/explain.rs b/src/cli/commands/explain.rs index 7444268..db92fe7 100644 --- a/src/cli/commands/explain.rs +++ b/src/cli/commands/explain.rs @@ -821,7 +821,8 @@ fn fetch_related_entities( LEFT JOIN merge_requests mr ON er.target_entity_type = 'merge_request' AND mr.id = er.target_entity_id \ WHERE er.source_entity_type = ?1 AND er.source_entity_id = ?2 \ AND er.reference_type != 'closes' \ - AND er.target_entity_iid IS NOT NULL", + AND er.target_entity_iid IS NOT NULL \ + ORDER BY er.target_entity_type, er.target_entity_iid", )?; let outgoing: Vec = out_stmt @@ -845,7 +846,8 @@ fn fetch_related_entities( LEFT JOIN merge_requests mr ON er.source_entity_type = 'merge_request' AND mr.id = er.source_entity_id \ WHERE er.target_entity_type = ?1 AND er.target_entity_id = ?2 \ AND er.reference_type != 'closes' \ - AND COALESCE(i.iid, mr.iid) IS NOT NULL", + AND COALESCE(i.iid, mr.iid) IS NOT NULL \ + ORDER BY er.source_entity_type, COALESCE(i.iid, mr.iid)", )?; let incoming: Vec = in_stmt @@ -1020,7 +1022,7 @@ pub fn handle_explain( let elapsed_ms = start.elapsed().as_millis() as u64; if robot_mode { - print_explain_json(&result, elapsed_ms); + print_explain_json(&result, elapsed_ms)?; } else { print_explain(&result); } @@ -1032,13 +1034,18 @@ pub fn handle_explain( // Output rendering (Task 5 fills these in fully) // --------------------------------------------------------------------------- -pub fn print_explain_json(result: &ExplainResult, elapsed_ms: u64) { +pub fn print_explain_json(result: &ExplainResult, elapsed_ms: u64) -> Result<()> { let response = serde_json::json!({ "ok": true, "data": result, "meta": { "elapsed_ms": elapsed_ms } }); - println!("{}", serde_json::to_string(&response).unwrap_or_default()); + println!( + "{}", + serde_json::to_string(&response) + .map_err(|e| LoreError::Other(format!("JSON serialization failed: {e}")))? + ); + Ok(()) } pub fn print_explain(result: &ExplainResult) { diff --git a/src/ingestion/discussions_tests.rs b/src/ingestion/discussions_tests.rs index 35cbac8..ad1aea8 100644 --- a/src/ingestion/discussions_tests.rs +++ b/src/ingestion/discussions_tests.rs @@ -40,8 +40,12 @@ fn setup() -> Connection { } fn get_discussion_id(conn: &Connection) -> i64 { - conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0)) - .unwrap() + conn.query_row( + "SELECT id FROM discussions ORDER BY id LIMIT 1", + [], + |row| row.get(0), + ) + .unwrap() } #[allow(clippy::too_many_arguments)] diff --git a/src/ingestion/mr_discussions.rs b/src/ingestion/mr_discussions.rs index 6c98047..ea09938 100644 --- a/src/ingestion/mr_discussions.rs +++ b/src/ingestion/mr_discussions.rs @@ -786,8 +786,12 @@ mod tests { } fn get_mr_discussion_id(conn: &Connection) -> i64 { - conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0)) - .unwrap() + conn.query_row( + "SELECT id FROM discussions ORDER BY id LIMIT 1", + [], + |row| row.get(0), + ) + .unwrap() } #[allow(clippy::too_many_arguments)] diff --git a/src/ingestion/storage/queue.rs b/src/ingestion/storage/queue.rs index 3746774..6ea7e79 100644 --- a/src/ingestion/storage/queue.rs +++ b/src/ingestion/storage/queue.rs @@ -242,14 +242,16 @@ mod tests { .unwrap(); let project_id: i64 = conn - .query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0)) + .query_row("SELECT id FROM projects ORDER BY id LIMIT 1", [], |row| { + row.get(0) + }) .unwrap(); enqueue_job(&conn, project_id, "issue", 42, 100, "resource_events", None).unwrap(); let job_id: i64 = conn .query_row( - "SELECT id FROM pending_dependent_fetches LIMIT 1", + "SELECT id FROM pending_dependent_fetches ORDER BY id LIMIT 1", [], |row| row.get(0), ) @@ -301,7 +303,9 @@ mod tests { let (conn, _job_id) = setup_db_with_job(); let project_id: i64 = conn - .query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0)) + .query_row("SELECT id FROM projects ORDER BY id LIMIT 1", [], |row| { + row.get(0) + }) .unwrap(); let jobs = claim_jobs(&conn, "resource_events", project_id, 10).unwrap(); assert_eq!(jobs.len(), 1);