use std::path::Path; use super::*; use crate::core::db::{create_connection, run_migrations}; use crate::embedding::pipeline::record_embedding_error; const MODEL: &str = "nomic-embed-text"; fn setup_db() -> Connection { let conn = create_connection(Path::new(":memory:")).unwrap(); run_migrations(&conn).unwrap(); conn } fn insert_test_project(conn: &Connection) -> i64 { conn.execute( "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/test', 'https://gitlab.example.com/group/test')", [], ) .unwrap(); conn.last_insert_rowid() } fn insert_test_document(conn: &Connection, project_id: i64, content: &str) -> i64 { conn.execute( "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES ('issue', 1, ?1, ?2, 'hash123')", rusqlite::params![project_id, content], ) .unwrap(); conn.last_insert_rowid() } #[test] fn retry_failed_delete_makes_doc_pending_again() { let conn = setup_db(); let proj_id = insert_test_project(&conn); let doc_id = insert_test_document(&conn, proj_id, "some text content"); // Doc starts as pending let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert_eq!(pending.len(), 1, "Doc should be pending initially"); // Record an error — doc should no longer be pending record_embedding_error( &conn, doc_id, 0, "hash123", "chunkhash", MODEL, "test error", ) .unwrap(); let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert!( pending.is_empty(), "Doc with error metadata should not be pending" ); // DELETE error rows (mimicking --retry-failed) — doc should become pending again conn.execute_batch( "DELETE FROM embeddings WHERE rowid / 1000 IN ( SELECT DISTINCT document_id FROM embedding_metadata WHERE last_error IS NOT NULL ); DELETE FROM embedding_metadata WHERE last_error IS NOT NULL;", ) .unwrap(); let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert_eq!(pending.len(), 1, "Doc should be pending again after DELETE"); assert_eq!(pending[0].document_id, doc_id); } #[test] fn empty_doc_with_error_not_pending() { let conn = setup_db(); let proj_id = insert_test_project(&conn); let doc_id = insert_test_document(&conn, proj_id, ""); // Empty doc starts as pending let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert_eq!(pending.len(), 1, "Empty doc should be pending initially"); // Record an error for the empty doc record_embedding_error( &conn, doc_id, 0, "hash123", "empty", MODEL, "Document has empty content", ) .unwrap(); // Should no longer be pending let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert!( pending.is_empty(), "Empty doc with error metadata should not be pending" ); } #[test] fn old_update_approach_leaves_doc_invisible() { // This test demonstrates WHY we use DELETE instead of UPDATE. // UPDATE clears last_error but the row still matches config params, // so the doc stays "not pending" — permanently invisible. let conn = setup_db(); let proj_id = insert_test_project(&conn); let doc_id = insert_test_document(&conn, proj_id, "some text content"); // Record an error record_embedding_error( &conn, doc_id, 0, "hash123", "chunkhash", MODEL, "test error", ) .unwrap(); // Old approach: UPDATE to clear error conn.execute( "UPDATE embedding_metadata SET last_error = NULL, attempt_count = 0 WHERE last_error IS NOT NULL", [], ) .unwrap(); // Doc is NOT pending — it's permanently invisible! This is the bug. let pending = find_pending_documents(&conn, 100, 0, MODEL).unwrap(); assert!( pending.is_empty(), "UPDATE approach leaves doc invisible (this proves the bug)" ); }