Files
gitlore/src/ingestion/discussions_tests.rs
Taylor Eernisse 7e0e6a91f2 refactor: extract unit tests into separate _tests.rs files
Move inline #[cfg(test)] mod tests { ... } blocks from 22 source files
into dedicated _tests.rs companion files, wired via:

    #[cfg(test)]
    #[path = "module_tests.rs"]
    mod tests;

This keeps implementation-focused source files leaner and more scannable
while preserving full access to private items through `use super::*;`.

Modules extracted:
  core:      db, note_parser, payloads, project, references, sync_run,
             timeline_collect, timeline_expand, timeline_seed
  cli:       list (55 tests), who (75 tests)
  documents: extractor (43 tests), regenerator
  embedding: change_detector, chunking
  gitlab:    graphql (wiremock async tests), transformers/issue
  ingestion: dirty_tracker, discussions, issues, mr_diffs

Also adds conflicts_with("explain_score") to the --detail flag in the
who command to prevent mutually exclusive flags from being combined.

All 629 unit tests pass. No behavior changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 10:54:02 -05:00

471 lines
16 KiB
Rust

use super::*;
use crate::core::db::{create_connection, run_migrations};
use crate::gitlab::transformers::NormalizedNote;
use std::path::Path;
#[test]
fn result_default_has_zero_counts() {
let result = IngestDiscussionsResult::default();
assert_eq!(result.discussions_fetched, 0);
assert_eq!(result.discussions_upserted, 0);
assert_eq!(result.notes_upserted, 0);
}
fn setup() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (gitlab_id, iid, project_id, title, state, author_username, created_at, updated_at, last_seen_at) \
VALUES (100, 1, 1, 'Test Issue', 'opened', 'testuser', 1000, 2000, 3000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, noteable_type, individual_note, last_seen_at, resolvable, resolved) \
VALUES ('disc-1', 1, 1, 'Issue', 0, 3000, 0, 0)",
[],
)
.unwrap();
conn
}
fn get_discussion_id(conn: &Connection) -> i64 {
conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0))
.unwrap()
}
#[allow(clippy::too_many_arguments)]
fn make_note(
gitlab_id: i64,
project_id: i64,
body: &str,
note_type: Option<&str>,
created_at: i64,
updated_at: i64,
resolved: bool,
resolved_by: Option<&str>,
) -> NormalizedNote {
NormalizedNote {
gitlab_id,
project_id,
note_type: note_type.map(String::from),
is_system: false,
author_id: None,
author_username: "testuser".to_string(),
body: body.to_string(),
created_at,
updated_at,
last_seen_at: updated_at,
position: 0,
resolvable: false,
resolved,
resolved_by: resolved_by.map(String::from),
resolved_at: None,
position_old_path: None,
position_new_path: None,
position_old_line: None,
position_new_line: None,
position_type: None,
position_line_range_start: None,
position_line_range_end: None,
position_base_sha: None,
position_start_sha: None,
position_head_sha: None,
}
}
#[test]
fn test_issue_note_upsert_stable_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let last_seen_at = 5000;
let note1 = make_note(1001, 1, "First note", None, 1000, 2000, false, None);
let note2 = make_note(1002, 1, "Second note", None, 1000, 2000, false, None);
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at, None).unwrap();
let id1 = out1.local_note_id;
let id2 = out2.local_note_id;
// Re-sync same gitlab_ids
let out1b = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at + 1, None).unwrap();
let out2b = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at + 1, None).unwrap();
assert_eq!(id1, out1b.local_note_id);
assert_eq!(id2, out2b.local_note_id);
}
#[test]
fn test_issue_note_upsert_detects_body_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(2001, 1, "Original body", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let mut changed = make_note(2001, 1, "Updated body", None, 1000, 3000, false, None);
changed.updated_at = 3000;
let outcome = upsert_note_for_issue(&conn, disc_id, &changed, 5001, None).unwrap();
assert!(outcome.changed_semantics);
}
#[test]
fn test_issue_note_upsert_unchanged_returns_false() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(3001, 1, "Same body", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Re-sync identical note
let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5001, None).unwrap();
assert!(!outcome.changed_semantics);
}
#[test]
fn test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(4001, 1, "Body stays", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Only change updated_at (non-semantic field)
let mut same = make_note(4001, 1, "Body stays", None, 1000, 9999, false, None);
same.updated_at = 9999;
let outcome = upsert_note_for_issue(&conn, disc_id, &same, 5001, None).unwrap();
assert!(!outcome.changed_semantics);
}
#[test]
fn test_issue_note_sweep_removes_stale() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note1 = make_note(5001, 1, "Keep me", None, 1000, 2000, false, None);
let note2 = make_note(5002, 1, "Stale me", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
// Re-sync only note1 with newer timestamp
upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
// Sweep should remove note2 (last_seen_at=5000 < 6000)
let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
assert_eq!(swept, 1);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM notes WHERE discussion_id = ?",
[disc_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn test_issue_note_upsert_returns_local_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(6001, 1, "Check my ID", None, 1000, 2000, false, None);
let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Verify the local_note_id matches what's in the DB
let db_id: i64 = conn
.query_row(
"SELECT id FROM notes WHERE gitlab_id = ?",
[6001_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(outcome.local_note_id, db_id);
}
#[test]
fn test_issue_note_upsert_captures_author_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let mut note = make_note(7001, 1, "With author", None, 1000, 2000, false, None);
note.author_id = Some(12345);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let stored: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7001_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, Some(12345));
}
#[test]
fn test_note_upsert_author_id_nullable() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(7002, 1, "No author id", None, 1000, 2000, false, None);
// author_id defaults to None in make_note
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let stored: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7002_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, None);
}
#[test]
fn test_note_author_id_survives_username_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let mut note = make_note(7003, 1, "Original body", None, 1000, 2000, false, None);
note.author_id = Some(99999);
note.author_username = "oldname".to_string();
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Re-sync with changed username, changed body, same author_id
let mut updated = make_note(7003, 1, "Updated body", None, 1000, 3000, false, None);
updated.author_id = Some(99999);
updated.author_username = "newname".to_string();
upsert_note_for_issue(&conn, disc_id, &updated, 5001, None).unwrap();
// author_id must survive the re-sync intact
let stored_id: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7003_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored_id, Some(99999));
}
fn insert_note_document(conn: &Connection, note_local_id: i64) {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
VALUES ('note', ?1, 1, 'note content', 'hash123')",
[note_local_id],
)
.unwrap();
}
fn insert_note_dirty_source(conn: &Connection, note_local_id: i64) {
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at) \
VALUES ('note', ?1, 1000)",
[note_local_id],
)
.unwrap();
}
fn count_note_documents(conn: &Connection, note_local_id: i64) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?",
[note_local_id],
|row| row.get(0),
)
.unwrap()
}
fn count_note_dirty_sources(conn: &Connection, note_local_id: i64) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note' AND source_id = ?",
[note_local_id],
|row| row.get(0),
)
.unwrap()
}
#[test]
fn test_issue_note_sweep_deletes_note_documents_immediately() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert 3 notes
let note1 = make_note(9001, 1, "Keep me", None, 1000, 2000, false, None);
let note2 = make_note(9002, 1, "Keep me too", None, 1000, 2000, false, None);
let note3 = make_note(9003, 1, "Stale me", None, 1000, 2000, false, None);
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
let out3 = upsert_note_for_issue(&conn, disc_id, &note3, 5000, None).unwrap();
// Add documents for all 3
insert_note_document(&conn, out1.local_note_id);
insert_note_document(&conn, out2.local_note_id);
insert_note_document(&conn, out3.local_note_id);
// Add dirty_sources for note3
insert_note_dirty_source(&conn, out3.local_note_id);
// Re-sync only notes 1 and 2 with newer timestamp
upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &note2, 6000, None).unwrap();
// Sweep should remove note3 and its document + dirty_source
sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
// Stale note's document should be gone
assert_eq!(count_note_documents(&conn, out3.local_note_id), 0);
assert_eq!(count_note_dirty_sources(&conn, out3.local_note_id), 0);
// Kept notes' documents should survive
assert_eq!(count_note_documents(&conn, out1.local_note_id), 1);
assert_eq!(count_note_documents(&conn, out2.local_note_id), 1);
}
#[test]
fn test_sweep_deletion_handles_note_without_document() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(9004, 1, "No doc", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Don't insert any document -- sweep should still work without error
let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
assert_eq!(swept, 1);
}
#[test]
fn test_set_based_deletion_atomicity() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert a stale note with both document and dirty_source
let note = make_note(9005, 1, "Stale with deps", None, 1000, 2000, false, None);
let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
insert_note_document(&conn, out.local_note_id);
insert_note_dirty_source(&conn, out.local_note_id);
// Verify they exist before sweep
assert_eq!(count_note_documents(&conn, out.local_note_id), 1);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
// The sweep function already runs inside a transaction (called from
// ingest_discussions_for_issue's tx). Simulate by wrapping in a transaction.
let tx = conn.unchecked_transaction().unwrap();
sweep_stale_issue_notes(&tx, disc_id, 6000).unwrap();
tx.commit().unwrap();
// All three DELETEs must have happened
assert_eq!(count_note_documents(&conn, out.local_note_id), 0);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 0);
let note_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM notes WHERE gitlab_id = ?",
[9005_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(note_count, 0);
}
fn count_dirty_notes(conn: &Connection) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap()
}
#[test]
fn test_parent_title_change_marks_notes_dirty() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert two user notes and one system note
let note1 = make_note(10001, 1, "User note 1", None, 1000, 2000, false, None);
let note2 = make_note(10002, 1, "User note 2", None, 1000, 2000, false, None);
let mut sys_note = make_note(10003, 1, "System note", None, 1000, 2000, false, None);
sys_note.is_system = true;
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &sys_note, 5000, None).unwrap();
// Clear any dirty_sources from individual note upserts
conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
.unwrap();
assert_eq!(count_dirty_notes(&conn), 0);
// Simulate parent title change triggering discussion re-ingest:
// update the issue title, then run the propagation SQL
conn.execute("UPDATE issues SET title = 'Changed Title' WHERE id = 1", [])
.unwrap();
// Run the propagation query (same as in ingestion code)
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), disc_id],
)
.unwrap();
// Both user notes should be dirty, system note should not
assert_eq!(count_dirty_notes(&conn), 2);
assert_eq!(count_note_dirty_sources(&conn, out1.local_note_id), 1);
assert_eq!(count_note_dirty_sources(&conn, out2.local_note_id), 1);
}
#[test]
fn test_parent_label_change_marks_notes_dirty() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert one user note
let note = make_note(11001, 1, "User note", None, 1000, 2000, false, None);
let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Clear dirty_sources
conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
.unwrap();
// Simulate label change on parent issue (labels are part of issue metadata)
conn.execute("UPDATE issues SET updated_at = 9999 WHERE id = 1", [])
.unwrap();
// Run propagation query
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), disc_id],
)
.unwrap();
assert_eq!(count_dirty_notes(&conn), 1);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
}