refactor: extract unit tests into separate _tests.rs files

Move inline #[cfg(test)] mod tests { ... } blocks from 22 source files
into dedicated _tests.rs companion files, wired via:

    #[cfg(test)]
    #[path = "module_tests.rs"]
    mod tests;

This keeps implementation-focused source files leaner and more scannable
while preserving full access to private items through `use super::*;`.

Modules extracted:
  core:      db, note_parser, payloads, project, references, sync_run,
             timeline_collect, timeline_expand, timeline_seed
  cli:       list (55 tests), who (75 tests)
  documents: extractor (43 tests), regenerator
  embedding: change_detector, chunking
  gitlab:    graphql (wiremock async tests), transformers/issue
  ingestion: dirty_tracker, discussions, issues, mr_diffs

Also adds conflicts_with("explain_score") to the --detail flag in the
who command to prevent mutually exclusive flags from being combined.

All 629 unit tests pass. No behavior changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-13 10:54:02 -05:00
parent 5c2df3df3b
commit 7e0e6a91f2
43 changed files with 11672 additions and 11942 deletions

View File

@@ -269,525 +269,5 @@ fn get_document_id(conn: &Connection, source_type: SourceType, source_id: i64) -
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ingestion::dirty_tracker::mark_dirty;
fn setup_db() -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.execute_batch("
CREATE TABLE projects (
id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL,
path_with_namespace TEXT NOT NULL,
default_branch TEXT,
web_url TEXT,
created_at INTEGER,
updated_at INTEGER,
raw_payload_id INTEGER
);
INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (1, 100, 'group/project');
CREATE TABLE issues (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT NOT NULL,
author_username TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_seen_at INTEGER NOT NULL,
discussions_synced_for_updated_at INTEGER,
resource_events_synced_for_updated_at INTEGER,
web_url TEXT,
raw_payload_id INTEGER
);
CREATE TABLE labels (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER,
project_id INTEGER NOT NULL REFERENCES projects(id),
name TEXT NOT NULL,
color TEXT,
description TEXT
);
CREATE TABLE issue_labels (
issue_id INTEGER NOT NULL REFERENCES issues(id),
label_id INTEGER NOT NULL REFERENCES labels(id),
PRIMARY KEY(issue_id, label_id)
);
CREATE TABLE documents (
id INTEGER PRIMARY KEY,
source_type TEXT NOT NULL,
source_id INTEGER NOT NULL,
project_id INTEGER NOT NULL,
author_username TEXT,
label_names TEXT,
created_at INTEGER,
updated_at INTEGER,
url TEXT,
title TEXT,
content_text TEXT NOT NULL,
content_hash TEXT NOT NULL,
labels_hash TEXT NOT NULL DEFAULT '',
paths_hash TEXT NOT NULL DEFAULT '',
is_truncated INTEGER NOT NULL DEFAULT 0,
truncated_reason TEXT,
UNIQUE(source_type, source_id)
);
CREATE TABLE document_labels (
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
label_name TEXT NOT NULL,
PRIMARY KEY(document_id, label_name)
);
CREATE TABLE document_paths (
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
path TEXT NOT NULL,
PRIMARY KEY(document_id, path)
);
CREATE TABLE dirty_sources (
source_type TEXT NOT NULL,
source_id INTEGER NOT NULL,
queued_at INTEGER NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
last_attempt_at INTEGER,
last_error TEXT,
next_attempt_at INTEGER,
PRIMARY KEY(source_type, source_id)
);
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
").unwrap();
conn
}
#[test]
fn test_regenerate_creates_document() {
let conn = setup_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test Issue', 'Description here', 'opened', 'alice', 1000, 2000, 3000)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
assert_eq!(result.unchanged, 0);
assert_eq!(result.errored, 0);
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1);
let content: String = conn
.query_row("SELECT content_text FROM documents", [], |r| r.get(0))
.unwrap();
assert!(content.contains("[[Issue]] #42: Test Issue"));
}
#[test]
fn test_regenerate_unchanged() {
let conn = setup_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'Desc', 'opened', 'alice', 1000, 2000, 3000)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let r1 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r1.regenerated, 1);
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let r2 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r2.unchanged, 1);
assert_eq!(r2.regenerated, 0);
}
#[test]
fn test_regenerate_deleted_source() {
let conn = setup_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
regenerate_dirty_documents(&conn, None).unwrap();
conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
conn.execute("DELETE FROM issues WHERE id = 1", []).unwrap();
conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_regenerate_drains_queue() {
let conn = setup_db();
for i in 1..=10 {
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)",
rusqlite::params![i, i * 10],
).unwrap();
mark_dirty(&conn, SourceType::Issue, i).unwrap();
}
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 10);
let dirty = get_dirty_sources(&conn).unwrap();
assert!(dirty.is_empty());
}
#[test]
fn test_triple_hash_fast_path() {
let conn = setup_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'bug')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
[],
)
.unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
regenerate_dirty_documents(&conn, None).unwrap();
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.unchanged, 1);
let label_count: i64 = conn
.query_row("SELECT COUNT(*) FROM document_labels", [], |r| r.get(0))
.unwrap();
assert_eq!(label_count, 1);
}
fn setup_note_db() -> Connection {
let conn = setup_db();
conn.execute_batch(
"
CREATE TABLE merge_requests (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT,
draft INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
source_branch TEXT,
target_branch TEXT,
head_sha TEXT,
references_short TEXT,
references_full TEXT,
detailed_merge_status TEXT,
merge_user_username TEXT,
created_at INTEGER,
updated_at INTEGER,
merged_at INTEGER,
closed_at INTEGER,
last_seen_at INTEGER NOT NULL,
discussions_synced_for_updated_at INTEGER,
discussions_sync_last_attempt_at INTEGER,
discussions_sync_attempts INTEGER DEFAULT 0,
discussions_sync_last_error TEXT,
resource_events_synced_for_updated_at INTEGER,
web_url TEXT,
raw_payload_id INTEGER
);
CREATE TABLE mr_labels (
merge_request_id INTEGER REFERENCES merge_requests(id),
label_id INTEGER REFERENCES labels(id),
PRIMARY KEY(merge_request_id, label_id)
);
CREATE TABLE discussions (
id INTEGER PRIMARY KEY,
gitlab_discussion_id TEXT NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
issue_id INTEGER REFERENCES issues(id),
merge_request_id INTEGER,
noteable_type TEXT NOT NULL,
individual_note INTEGER NOT NULL DEFAULT 0,
first_note_at INTEGER,
last_note_at INTEGER,
last_seen_at INTEGER NOT NULL,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE notes (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
discussion_id INTEGER NOT NULL REFERENCES discussions(id),
project_id INTEGER NOT NULL REFERENCES projects(id),
note_type TEXT,
is_system INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
body TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_seen_at INTEGER NOT NULL,
position INTEGER,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0,
resolved_by TEXT,
resolved_at INTEGER,
position_old_path TEXT,
position_new_path TEXT,
position_old_line INTEGER,
position_new_line INTEGER,
raw_payload_id INTEGER
);
",
)
.unwrap();
conn
}
#[test]
fn test_regenerate_note_document() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'This is a note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
assert_eq!(result.unchanged, 0);
assert_eq!(result.errored, 0);
let (source_type, content): (String, String) = conn
.query_row(
"SELECT source_type, content_text FROM documents WHERE source_id = 1",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap();
assert_eq!(source_type, "note");
assert!(content.contains("[[Note]]"));
assert!(content.contains("author: @bob"));
}
#[test]
fn test_regenerate_note_system_note_deletes() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bot', 'assigned to @alice', 1000, 2000, 3000, 1)",
[],
).unwrap();
// Pre-insert a document for this note (simulating a previously-generated doc)
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES ('note', 1, 1, 'old content', 'oldhash')",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_regenerate_note_unchanged() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let r1 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r1.regenerated, 1);
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let r2 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r2.unchanged, 1);
assert_eq!(r2.regenerated, 0);
}
#[test]
fn test_note_regeneration_batch_uses_cache() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Shared Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
for i in 1..=10 {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'bob', ?3, 1000, 2000, 3000, 0)",
rusqlite::params![i, i * 100, format!("Note body {}", i)],
).unwrap();
mark_dirty(&conn, SourceType::Note, i).unwrap();
}
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 10);
assert_eq!(result.errored, 0);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 10);
}
#[test]
fn test_note_regeneration_cache_consistent_with_direct_extraction() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Consistency Check', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'backend')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some content', 1000, 2000, 3000, 0)",
[],
).unwrap();
use crate::documents::extract_note_document;
let direct = extract_note_document(&conn, 1).unwrap().unwrap();
let mut cache = ParentMetadataCache::new();
let cached = extract_note_document_cached(&conn, 1, &mut cache)
.unwrap()
.unwrap();
assert_eq!(direct.content_text, cached.content_text);
assert_eq!(direct.content_hash, cached.content_hash);
assert_eq!(direct.labels, cached.labels);
assert_eq!(direct.labels_hash, cached.labels_hash);
assert_eq!(direct.paths_hash, cached.paths_hash);
assert_eq!(direct.title, cached.title);
assert_eq!(direct.url, cached.url);
assert_eq!(direct.author_username, cached.author_username);
}
#[test]
fn test_note_regeneration_cache_invalidates_across_parents() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Issue Alpha', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (2, 20, 1, 99, 'Issue Beta', 'opened', 1000, 2000, 3000, 'https://example.com/issues/99')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (2, 'disc_2', 1, 2, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Alpha note', 1000, 2000, 3000, 0)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (2, 200, 2, 1, 'alice', 'Beta note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
mark_dirty(&conn, SourceType::Note, 2).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 2);
assert_eq!(result.errored, 0);
let alpha_content: String = conn
.query_row(
"SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 1",
[],
|r| r.get(0),
)
.unwrap();
let beta_content: String = conn
.query_row(
"SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 2",
[],
|r| r.get(0),
)
.unwrap();
assert!(alpha_content.contains("parent_iid: 42"));
assert!(alpha_content.contains("parent_title: Issue Alpha"));
assert!(beta_content.contains("parent_iid: 99"));
assert!(beta_content.contains("parent_title: Issue Beta"));
}
}
#[path = "regenerator_tests.rs"]
mod tests;