refactor(structure): reorganize codebase into domain-focused modules

This commit is contained in:
teernisse
2026-03-06 15:22:42 -05:00
parent 4d41d74ea7
commit bf977eca1a
78 changed files with 8704 additions and 6973 deletions

View File

@@ -1,330 +0,0 @@
use std::collections::HashMap;
use rusqlite::Connection;
use super::error::Result;
use super::time::now_ms;
#[derive(Debug)]
pub struct PendingJob {
pub id: i64,
pub project_id: i64,
pub entity_type: String,
pub entity_iid: i64,
pub entity_local_id: i64,
pub job_type: String,
pub payload_json: Option<String>,
pub attempts: i32,
}
pub fn enqueue_job(
conn: &Connection,
project_id: i64,
entity_type: &str,
entity_iid: i64,
entity_local_id: i64,
job_type: &str,
payload_json: Option<&str>,
) -> Result<bool> {
let now = now_ms();
let changes = conn.execute(
"INSERT OR IGNORE INTO pending_dependent_fetches
(project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
rusqlite::params![
project_id,
entity_type,
entity_iid,
entity_local_id,
job_type,
payload_json,
now
],
)?;
Ok(changes > 0)
}
pub fn claim_jobs(
conn: &Connection,
job_type: &str,
project_id: i64,
batch_size: usize,
) -> Result<Vec<PendingJob>> {
if batch_size == 0 {
return Ok(Vec::new());
}
let now = now_ms();
let mut stmt = conn.prepare_cached(
"UPDATE pending_dependent_fetches
SET locked_at = ?1
WHERE id IN (
SELECT id FROM pending_dependent_fetches
WHERE job_type = ?2
AND project_id = ?4
AND locked_at IS NULL
AND (next_retry_at IS NULL OR next_retry_at <= ?1)
ORDER BY enqueued_at ASC
LIMIT ?3
)
RETURNING id, project_id, entity_type, entity_iid, entity_local_id,
job_type, payload_json, attempts",
)?;
let jobs = stmt
.query_map(
rusqlite::params![now, job_type, batch_size as i64, project_id],
|row| {
Ok(PendingJob {
id: row.get(0)?,
project_id: row.get(1)?,
entity_type: row.get(2)?,
entity_iid: row.get(3)?,
entity_local_id: row.get(4)?,
job_type: row.get(5)?,
payload_json: row.get(6)?,
attempts: row.get(7)?,
})
},
)?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(jobs)
}
pub fn complete_job(conn: &Connection, job_id: i64) -> Result<()> {
conn.execute(
"DELETE FROM pending_dependent_fetches WHERE id = ?1",
rusqlite::params![job_id],
)?;
Ok(())
}
/// Same DELETE as `complete_job`, but on an existing transaction so the caller
/// can bundle it atomically with a watermark update.
pub fn complete_job_tx(tx: &rusqlite::Transaction<'_>, job_id: i64) -> Result<()> {
tx.execute(
"DELETE FROM pending_dependent_fetches WHERE id = ?1",
rusqlite::params![job_id],
)?;
Ok(())
}
/// Release all currently locked jobs (set `locked_at = NULL`).
/// Used during graceful shutdown so the next sync doesn't wait for stale locks.
pub fn release_all_locked_jobs(conn: &Connection) -> Result<usize> {
let changes = conn.execute(
"UPDATE pending_dependent_fetches SET locked_at = NULL WHERE locked_at IS NOT NULL",
[],
)?;
Ok(changes)
}
pub fn fail_job(conn: &Connection, job_id: i64, error: &str) -> Result<()> {
let now = now_ms();
let changes = conn.execute(
"UPDATE pending_dependent_fetches
SET attempts = attempts + 1,
next_retry_at = ?1 + MIN(30000 * (1 << MIN(attempts, 4)), 480000),
locked_at = NULL,
last_error = ?2
WHERE id = ?3",
rusqlite::params![now, error, job_id],
)?;
if changes == 0 {
return Err(crate::core::error::LoreError::Other(
"fail_job: job not found (may have been reclaimed or completed)".into(),
));
}
Ok(())
}
pub fn reclaim_stale_locks(conn: &Connection, stale_threshold_minutes: u32) -> Result<usize> {
let threshold_ms = now_ms() - (i64::from(stale_threshold_minutes) * 60 * 1000);
let changes = conn.execute(
"UPDATE pending_dependent_fetches SET locked_at = NULL WHERE locked_at < ?1",
rusqlite::params![threshold_ms],
)?;
Ok(changes)
}
pub fn count_pending_jobs(
conn: &Connection,
project_id: Option<i64>,
) -> Result<HashMap<String, usize>> {
let mut counts = HashMap::new();
match project_id {
Some(pid) => {
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
WHERE project_id = ?1 GROUP BY job_type",
)?;
let rows = stmt.query_map(rusqlite::params![pid], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);
}
}
None => {
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches GROUP BY job_type",
)?;
let rows = stmt.query_map([], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);
}
}
}
Ok(counts)
}
pub fn count_claimable_jobs(conn: &Connection, project_id: i64) -> Result<HashMap<String, usize>> {
let now = now_ms();
let mut counts = HashMap::new();
let mut stmt = conn.prepare_cached(
"SELECT job_type, COUNT(*) FROM pending_dependent_fetches \
WHERE project_id = ?1 \
AND locked_at IS NULL \
AND (next_retry_at IS NULL OR next_retry_at <= ?2) \
GROUP BY job_type",
)?;
let rows = stmt.query_map(rusqlite::params![project_id, now], |row| {
let job_type: String = row.get(0)?;
let count: i64 = row.get(1)?;
Ok((job_type, count as usize))
})?;
for row in rows {
let (job_type, count) = row?;
counts.insert(job_type, count);
}
Ok(counts)
}
#[cfg(test)]
mod tests {
use std::path::Path;
use super::*;
use crate::core::db::{create_connection, run_migrations};
fn setup_db_with_job() -> (Connection, i64) {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
)
.unwrap();
let project_id: i64 = conn
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
.unwrap();
enqueue_job(&conn, project_id, "issue", 42, 100, "resource_events", None).unwrap();
let job_id: i64 = conn
.query_row(
"SELECT id FROM pending_dependent_fetches LIMIT 1",
[],
|row| row.get(0),
)
.unwrap();
(conn, job_id)
}
#[test]
fn complete_job_tx_commits() {
let (conn, job_id) = setup_db_with_job();
let tx = conn.unchecked_transaction().unwrap();
complete_job_tx(&tx, job_id).unwrap();
tx.commit().unwrap();
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
[job_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 0, "job should be deleted after commit");
}
#[test]
fn complete_job_tx_rollback() {
let (conn, job_id) = setup_db_with_job();
{
let tx = conn.unchecked_transaction().unwrap();
complete_job_tx(&tx, job_id).unwrap();
// drop tx without commit = rollback
}
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM pending_dependent_fetches WHERE id = ?1",
[job_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1, "job should survive dropped (rolled-back) tx");
}
#[test]
fn release_all_locked_jobs_clears_locks() {
let (conn, _job_id) = setup_db_with_job();
let project_id: i64 = conn
.query_row("SELECT id FROM projects LIMIT 1", [], |row| row.get(0))
.unwrap();
let jobs = claim_jobs(&conn, "resource_events", project_id, 10).unwrap();
assert_eq!(jobs.len(), 1);
let locked: bool = conn
.query_row(
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
[jobs[0].id],
|row| row.get(0),
)
.unwrap();
assert!(locked, "job should be locked after claim");
let released = release_all_locked_jobs(&conn).unwrap();
assert_eq!(released, 1);
let locked: bool = conn
.query_row(
"SELECT locked_at IS NOT NULL FROM pending_dependent_fetches WHERE id = ?1",
[jobs[0].id],
|row| row.get(0),
)
.unwrap();
assert!(!locked, "job should be unlocked after release_all");
}
}

View File

@@ -1,199 +0,0 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
use super::time::iso_to_ms_strict;
use crate::gitlab::types::{GitLabLabelEvent, GitLabMilestoneEvent, GitLabStateEvent};
pub fn upsert_state_events(
conn: &Connection,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[GitLabStateEvent],
) -> Result<usize> {
let (issue_id, merge_request_id) = resolve_entity_ids(entity_type, entity_local_id)?;
let mut stmt = conn.prepare_cached(
"INSERT OR REPLACE INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
actor_gitlab_id, actor_username, created_at,
source_commit, source_merge_request_iid)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
)?;
let mut count = 0;
for event in events {
let created_at = iso_to_ms_strict(&event.created_at).map_err(LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
let source_mr_iid = event.source_merge_request.as_ref().map(|mr| mr.iid);
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.state,
actor_id,
actor_username,
created_at,
event.source_commit,
source_mr_iid,
])?;
count += 1;
}
Ok(count)
}
pub fn upsert_label_events(
conn: &Connection,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[GitLabLabelEvent],
) -> Result<usize> {
let (issue_id, merge_request_id) = resolve_entity_ids(entity_type, entity_local_id)?;
let mut stmt = conn.prepare_cached(
"INSERT OR REPLACE INTO resource_label_events
(gitlab_id, project_id, issue_id, merge_request_id, action,
label_name, actor_gitlab_id, actor_username, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)?;
let mut count = 0;
for event in events {
let created_at = iso_to_ms_strict(&event.created_at).map_err(LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.action,
event.label.as_ref().map(|l| l.name.as_str()),
actor_id,
actor_username,
created_at,
])?;
count += 1;
}
Ok(count)
}
pub fn upsert_milestone_events(
conn: &Connection,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[GitLabMilestoneEvent],
) -> Result<usize> {
let (issue_id, merge_request_id) = resolve_entity_ids(entity_type, entity_local_id)?;
let mut stmt = conn.prepare_cached(
"INSERT OR REPLACE INTO resource_milestone_events
(gitlab_id, project_id, issue_id, merge_request_id, action,
milestone_title, milestone_id, actor_gitlab_id, actor_username, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
)?;
let mut count = 0;
for event in events {
let created_at = iso_to_ms_strict(&event.created_at).map_err(LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.action,
event.milestone.as_ref().map(|m| m.title.as_str()),
event.milestone.as_ref().map(|m| m.id),
actor_id,
actor_username,
created_at,
])?;
count += 1;
}
Ok(count)
}
fn resolve_entity_ids(
entity_type: &str,
entity_local_id: i64,
) -> Result<(Option<i64>, Option<i64>)> {
match entity_type {
"issue" => Ok((Some(entity_local_id), None)),
"merge_request" => Ok((None, Some(entity_local_id))),
_ => Err(LoreError::Other(format!(
"Invalid entity type for resource events: {entity_type}"
))),
}
}
pub fn count_events(conn: &Connection) -> Result<EventCounts> {
let mut counts = EventCounts::default();
let row: (i64, i64) = conn.query_row(
"SELECT
COUNT(CASE WHEN issue_id IS NOT NULL THEN 1 END),
COUNT(CASE WHEN merge_request_id IS NOT NULL THEN 1 END)
FROM resource_state_events",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)?;
counts.state_issue = row.0 as usize;
counts.state_mr = row.1 as usize;
let row: (i64, i64) = conn.query_row(
"SELECT
COUNT(CASE WHEN issue_id IS NOT NULL THEN 1 END),
COUNT(CASE WHEN merge_request_id IS NOT NULL THEN 1 END)
FROM resource_label_events",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)?;
counts.label_issue = row.0 as usize;
counts.label_mr = row.1 as usize;
let row: (i64, i64) = conn.query_row(
"SELECT
COUNT(CASE WHEN issue_id IS NOT NULL THEN 1 END),
COUNT(CASE WHEN merge_request_id IS NOT NULL THEN 1 END)
FROM resource_milestone_events",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)?;
counts.milestone_issue = row.0 as usize;
counts.milestone_mr = row.1 as usize;
Ok(counts)
}
#[derive(Debug, Default)]
pub struct EventCounts {
pub state_issue: usize,
pub state_mr: usize,
pub label_issue: usize,
pub label_mr: usize,
pub milestone_issue: usize,
pub milestone_mr: usize,
}
impl EventCounts {
pub fn total(&self) -> usize {
self.state_issue
+ self.state_mr
+ self.label_issue
+ self.label_mr
+ self.milestone_issue
+ self.milestone_mr
}
}

View File

@@ -1,476 +0,0 @@
use std::sync::LazyLock;
use regex::Regex;
use rusqlite::Connection;
use tracing::debug;
use super::error::Result;
use super::time::now_ms;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedCrossRef {
pub reference_type: String,
pub target_entity_type: String,
pub target_iid: i64,
pub target_project_path: Option<String>,
}
#[derive(Debug, Default)]
pub struct ExtractResult {
pub inserted: usize,
pub skipped_unresolvable: usize,
pub parse_failures: usize,
}
// GitLab system notes include the entity type word: "mentioned in issue #5"
// or "mentioned in merge request !730". The word is mandatory in real data,
// but we also keep the old bare-sigil form as a fallback (no data uses it today,
// but other GitLab instances might differ).
static MENTIONED_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"mentioned in (?:issue |merge request )?(?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
)
.expect("mentioned regex is valid")
});
static CLOSED_BY_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"closed by (?:issue |merge request )?(?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
)
.expect("closed_by regex is valid")
});
/// Matches full GitLab URLs like:
/// `https://gitlab.example.com/group/project/-/issues/123`
/// `https://gitlab.example.com/group/sub/project/-/merge_requests/456`
static GITLAB_URL_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"https?://[^\s/]+/(?P<project>[^\s]+?)/-/(?P<entity_type>issues|merge_requests)/(?P<iid>\d+)",
)
.expect("gitlab url regex is valid")
});
pub fn parse_cross_refs(body: &str) -> Vec<ParsedCrossRef> {
let mut refs = Vec::new();
for caps in MENTIONED_RE.captures_iter(body) {
if let Some(parsed) = capture_to_cross_ref(&caps, "mentioned") {
refs.push(parsed);
}
}
for caps in CLOSED_BY_RE.captures_iter(body) {
if let Some(parsed) = capture_to_cross_ref(&caps, "closes") {
refs.push(parsed);
}
}
refs
}
/// Extract cross-references from GitLab URLs in free-text bodies (descriptions, user notes).
pub fn parse_url_refs(body: &str) -> Vec<ParsedCrossRef> {
let mut refs = Vec::new();
let mut seen = std::collections::HashSet::new();
for caps in GITLAB_URL_RE.captures_iter(body) {
let Some(entity_type_raw) = caps.name("entity_type").map(|m| m.as_str()) else {
continue;
};
let Some(iid_str) = caps.name("iid").map(|m| m.as_str()) else {
continue;
};
let Some(project) = caps.name("project").map(|m| m.as_str()) else {
continue;
};
let Ok(iid) = iid_str.parse::<i64>() else {
continue;
};
let target_entity_type = match entity_type_raw {
"issues" => "issue",
"merge_requests" => "merge_request",
_ => continue,
};
let key = (target_entity_type, project.to_owned(), iid);
if !seen.insert(key) {
continue; // deduplicate within same body
}
refs.push(ParsedCrossRef {
reference_type: "mentioned".to_owned(),
target_entity_type: target_entity_type.to_owned(),
target_iid: iid,
target_project_path: Some(project.to_owned()),
});
}
refs
}
fn capture_to_cross_ref(
caps: &regex::Captures<'_>,
reference_type: &str,
) -> Option<ParsedCrossRef> {
let sigil = caps.name("sigil")?.as_str();
let iid_str = caps.name("iid")?.as_str();
let iid: i64 = iid_str.parse().ok()?;
let project = caps.name("project").map(|m| m.as_str().to_owned());
let target_entity_type = match sigil {
"#" => "issue",
"!" => "merge_request",
_ => return None,
};
Some(ParsedCrossRef {
reference_type: reference_type.to_owned(),
target_entity_type: target_entity_type.to_owned(),
target_iid: iid,
target_project_path: project,
})
}
struct SystemNote {
note_id: i64,
body: String,
noteable_type: String,
entity_id: i64,
}
pub fn extract_refs_from_system_notes(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
let mut result = ExtractResult::default();
let mut stmt = conn.prepare_cached(
"SELECT n.id, n.body, d.noteable_type,
COALESCE(d.issue_id, d.merge_request_id) AS entity_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE n.is_system = 1
AND n.project_id = ?1
AND n.body IS NOT NULL",
)?;
let notes: Vec<SystemNote> = stmt
.query_map([project_id], |row| {
Ok(SystemNote {
note_id: row.get(0)?,
body: row.get(1)?,
noteable_type: row.get(2)?,
entity_id: row.get(3)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
if notes.is_empty() {
return Ok(result);
}
let mut insert_stmt = conn.prepare_cached(
"INSERT OR IGNORE INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
target_project_path, target_entity_iid,
reference_type, source_method, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'note_parse', ?9)",
)?;
let now = now_ms();
for note in &notes {
let cross_refs = parse_cross_refs(&note.body);
if cross_refs.is_empty() {
debug!(
note_id = note.note_id,
body = %note.body,
"System note did not match any cross-reference pattern"
);
result.parse_failures += 1;
continue;
}
let source_entity_type = noteable_type_to_entity_type(&note.noteable_type);
for xref in &cross_refs {
let target_entity_id = if xref.target_project_path.is_none() {
resolve_entity_id(conn, project_id, &xref.target_entity_type, xref.target_iid)
} else {
resolve_cross_project_entity(
conn,
xref.target_project_path.as_deref().unwrap_or_default(),
&xref.target_entity_type,
xref.target_iid,
)
};
let rows_changed = insert_stmt.execute(rusqlite::params![
project_id,
source_entity_type,
note.entity_id,
xref.target_entity_type,
target_entity_id,
xref.target_project_path,
if target_entity_id.is_none() {
Some(xref.target_iid)
} else {
None
},
xref.reference_type,
now,
])?;
if rows_changed > 0 {
if target_entity_id.is_none() {
result.skipped_unresolvable += 1;
} else {
result.inserted += 1;
}
}
}
}
if result.inserted > 0 || result.skipped_unresolvable > 0 {
debug!(
inserted = result.inserted,
unresolvable = result.skipped_unresolvable,
parse_failures = result.parse_failures,
"System note cross-reference extraction complete"
);
}
Ok(result)
}
fn noteable_type_to_entity_type(noteable_type: &str) -> &str {
match noteable_type {
"Issue" => "issue",
"MergeRequest" => "merge_request",
other => {
debug!(noteable_type = %other, "Unknown noteable_type, defaulting to issue");
"issue"
}
}
}
fn resolve_entity_id(
conn: &Connection,
project_id: i64,
entity_type: &str,
iid: i64,
) -> Option<i64> {
let (table, id_col) = match entity_type {
"issue" => ("issues", "id"),
"merge_request" => ("merge_requests", "id"),
_ => return None,
};
let sql = format!("SELECT {id_col} FROM {table} WHERE project_id = ?1 AND iid = ?2");
conn.query_row(&sql, rusqlite::params![project_id, iid], |row| row.get(0))
.ok()
}
fn resolve_cross_project_entity(
conn: &Connection,
project_path: &str,
entity_type: &str,
iid: i64,
) -> Option<i64> {
let project_id: i64 = conn
.query_row(
"SELECT id FROM projects WHERE path_with_namespace = ?1",
[project_path],
|row| row.get(0),
)
.ok()?;
resolve_entity_id(conn, project_id, entity_type, iid)
}
/// Extract cross-references from issue and MR descriptions (GitLab URLs only).
pub fn extract_refs_from_descriptions(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
let mut result = ExtractResult::default();
let mut insert_stmt = conn.prepare_cached(
"INSERT OR IGNORE INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
target_project_path, target_entity_iid,
reference_type, source_method, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'description_parse', ?9)",
)?;
let now = now_ms();
// Issues with descriptions
let mut issue_stmt = conn.prepare_cached(
"SELECT id, iid, description FROM issues
WHERE project_id = ?1 AND description IS NOT NULL AND description != ''",
)?;
let issues: Vec<(i64, i64, String)> = issue_stmt
.query_map([project_id], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
for (entity_id, _iid, description) in &issues {
insert_url_refs(
conn,
&mut insert_stmt,
&mut result,
project_id,
"issue",
*entity_id,
description,
now,
)?;
}
// Merge requests with descriptions
let mut mr_stmt = conn.prepare_cached(
"SELECT id, iid, description FROM merge_requests
WHERE project_id = ?1 AND description IS NOT NULL AND description != ''",
)?;
let mrs: Vec<(i64, i64, String)> = mr_stmt
.query_map([project_id], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
for (entity_id, _iid, description) in &mrs {
insert_url_refs(
conn,
&mut insert_stmt,
&mut result,
project_id,
"merge_request",
*entity_id,
description,
now,
)?;
}
if result.inserted > 0 || result.skipped_unresolvable > 0 {
debug!(
inserted = result.inserted,
unresolvable = result.skipped_unresolvable,
"Description cross-reference extraction complete"
);
}
Ok(result)
}
/// Extract cross-references from user (non-system) notes (GitLab URLs only).
pub fn extract_refs_from_user_notes(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
let mut result = ExtractResult::default();
let mut note_stmt = conn.prepare_cached(
"SELECT n.id, n.body, d.noteable_type,
COALESCE(d.issue_id, d.merge_request_id) AS entity_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE n.is_system = 0
AND n.project_id = ?1
AND n.body IS NOT NULL",
)?;
let notes: Vec<(i64, String, String, i64)> = note_stmt
.query_map([project_id], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
if notes.is_empty() {
return Ok(result);
}
let mut insert_stmt = conn.prepare_cached(
"INSERT OR IGNORE INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
target_project_path, target_entity_iid,
reference_type, source_method, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'note_parse', ?9)",
)?;
let now = now_ms();
for (_, body, noteable_type, entity_id) in &notes {
let source_entity_type = noteable_type_to_entity_type(noteable_type);
insert_url_refs(
conn,
&mut insert_stmt,
&mut result,
project_id,
source_entity_type,
*entity_id,
body,
now,
)?;
}
if result.inserted > 0 || result.skipped_unresolvable > 0 {
debug!(
inserted = result.inserted,
unresolvable = result.skipped_unresolvable,
"User note cross-reference extraction complete"
);
}
Ok(result)
}
/// Shared helper: parse URL refs from a body and insert into entity_references.
#[allow(clippy::too_many_arguments)]
fn insert_url_refs(
conn: &Connection,
insert_stmt: &mut rusqlite::CachedStatement<'_>,
result: &mut ExtractResult,
project_id: i64,
source_entity_type: &str,
source_entity_id: i64,
body: &str,
now: i64,
) -> Result<()> {
let url_refs = parse_url_refs(body);
for xref in &url_refs {
let target_entity_id = if let Some(ref path) = xref.target_project_path {
resolve_cross_project_entity(conn, path, &xref.target_entity_type, xref.target_iid)
} else {
resolve_entity_id(conn, project_id, &xref.target_entity_type, xref.target_iid)
};
let rows_changed = insert_stmt.execute(rusqlite::params![
project_id,
source_entity_type,
source_entity_id,
xref.target_entity_type,
target_entity_id,
xref.target_project_path,
if target_entity_id.is_none() {
Some(xref.target_iid)
} else {
None
},
xref.reference_type,
now,
])?;
if rows_changed > 0 {
if target_entity_id.is_none() {
result.skipped_unresolvable += 1;
} else {
result.inserted += 1;
}
}
}
Ok(())
}
#[cfg(test)]
#[path = "note_parser_tests.rs"]
mod tests;

View File

@@ -1,770 +0,0 @@
use super::*;
// --- parse_cross_refs: real GitLab system note format ---
#[test]
fn test_parse_mentioned_in_mr() {
let refs = parse_cross_refs("mentioned in merge request !567");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "mentioned");
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 567);
assert!(refs[0].target_project_path.is_none());
}
#[test]
fn test_parse_mentioned_in_issue() {
let refs = parse_cross_refs("mentioned in issue #234");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "mentioned");
assert_eq!(refs[0].target_entity_type, "issue");
assert_eq!(refs[0].target_iid, 234);
assert!(refs[0].target_project_path.is_none());
}
#[test]
fn test_parse_mentioned_cross_project() {
let refs = parse_cross_refs("mentioned in merge request group/repo!789");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "mentioned");
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 789);
assert_eq!(refs[0].target_project_path.as_deref(), Some("group/repo"));
}
#[test]
fn test_parse_mentioned_cross_project_issue() {
let refs = parse_cross_refs("mentioned in issue group/repo#123");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "mentioned");
assert_eq!(refs[0].target_entity_type, "issue");
assert_eq!(refs[0].target_iid, 123);
assert_eq!(refs[0].target_project_path.as_deref(), Some("group/repo"));
}
#[test]
fn test_parse_closed_by_mr() {
let refs = parse_cross_refs("closed by merge request !567");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "closes");
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 567);
assert!(refs[0].target_project_path.is_none());
}
#[test]
fn test_parse_closed_by_cross_project() {
let refs = parse_cross_refs("closed by merge request group/repo!789");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "closes");
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 789);
assert_eq!(refs[0].target_project_path.as_deref(), Some("group/repo"));
}
#[test]
fn test_parse_multiple_refs() {
let refs = parse_cross_refs("mentioned in merge request !123 and mentioned in issue #456");
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 123);
assert_eq!(refs[1].target_entity_type, "issue");
assert_eq!(refs[1].target_iid, 456);
}
#[test]
fn test_parse_no_refs() {
let refs = parse_cross_refs("Updated the description");
assert!(refs.is_empty());
}
#[test]
fn test_parse_non_english_note() {
let refs = parse_cross_refs("a ajout\u{00e9} l'\u{00e9}tiquette ~bug");
assert!(refs.is_empty());
}
#[test]
fn test_parse_multi_level_group_path() {
let refs = parse_cross_refs("mentioned in issue top/sub/project#123");
assert_eq!(refs.len(), 1);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("top/sub/project")
);
assert_eq!(refs[0].target_iid, 123);
}
#[test]
fn test_parse_deeply_nested_group_path() {
let refs = parse_cross_refs("mentioned in merge request a/b/c/d/e!42");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_project_path.as_deref(), Some("a/b/c/d/e"));
assert_eq!(refs[0].target_iid, 42);
}
#[test]
fn test_parse_hyphenated_project_path() {
let refs = parse_cross_refs("mentioned in issue my-group/my-project#99");
assert_eq!(refs.len(), 1);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("my-group/my-project")
);
}
#[test]
fn test_parse_dotted_project_path() {
let refs = parse_cross_refs("mentioned in issue visiostack.io/backend#123");
assert_eq!(refs.len(), 1);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("visiostack.io/backend")
);
assert_eq!(refs[0].target_iid, 123);
}
#[test]
fn test_parse_dotted_nested_project_path() {
let refs = parse_cross_refs("closed by merge request my.org/sub.group/my.project!42");
assert_eq!(refs.len(), 1);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("my.org/sub.group/my.project")
);
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 42);
}
// Bare-sigil fallback (no "issue"/"merge request" word) still works
#[test]
fn test_parse_bare_sigil_fallback() {
let refs = parse_cross_refs("mentioned in #123");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_iid, 123);
assert_eq!(refs[0].target_entity_type, "issue");
}
#[test]
fn test_parse_bare_sigil_closed_by() {
let refs = parse_cross_refs("closed by !567");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].reference_type, "closes");
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 567);
}
#[test]
fn test_parse_mixed_mentioned_and_closed() {
let refs = parse_cross_refs("mentioned in merge request !10 and closed by merge request !20");
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].reference_type, "mentioned");
assert_eq!(refs[0].target_iid, 10);
assert_eq!(refs[1].reference_type, "closes");
assert_eq!(refs[1].target_iid, 20);
}
// --- parse_url_refs ---
#[test]
fn test_url_ref_same_project_issue() {
let refs = parse_url_refs(
"See https://gitlab.visiostack.com/vs/typescript-code/-/issues/3537 for details",
);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_entity_type, "issue");
assert_eq!(refs[0].target_iid, 3537);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("vs/typescript-code")
);
assert_eq!(refs[0].reference_type, "mentioned");
}
#[test]
fn test_url_ref_merge_request() {
let refs =
parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/3548");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 3548);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("vs/typescript-code")
);
}
#[test]
fn test_url_ref_cross_project() {
let refs = parse_url_refs(
"Related: https://gitlab.visiostack.com/vs/python-code/-/merge_requests/5203",
);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 5203);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("vs/python-code")
);
}
#[test]
fn test_url_ref_with_anchor() {
let refs =
parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/issues/123#note_456");
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_entity_type, "issue");
assert_eq!(refs[0].target_iid, 123);
}
#[test]
fn test_url_ref_markdown_link() {
let refs = parse_url_refs(
"Check [this MR](https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/100) for context",
);
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].target_entity_type, "merge_request");
assert_eq!(refs[0].target_iid, 100);
}
#[test]
fn test_url_ref_multiple_urls() {
let body =
"See https://gitlab.com/a/b/-/issues/1 and https://gitlab.com/a/b/-/merge_requests/2";
let refs = parse_url_refs(body);
assert_eq!(refs.len(), 2);
assert_eq!(refs[0].target_entity_type, "issue");
assert_eq!(refs[0].target_iid, 1);
assert_eq!(refs[1].target_entity_type, "merge_request");
assert_eq!(refs[1].target_iid, 2);
}
#[test]
fn test_url_ref_deduplicates() {
let body = "See https://gitlab.com/a/b/-/issues/1 and again https://gitlab.com/a/b/-/issues/1";
let refs = parse_url_refs(body);
assert_eq!(
refs.len(),
1,
"Duplicate URLs in same body should be deduplicated"
);
}
#[test]
fn test_url_ref_non_gitlab_urls_ignored() {
let refs = parse_url_refs(
"Check https://google.com/search?q=test and https://github.com/org/repo/issues/1",
);
assert!(refs.is_empty());
}
#[test]
fn test_url_ref_deeply_nested_project() {
let refs = parse_url_refs("https://gitlab.com/org/sub/deep/project/-/issues/42");
assert_eq!(refs.len(), 1);
assert_eq!(
refs[0].target_project_path.as_deref(),
Some("org/sub/deep/project")
);
assert_eq!(refs[0].target_iid, 42);
}
// --- Integration tests: system notes (updated for real format) ---
fn setup_test_db() -> Connection {
use crate::core::db::{create_connection, run_migrations};
let conn = create_connection(std::path::Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_test_data(conn: &Connection) -> i64 {
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'group/test-project', 'https://gitlab.com/group/test-project', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 123, 'Test Issue', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (11, 1001, 1, 456, 'Another Issue', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 789, 'Test MR', 'opened', 'feat', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
VALUES (30, 'disc-aaa', 1, 10, 'Issue', ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, merge_request_id, noteable_type, last_seen_at)
VALUES (31, 'disc-bbb', 1, 20, 'MergeRequest', ?1)",
[now],
)
.unwrap();
// System note: real GitLab format "mentioned in merge request !789"
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (40, 4000, 30, 1, 1, 'mentioned in merge request !789', ?1, ?1, ?1)",
[now],
)
.unwrap();
// System note: real GitLab format "mentioned in issue #456"
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (41, 4001, 31, 1, 1, 'mentioned in issue #456', ?1, ?1, ?1)",
[now],
)
.unwrap();
// User note (is_system=0) — should NOT be processed by system note extractor
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (42, 4002, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)",
[now],
)
.unwrap();
// System note with no cross-ref pattern
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (43, 4003, 30, 1, 1, 'added label ~bug', ?1, ?1, ?1)",
[now],
)
.unwrap();
// System note: cross-project ref
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (44, 4004, 30, 1, 1, 'mentioned in issue other/project#999', ?1, ?1, ?1)",
[now],
)
.unwrap();
1
}
#[test]
fn test_extract_refs_from_system_notes_integration() {
let conn = setup_test_db();
let project_id = seed_test_data(&conn);
let result = extract_refs_from_system_notes(&conn, project_id).unwrap();
assert_eq!(result.inserted, 2, "Two same-project refs should resolve");
assert_eq!(
result.skipped_unresolvable, 1,
"One cross-project ref should be unresolvable"
);
assert_eq!(
result.parse_failures, 1,
"One system note has no cross-ref pattern"
);
let ref_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1 AND source_method = 'note_parse'",
[project_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(ref_count, 3, "Should have 3 entity_references rows total");
let unresolved_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE target_entity_id IS NULL AND source_method = 'note_parse'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
unresolved_count, 1,
"Should have 1 unresolved cross-project ref"
);
let (path, iid): (String, i64) = conn
.query_row(
"SELECT target_project_path, target_entity_iid FROM entity_references WHERE target_entity_id IS NULL",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(path, "other/project");
assert_eq!(iid, 999);
}
#[test]
fn test_extract_refs_idempotent() {
let conn = setup_test_db();
let project_id = seed_test_data(&conn);
let result1 = extract_refs_from_system_notes(&conn, project_id).unwrap();
let result2 = extract_refs_from_system_notes(&conn, project_id).unwrap();
assert_eq!(result2.inserted, 0);
assert_eq!(result2.skipped_unresolvable, 0);
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE source_method = 'note_parse'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
total,
(result1.inserted + result1.skipped_unresolvable) as i64
);
}
#[test]
fn test_extract_refs_empty_project() {
let conn = setup_test_db();
let result = extract_refs_from_system_notes(&conn, 999).unwrap();
assert_eq!(result.inserted, 0);
assert_eq!(result.skipped_unresolvable, 0);
assert_eq!(result.parse_failures, 0);
}
// --- Integration tests: description extraction ---
#[test]
fn test_extract_refs_from_descriptions_issue() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
[now],
)
.unwrap();
// Issue with MR reference in description
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 3537, 'Test Issue', 'opened',
'Related to https://gitlab.com/vs/typescript-code/-/merge_requests/3548',
?1, ?1, ?1)",
[now],
)
.unwrap();
// The target MR so it resolves
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 3548, 'Fix MR', 'merged', 'fix', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
assert_eq!(result.inserted, 1, "Should insert 1 description ref");
assert_eq!(result.skipped_unresolvable, 0);
let method: String = conn
.query_row(
"SELECT source_method FROM entity_references WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(method, "description_parse");
}
#[test]
fn test_extract_refs_from_descriptions_mr() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 100, 'Target Issue', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, description, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 200, 'Fixing MR', 'merged', 'fix', 'main', 'dev',
'Fixes https://gitlab.com/vs/typescript-code/-/issues/100',
?1, ?1, ?1)",
[now],
)
.unwrap();
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
assert_eq!(result.inserted, 1);
let (src_type, tgt_type): (String, String) = conn
.query_row(
"SELECT source_entity_type, target_entity_type FROM entity_references WHERE project_id = 1",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(src_type, "merge_request");
assert_eq!(tgt_type, "issue");
}
#[test]
fn test_extract_refs_from_descriptions_idempotent() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 1, 'Issue', 'opened',
'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 2, 'MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
let r1 = extract_refs_from_descriptions(&conn, 1).unwrap();
assert_eq!(r1.inserted, 1);
let r2 = extract_refs_from_descriptions(&conn, 1).unwrap();
assert_eq!(r2.inserted, 0, "Second run should insert 0 (idempotent)");
}
#[test]
fn test_extract_refs_from_descriptions_cross_project_unresolved() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 1, 'Issue', 'opened',
'See https://gitlab.com/vs/other-project/-/merge_requests/99', ?1, ?1, ?1)",
[now],
)
.unwrap();
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
assert_eq!(result.inserted, 0);
assert_eq!(
result.skipped_unresolvable, 1,
"Cross-project ref with no matching project should be unresolvable"
);
let (path, iid): (String, i64) = conn
.query_row(
"SELECT target_project_path, target_entity_iid FROM entity_references WHERE target_entity_id IS NULL",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(path, "vs/other-project");
assert_eq!(iid, 99);
}
// --- Integration tests: user note extraction ---
#[test]
fn test_extract_refs_from_user_notes_with_url() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 50, 'Source Issue', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 60, 'Target MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
VALUES (30, 'disc-user', 1, 10, 'Issue', ?1)",
[now],
)
.unwrap();
// User note with a URL
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (40, 4000, 30, 1, 0,
'This is related to https://gitlab.com/vs/code/-/merge_requests/60',
?1, ?1, ?1)",
[now],
)
.unwrap();
let result = extract_refs_from_user_notes(&conn, 1).unwrap();
assert_eq!(result.inserted, 1);
let method: String = conn
.query_row(
"SELECT source_method FROM entity_references WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(method, "note_parse");
}
#[test]
fn test_extract_refs_from_user_notes_no_system_note_patterns() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 50, 'Source', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 999, 'Target', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
VALUES (30, 'disc-x', 1, 10, 'Issue', ?1)",
[now],
)
.unwrap();
// User note with system-note-like text but no URL — should NOT extract
// (user notes only use URL parsing, not system note pattern matching)
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (40, 4000, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)",
[now],
)
.unwrap();
let result = extract_refs_from_user_notes(&conn, 1).unwrap();
assert_eq!(
result.inserted, 0,
"User notes should only parse URLs, not system note patterns"
);
}
#[test]
fn test_extract_refs_from_user_notes_idempotent() {
let conn = setup_test_db();
let now = now_ms();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 1000, 1, 1, 'Src', 'opened', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
VALUES (20, 2000, 1, 2, 'Tgt', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
VALUES (30, 'disc-y', 1, 10, 'Issue', ?1)",
[now],
)
.unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
VALUES (40, 4000, 30, 1, 0,
'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)",
[now],
)
.unwrap();
let r1 = extract_refs_from_user_notes(&conn, 1).unwrap();
assert_eq!(r1.inserted, 1);
let r2 = extract_refs_from_user_notes(&conn, 1).unwrap();
assert_eq!(r2.inserted, 0, "Second extraction should be idempotent");
}

View File

@@ -1,99 +0,0 @@
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use rusqlite::Connection;
use rusqlite::OptionalExtension;
use sha2::{Digest, Sha256};
use std::io::{Read, Write};
use super::error::Result;
use super::time::now_ms;
pub struct StorePayloadOptions<'a> {
pub project_id: Option<i64>,
pub resource_type: &'a str,
pub gitlab_id: &'a str,
pub json_bytes: &'a [u8],
pub compress: bool,
}
pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<i64> {
let json_bytes = options.json_bytes;
let mut hasher = Sha256::new();
hasher.update(json_bytes);
let payload_hash = format!("{:x}", hasher.finalize());
let existing: Option<i64> = conn
.query_row(
"SELECT id FROM raw_payloads
WHERE project_id IS ? AND resource_type = ? AND gitlab_id = ? AND payload_hash = ?",
(
options.project_id,
options.resource_type,
options.gitlab_id,
&payload_hash,
),
|row| row.get(0),
)
.optional()?;
if let Some(id) = existing {
return Ok(id);
}
let (encoding, payload_bytes): (&str, std::borrow::Cow<'_, [u8]>) = if options.compress {
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(json_bytes)?;
("gzip", std::borrow::Cow::Owned(encoder.finish()?))
} else {
("identity", std::borrow::Cow::Borrowed(json_bytes))
};
conn.execute(
"INSERT INTO raw_payloads
(source, project_id, resource_type, gitlab_id, fetched_at, content_encoding, payload_hash, payload)
VALUES ('gitlab', ?, ?, ?, ?, ?, ?, ?)",
(
options.project_id,
options.resource_type,
options.gitlab_id,
now_ms(),
encoding,
&payload_hash,
payload_bytes.as_ref(),
),
)?;
Ok(conn.last_insert_rowid())
}
pub fn read_payload(conn: &Connection, id: i64) -> Result<Option<serde_json::Value>> {
let row: Option<(String, Vec<u8>)> = conn
.query_row(
"SELECT content_encoding, payload FROM raw_payloads WHERE id = ?",
[id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.optional()?;
let Some((encoding, payload_bytes)) = row else {
return Ok(None);
};
let json_bytes = if encoding == "gzip" {
let mut decoder = GzDecoder::new(&payload_bytes[..]);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
decompressed
} else {
payload_bytes
};
let value: serde_json::Value = serde_json::from_slice(&json_bytes)?;
Ok(Some(value))
}
#[cfg(test)]
#[path = "payloads_tests.rs"]
mod tests;

View File

@@ -1,105 +0,0 @@
use super::*;
use crate::core::db::create_connection;
use tempfile::tempdir;
fn setup_test_db() -> Connection {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test.db");
let conn = create_connection(&db_path).unwrap();
conn.execute_batch(
"CREATE TABLE raw_payloads (
id INTEGER PRIMARY KEY,
source TEXT NOT NULL,
project_id INTEGER,
resource_type TEXT NOT NULL,
gitlab_id TEXT NOT NULL,
fetched_at INTEGER NOT NULL,
content_encoding TEXT NOT NULL DEFAULT 'identity',
payload_hash TEXT NOT NULL,
payload BLOB NOT NULL
);
CREATE UNIQUE INDEX uq_raw_payloads_dedupe
ON raw_payloads(project_id, resource_type, gitlab_id, payload_hash);",
)
.unwrap();
conn
}
#[test]
fn test_store_and_read_payload() {
let conn = setup_test_db();
let payload = serde_json::json!({"title": "Test Issue", "id": 123});
let json_bytes = serde_json::to_vec(&payload).unwrap();
let id = store_payload(
&conn,
StorePayloadOptions {
project_id: Some(1),
resource_type: "issue",
gitlab_id: "123",
json_bytes: &json_bytes,
compress: false,
},
)
.unwrap();
let result = read_payload(&conn, id).unwrap().unwrap();
assert_eq!(result["title"], "Test Issue");
}
#[test]
fn test_compression_roundtrip() {
let conn = setup_test_db();
let payload = serde_json::json!({"data": "x".repeat(1000)});
let json_bytes = serde_json::to_vec(&payload).unwrap();
let id = store_payload(
&conn,
StorePayloadOptions {
project_id: Some(1),
resource_type: "issue",
gitlab_id: "456",
json_bytes: &json_bytes,
compress: true,
},
)
.unwrap();
let result = read_payload(&conn, id).unwrap().unwrap();
assert_eq!(result["data"], "x".repeat(1000));
}
#[test]
fn test_deduplication() {
let conn = setup_test_db();
let payload = serde_json::json!({"id": 789});
let json_bytes = serde_json::to_vec(&payload).unwrap();
let id1 = store_payload(
&conn,
StorePayloadOptions {
project_id: Some(1),
resource_type: "issue",
gitlab_id: "789",
json_bytes: &json_bytes,
compress: false,
},
)
.unwrap();
let id2 = store_payload(
&conn,
StorePayloadOptions {
project_id: Some(1),
resource_type: "issue",
gitlab_id: "789",
json_bytes: &json_bytes,
compress: false,
},
)
.unwrap();
assert_eq!(id1, id2);
}

View File

@@ -1,126 +0,0 @@
use rusqlite::{Connection, OptionalExtension};
use tracing::info;
use super::error::Result;
use super::time::now_ms;
pub fn extract_refs_from_state_events(conn: &Connection, project_id: i64) -> Result<usize> {
let changes = conn.execute(
"INSERT OR IGNORE INTO entity_references (
project_id,
source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method, created_at
)
SELECT
rse.project_id,
'merge_request',
mr.id,
'issue',
rse.issue_id,
'closes',
'api',
rse.created_at
FROM resource_state_events rse
JOIN merge_requests mr
ON mr.project_id = rse.project_id
AND mr.iid = rse.source_merge_request_iid
WHERE rse.source_merge_request_iid IS NOT NULL
AND rse.issue_id IS NOT NULL
AND rse.project_id = ?1",
rusqlite::params![project_id],
)?;
if changes > 0 {
info!(
project_id,
references_inserted = changes,
"Extracted cross-references from state events"
);
}
Ok(changes)
}
#[derive(Debug, Clone)]
pub struct EntityReference<'a> {
pub project_id: i64,
pub source_entity_type: &'a str,
pub source_entity_id: i64,
pub target_entity_type: &'a str,
pub target_entity_id: Option<i64>,
pub target_project_path: Option<&'a str>,
pub target_entity_iid: Option<i64>,
pub reference_type: &'a str,
pub source_method: &'a str,
}
pub fn insert_entity_reference(conn: &Connection, ref_: &EntityReference<'_>) -> Result<bool> {
let now = now_ms();
let changes = conn.execute(
"INSERT OR IGNORE INTO entity_references \
(project_id, source_entity_type, source_entity_id, \
target_entity_type, target_entity_id, target_project_path, target_entity_iid, \
reference_type, source_method, created_at) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
rusqlite::params![
ref_.project_id,
ref_.source_entity_type,
ref_.source_entity_id,
ref_.target_entity_type,
ref_.target_entity_id,
ref_.target_project_path,
ref_.target_entity_iid,
ref_.reference_type,
ref_.source_method,
now,
],
)?;
Ok(changes > 0)
}
pub fn resolve_issue_local_id(
conn: &Connection,
project_id: i64,
issue_iid: i64,
) -> Result<Option<i64>> {
let mut stmt =
conn.prepare_cached("SELECT id FROM issues WHERE project_id = ?1 AND iid = ?2")?;
let result = stmt
.query_row(rusqlite::params![project_id, issue_iid], |row| row.get(0))
.optional()?;
Ok(result)
}
pub fn resolve_project_path(conn: &Connection, gitlab_project_id: i64) -> Result<Option<String>> {
let mut stmt = conn
.prepare_cached("SELECT path_with_namespace FROM projects WHERE gitlab_project_id = ?1")?;
let result = stmt
.query_row(rusqlite::params![gitlab_project_id], |row| row.get(0))
.optional()?;
Ok(result)
}
pub fn count_references_for_source(
conn: &Connection,
source_entity_type: &str,
source_entity_id: i64,
) -> Result<usize> {
let count: i64 = conn.query_row(
"SELECT COUNT(*) FROM entity_references \
WHERE source_entity_type = ?1 AND source_entity_id = ?2",
rusqlite::params![source_entity_type, source_entity_id],
|row| row.get(0),
)?;
Ok(count as usize)
}
#[cfg(test)]
#[path = "references_tests.rs"]
mod tests;

View File

@@ -1,425 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project_issue_mr(conn: &Connection) -> (i64, i64, i64) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (1, 200, 10, 1, 'Test issue', 'closed', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (1, 300, 5, 1, 'Test MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
[],
)
.unwrap();
(1, 1, 1)
}
#[test]
fn test_extract_refs_from_state_events_basic() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 1, "Should insert exactly one reference");
let (src_type, src_id, tgt_type, tgt_id, ref_type, method): (
String,
i64,
String,
i64,
String,
String,
) = conn
.query_row(
"SELECT source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method
FROM entity_references WHERE project_id = ?1",
[project_id],
|row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
row.get(5)?,
))
},
)
.unwrap();
assert_eq!(src_type, "merge_request");
assert_eq!(src_id, mr_id, "Source should be the MR's local DB id");
assert_eq!(tgt_type, "issue");
assert_eq!(tgt_id, issue_id, "Target should be the issue's local DB id");
assert_eq!(ref_type, "closes");
assert_eq!(method, "api");
}
#[test]
fn test_extract_refs_dedup_with_closes_issues() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method, created_at)
VALUES (?1, 'merge_request', ?2, 'issue', ?3, 'closes', 'api', 3000)",
rusqlite::params![project_id, mr_id, issue_id],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 0, "Should not insert duplicate reference");
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
[project_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(total, 1, "Should still have exactly one reference");
}
#[test]
fn test_extract_refs_no_source_mr() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, NULL)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 0, "Should not create refs when no source MR");
}
#[test]
fn test_extract_refs_mr_not_synced() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, ?1, ?2, NULL, 'closed', 3000, 999)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(
count, 0,
"Should not create ref when MR is not synced locally"
);
}
#[test]
fn test_extract_refs_idempotent() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count1 = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count1, 1);
let count2 = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count2, 0, "Second run should insert nothing (idempotent)");
}
#[test]
fn test_extract_refs_multiple_events_same_mr_issue() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, ?1, ?2, NULL, 'closed', 4000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert!(count <= 2, "At most 2 inserts attempted");
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
[project_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(
total, 1,
"Only one unique reference should exist for same MR->issue pair"
);
}
#[test]
fn test_extract_refs_scoped_to_project() {
let conn = setup_test_db();
seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (2, 101, 'group/other', 'https://gitlab.example.com/group/other', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Other issue', 'closed', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, 1, 1, NULL, 'closed', 3000, 5)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, 2, 2, NULL, 'closed', 3000, 5)",
[],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, 1).unwrap();
assert_eq!(count, 1);
let total: i64 = conn
.query_row("SELECT COUNT(*) FROM entity_references", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(total, 1, "Only project 1 refs should be created");
}
#[test]
fn test_insert_entity_reference_creates_row() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(issue_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
assert!(inserted);
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 1);
}
#[test]
fn test_insert_entity_reference_idempotent() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(issue_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
let first = insert_entity_reference(&conn, &ref_).unwrap();
assert!(first);
let second = insert_entity_reference(&conn, &ref_).unwrap();
assert!(!second, "Duplicate insert should be ignored");
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 1, "Still just one reference");
}
#[test]
fn test_insert_entity_reference_cross_project_unresolved() {
let conn = setup_test_db();
let (project_id, _issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: None,
target_project_path: Some("other-group/other-project"),
target_entity_iid: Some(99),
reference_type: "closes",
source_method: "api",
};
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
assert!(inserted);
let (target_id, target_path, target_iid): (Option<i64>, Option<String>, Option<i64>) = conn
.query_row(
"SELECT target_entity_id, target_project_path, target_entity_iid \
FROM entity_references WHERE source_entity_id = ?1",
[mr_id],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert!(target_id.is_none());
assert_eq!(target_path, Some("other-group/other-project".to_string()));
assert_eq!(target_iid, Some(99));
}
#[test]
fn test_insert_multiple_closes_references() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 210, 11, ?1, 'Second issue', 'opened', 1000, 2000, 2000)",
rusqlite::params![project_id],
)
.unwrap();
let issue_id_2 = 10i64;
for target_id in [issue_id, issue_id_2] {
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(target_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
insert_entity_reference(&conn, &ref_).unwrap();
}
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 2);
}
#[test]
fn test_resolve_issue_local_id_found() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
let resolved = resolve_issue_local_id(&conn, project_id, 10).unwrap();
assert_eq!(resolved, Some(issue_id));
}
#[test]
fn test_resolve_issue_local_id_not_found() {
let conn = setup_test_db();
let (project_id, _issue_id, _mr_id) = seed_project_issue_mr(&conn);
let resolved = resolve_issue_local_id(&conn, project_id, 999).unwrap();
assert!(resolved.is_none());
}
#[test]
fn test_resolve_project_path_found() {
let conn = setup_test_db();
seed_project_issue_mr(&conn);
let path = resolve_project_path(&conn, 100).unwrap();
assert_eq!(path, Some("group/repo".to_string()));
}
#[test]
fn test_resolve_project_path_not_found() {
let conn = setup_test_db();
let path = resolve_project_path(&conn, 999).unwrap();
assert!(path.is_none());
}

View File

@@ -1,139 +0,0 @@
use rusqlite::Connection;
use super::error::Result;
use super::metrics::StageTiming;
use super::time::now_ms;
pub struct SyncRunRecorder {
row_id: i64,
}
impl SyncRunRecorder {
pub fn start(conn: &Connection, command: &str, run_id: &str) -> Result<Self> {
let now = now_ms();
conn.execute(
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, run_id)
VALUES (?1, ?2, 'running', ?3, ?4)",
rusqlite::params![now, now, command, run_id],
)?;
let row_id = conn.last_insert_rowid();
Ok(Self { row_id })
}
/// Returns the database row ID of this sync run.
pub fn row_id(&self) -> i64 {
self.row_id
}
/// Sets surgical-mode metadata on the run (mode, phase, IID manifest).
pub fn set_surgical_metadata(
&self,
conn: &Connection,
mode: &str,
phase: &str,
surgical_iids_json: &str,
) -> Result<()> {
conn.execute(
"UPDATE sync_runs
SET mode = ?1, phase = ?2, surgical_iids_json = ?3
WHERE id = ?4",
rusqlite::params![mode, phase, surgical_iids_json, self.row_id],
)?;
Ok(())
}
/// Updates the current phase and refreshes the heartbeat timestamp.
pub fn update_phase(&self, conn: &Connection, phase: &str) -> Result<()> {
let now = now_ms();
conn.execute(
"UPDATE sync_runs SET phase = ?1, heartbeat_at = ?2 WHERE id = ?3",
rusqlite::params![phase, now, self.row_id],
)?;
Ok(())
}
/// Increments a counter column by 1 based on entity type and stage.
/// Unknown (entity_type, stage) combinations are silently ignored.
pub fn record_entity_result(
&self,
conn: &Connection,
entity_type: &str,
stage: &str,
) -> Result<()> {
let column = match (entity_type, stage) {
("issue", "fetched") => "issues_fetched",
("issue", "ingested") => "issues_ingested",
("mr", "fetched") => "mrs_fetched",
("mr", "ingested") => "mrs_ingested",
("issue" | "mr", "skipped_stale") => "skipped_stale",
("doc", "regenerated") => "docs_regenerated",
("doc", "embedded") => "docs_embedded",
(_, "warning") => "warnings_count",
_ => return Ok(()),
};
// Column name is from a hardcoded match, not user input — safe to interpolate.
let sql = format!("UPDATE sync_runs SET {column} = {column} + 1 WHERE id = ?1");
conn.execute(&sql, rusqlite::params![self.row_id])?;
Ok(())
}
/// Marks the run as cancelled with a reason. Consumes self (terminal state).
pub fn cancel(self, conn: &Connection, reason: &str) -> Result<()> {
let now = now_ms();
conn.execute(
"UPDATE sync_runs
SET status = 'cancelled', error = ?1, cancelled_at = ?2, finished_at = ?3
WHERE id = ?4",
rusqlite::params![reason, now, now, self.row_id],
)?;
Ok(())
}
pub fn succeed(
self,
conn: &Connection,
metrics: &[StageTiming],
total_items: usize,
total_errors: usize,
) -> Result<()> {
let now = now_ms();
let metrics_json = serde_json::to_string(metrics).unwrap_or_else(|_| "[]".to_string());
conn.execute(
"UPDATE sync_runs
SET finished_at = ?1, status = 'succeeded',
metrics_json = ?2, total_items_processed = ?3, total_errors = ?4
WHERE id = ?5",
rusqlite::params![
now,
metrics_json,
total_items as i64,
total_errors as i64,
self.row_id
],
)?;
Ok(())
}
pub fn fail(
self,
conn: &Connection,
error: &str,
metrics: Option<&[StageTiming]>,
) -> Result<()> {
let now = now_ms();
let metrics_json =
metrics.map(|m| serde_json::to_string(m).unwrap_or_else(|_| "[]".to_string()));
conn.execute(
"UPDATE sync_runs
SET finished_at = ?1, status = 'failed', error = ?2,
metrics_json = ?3
WHERE id = ?4",
rusqlite::params![now, error, metrics_json, self.row_id],
)?;
Ok(())
}
}
#[cfg(test)]
#[path = "sync_run_tests.rs"]
mod tests;

View File

@@ -1,384 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
#[test]
fn test_sync_run_recorder_start() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "abc12345").unwrap();
assert!(recorder.row_id > 0);
let (status, command, run_id): (String, String, String) = conn
.query_row(
"SELECT status, command, run_id FROM sync_runs WHERE id = ?1",
[recorder.row_id],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert_eq!(status, "running");
assert_eq!(command, "sync");
assert_eq!(run_id, "abc12345");
}
#[test]
fn test_sync_run_recorder_succeed() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "def67890").unwrap();
let row_id = recorder.row_id;
let metrics = vec![StageTiming {
name: "ingest".to_string(),
project: None,
elapsed_ms: 1200,
items_processed: 50,
items_skipped: 0,
errors: 2,
rate_limit_hits: 0,
retries: 0,
sub_stages: vec![],
}];
recorder.succeed(&conn, &metrics, 50, 2).unwrap();
let (status, finished_at, metrics_json, total_items, total_errors): (
String,
Option<i64>,
Option<String>,
i64,
i64,
) = conn
.query_row(
"SELECT status, finished_at, metrics_json, total_items_processed, total_errors
FROM sync_runs WHERE id = ?1",
[row_id],
|row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
))
},
)
.unwrap();
assert_eq!(status, "succeeded");
assert!(finished_at.is_some());
assert!(metrics_json.is_some());
assert_eq!(total_items, 50);
assert_eq!(total_errors, 2);
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
assert_eq!(parsed.len(), 1);
assert_eq!(parsed[0].name, "ingest");
}
#[test]
fn test_sync_run_recorder_fail() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "ingest issues", "fail0001").unwrap();
let row_id = recorder.row_id;
recorder.fail(&conn, "GitLab auth failed", None).unwrap();
let (status, finished_at, error, metrics_json): (
String,
Option<i64>,
Option<String>,
Option<String>,
) = conn
.query_row(
"SELECT status, finished_at, error, metrics_json
FROM sync_runs WHERE id = ?1",
[row_id],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
)
.unwrap();
assert_eq!(status, "failed");
assert!(finished_at.is_some());
assert_eq!(error.as_deref(), Some("GitLab auth failed"));
assert!(metrics_json.is_none());
}
#[test]
fn test_sync_run_recorder_fail_with_partial_metrics() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "part0001").unwrap();
let row_id = recorder.row_id;
let partial_metrics = vec![StageTiming {
name: "ingest_issues".to_string(),
project: Some("group/repo".to_string()),
elapsed_ms: 800,
items_processed: 30,
items_skipped: 0,
errors: 0,
rate_limit_hits: 1,
retries: 0,
sub_stages: vec![],
}];
recorder
.fail(&conn, "Embedding failed", Some(&partial_metrics))
.unwrap();
let (status, metrics_json): (String, Option<String>) = conn
.query_row(
"SELECT status, metrics_json FROM sync_runs WHERE id = ?1",
[row_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(status, "failed");
assert!(metrics_json.is_some());
let parsed: Vec<StageTiming> = serde_json::from_str(&metrics_json.unwrap()).unwrap();
assert_eq!(parsed.len(), 1);
assert_eq!(parsed[0].name, "ingest_issues");
}
#[test]
fn sync_run_surgical_columns_exist() {
let conn = setup_test_db();
conn.execute(
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, phase, surgical_iids_json)
VALUES (1000, 1000, 'running', 'sync', 'surgical', 'preflight', '{\"issues\":[7],\"mrs\":[]}')",
[],
)
.unwrap();
let (mode, phase, iids_json): (String, String, String) = conn
.query_row(
"SELECT mode, phase, surgical_iids_json FROM sync_runs WHERE mode = 'surgical'",
[],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
)
.unwrap();
assert_eq!(mode, "surgical");
assert_eq!(phase, "preflight");
assert!(iids_json.contains("7"));
}
#[test]
fn sync_run_counter_defaults_are_zero() {
let conn = setup_test_db();
conn.execute(
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command)
VALUES (2000, 2000, 'running', 'sync')",
[],
)
.unwrap();
let row_id = conn.last_insert_rowid();
let (issues_fetched, mrs_fetched, docs_regenerated, warnings_count): (i64, i64, i64, i64) =
conn.query_row(
"SELECT issues_fetched, mrs_fetched, docs_regenerated, warnings_count FROM sync_runs WHERE id = ?1",
[row_id],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
)
.unwrap();
assert_eq!(issues_fetched, 0);
assert_eq!(mrs_fetched, 0);
assert_eq!(docs_regenerated, 0);
assert_eq!(warnings_count, 0);
}
#[test]
fn sync_run_nullable_columns_default_to_null() {
let conn = setup_test_db();
conn.execute(
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command)
VALUES (3000, 3000, 'running', 'sync')",
[],
)
.unwrap();
let row_id = conn.last_insert_rowid();
let (mode, phase, cancelled_at): (Option<String>, Option<String>, Option<i64>) = conn
.query_row(
"SELECT mode, phase, cancelled_at FROM sync_runs WHERE id = ?1",
[row_id],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
)
.unwrap();
assert!(mode.is_none());
assert!(phase.is_none());
assert!(cancelled_at.is_none());
}
#[test]
fn sync_run_counter_round_trip() {
let conn = setup_test_db();
conn.execute(
"INSERT INTO sync_runs (started_at, heartbeat_at, status, command, mode, issues_fetched, mrs_ingested, docs_embedded)
VALUES (4000, 4000, 'succeeded', 'sync', 'surgical', 3, 2, 5)",
[],
)
.unwrap();
let row_id = conn.last_insert_rowid();
let (issues_fetched, mrs_ingested, docs_embedded): (i64, i64, i64) = conn
.query_row(
"SELECT issues_fetched, mrs_ingested, docs_embedded FROM sync_runs WHERE id = ?1",
[row_id],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
)
.unwrap();
assert_eq!(issues_fetched, 3);
assert_eq!(mrs_ingested, 2);
assert_eq!(docs_embedded, 5);
}
#[test]
fn surgical_lifecycle_start_metadata_succeed() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "surg001").unwrap();
let row_id = recorder.row_id();
recorder
.set_surgical_metadata(
&conn,
"surgical",
"preflight",
r#"{"issues":[7,8],"mrs":[101]}"#,
)
.unwrap();
recorder.update_phase(&conn, "ingest").unwrap();
recorder
.record_entity_result(&conn, "issue", "fetched")
.unwrap();
recorder
.record_entity_result(&conn, "issue", "fetched")
.unwrap();
recorder
.record_entity_result(&conn, "issue", "ingested")
.unwrap();
recorder
.record_entity_result(&conn, "mr", "fetched")
.unwrap();
recorder
.record_entity_result(&conn, "mr", "ingested")
.unwrap();
recorder.succeed(&conn, &[], 3, 0).unwrap();
#[allow(clippy::type_complexity)]
let (mode, phase, iids, issues_fetched, mrs_fetched, issues_ingested, mrs_ingested, status): (
String,
String,
String,
i64,
i64,
i64,
i64,
String,
) = conn
.query_row(
"SELECT mode, phase, surgical_iids_json, issues_fetched, mrs_fetched, \
issues_ingested, mrs_ingested, status \
FROM sync_runs WHERE id = ?1",
[row_id],
|r| {
Ok((
r.get(0)?,
r.get(1)?,
r.get(2)?,
r.get(3)?,
r.get(4)?,
r.get(5)?,
r.get(6)?,
r.get(7)?,
))
},
)
.unwrap();
assert_eq!(mode, "surgical");
assert_eq!(phase, "ingest");
assert!(iids.contains("101"));
assert_eq!(issues_fetched, 2);
assert_eq!(mrs_fetched, 1);
assert_eq!(issues_ingested, 1);
assert_eq!(mrs_ingested, 1);
assert_eq!(status, "succeeded");
}
#[test]
fn surgical_lifecycle_cancel() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "cancel01").unwrap();
let row_id = recorder.row_id();
recorder
.set_surgical_metadata(&conn, "surgical", "preflight", "{}")
.unwrap();
recorder
.cancel(&conn, "User requested cancellation")
.unwrap();
let (status, error, cancelled_at, finished_at): (
String,
Option<String>,
Option<i64>,
Option<i64>,
) = conn
.query_row(
"SELECT status, error, cancelled_at, finished_at FROM sync_runs WHERE id = ?1",
[row_id],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
)
.unwrap();
assert_eq!(status, "cancelled");
assert_eq!(error.as_deref(), Some("User requested cancellation"));
assert!(cancelled_at.is_some());
assert!(finished_at.is_some());
}
#[test]
fn record_entity_result_ignores_unknown() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "unk001").unwrap();
recorder
.record_entity_result(&conn, "widget", "exploded")
.unwrap();
}
#[test]
fn record_entity_result_doc_counters() {
let conn = setup_test_db();
let recorder = SyncRunRecorder::start(&conn, "sync", "cnt001").unwrap();
let row_id = recorder.row_id();
recorder
.record_entity_result(&conn, "doc", "regenerated")
.unwrap();
recorder
.record_entity_result(&conn, "doc", "regenerated")
.unwrap();
recorder
.record_entity_result(&conn, "doc", "embedded")
.unwrap();
recorder
.record_entity_result(&conn, "issue", "skipped_stale")
.unwrap();
let (docs_regen, docs_embed, skipped): (i64, i64, i64) = conn
.query_row(
"SELECT docs_regenerated, docs_embedded, skipped_stale FROM sync_runs WHERE id = ?1",
[row_id],
|r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
)
.unwrap();
assert_eq!(docs_regen, 2);
assert_eq!(docs_embed, 1);
assert_eq!(skipped, 1);
}

View File

@@ -1,586 +0,0 @@
use std::cmp::Ordering;
use rusqlite::Connection;
use serde::Serialize;
use super::error::Result;
/// The core timeline event. All pipeline stages produce or consume these.
/// Spec ref: Section 3.3 "Event Model"
#[derive(Debug, Clone, Serialize)]
pub struct TimelineEvent {
pub timestamp: i64,
pub entity_type: String,
#[serde(skip)]
pub entity_id: i64,
pub entity_iid: i64,
pub project_path: String,
pub event_type: TimelineEventType,
pub summary: String,
pub actor: Option<String>,
pub url: Option<String>,
pub is_seed: bool,
}
impl PartialEq for TimelineEvent {
fn eq(&self, other: &Self) -> bool {
self.timestamp == other.timestamp
&& self.entity_type == other.entity_type
&& self.entity_id == other.entity_id
&& self.event_type == other.event_type
}
}
impl Eq for TimelineEvent {}
impl PartialOrd for TimelineEvent {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for TimelineEvent {
fn cmp(&self, other: &Self) -> Ordering {
self.timestamp
.cmp(&other.timestamp)
.then_with(|| self.entity_type.cmp(&other.entity_type))
.then_with(|| self.entity_id.cmp(&other.entity_id))
.then_with(|| self.event_type.cmp(&other.event_type))
}
}
/// Maximum characters per note body in a discussion thread.
pub const THREAD_NOTE_MAX_CHARS: usize = 2000;
/// Maximum notes per discussion thread before truncation.
pub const THREAD_MAX_NOTES: usize = 50;
/// A single note within a discussion thread.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
pub struct ThreadNote {
pub note_id: i64,
pub author: Option<String>,
pub body: String,
pub created_at: i64,
}
/// Per spec Section 3.3. Serde tagged enum for JSON output.
///
/// Variant declaration order defines the sort order within a timestamp+entity
/// tiebreak (Created < StateChanged < LabelAdded < ... < CrossReferenced).
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum TimelineEventType {
Created,
StateChanged {
state: String,
},
LabelAdded {
label: String,
},
LabelRemoved {
label: String,
},
MilestoneSet {
milestone: String,
},
MilestoneRemoved {
milestone: String,
},
Merged,
NoteEvidence {
note_id: i64,
snippet: String,
discussion_id: Option<i64>,
},
DiscussionThread {
discussion_id: i64,
notes: Vec<ThreadNote>,
},
CrossReferenced {
target: String,
},
}
/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
pub(crate) fn truncate_to_chars(s: &str, max_chars: usize) -> String {
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_owned();
}
let byte_end = s
.char_indices()
.nth(max_chars)
.map(|(i, _)| i)
.unwrap_or(s.len());
s[..byte_end].to_owned()
}
/// A discussion matched during the seed phase, to be collected as a full thread.
#[derive(Debug, Clone)]
pub struct MatchedDiscussion {
pub discussion_id: i64,
pub entity_type: String,
pub entity_id: i64,
pub project_id: i64,
}
/// Internal entity reference used across pipeline stages.
#[derive(Debug, Clone, Serialize)]
pub struct EntityRef {
pub entity_type: String,
pub entity_id: i64,
pub entity_iid: i64,
pub project_path: String,
}
/// An entity discovered via BFS expansion.
/// Spec ref: Section 3.5 "expanded_entities" JSON structure.
#[derive(Debug, Clone, Serialize)]
pub struct ExpandedEntityRef {
pub entity_ref: EntityRef,
pub depth: u32,
pub via_from: EntityRef,
pub via_reference_type: String,
pub via_source_method: String,
}
/// Reference to an unsynced external entity.
/// Spec ref: Section 3.5 "unresolved_references" JSON structure.
#[derive(Debug, Clone, Serialize)]
pub struct UnresolvedRef {
pub source: EntityRef,
pub target_project: Option<String>,
pub target_type: String,
pub target_iid: Option<i64>,
pub reference_type: String,
}
/// Complete result from the timeline pipeline.
#[derive(Debug, Clone, Serialize)]
pub struct TimelineResult {
pub query: String,
/// The search mode actually used for seeding (e.g. "hybrid", "lexical", "lexical (hybrid fallback)").
pub search_mode: String,
pub events: Vec<TimelineEvent>,
/// Total events after filters (e.g., --since) but before --limit was applied.
/// Use this to show "showing X of Y filtered events".
#[serde(skip)]
pub total_filtered_events: usize,
pub seed_entities: Vec<EntityRef>,
pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>,
}
/// Resolve an entity's internal DB id to a full [`EntityRef`] with iid and project path.
///
/// When `project_id` is `Some`, the query is scoped to that project.
/// Returns `Ok(None)` for unknown entity types or when no matching row exists.
pub fn resolve_entity_ref(
conn: &Connection,
entity_type: &str,
entity_id: i64,
project_id: Option<i64>,
) -> Result<Option<EntityRef>> {
let table = match entity_type {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => return Ok(None),
};
let sql = format!(
"SELECT e.iid, p.path_with_namespace
FROM {table} e
JOIN projects p ON p.id = e.project_id
WHERE e.id = ?1 AND (?2 IS NULL OR e.project_id = ?2)"
);
let result = conn.query_row(&sql, rusqlite::params![entity_id, project_id], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
});
match result {
Ok((iid, project_path)) => Ok(Some(EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path,
})),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
}
/// Resolve an entity by its user-facing IID (e.g. issue #42) to a full [`EntityRef`].
///
/// Unlike [`resolve_entity_ref`] which takes an internal DB id, this takes the
/// GitLab IID that users see. Used by entity-direct timeline seeding (`issue:42`).
///
/// When `project_id` is `Some`, the query is scoped to that project (disambiguates
/// duplicate IIDs across projects).
///
/// Returns `LoreError::NotFound` when no match exists, `LoreError::Ambiguous` when
/// the same IID exists in multiple projects (suggest `--project`).
pub fn resolve_entity_by_iid(
conn: &Connection,
entity_type: &str,
iid: i64,
project_id: Option<i64>,
) -> Result<EntityRef> {
let table = match entity_type {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => {
return Err(super::error::LoreError::NotFound(format!(
"Unknown entity type: {entity_type}"
)));
}
};
let sql = format!(
"SELECT e.id, e.iid, p.path_with_namespace
FROM {table} e
JOIN projects p ON p.id = e.project_id
WHERE e.iid = ?1 AND (?2 IS NULL OR e.project_id = ?2)"
);
let mut stmt = conn.prepare(&sql)?;
let rows: Vec<(i64, i64, String)> = stmt
.query_map(rusqlite::params![iid, project_id], |row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, String>(2)?,
))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match rows.len() {
0 => {
let sigil = if entity_type == "issue" { "#" } else { "!" };
Err(super::error::LoreError::NotFound(format!(
"{entity_type} {sigil}{iid} not found"
)))
}
1 => {
let (entity_id, entity_iid, project_path) = rows.into_iter().next().unwrap();
Ok(EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid,
project_path,
})
}
_ => {
let projects: Vec<&str> = rows.iter().map(|(_, _, p)| p.as_str()).collect();
let sigil = if entity_type == "issue" { "#" } else { "!" };
Err(super::error::LoreError::Ambiguous(format!(
"{entity_type} {sigil}{iid} exists in multiple projects: {}. Use --project to specify.",
projects.join(", ")
)))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_event(timestamp: i64, entity_id: i64, event_type: TimelineEventType) -> TimelineEvent {
TimelineEvent {
timestamp,
entity_type: "issue".to_owned(),
entity_id,
entity_iid: 1,
project_path: "group/project".to_owned(),
event_type,
summary: "test".to_owned(),
actor: None,
url: None,
is_seed: true,
}
}
#[test]
fn test_timeline_event_sort_by_timestamp() {
let mut events = [
make_event(3000, 1, TimelineEventType::Created),
make_event(1000, 2, TimelineEventType::Created),
make_event(2000, 3, TimelineEventType::Merged),
];
events.sort();
assert_eq!(events[0].timestamp, 1000);
assert_eq!(events[1].timestamp, 2000);
assert_eq!(events[2].timestamp, 3000);
}
#[test]
fn test_timeline_event_sort_tiebreak() {
let mut events = [
make_event(1000, 5, TimelineEventType::Created),
make_event(1000, 2, TimelineEventType::Merged),
make_event(1000, 2, TimelineEventType::Created),
];
events.sort();
// Same timestamp: sort by entity_id first, then event_type discriminant
assert_eq!(events[0].entity_id, 2);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert_eq!(events[1].entity_id, 2);
assert!(matches!(events[1].event_type, TimelineEventType::Merged));
assert_eq!(events[2].entity_id, 5);
}
#[test]
fn test_timeline_event_type_serializes_tagged() {
let event_type = TimelineEventType::StateChanged {
state: "closed".to_owned(),
};
let json = serde_json::to_value(&event_type).unwrap();
assert_eq!(json["kind"], "state_changed");
assert_eq!(json["state"], "closed");
}
#[test]
fn test_note_evidence_has_note_id() {
let event_type = TimelineEventType::NoteEvidence {
note_id: 42,
snippet: "some text".to_owned(),
discussion_id: Some(7),
};
let json = serde_json::to_value(&event_type).unwrap();
assert_eq!(json["kind"], "note_evidence");
assert_eq!(json["note_id"], 42);
assert_eq!(json["snippet"], "some text");
assert_eq!(json["discussion_id"], 7);
}
#[test]
fn test_entity_id_skipped_in_serialization() {
let event = make_event(1000, 99, TimelineEventType::Created);
let json = serde_json::to_value(&event).unwrap();
assert!(json.get("entity_id").is_none());
assert_eq!(json["entity_iid"], 1);
}
#[test]
fn test_timeline_event_type_variant_count() {
// Verify all 10 variants serialize without panic
let variants: Vec<TimelineEventType> = vec![
TimelineEventType::Created,
TimelineEventType::StateChanged {
state: "closed".to_owned(),
},
TimelineEventType::LabelAdded {
label: "bug".to_owned(),
},
TimelineEventType::LabelRemoved {
label: "bug".to_owned(),
},
TimelineEventType::MilestoneSet {
milestone: "v1".to_owned(),
},
TimelineEventType::MilestoneRemoved {
milestone: "v1".to_owned(),
},
TimelineEventType::Merged,
TimelineEventType::NoteEvidence {
note_id: 1,
snippet: "text".to_owned(),
discussion_id: None,
},
TimelineEventType::DiscussionThread {
discussion_id: 1,
notes: vec![ThreadNote {
note_id: 1,
author: Some("alice".to_owned()),
body: "hello".to_owned(),
created_at: 1000,
}],
},
TimelineEventType::CrossReferenced {
target: "!567".to_owned(),
},
];
assert_eq!(variants.len(), 10);
for v in &variants {
serde_json::to_value(v).unwrap();
}
}
#[test]
fn test_discussion_thread_serializes_tagged() {
let event_type = TimelineEventType::DiscussionThread {
discussion_id: 42,
notes: vec![
ThreadNote {
note_id: 1,
author: Some("alice".to_owned()),
body: "first note".to_owned(),
created_at: 1000,
},
ThreadNote {
note_id: 2,
author: Some("bob".to_owned()),
body: "second note".to_owned(),
created_at: 2000,
},
],
};
let json = serde_json::to_value(&event_type).unwrap();
assert_eq!(json["kind"], "discussion_thread");
assert_eq!(json["discussion_id"], 42);
assert_eq!(json["notes"].as_array().unwrap().len(), 2);
assert_eq!(json["notes"][0]["note_id"], 1);
assert_eq!(json["notes"][0]["author"], "alice");
assert_eq!(json["notes"][0]["body"], "first note");
assert_eq!(json["notes"][1]["note_id"], 2);
}
#[test]
fn test_discussion_thread_sort_order() {
// DiscussionThread should sort after NoteEvidence, before CrossReferenced
let note_ev = TimelineEventType::NoteEvidence {
note_id: 1,
snippet: "a".to_owned(),
discussion_id: None,
};
let thread = TimelineEventType::DiscussionThread {
discussion_id: 1,
notes: vec![],
};
let cross_ref = TimelineEventType::CrossReferenced {
target: "!1".to_owned(),
};
assert!(note_ev < thread);
assert!(thread < cross_ref);
}
#[test]
fn test_thread_note_ord() {
let a = ThreadNote {
note_id: 1,
author: Some("alice".to_owned()),
body: "first".to_owned(),
created_at: 1000,
};
let b = ThreadNote {
note_id: 2,
author: Some("bob".to_owned()),
body: "second".to_owned(),
created_at: 2000,
};
// ThreadNote derives Ord — note_id is the first field, so ordering is by note_id
assert!(a < b);
}
#[test]
fn test_truncate_to_chars() {
assert_eq!(truncate_to_chars("hello", 200), "hello");
let long = "a".repeat(300);
assert_eq!(truncate_to_chars(&long, 200).chars().count(), 200);
}
// ─── resolve_entity_by_iid tests ────────────────────────────────────────
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection, gitlab_id: i64, path: &str) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (?1, ?2, ?3)",
rusqlite::params![gitlab_id, path, format!("https://gitlab.com/{path}")],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![project_id * 10000 + iid, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![project_id * 10000 + iid, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn test_resolve_entity_by_iid_issue() {
let conn = setup_db();
let project_id = insert_project(&conn, 1, "group/project");
let entity_id = insert_issue(&conn, project_id, 42);
let result = resolve_entity_by_iid(&conn, "issue", 42, None).unwrap();
assert_eq!(result.entity_type, "issue");
assert_eq!(result.entity_id, entity_id);
assert_eq!(result.entity_iid, 42);
assert_eq!(result.project_path, "group/project");
}
#[test]
fn test_resolve_entity_by_iid_mr() {
let conn = setup_db();
let project_id = insert_project(&conn, 1, "group/project");
let entity_id = insert_mr(&conn, project_id, 99);
let result = resolve_entity_by_iid(&conn, "merge_request", 99, None).unwrap();
assert_eq!(result.entity_type, "merge_request");
assert_eq!(result.entity_id, entity_id);
assert_eq!(result.entity_iid, 99);
assert_eq!(result.project_path, "group/project");
}
#[test]
fn test_resolve_entity_by_iid_not_found() {
let conn = setup_db();
insert_project(&conn, 1, "group/project");
let result = resolve_entity_by_iid(&conn, "issue", 999, None);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(matches!(err, crate::core::error::LoreError::NotFound(_)));
}
#[test]
fn test_resolve_entity_by_iid_ambiguous() {
let conn = setup_db();
let proj1 = insert_project(&conn, 1, "group/project-a");
let proj2 = insert_project(&conn, 2, "group/project-b");
insert_issue(&conn, proj1, 42);
insert_issue(&conn, proj2, 42);
let result = resolve_entity_by_iid(&conn, "issue", 42, None);
assert!(result.is_err());
let err = result.unwrap_err();
assert!(matches!(err, crate::core::error::LoreError::Ambiguous(_)));
}
#[test]
fn test_resolve_entity_by_iid_project_scoped() {
let conn = setup_db();
let proj1 = insert_project(&conn, 1, "group/project-a");
let proj2 = insert_project(&conn, 2, "group/project-b");
insert_issue(&conn, proj1, 42);
let entity_id_b = insert_issue(&conn, proj2, 42);
let result = resolve_entity_by_iid(&conn, "issue", 42, Some(proj2)).unwrap();
assert_eq!(result.entity_id, entity_id_b);
assert_eq!(result.project_path, "group/project-b");
}
}

View File

@@ -1,496 +0,0 @@
use rusqlite::Connection;
use std::collections::HashSet;
use crate::core::error::{LoreError, Result};
use crate::core::timeline::{
EntityRef, ExpandedEntityRef, MatchedDiscussion, THREAD_MAX_NOTES, THREAD_NOTE_MAX_CHARS,
ThreadNote, TimelineEvent, TimelineEventType, truncate_to_chars,
};
/// Collect all events for seed and expanded entities, interleave chronologically.
///
/// Steps 4-5 of the timeline pipeline:
/// 1. For each entity, collect Created, StateChanged, Label, Milestone, Merged events
/// 2. Collect discussion threads from matched discussions
/// 3. Merge in evidence notes from the seed phase
/// 4. Sort chronologically with stable tiebreak
/// 5. Apply --since filter and --limit
pub fn collect_events(
conn: &Connection,
seed_entities: &[EntityRef],
expanded_entities: &[ExpandedEntityRef],
evidence_notes: &[TimelineEvent],
matched_discussions: &[MatchedDiscussion],
since_ms: Option<i64>,
limit: usize,
) -> Result<(Vec<TimelineEvent>, usize)> {
let mut all_events: Vec<TimelineEvent> = Vec::new();
// Collect events for seed entities
for entity in seed_entities {
collect_entity_events(conn, entity, true, &mut all_events)?;
}
// Collect events for expanded entities
for expanded in expanded_entities {
collect_entity_events(conn, &expanded.entity_ref, false, &mut all_events)?;
}
// Collect discussion threads
let entity_lookup = build_entity_lookup(seed_entities, expanded_entities);
collect_discussion_threads(conn, matched_discussions, &entity_lookup, &mut all_events)?;
// Add evidence notes from seed phase
all_events.extend(evidence_notes.iter().cloned());
// Sort chronologically (uses Ord impl from timeline.rs)
all_events.sort();
// Apply --since filter
if let Some(since) = since_ms {
all_events.retain(|e| e.timestamp >= since);
}
// Capture total before applying limit (for meta.total_events vs meta.showing)
let total_before_limit = all_events.len();
// Apply limit
all_events.truncate(limit);
Ok((all_events, total_before_limit))
}
/// Collect all events for a single entity.
fn collect_entity_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
collect_creation_event(conn, entity, is_seed, events)?;
collect_state_events(conn, entity, is_seed, events)?;
collect_label_events(conn, entity, is_seed, events)?;
collect_milestone_events(conn, entity, is_seed, events)?;
collect_merged_event(conn, entity, is_seed, events)?;
Ok(())
}
/// Collect the Created event from the entity's own table.
fn collect_creation_event(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let table = match entity.entity_type.as_str() {
"issue" => "issues",
"merge_request" => "merge_requests",
_ => return Ok(()),
};
let sql =
format!("SELECT created_at, author_username, title, web_url FROM {table} WHERE id = ?1");
let result = conn.query_row(&sql, rusqlite::params![entity.entity_id], |row| {
Ok((
row.get::<_, Option<i64>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
))
});
if let Ok((Some(created_at), author, title, url)) = result {
let type_label = if entity.entity_type == "issue" {
"Issue"
} else {
"MR"
};
let title_str = title.as_deref().unwrap_or("(untitled)");
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Created,
summary: format!("{type_label} #{} created: {title_str}", entity.entity_iid),
actor: author,
url,
is_seed,
});
}
Ok(())
}
/// Collect state change events. State='merged' produces Merged, not StateChanged.
fn collect_state_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity)?;
let sql = format!(
"SELECT state, actor_username, created_at FROM resource_state_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, i64>(2)?,
))
})?;
for row_result in rows {
let (state, actor, created_at) = row_result?;
// state='merged' is handled by collect_merged_event — skip here
if state == "merged" {
continue;
}
let summary = format!("State changed to {state}");
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::StateChanged { state },
summary,
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect label add/remove events.
fn collect_label_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity)?;
let sql = format!(
"SELECT action, label_name, actor_username, created_at FROM resource_label_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, i64>(3)?,
))
})?;
for row_result in rows {
let (action, label_name, actor, created_at) = row_result?;
let label = label_name.unwrap_or_else(|| "[deleted label]".to_owned());
let (event_type, summary) = match action.as_str() {
"add" => {
let summary = format!("Label added: {label}");
(TimelineEventType::LabelAdded { label }, summary)
}
"remove" => {
let summary = format!("Label removed: {label}");
(TimelineEventType::LabelRemoved { label }, summary)
}
_ => continue,
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type,
summary,
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect milestone add/remove events.
fn collect_milestone_events(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
let (id_col, id_val) = entity_id_column(entity)?;
let sql = format!(
"SELECT action, milestone_title, actor_username, created_at FROM resource_milestone_events
WHERE {id_col} = ?1
ORDER BY created_at ASC"
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(rusqlite::params![id_val], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, i64>(3)?,
))
})?;
for row_result in rows {
let (action, milestone_title, actor, created_at) = row_result?;
let milestone = milestone_title.unwrap_or_else(|| "[deleted milestone]".to_owned());
let (event_type, summary) = match action.as_str() {
"add" => {
let summary = format!("Milestone set: {milestone}");
(TimelineEventType::MilestoneSet { milestone }, summary)
}
"remove" => {
let summary = format!("Milestone removed: {milestone}");
(TimelineEventType::MilestoneRemoved { milestone }, summary)
}
_ => continue,
};
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type,
summary,
actor,
url: None,
is_seed,
});
}
Ok(())
}
/// Collect Merged event for MRs. Prefers merged_at from the MR table.
/// Falls back to resource_state_events WHERE state='merged' if merged_at is NULL.
fn collect_merged_event(
conn: &Connection,
entity: &EntityRef,
is_seed: bool,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
if entity.entity_type != "merge_request" {
return Ok(());
}
// Try merged_at from merge_requests table first
let mr_result = conn.query_row(
"SELECT merged_at, merge_user_username, web_url FROM merge_requests WHERE id = ?1",
rusqlite::params![entity.entity_id],
|row| {
Ok((
row.get::<_, Option<i64>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
match mr_result {
Ok((Some(merged_at), merge_user, url)) => {
events.push(TimelineEvent {
timestamp: merged_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Merged,
summary: format!("MR !{} merged", entity.entity_iid),
actor: merge_user,
url,
is_seed,
});
return Ok(());
}
Ok((None, _, _)) => {} // merged_at is NULL, try fallback
Err(rusqlite::Error::QueryReturnedNoRows) => {} // entity not found, try fallback
Err(e) => return Err(e.into()),
}
// Fallback: check resource_state_events for state='merged'
let fallback_result = conn.query_row(
"SELECT actor_username, created_at FROM resource_state_events
WHERE merge_request_id = ?1 AND state = 'merged'
ORDER BY created_at DESC LIMIT 1",
rusqlite::params![entity.entity_id],
|row| Ok((row.get::<_, Option<String>>(0)?, row.get::<_, i64>(1)?)),
);
match fallback_result {
Ok((actor, created_at)) => {
events.push(TimelineEvent {
timestamp: created_at,
entity_type: entity.entity_type.clone(),
entity_id: entity.entity_id,
entity_iid: entity.entity_iid,
project_path: entity.project_path.clone(),
event_type: TimelineEventType::Merged,
summary: format!("MR !{} merged", entity.entity_iid),
actor,
url: None,
is_seed,
});
}
Err(rusqlite::Error::QueryReturnedNoRows) => {} // no merged state event, MR wasn't merged
Err(e) => return Err(e.into()),
}
Ok(())
}
/// Return the correct column name and value for querying resource event tables.
fn entity_id_column(entity: &EntityRef) -> Result<(&'static str, i64)> {
match entity.entity_type.as_str() {
"issue" => Ok(("issue_id", entity.entity_id)),
"merge_request" => Ok(("merge_request_id", entity.entity_id)),
_ => Err(LoreError::Other(format!(
"Unknown entity type for event collection: {}",
entity.entity_type
))),
}
}
/// Lookup key: (entity_type, entity_id) -> (iid, project_path)
type EntityLookup = std::collections::HashMap<(String, i64), (i64, String)>;
fn build_entity_lookup(seeds: &[EntityRef], expanded: &[ExpandedEntityRef]) -> EntityLookup {
let mut lookup = EntityLookup::new();
for e in seeds {
lookup.insert(
(e.entity_type.clone(), e.entity_id),
(e.entity_iid, e.project_path.clone()),
);
}
for exp in expanded {
let e = &exp.entity_ref;
lookup.insert(
(e.entity_type.clone(), e.entity_id),
(e.entity_iid, e.project_path.clone()),
);
}
lookup
}
/// Collect full discussion threads for matched discussions.
fn collect_discussion_threads(
conn: &Connection,
matched_discussions: &[MatchedDiscussion],
entity_lookup: &EntityLookup,
events: &mut Vec<TimelineEvent>,
) -> Result<()> {
// Deduplicate by discussion_id
let mut seen = HashSet::new();
let mut stmt = conn.prepare(
"SELECT id, author_username, body, created_at FROM notes
WHERE discussion_id = ?1 AND is_system = 0
ORDER BY created_at ASC",
)?;
for disc in matched_discussions {
if !seen.insert(disc.discussion_id) {
continue;
}
let (iid, project_path) =
match entity_lookup.get(&(disc.entity_type.clone(), disc.entity_id)) {
Some(val) => val.clone(),
None => continue, // entity not in seed or expanded set
};
let rows = stmt.query_map(rusqlite::params![disc.discussion_id], |row| {
Ok((
row.get::<_, i64>(0)?, // id
row.get::<_, Option<String>>(1)?, // author_username
row.get::<_, Option<String>>(2)?, // body
row.get::<_, i64>(3)?, // created_at
))
})?;
let mut notes = Vec::new();
for row_result in rows {
let (note_id, author, body, created_at) = row_result?;
let body = truncate_to_chars(body.as_deref().unwrap_or(""), THREAD_NOTE_MAX_CHARS);
notes.push(ThreadNote {
note_id,
author,
body,
created_at,
});
}
// Skip empty threads (all notes were system notes)
if notes.is_empty() {
continue;
}
let first_created_at = notes[0].created_at;
// Cap notes per thread
let total_notes = notes.len();
if total_notes > THREAD_MAX_NOTES {
notes.truncate(THREAD_MAX_NOTES);
notes.push(ThreadNote {
note_id: -1,
author: None,
body: format!("[{} more notes not shown]", total_notes - THREAD_MAX_NOTES),
created_at: notes.last().map_or(first_created_at, |n| n.created_at),
});
}
let note_count = notes.len();
let actor = notes.first().and_then(|n| n.author.clone());
events.push(TimelineEvent {
timestamp: first_created_at,
entity_type: disc.entity_type.clone(),
entity_id: disc.entity_id,
entity_iid: iid,
project_path,
event_type: TimelineEventType::DiscussionThread {
discussion_id: disc.discussion_id,
notes,
},
summary: format!("Discussion ({note_count} notes)"),
actor,
url: None,
is_seed: true,
});
}
Ok(())
}
#[cfg(test)]
#[path = "timeline_collect_tests.rs"]
mod tests;

View File

@@ -1,704 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, ?3, 'Auth bug', 'opened', 'alice', 1000, 2000, 3000, 'https://gitlab.com/group/project/-/issues/1')",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64, merged_at: Option<i64>) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, merged_at, merge_user_username, web_url) VALUES (?1, ?2, ?3, 'Fix auth', 'merged', 'bob', 1000, 5000, 6000, ?4, 'charlie', 'https://gitlab.com/group/project/-/merge_requests/10')",
rusqlite::params![iid * 100, project_id, iid, merged_at],
)
.unwrap();
conn.last_insert_rowid()
}
fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef {
EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path: "group/project".to_owned(),
}
}
fn insert_state_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
state: &str,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_state_events (gitlab_id, project_id, issue_id, merge_request_id, state, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, 'alice', ?6)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, state, created_at],
)
.unwrap();
}
fn insert_label_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
action: &str,
label_name: Option<&str>,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_label_events (gitlab_id, project_id, issue_id, merge_request_id, action, label_name, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, label_name, created_at],
)
.unwrap();
}
fn insert_milestone_event(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
action: &str,
milestone_title: Option<&str>,
created_at: i64,
) {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO resource_milestone_events (gitlab_id, project_id, issue_id, merge_request_id, action, milestone_title, actor_username, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, 'alice', ?7)",
rusqlite::params![gitlab_id, project_id, issue_id, mr_id, action, milestone_title, created_at],
)
.unwrap();
}
#[test]
fn test_collect_creation_event() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
assert_eq!(events.len(), 1);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert_eq!(events[0].timestamp, 1000);
assert_eq!(events[0].actor, Some("alice".to_owned()));
assert!(events[0].is_seed);
}
#[test]
fn test_collect_state_events() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 4000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
// Created + 2 state changes = 3
assert_eq!(events.len(), 3);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert!(matches!(
events[1].event_type,
TimelineEventType::StateChanged { ref state } if state == "closed"
));
assert!(matches!(
events[2].event_type,
TimelineEventType::StateChanged { ref state } if state == "reopened"
));
}
#[test]
fn test_collect_merged_dedup() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let mr_id = insert_mr(&conn, project_id, 10, Some(5000));
// Also add a state event for 'merged' — this should NOT produce a StateChanged
insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
// Should have Created + Merged (not Created + StateChanged{merged} + Merged)
let merged_count = events
.iter()
.filter(|e| matches!(e.event_type, TimelineEventType::Merged))
.count();
let state_merged_count = events
.iter()
.filter(|e| matches!(&e.event_type, TimelineEventType::StateChanged { state } if state == "merged"))
.count();
assert_eq!(merged_count, 1);
assert_eq!(state_merged_count, 0);
}
#[test]
fn test_collect_null_label_fallback() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_label_event(&conn, project_id, Some(issue_id), None, "add", None, 2000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
let label_event = events.iter().find(|e| {
matches!(&e.event_type, TimelineEventType::LabelAdded { label } if label == "[deleted label]")
});
assert!(label_event.is_some());
}
#[test]
fn test_collect_null_milestone_fallback() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_milestone_event(&conn, project_id, Some(issue_id), None, "add", None, 2000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
let ms_event = events.iter().find(|e| {
matches!(&e.event_type, TimelineEventType::MilestoneSet { milestone } if milestone == "[deleted milestone]")
});
assert!(ms_event.is_some());
}
#[test]
fn test_collect_since_filter() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_state_event(&conn, project_id, Some(issue_id), None, "reopened", 5000);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
// Since 4000: should exclude Created (1000) and closed (3000)
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], Some(4000), 100).unwrap();
assert_eq!(events.len(), 1);
assert_eq!(events[0].timestamp, 5000);
}
#[test]
fn test_collect_chronological_sort() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10, Some(4000));
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
insert_label_event(
&conn,
project_id,
None,
Some(mr_id),
"add",
Some("bug"),
2000,
);
let seeds = vec![
make_entity_ref("issue", issue_id, 1),
make_entity_ref("merge_request", mr_id, 10),
];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
// Verify chronological order
for window in events.windows(2) {
assert!(window[0].timestamp <= window[1].timestamp);
}
}
#[test]
fn test_collect_respects_limit() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
for i in 0..20 {
insert_state_event(
&conn,
project_id,
Some(issue_id),
None,
"closed",
3000 + i * 100,
);
}
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, total) = collect_events(&conn, &seeds, &[], &[], &[], None, 5).unwrap();
assert_eq!(events.len(), 5);
// 20 state changes + 1 created = 21 total before limit
assert_eq!(total, 21);
}
#[test]
fn test_collect_evidence_notes_included() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let evidence = vec![TimelineEvent {
timestamp: 2500,
entity_type: "issue".to_owned(),
entity_id: issue_id,
entity_iid: 1,
project_path: "group/project".to_owned(),
event_type: TimelineEventType::NoteEvidence {
note_id: 42,
snippet: "relevant note".to_owned(),
discussion_id: Some(1),
},
summary: "Note by alice".to_owned(),
actor: Some("alice".to_owned()),
url: None,
is_seed: true,
}];
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let (events, _) = collect_events(&conn, &seeds, &[], &evidence, &[], None, 100).unwrap();
let note_event = events.iter().find(|e| {
matches!(
&e.event_type,
TimelineEventType::NoteEvidence { note_id, .. } if *note_id == 42
)
});
assert!(note_event.is_some());
}
#[test]
fn test_collect_merged_fallback_to_state_event() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
// MR with merged_at = NULL
let mr_id = insert_mr(&conn, project_id, 10, None);
// But has a state event for 'merged'
insert_state_event(&conn, project_id, None, Some(mr_id), "merged", 5000);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &[], None, 100).unwrap();
let merged = events
.iter()
.find(|e| matches!(e.event_type, TimelineEventType::Merged));
assert!(merged.is_some());
assert_eq!(merged.unwrap().timestamp, 5000);
}
// ─── Discussion thread tests ────────────────────────────────────────────────
fn insert_discussion(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
) -> i64 {
let noteable_type = if issue_id.is_some() {
"Issue"
} else {
"MergeRequest"
};
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)",
rusqlite::params![format!("disc_{}", rand::random::<u32>()), project_id, issue_id, mr_id, noteable_type],
)
.unwrap();
conn.last_insert_rowid()
}
#[allow(clippy::too_many_arguments)]
fn insert_note(
conn: &Connection,
discussion_id: i64,
project_id: i64,
author: &str,
body: &str,
is_system: bool,
created_at: i64,
) -> i64 {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?7, ?7)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, author, body, created_at],
)
.unwrap();
conn.last_insert_rowid()
}
fn make_matched_discussion(
discussion_id: i64,
entity_type: &str,
entity_id: i64,
project_id: i64,
) -> MatchedDiscussion {
MatchedDiscussion {
discussion_id,
entity_type: entity_type.to_owned(),
entity_id,
project_id,
}
}
#[test]
fn test_collect_discussion_thread_basic() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(
&conn,
disc_id,
project_id,
"alice",
"First note",
false,
2000,
);
insert_note(&conn, disc_id, project_id, "bob", "Reply here", false, 3000);
insert_note(
&conn,
disc_id,
project_id,
"alice",
"Follow up",
false,
4000,
);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread = events
.iter()
.find(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }));
assert!(thread.is_some(), "Should have a DiscussionThread event");
let thread = thread.unwrap();
if let TimelineEventType::DiscussionThread {
discussion_id,
notes,
} = &thread.event_type
{
assert_eq!(*discussion_id, disc_id);
assert_eq!(notes.len(), 3);
assert_eq!(notes[0].author.as_deref(), Some("alice"));
assert_eq!(notes[0].body, "First note");
assert_eq!(notes[1].author.as_deref(), Some("bob"));
assert_eq!(notes[2].body, "Follow up");
} else {
panic!("Expected DiscussionThread variant");
}
}
#[test]
fn test_collect_discussion_thread_skips_system_notes() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(
&conn,
disc_id,
project_id,
"alice",
"User note",
false,
2000,
);
insert_note(
&conn,
disc_id,
project_id,
"system",
"added label ~bug",
true,
3000,
);
insert_note(
&conn,
disc_id,
project_id,
"bob",
"Another user note",
false,
4000,
);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread = events
.iter()
.find(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }));
assert!(thread.is_some());
if let TimelineEventType::DiscussionThread { notes, .. } = &thread.unwrap().event_type {
assert_eq!(notes.len(), 2, "System notes should be filtered out");
assert_eq!(notes[0].body, "User note");
assert_eq!(notes[1].body, "Another user note");
} else {
panic!("Expected DiscussionThread");
}
}
#[test]
fn test_collect_discussion_thread_empty_after_system_filter() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Only system notes
insert_note(
&conn,
disc_id,
project_id,
"system",
"added label",
true,
2000,
);
insert_note(
&conn,
disc_id,
project_id,
"system",
"removed label",
true,
3000,
);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread_count = events
.iter()
.filter(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }))
.count();
assert_eq!(
thread_count, 0,
"All-system-note discussion should produce no thread"
);
}
#[test]
fn test_collect_discussion_thread_body_truncation() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
let long_body = "x".repeat(10_000);
insert_note(&conn, disc_id, project_id, "alice", &long_body, false, 2000);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread = events
.iter()
.find(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }))
.unwrap();
if let TimelineEventType::DiscussionThread { notes, .. } = &thread.event_type {
assert!(
notes[0].body.chars().count() <= crate::core::timeline::THREAD_NOTE_MAX_CHARS,
"Body should be truncated to THREAD_NOTE_MAX_CHARS"
);
} else {
panic!("Expected DiscussionThread");
}
}
#[test]
fn test_collect_discussion_thread_note_cap() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Insert 60 notes, exceeding THREAD_MAX_NOTES (50)
for i in 0..60 {
insert_note(
&conn,
disc_id,
project_id,
"alice",
&format!("Note {i}"),
false,
2000 + i * 100,
);
}
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread = events
.iter()
.find(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }))
.unwrap();
if let TimelineEventType::DiscussionThread { notes, .. } = &thread.event_type {
// 50 notes + 1 synthetic summary = 51
assert_eq!(
notes.len(),
crate::core::timeline::THREAD_MAX_NOTES + 1,
"Should cap at THREAD_MAX_NOTES + synthetic summary"
);
let last = notes.last().unwrap();
assert!(last.body.contains("more notes not shown"));
} else {
panic!("Expected DiscussionThread");
}
}
#[test]
fn test_collect_discussion_thread_timestamp_is_first_note() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(&conn, disc_id, project_id, "alice", "First", false, 5000);
insert_note(&conn, disc_id, project_id, "bob", "Second", false, 8000);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread = events
.iter()
.find(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }))
.unwrap();
assert_eq!(
thread.timestamp, 5000,
"Thread timestamp should be first note's created_at"
);
}
#[test]
fn test_collect_discussion_thread_sort_position() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Note at t=2000 (between Created at t=1000 and state change at t=3000)
insert_note(
&conn,
disc_id,
project_id,
"alice",
"discussion",
false,
2000,
);
insert_state_event(&conn, project_id, Some(issue_id), None, "closed", 3000);
let seeds = [make_entity_ref("issue", issue_id, 1)];
let discussions = [make_matched_discussion(
disc_id, "issue", issue_id, project_id,
)];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
// Expected order: Created(1000), DiscussionThread(2000), StateChanged(3000)
assert!(events.len() >= 3);
assert!(matches!(events[0].event_type, TimelineEventType::Created));
assert!(matches!(
events[1].event_type,
TimelineEventType::DiscussionThread { .. }
));
assert!(matches!(
events[2].event_type,
TimelineEventType::StateChanged { .. }
));
}
#[test]
fn test_collect_discussion_thread_dedup() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(&conn, disc_id, project_id, "alice", "hello", false, 2000);
let seeds = [make_entity_ref("issue", issue_id, 1)];
// Same discussion_id twice
let discussions = [
make_matched_discussion(disc_id, "issue", issue_id, project_id),
make_matched_discussion(disc_id, "issue", issue_id, project_id),
];
let (events, _) = collect_events(&conn, &seeds, &[], &[], &discussions, None, 100).unwrap();
let thread_count = events
.iter()
.filter(|e| matches!(&e.event_type, TimelineEventType::DiscussionThread { .. }))
.count();
assert_eq!(
thread_count, 1,
"Duplicate discussion_id should produce one thread"
);
}

View File

@@ -1,252 +0,0 @@
use std::collections::{HashSet, VecDeque};
use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef, resolve_entity_ref};
/// Result of the expand phase.
pub struct ExpandResult {
pub expanded_entities: Vec<ExpandedEntityRef>,
pub unresolved_references: Vec<UnresolvedRef>,
}
/// Run the EXPAND phase of the timeline pipeline (BFS over entity_references).
///
/// Starting from seed entities, traverses cross-references (both outgoing and incoming)
/// to discover related entities. Collects provenance (who referenced whom, how).
pub fn expand_timeline(
conn: &Connection,
seeds: &[EntityRef],
depth: u32,
include_mentions: bool,
max_entities: usize,
) -> Result<ExpandResult> {
if depth == 0 || seeds.is_empty() {
return Ok(ExpandResult {
expanded_entities: Vec::new(),
unresolved_references: Vec::new(),
});
}
let edge_types = if include_mentions {
vec!["closes", "related", "mentioned"]
} else {
vec!["closes", "related"]
};
let mut visited: HashSet<(String, i64)> = seeds
.iter()
.map(|s| (s.entity_type.clone(), s.entity_id))
.collect();
let mut queue: VecDeque<(EntityRef, u32)> = seeds.iter().map(|s| (s.clone(), 0)).collect();
let mut expanded = Vec::new();
let mut unresolved = Vec::new();
while let Some((current, current_depth)) = queue.pop_front() {
if expanded.len() >= max_entities {
break;
}
let neighbors = find_neighbors(conn, &current, &edge_types)?;
for neighbor in neighbors {
match neighbor {
Neighbor::Resolved {
entity_ref,
reference_type,
source_method,
} => {
let key = (entity_ref.entity_type.clone(), entity_ref.entity_id);
if !visited.insert(key) {
continue;
}
expanded.push(ExpandedEntityRef {
entity_ref: entity_ref.clone(),
depth: current_depth + 1,
via_from: current.clone(),
via_reference_type: reference_type,
via_source_method: source_method,
});
if expanded.len() >= max_entities {
break;
}
if current_depth + 1 < depth {
queue.push_back((entity_ref, current_depth + 1));
}
}
Neighbor::Unresolved(unresolved_ref) => {
unresolved.push(unresolved_ref);
}
}
}
}
Ok(ExpandResult {
expanded_entities: expanded,
unresolved_references: unresolved,
})
}
enum Neighbor {
Resolved {
entity_ref: EntityRef,
reference_type: String,
source_method: String,
},
Unresolved(UnresolvedRef),
}
/// Find all neighbors (outgoing + incoming) for an entity in entity_references.
fn find_neighbors(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
) -> Result<Vec<Neighbor>> {
let mut neighbors = Vec::new();
find_outgoing(conn, entity, edge_types, &mut neighbors)?;
find_incoming(conn, entity, edge_types, &mut neighbors)?;
Ok(neighbors)
}
/// Find outgoing references: current entity is the source.
fn find_outgoing(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT target_entity_type, target_entity_id, target_project_path, target_entity_iid,
reference_type, source_method
FROM entity_references
WHERE source_entity_type = ?1
AND source_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // target_entity_type
row.get::<_, Option<i64>>(1)?, // target_entity_id
row.get::<_, Option<String>>(2)?, // target_project_path
row.get::<_, Option<i64>>(3)?, // target_entity_iid
row.get::<_, String>(4)?, // reference_type
row.get::<_, String>(5)?, // source_method
))
})?;
for row_result in rows {
let (target_type, target_id, target_project_path, target_iid, ref_type, source_method) =
row_result?;
match target_id {
Some(tid) => {
if let Some(resolved) = resolve_entity_ref(conn, &target_type, tid, None)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
None => {
neighbors.push(Neighbor::Unresolved(UnresolvedRef {
source: entity.clone(),
target_project: target_project_path,
target_type,
target_iid,
reference_type: ref_type,
}));
}
}
}
Ok(())
}
/// Find incoming references: current entity is the target.
fn find_incoming(
conn: &Connection,
entity: &EntityRef,
edge_types: &[&str],
neighbors: &mut Vec<Neighbor>,
) -> Result<()> {
let placeholders: String = edge_types
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 3))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT source_entity_type, source_entity_id, reference_type, source_method
FROM entity_references
WHERE target_entity_type = ?1
AND target_entity_id = ?2
AND reference_type IN ({placeholders})"
);
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![
Box::new(entity.entity_type.clone()),
Box::new(entity.entity_id),
];
for et in edge_types {
params.push(Box::new(et.to_string()));
}
let params_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map(params_refs.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // source_entity_type
row.get::<_, i64>(1)?, // source_entity_id
row.get::<_, String>(2)?, // reference_type
row.get::<_, String>(3)?, // source_method
))
})?;
for row_result in rows {
let (source_type, source_id, ref_type, source_method) = row_result?;
if let Some(resolved) = resolve_entity_ref(conn, &source_type, source_id, None)? {
neighbors.push(Neighbor::Resolved {
entity_ref: resolved,
reference_type: ref_type,
source_method,
});
}
}
Ok(())
}
#[cfg(test)]
#[path = "timeline_expand_tests.rs"]
mod tests;

View File

@@ -1,305 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
#[allow(clippy::too_many_arguments)]
fn insert_ref(
conn: &Connection,
project_id: i64,
source_type: &str,
source_id: i64,
target_type: &str,
target_id: Option<i64>,
ref_type: &str,
source_method: &str,
) {
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, reference_type, source_method, created_at) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1000)",
rusqlite::params![project_id, source_type, source_id, target_type, target_id, ref_type, source_method],
)
.unwrap();
}
fn make_entity_ref(entity_type: &str, entity_id: i64, iid: i64) -> EntityRef {
EntityRef {
entity_type: entity_type.to_owned(),
entity_id,
entity_iid: iid,
project_path: "group/project".to_owned(),
}
}
#[test]
fn test_expand_depth_zero() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 0, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert!(result.unresolved_references.is_empty());
}
#[test]
fn test_expand_finds_linked_entity() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(
result.expanded_entities[0].entity_ref.entity_type,
"merge_request"
);
assert_eq!(result.expanded_entities[0].entity_ref.entity_iid, 10);
assert_eq!(result.expanded_entities[0].depth, 1);
}
#[test]
fn test_expand_bidirectional() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR closes issue (MR is source, issue is target)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
// Starting from MR should find the issue (outgoing)
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
assert_eq!(result.expanded_entities[0].entity_ref.entity_type, "issue");
}
#[test]
fn test_expand_respects_max_entities() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Create 10 MRs that all close this issue
for i in 2..=11 {
let mr_id = insert_mr(&conn, project_id, i);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
}
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 3).unwrap();
assert!(result.expanded_entities.len() <= 3);
}
#[test]
fn test_expand_skips_mentions_by_default() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue (should be skipped by default)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}
#[test]
fn test_expand_includes_mentions_when_flagged() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// MR mentions issue
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"mentioned",
"note_parse",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, true, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_collects_unresolved() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
// Unresolved cross-project reference
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id, target_entity_type, target_entity_id, target_project_path, target_entity_iid, reference_type, source_method, created_at) VALUES (?1, 'issue', ?2, 'issue', NULL, 'other/repo', 42, 'closes', 'description_parse', 1000)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
assert_eq!(result.unresolved_references.len(), 1);
assert_eq!(
result.unresolved_references[0].target_project,
Some("other/repo".to_owned())
);
assert_eq!(result.unresolved_references[0].target_iid, Some(42));
}
#[test]
fn test_expand_tracks_provenance() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
let seeds = vec![make_entity_ref("issue", issue_id, 1)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
assert_eq!(result.expanded_entities.len(), 1);
let expanded = &result.expanded_entities[0];
assert_eq!(expanded.via_reference_type, "closes");
assert_eq!(expanded.via_source_method, "api");
assert_eq!(expanded.via_from.entity_type, "issue");
assert_eq!(expanded.via_from.entity_id, issue_id);
}
#[test]
fn test_expand_no_duplicates() {
let conn = setup_test_db();
let project_id = insert_project(&conn);
let issue_id = insert_issue(&conn, project_id, 1);
let mr_id = insert_mr(&conn, project_id, 10);
// Two references from MR to same issue (different methods)
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"closes",
"api",
);
insert_ref(
&conn,
project_id,
"merge_request",
mr_id,
"issue",
Some(issue_id),
"related",
"note_parse",
);
let seeds = vec![make_entity_ref("merge_request", mr_id, 10)];
let result = expand_timeline(&conn, &seeds, 1, false, 100).unwrap();
// Should only appear once (first-come wins)
assert_eq!(result.expanded_entities.len(), 1);
}
#[test]
fn test_expand_empty_seeds() {
let conn = setup_test_db();
let result = expand_timeline(&conn, &[], 1, false, 100).unwrap();
assert!(result.expanded_entities.is_empty());
}

View File

@@ -1,423 +0,0 @@
use std::collections::HashSet;
use rusqlite::Connection;
use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{
EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_by_iid,
resolve_entity_ref, truncate_to_chars,
};
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
/// Result of the seed + hydrate phases.
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
/// Discussions matched during seeding, to be collected as full threads.
pub matched_discussions: Vec<MatchedDiscussion>,
/// The search mode actually used (hybrid with fallback info).
pub search_mode: String,
}
/// Run the SEED + HYDRATE phases of the timeline pipeline.
///
/// 1. SEED: Hybrid search (FTS + vector via RRF) over documents -> matched document IDs
/// 2. HYDRATE: Map document IDs -> source entities + top matched notes as evidence
///
/// When `client` is `None` or Ollama is unavailable, falls back to FTS-only search.
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Entities are deduplicated. Evidence notes are capped at `max_evidence`.
pub async fn seed_timeline(
conn: &Connection,
client: Option<&OllamaClient>,
query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_seeds: usize,
max_evidence: usize,
) -> Result<SeedResult> {
let fts_query = to_fts_query(query, FtsQueryMode::Safe);
if fts_query.is_empty() {
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
matched_discussions: Vec::new(),
search_mode: "lexical".to_owned(),
});
}
// Use hybrid search for seed entity discovery (better recall than FTS alone).
// search_hybrid gracefully falls back to FTS-only when Ollama is unavailable.
let filters = SearchFilters {
project_id,
updated_since: since_ms,
limit: max_seeds.saturating_mul(3),
..SearchFilters::default()
};
let (hybrid_results, warnings) = search_hybrid(
conn,
client,
query,
SearchMode::Hybrid,
&filters,
FtsQueryMode::Safe,
)
.await?;
let search_mode = if warnings
.iter()
.any(|w| w.contains("falling back") || w.contains("FTS only"))
{
"lexical (hybrid fallback)".to_owned()
} else if client.is_some() && !hybrid_results.is_empty() {
"hybrid".to_owned()
} else {
"lexical".to_owned()
};
for w in &warnings {
debug!(warning = %w, "hybrid search warning during timeline seeding");
}
let (seed_entities, matched_discussions) = resolve_documents_to_entities(
conn,
&hybrid_results
.iter()
.map(|r| r.document_id)
.collect::<Vec<_>>(),
max_seeds,
)?;
// Evidence notes stay FTS-only (supplementary context, not worth a second embedding call)
let evidence_notes = find_evidence_notes(conn, &fts_query, project_id, since_ms, max_evidence)?;
Ok(SeedResult {
seed_entities,
evidence_notes,
matched_discussions,
search_mode,
})
}
/// Seed the timeline directly from an entity IID, bypassing search entirely.
///
/// Used for `issue:42` / `mr:99` syntax. Resolves the entity, gathers ALL its
/// discussions, and returns a `SeedResult` compatible with the rest of the pipeline.
pub fn seed_timeline_direct(
conn: &Connection,
entity_type: &str,
iid: i64,
project_id: Option<i64>,
) -> Result<SeedResult> {
let entity_ref = resolve_entity_by_iid(conn, entity_type, iid, project_id)?;
// Gather all discussions for this entity (not search-matched, ALL of them)
let entity_id_col = match entity_type {
"issue" => "issue_id",
"merge_request" => "merge_request_id",
_ => {
return Ok(SeedResult {
seed_entities: vec![entity_ref],
evidence_notes: Vec::new(),
matched_discussions: Vec::new(),
search_mode: "direct".to_owned(),
});
}
};
let sql = format!("SELECT id, project_id FROM discussions WHERE {entity_id_col} = ?1");
let mut stmt = conn.prepare(&sql)?;
let matched_discussions: Vec<MatchedDiscussion> = stmt
.query_map(rusqlite::params![entity_ref.entity_id], |row| {
Ok(MatchedDiscussion {
discussion_id: row.get(0)?,
entity_type: entity_type.to_owned(),
entity_id: entity_ref.entity_id,
project_id: row.get(1)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(SeedResult {
seed_entities: vec![entity_ref],
evidence_notes: Vec::new(),
matched_discussions,
search_mode: "direct".to_owned(),
})
}
/// Resolve a list of document IDs to deduplicated entity refs and matched discussions.
/// Discussion and note documents are resolved to their parent entity (issue or MR).
/// Returns (entities, matched_discussions).
fn resolve_documents_to_entities(
conn: &Connection,
document_ids: &[i64],
max_entities: usize,
) -> Result<(Vec<EntityRef>, Vec<MatchedDiscussion>)> {
if document_ids.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
let placeholders: String = document_ids
.iter()
.map(|_| "?")
.collect::<Vec<_>>()
.join(",");
let sql = format!(
r"
SELECT d.source_type, d.source_id, d.project_id,
COALESCE(disc.issue_id, note_disc.issue_id) AS issue_id,
COALESCE(disc.merge_request_id, note_disc.merge_request_id) AS mr_id,
COALESCE(disc.id, note_disc.id) AS discussion_id
FROM documents d
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
LEFT JOIN notes n ON n.id = d.source_id AND d.source_type = 'note'
LEFT JOIN discussions note_disc ON note_disc.id = n.discussion_id AND d.source_type = 'note'
WHERE d.id IN ({placeholders})
ORDER BY CASE d.id {order_clause} END
",
order_clause = document_ids
.iter()
.enumerate()
.map(|(i, id)| format!("WHEN {id} THEN {i}"))
.collect::<Vec<_>>()
.join(" "),
);
let mut stmt = conn.prepare(&sql)?;
let params: Vec<&dyn rusqlite::types::ToSql> = document_ids
.iter()
.map(|id| id as &dyn rusqlite::types::ToSql)
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?, // source_type
row.get::<_, i64>(1)?, // source_id
row.get::<_, i64>(2)?, // project_id
row.get::<_, Option<i64>>(3)?, // issue_id (coalesced)
row.get::<_, Option<i64>>(4)?, // mr_id (coalesced)
row.get::<_, Option<i64>>(5)?, // discussion_id (coalesced)
))
})?;
let mut seen_entities = HashSet::new();
let mut seen_discussions = HashSet::new();
let mut entities = Vec::new();
let mut matched_discussions = Vec::new();
for row_result in rows {
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id, discussion_id) =
row_result?;
let (entity_type, entity_id) = match source_type.as_str() {
"issue" => ("issue".to_owned(), source_id),
"merge_request" => ("merge_request".to_owned(), source_id),
"discussion" | "note" => {
if let Some(issue_id) = disc_issue_id {
("issue".to_owned(), issue_id)
} else if let Some(mr_id) = disc_mr_id {
("merge_request".to_owned(), mr_id)
} else {
continue; // orphaned discussion/note
}
}
_ => continue,
};
// Capture matched discussion (deduplicated)
if let Some(disc_id) = discussion_id
&& (source_type == "discussion" || source_type == "note")
&& seen_discussions.insert(disc_id)
{
matched_discussions.push(MatchedDiscussion {
discussion_id: disc_id,
entity_type: entity_type.clone(),
entity_id,
project_id: proj_id,
});
}
// Entity dedup
let key = (entity_type.clone(), entity_id);
if !seen_entities.insert(key) {
continue;
}
if let Some(entity_ref) = resolve_entity_ref(conn, &entity_type, entity_id, Some(proj_id))?
{
entities.push(entity_ref);
}
if entities.len() >= max_entities {
break;
}
}
Ok((entities, matched_discussions))
}
/// Find evidence notes: FTS5-matched discussion notes that provide context.
///
/// Uses round-robin selection across discussions to ensure diverse evidence
/// rather than all notes coming from a single high-traffic discussion.
fn find_evidence_notes(
conn: &Connection,
fts_query: &str,
project_id: Option<i64>,
since_ms: Option<i64>,
max_evidence: usize,
) -> Result<Vec<TimelineEvent>> {
// Fetch extra rows to enable round-robin across discussions.
// We'll select from multiple discussions in rotation.
let fetch_limit = (max_evidence * 5).max(50);
let sql = r"
SELECT n.id AS note_id, n.body, n.created_at, n.author_username,
disc.id AS discussion_id,
CASE WHEN disc.issue_id IS NOT NULL THEN 'issue' ELSE 'merge_request' END AS parent_type,
COALESCE(disc.issue_id, disc.merge_request_id) AS parent_entity_id,
d.project_id
FROM documents_fts
JOIN documents d ON d.id = documents_fts.rowid
JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
JOIN notes n ON n.discussion_id = disc.id AND n.is_system = 0
WHERE documents_fts MATCH ?1
AND (?2 IS NULL OR d.project_id = ?2)
AND (?3 IS NULL OR d.updated_at >= ?3)
ORDER BY rank
LIMIT ?4
";
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(
rusqlite::params![fts_query, project_id, since_ms, fetch_limit as i64],
|row| {
Ok((
row.get::<_, i64>(0)?, // note_id
row.get::<_, Option<String>>(1)?, // body
row.get::<_, i64>(2)?, // created_at
row.get::<_, Option<String>>(3)?, // author
row.get::<_, i64>(4)?, // discussion_id
row.get::<_, String>(5)?, // parent_type
row.get::<_, i64>(6)?, // parent_entity_id
row.get::<_, i64>(7)?, // project_id
))
},
)?;
let mut events = Vec::new();
for row_result in rows {
let (
note_id,
body,
created_at,
author,
discussion_id,
parent_type,
parent_entity_id,
proj_id,
) = row_result?;
let snippet = truncate_to_chars(body.as_deref().unwrap_or(""), 200);
let entity_ref = resolve_entity_ref(conn, &parent_type, parent_entity_id, Some(proj_id))?;
let (iid, project_path) = match entity_ref {
Some(ref e) => (e.entity_iid, e.project_path.clone()),
None => {
debug!(
parent_type,
parent_entity_id,
proj_id,
"Skipping evidence note: parent entity not found (orphaned discussion)"
);
continue;
}
};
events.push((
discussion_id,
TimelineEvent {
timestamp: created_at,
entity_type: parent_type,
entity_id: parent_entity_id,
entity_iid: iid,
project_path,
event_type: TimelineEventType::NoteEvidence {
note_id,
snippet,
discussion_id: Some(discussion_id),
},
summary: format!("Note by {}", author.as_deref().unwrap_or("unknown")),
actor: author,
url: None,
is_seed: true,
},
));
}
// Round-robin selection across discussions for diverse evidence
Ok(round_robin_select_by_discussion(events, max_evidence))
}
/// Round-robin select events across discussions to ensure diverse evidence.
///
/// Groups events by discussion_id, then iterates through discussions in order,
/// taking one event from each until the limit is reached.
fn round_robin_select_by_discussion(
events: Vec<(i64, TimelineEvent)>,
max_evidence: usize,
) -> Vec<TimelineEvent> {
use std::collections::HashMap;
if events.is_empty() || max_evidence == 0 {
return Vec::new();
}
// Group events by discussion_id, preserving order within each group
let mut by_discussion: HashMap<i64, Vec<TimelineEvent>> = HashMap::new();
let mut discussion_order: Vec<i64> = Vec::new();
for (discussion_id, event) in events {
if !by_discussion.contains_key(&discussion_id) {
discussion_order.push(discussion_id);
}
by_discussion.entry(discussion_id).or_default().push(event);
}
// Round-robin selection
let mut result = Vec::with_capacity(max_evidence);
let mut indices: Vec<usize> = vec![0; discussion_order.len()];
'outer: loop {
let mut made_progress = false;
for (disc_idx, &discussion_id) in discussion_order.iter().enumerate() {
let notes = by_discussion.get(&discussion_id).unwrap();
let note_idx = indices[disc_idx];
if note_idx < notes.len() {
result.push(notes[note_idx].clone());
indices[disc_idx] += 1;
made_progress = true;
if result.len() >= max_evidence {
break 'outer;
}
}
}
if !made_progress {
break;
}
}
result
}
#[cfg(test)]
#[path = "timeline_seed_tests.rs"]
mod tests;

View File

@@ -1,512 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_test_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_document(
conn: &Connection,
source_type: &str,
source_id: i64,
project_id: i64,
content: &str,
) -> i64 {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![source_type, source_id, project_id, content, format!("hash_{source_id}")],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_discussion(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
) -> i64 {
let noteable_type = if issue_id.is_some() {
"Issue"
} else {
"MergeRequest"
};
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)",
rusqlite::params![format!("disc_{}", rand::random::<u32>()), project_id, issue_id, mr_id, noteable_type],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_note(
conn: &Connection,
discussion_id: i64,
project_id: i64,
body: &str,
is_system: bool,
) -> i64 {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, 'alice', ?5, 5000, 5000, 5000)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, body],
)
.unwrap();
conn.last_insert_rowid()
}
#[tokio::test]
async fn test_seed_empty_query_returns_empty() {
let conn = setup_test_db();
let result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
}
#[tokio::test]
async fn test_seed_no_matches_returns_empty() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"unrelated content here",
);
let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
}
#[tokio::test]
async fn test_seed_finds_issue() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error in login flow",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[tokio::test]
async fn test_seed_finds_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
insert_document(
&conn,
"merge_request",
mr_id,
project_id,
"fix authentication bug",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
}
#[tokio::test]
async fn test_seed_deduplicates_entities() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
// Two documents referencing the same issue
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error first doc",
);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"authentication error second doc",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
}
#[tokio::test]
async fn test_seed_resolves_discussion_to_parent() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline failed",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
}
#[tokio::test]
async fn test_seed_evidence_capped() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
// Create 15 discussion documents with notes about "deployment"
for i in 0..15 {
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
&format!("deployment issue number {i}"),
);
insert_note(
&conn,
disc_id,
project_id,
&format!("deployment note {i}"),
false,
);
}
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5)
.await
.unwrap();
assert!(result.evidence_notes.len() <= 5);
}
#[tokio::test]
async fn test_seed_evidence_snippet_truncated() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment configuration",
);
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } = &result.evidence_notes[0].event_type {
assert!(snippet.chars().count() <= 200);
} else {
panic!("Expected NoteEvidence");
}
}
#[tokio::test]
async fn test_seed_respects_project_filter() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
// Insert a second project
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (2, 'other/repo', 'https://gitlab.com/other/repo')",
[],
)
.unwrap();
let project2_id = conn.last_insert_rowid();
let issue1_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue1_id,
project_id,
"authentication error",
);
let issue2_id = insert_test_issue(&conn, project2_id, 2);
insert_document(
&conn,
"issue",
issue2_id,
project2_id,
"authentication error",
);
// Filter to project 1 only
let result = seed_timeline(
&conn,
None,
"authentication",
Some(project_id),
None,
50,
10,
)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
// ─── Matched discussion tests ───────────────────────────────────────────────
#[tokio::test]
async fn test_seed_captures_matched_discussions_from_discussion_doc() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline authentication",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
assert_eq!(result.matched_discussions[0].entity_id, issue_id);
}
#[tokio::test]
async fn test_seed_captures_matched_discussions_from_note_doc() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
let note_id = insert_note(&conn, disc_id, project_id, "note about deployment", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment configuration details",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Note doc should resolve to parent discussion"
);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
}
#[tokio::test]
async fn test_seed_deduplicates_matched_discussions() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Two docs referencing the same discussion
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline first doc",
);
let note_id = insert_note(&conn, disc_id, project_id, "deployment note", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment pipeline second doc",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Same discussion_id from two docs should deduplicate"
);
}
#[tokio::test]
async fn test_seed_matched_discussions_have_correct_parent_entity() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline for merge request",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].entity_type, "merge_request");
assert_eq!(result.matched_discussions[0].entity_id, mr_id);
}
// ─── seed_timeline_direct tests ─────────────────────────────────────────────
#[test]
fn test_direct_seed_resolves_entity() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_direct_seed_gathers_all_discussions() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
// Create 3 discussions for this issue
let disc1 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc2 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc3 = insert_discussion(&conn, project_id, Some(issue_id), None);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.matched_discussions.len(), 3);
let disc_ids: Vec<i64> = result
.matched_discussions
.iter()
.map(|d| d.discussion_id)
.collect();
assert!(disc_ids.contains(&disc1));
assert!(disc_ids.contains(&disc2));
assert!(disc_ids.contains(&disc3));
}
#[test]
fn test_direct_seed_no_evidence_notes() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(&conn, disc_id, project_id, "some note body", false);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert!(
result.evidence_notes.is_empty(),
"Direct seeding should not produce evidence notes"
);
}
#[test]
fn test_direct_seed_search_mode_is_direct() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.search_mode, "direct");
}
#[test]
fn test_direct_seed_not_found() {
let conn = setup_test_db();
insert_test_project(&conn);
let result = seed_timeline_direct(&conn, "issue", 999, None);
assert!(result.is_err());
}
#[test]
fn test_direct_seed_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
let result = seed_timeline_direct(&conn, "merge_request", 99, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
}