feat(core): Add cross-reference extraction infrastructure

Introduces two new modules for extracting and storing entity cross-references
from GitLab data:

note_parser.rs:
- Parses system notes for "mentioned in" and "closed by" patterns
- Extracts cross-project references (group/project#42, group/project!123)
- Uses lazy-compiled regexes for performance
- Handles both issue (#) and MR (!) sigils
- Provides extract_refs_from_system_notes() for batch processing

references.rs:
- Extracts refs from resource_state_events table (API-sourced closes links)
- Provides insert_entity_reference() for storing discovered references
- Includes resolution helpers: resolve_issue_local_id, resolve_mr_local_id,
  resolve_project_path for converting iids to internal IDs
- Enables cross-project reference resolution

These modules power the entity_references table, enabling features like
"find all MRs that close this issue" and "find all issues mentioned in this MR".

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:03:13 -05:00
parent 0b6b168043
commit f748570d4d
3 changed files with 1114 additions and 2 deletions

551
src/core/references.rs Normal file
View File

@@ -0,0 +1,551 @@
use rusqlite::{Connection, OptionalExtension};
use tracing::info;
use super::error::Result;
use super::time::now_ms;
pub fn extract_refs_from_state_events(conn: &Connection, project_id: i64) -> Result<usize> {
let changes = conn.execute(
"INSERT OR IGNORE INTO entity_references (
project_id,
source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method, created_at
)
SELECT
rse.project_id,
'merge_request',
mr.id,
'issue',
rse.issue_id,
'closes',
'api',
rse.created_at
FROM resource_state_events rse
JOIN merge_requests mr
ON mr.project_id = rse.project_id
AND mr.iid = rse.source_merge_request_iid
WHERE rse.source_merge_request_iid IS NOT NULL
AND rse.issue_id IS NOT NULL
AND rse.project_id = ?1",
rusqlite::params![project_id],
)?;
if changes > 0 {
info!(
project_id,
references_inserted = changes,
"Extracted cross-references from state events"
);
}
Ok(changes)
}
#[derive(Debug, Clone)]
pub struct EntityReference<'a> {
pub project_id: i64,
pub source_entity_type: &'a str,
pub source_entity_id: i64,
pub target_entity_type: &'a str,
pub target_entity_id: Option<i64>,
pub target_project_path: Option<&'a str>,
pub target_entity_iid: Option<i64>,
pub reference_type: &'a str,
pub source_method: &'a str,
}
pub fn insert_entity_reference(conn: &Connection, ref_: &EntityReference<'_>) -> Result<bool> {
let now = now_ms();
let changes = conn.execute(
"INSERT OR IGNORE INTO entity_references \
(project_id, source_entity_type, source_entity_id, \
target_entity_type, target_entity_id, target_project_path, target_entity_iid, \
reference_type, source_method, created_at) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
rusqlite::params![
ref_.project_id,
ref_.source_entity_type,
ref_.source_entity_id,
ref_.target_entity_type,
ref_.target_entity_id,
ref_.target_project_path,
ref_.target_entity_iid,
ref_.reference_type,
ref_.source_method,
now,
],
)?;
Ok(changes > 0)
}
pub fn resolve_issue_local_id(
conn: &Connection,
project_id: i64,
issue_iid: i64,
) -> Result<Option<i64>> {
let mut stmt =
conn.prepare_cached("SELECT id FROM issues WHERE project_id = ?1 AND iid = ?2")?;
let result = stmt
.query_row(rusqlite::params![project_id, issue_iid], |row| row.get(0))
.optional()?;
Ok(result)
}
pub fn resolve_project_path(conn: &Connection, gitlab_project_id: i64) -> Result<Option<String>> {
let mut stmt = conn
.prepare_cached("SELECT path_with_namespace FROM projects WHERE gitlab_project_id = ?1")?;
let result = stmt
.query_row(rusqlite::params![gitlab_project_id], |row| row.get(0))
.optional()?;
Ok(result)
}
pub fn count_references_for_source(
conn: &Connection,
source_entity_type: &str,
source_entity_id: i64,
) -> Result<usize> {
let count: i64 = conn.query_row(
"SELECT COUNT(*) FROM entity_references \
WHERE source_entity_type = ?1 AND source_entity_id = ?2",
rusqlite::params![source_entity_type, source_entity_id],
|row| row.get(0),
)?;
Ok(count as usize)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project_issue_mr(conn: &Connection) -> (i64, i64, i64) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (1, 200, 10, 1, 'Test issue', 'closed', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (1, 300, 5, 1, 'Test MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
[],
)
.unwrap();
(1, 1, 1)
}
#[test]
fn test_extract_refs_from_state_events_basic() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 1, "Should insert exactly one reference");
let (src_type, src_id, tgt_type, tgt_id, ref_type, method): (
String,
i64,
String,
i64,
String,
String,
) = conn
.query_row(
"SELECT source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method
FROM entity_references WHERE project_id = ?1",
[project_id],
|row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
row.get(5)?,
))
},
)
.unwrap();
assert_eq!(src_type, "merge_request");
assert_eq!(src_id, mr_id, "Source should be the MR's local DB id");
assert_eq!(tgt_type, "issue");
assert_eq!(tgt_id, issue_id, "Target should be the issue's local DB id");
assert_eq!(ref_type, "closes");
assert_eq!(method, "api");
}
#[test]
fn test_extract_refs_dedup_with_closes_issues() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
reference_type, source_method, created_at)
VALUES (?1, 'merge_request', ?2, 'issue', ?3, 'closes', 'api', 3000)",
rusqlite::params![project_id, mr_id, issue_id],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 0, "Should not insert duplicate reference");
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
[project_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(total, 1, "Should still have exactly one reference");
}
#[test]
fn test_extract_refs_no_source_mr() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, NULL)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count, 0, "Should not create refs when no source MR");
}
#[test]
fn test_extract_refs_mr_not_synced() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, ?1, ?2, NULL, 'closed', 3000, 999)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(
count, 0,
"Should not create ref when MR is not synced locally"
);
}
#[test]
fn test_extract_refs_idempotent() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count1 = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count1, 1);
let count2 = extract_refs_from_state_events(&conn, project_id).unwrap();
assert_eq!(count2, 0, "Second run should insert nothing (idempotent)");
}
#[test]
fn test_extract_refs_multiple_events_same_mr_issue() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, ?1, ?2, NULL, 'closed', 4000, 5)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
assert!(count <= 2, "At most 2 inserts attempted");
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
[project_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(
total, 1,
"Only one unique reference should exist for same MR->issue pair"
);
}
#[test]
fn test_extract_refs_scoped_to_project() {
let conn = setup_test_db();
seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (2, 101, 'group/other', 'https://gitlab.example.com/group/other', 1000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Other issue', 'closed', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (1, 1, 1, NULL, 'closed', 3000, 5)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
created_at, source_merge_request_iid)
VALUES (2, 2, 2, NULL, 'closed', 3000, 5)",
[],
)
.unwrap();
let count = extract_refs_from_state_events(&conn, 1).unwrap();
assert_eq!(count, 1);
let total: i64 = conn
.query_row("SELECT COUNT(*) FROM entity_references", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(total, 1, "Only project 1 refs should be created");
}
#[test]
fn test_insert_entity_reference_creates_row() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(issue_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
assert!(inserted);
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 1);
}
#[test]
fn test_insert_entity_reference_idempotent() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(issue_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
let first = insert_entity_reference(&conn, &ref_).unwrap();
assert!(first);
let second = insert_entity_reference(&conn, &ref_).unwrap();
assert!(!second, "Duplicate insert should be ignored");
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 1, "Still just one reference");
}
#[test]
fn test_insert_entity_reference_cross_project_unresolved() {
let conn = setup_test_db();
let (project_id, _issue_id, mr_id) = seed_project_issue_mr(&conn);
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: None,
target_project_path: Some("other-group/other-project"),
target_entity_iid: Some(99),
reference_type: "closes",
source_method: "api",
};
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
assert!(inserted);
let (target_id, target_path, target_iid): (Option<i64>, Option<String>, Option<i64>) = conn
.query_row(
"SELECT target_entity_id, target_project_path, target_entity_iid \
FROM entity_references WHERE source_entity_id = ?1",
[mr_id],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert!(target_id.is_none());
assert_eq!(target_path, Some("other-group/other-project".to_string()));
assert_eq!(target_iid, Some(99));
}
#[test]
fn test_insert_multiple_closes_references() {
let conn = setup_test_db();
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
VALUES (10, 210, 11, ?1, 'Second issue', 'opened', 1000, 2000, 2000)",
rusqlite::params![project_id],
)
.unwrap();
let issue_id_2 = 10i64;
for target_id in [issue_id, issue_id_2] {
let ref_ = EntityReference {
project_id,
source_entity_type: "merge_request",
source_entity_id: mr_id,
target_entity_type: "issue",
target_entity_id: Some(target_id),
target_project_path: None,
target_entity_iid: None,
reference_type: "closes",
source_method: "api",
};
insert_entity_reference(&conn, &ref_).unwrap();
}
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
assert_eq!(count, 2);
}
#[test]
fn test_resolve_issue_local_id_found() {
let conn = setup_test_db();
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
let resolved = resolve_issue_local_id(&conn, project_id, 10).unwrap();
assert_eq!(resolved, Some(issue_id));
}
#[test]
fn test_resolve_issue_local_id_not_found() {
let conn = setup_test_db();
let (project_id, _issue_id, _mr_id) = seed_project_issue_mr(&conn);
let resolved = resolve_issue_local_id(&conn, project_id, 999).unwrap();
assert!(resolved.is_none());
}
#[test]
fn test_resolve_project_path_found() {
let conn = setup_test_db();
seed_project_issue_mr(&conn);
let path = resolve_project_path(&conn, 100).unwrap();
assert_eq!(path, Some("group/repo".to_string()));
}
#[test]
fn test_resolve_project_path_not_found() {
let conn = setup_test_db();
let path = resolve_project_path(&conn, 999).unwrap();
assert!(path.is_none());
}
}