feat(core): Add cross-reference extraction infrastructure
Introduces two new modules for extracting and storing entity cross-references from GitLab data: note_parser.rs: - Parses system notes for "mentioned in" and "closed by" patterns - Extracts cross-project references (group/project#42, group/project!123) - Uses lazy-compiled regexes for performance - Handles both issue (#) and MR (!) sigils - Provides extract_refs_from_system_notes() for batch processing references.rs: - Extracts refs from resource_state_events table (API-sourced closes links) - Provides insert_entity_reference() for storing discovered references - Includes resolution helpers: resolve_issue_local_id, resolve_mr_local_id, resolve_project_path for converting iids to internal IDs - Enables cross-project reference resolution These modules power the entity_references table, enabling features like "find all MRs that close this issue" and "find all issues mentioned in this MR". Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
551
src/core/references.rs
Normal file
551
src/core/references.rs
Normal file
@@ -0,0 +1,551 @@
|
||||
use rusqlite::{Connection, OptionalExtension};
|
||||
use tracing::info;
|
||||
|
||||
use super::error::Result;
|
||||
use super::time::now_ms;
|
||||
|
||||
pub fn extract_refs_from_state_events(conn: &Connection, project_id: i64) -> Result<usize> {
|
||||
let changes = conn.execute(
|
||||
"INSERT OR IGNORE INTO entity_references (
|
||||
project_id,
|
||||
source_entity_type, source_entity_id,
|
||||
target_entity_type, target_entity_id,
|
||||
reference_type, source_method, created_at
|
||||
)
|
||||
SELECT
|
||||
rse.project_id,
|
||||
'merge_request',
|
||||
mr.id,
|
||||
'issue',
|
||||
rse.issue_id,
|
||||
'closes',
|
||||
'api',
|
||||
rse.created_at
|
||||
FROM resource_state_events rse
|
||||
JOIN merge_requests mr
|
||||
ON mr.project_id = rse.project_id
|
||||
AND mr.iid = rse.source_merge_request_iid
|
||||
WHERE rse.source_merge_request_iid IS NOT NULL
|
||||
AND rse.issue_id IS NOT NULL
|
||||
AND rse.project_id = ?1",
|
||||
rusqlite::params![project_id],
|
||||
)?;
|
||||
|
||||
if changes > 0 {
|
||||
info!(
|
||||
project_id,
|
||||
references_inserted = changes,
|
||||
"Extracted cross-references from state events"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EntityReference<'a> {
|
||||
pub project_id: i64,
|
||||
pub source_entity_type: &'a str,
|
||||
pub source_entity_id: i64,
|
||||
pub target_entity_type: &'a str,
|
||||
pub target_entity_id: Option<i64>,
|
||||
pub target_project_path: Option<&'a str>,
|
||||
pub target_entity_iid: Option<i64>,
|
||||
pub reference_type: &'a str,
|
||||
pub source_method: &'a str,
|
||||
}
|
||||
|
||||
pub fn insert_entity_reference(conn: &Connection, ref_: &EntityReference<'_>) -> Result<bool> {
|
||||
let now = now_ms();
|
||||
let changes = conn.execute(
|
||||
"INSERT OR IGNORE INTO entity_references \
|
||||
(project_id, source_entity_type, source_entity_id, \
|
||||
target_entity_type, target_entity_id, target_project_path, target_entity_iid, \
|
||||
reference_type, source_method, created_at) \
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
|
||||
rusqlite::params![
|
||||
ref_.project_id,
|
||||
ref_.source_entity_type,
|
||||
ref_.source_entity_id,
|
||||
ref_.target_entity_type,
|
||||
ref_.target_entity_id,
|
||||
ref_.target_project_path,
|
||||
ref_.target_entity_iid,
|
||||
ref_.reference_type,
|
||||
ref_.source_method,
|
||||
now,
|
||||
],
|
||||
)?;
|
||||
|
||||
Ok(changes > 0)
|
||||
}
|
||||
|
||||
pub fn resolve_issue_local_id(
|
||||
conn: &Connection,
|
||||
project_id: i64,
|
||||
issue_iid: i64,
|
||||
) -> Result<Option<i64>> {
|
||||
let mut stmt =
|
||||
conn.prepare_cached("SELECT id FROM issues WHERE project_id = ?1 AND iid = ?2")?;
|
||||
|
||||
let result = stmt
|
||||
.query_row(rusqlite::params![project_id, issue_iid], |row| row.get(0))
|
||||
.optional()?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn resolve_project_path(conn: &Connection, gitlab_project_id: i64) -> Result<Option<String>> {
|
||||
let mut stmt = conn
|
||||
.prepare_cached("SELECT path_with_namespace FROM projects WHERE gitlab_project_id = ?1")?;
|
||||
|
||||
let result = stmt
|
||||
.query_row(rusqlite::params![gitlab_project_id], |row| row.get(0))
|
||||
.optional()?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn count_references_for_source(
|
||||
conn: &Connection,
|
||||
source_entity_type: &str,
|
||||
source_entity_id: i64,
|
||||
) -> Result<usize> {
|
||||
let count: i64 = conn.query_row(
|
||||
"SELECT COUNT(*) FROM entity_references \
|
||||
WHERE source_entity_type = ?1 AND source_entity_id = ?2",
|
||||
rusqlite::params![source_entity_type, source_entity_id],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
use std::path::Path;
|
||||
|
||||
fn setup_test_db() -> Connection {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
fn seed_project_issue_mr(conn: &Connection) -> (i64, i64, i64) {
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com/group/repo', 1000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (1, 200, 10, 1, 'Test issue', 'closed', 1000, 2000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (1, 300, 5, 1, 'Test MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
(1, 1, 1)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_state_events_basic() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(count, 1, "Should insert exactly one reference");
|
||||
|
||||
let (src_type, src_id, tgt_type, tgt_id, ref_type, method): (
|
||||
String,
|
||||
i64,
|
||||
String,
|
||||
i64,
|
||||
String,
|
||||
String,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT source_entity_type, source_entity_id,
|
||||
target_entity_type, target_entity_id,
|
||||
reference_type, source_method
|
||||
FROM entity_references WHERE project_id = ?1",
|
||||
[project_id],
|
||||
|row| {
|
||||
Ok((
|
||||
row.get(0)?,
|
||||
row.get(1)?,
|
||||
row.get(2)?,
|
||||
row.get(3)?,
|
||||
row.get(4)?,
|
||||
row.get(5)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(src_type, "merge_request");
|
||||
assert_eq!(src_id, mr_id, "Source should be the MR's local DB id");
|
||||
assert_eq!(tgt_type, "issue");
|
||||
assert_eq!(tgt_id, issue_id, "Target should be the issue's local DB id");
|
||||
assert_eq!(ref_type, "closes");
|
||||
assert_eq!(method, "api");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_dedup_with_closes_issues() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO entity_references
|
||||
(project_id, source_entity_type, source_entity_id,
|
||||
target_entity_type, target_entity_id,
|
||||
reference_type, source_method, created_at)
|
||||
VALUES (?1, 'merge_request', ?2, 'issue', ?3, 'closes', 'api', 3000)",
|
||||
rusqlite::params![project_id, mr_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(count, 0, "Should not insert duplicate reference");
|
||||
|
||||
let total: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
|
||||
[project_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(total, 1, "Should still have exactly one reference");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_no_source_mr() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, ?1, ?2, NULL, 'closed', 3000, NULL)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(count, 0, "Should not create refs when no source MR");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_mr_not_synced() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (2, ?1, ?2, NULL, 'closed', 3000, 999)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(
|
||||
count, 0,
|
||||
"Should not create ref when MR is not synced locally"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_idempotent() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count1 = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(count1, 1);
|
||||
|
||||
let count2 = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert_eq!(count2, 0, "Second run should insert nothing (idempotent)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_multiple_events_same_mr_issue() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, ?1, ?2, NULL, 'closed', 3000, 5)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (2, ?1, ?2, NULL, 'closed', 4000, 5)",
|
||||
rusqlite::params![project_id, issue_id],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, project_id).unwrap();
|
||||
assert!(count <= 2, "At most 2 inserts attempted");
|
||||
|
||||
let total: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM entity_references WHERE project_id = ?1",
|
||||
[project_id],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
total, 1,
|
||||
"Only one unique reference should exist for same MR->issue pair"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_scoped_to_project() {
|
||||
let conn = setup_test_db();
|
||||
seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (2, 101, 'group/other', 'https://gitlab.example.com/group/other', 1000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (2, 201, 10, 2, 'Other issue', 'closed', 1000, 2000, 2000)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at, source_branch, target_branch)
|
||||
VALUES (2, 301, 5, 2, 'Other MR', 'merged', 1000, 2000, 2000, 'feature', 'main')",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (1, 1, 1, NULL, 'closed', 3000, 5)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO resource_state_events
|
||||
(gitlab_id, project_id, issue_id, merge_request_id, state,
|
||||
created_at, source_merge_request_iid)
|
||||
VALUES (2, 2, 2, NULL, 'closed', 3000, 5)",
|
||||
[],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let count = extract_refs_from_state_events(&conn, 1).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
|
||||
let total: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM entity_references", [], |row| {
|
||||
row.get(0)
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(total, 1, "Only project 1 refs should be created");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_entity_reference_creates_row() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
let ref_ = EntityReference {
|
||||
project_id,
|
||||
source_entity_type: "merge_request",
|
||||
source_entity_id: mr_id,
|
||||
target_entity_type: "issue",
|
||||
target_entity_id: Some(issue_id),
|
||||
target_project_path: None,
|
||||
target_entity_iid: None,
|
||||
reference_type: "closes",
|
||||
source_method: "api",
|
||||
};
|
||||
|
||||
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
|
||||
assert!(inserted);
|
||||
|
||||
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_entity_reference_idempotent() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
let ref_ = EntityReference {
|
||||
project_id,
|
||||
source_entity_type: "merge_request",
|
||||
source_entity_id: mr_id,
|
||||
target_entity_type: "issue",
|
||||
target_entity_id: Some(issue_id),
|
||||
target_project_path: None,
|
||||
target_entity_iid: None,
|
||||
reference_type: "closes",
|
||||
source_method: "api",
|
||||
};
|
||||
|
||||
let first = insert_entity_reference(&conn, &ref_).unwrap();
|
||||
assert!(first);
|
||||
|
||||
let second = insert_entity_reference(&conn, &ref_).unwrap();
|
||||
assert!(!second, "Duplicate insert should be ignored");
|
||||
|
||||
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
|
||||
assert_eq!(count, 1, "Still just one reference");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_entity_reference_cross_project_unresolved() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, _issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
let ref_ = EntityReference {
|
||||
project_id,
|
||||
source_entity_type: "merge_request",
|
||||
source_entity_id: mr_id,
|
||||
target_entity_type: "issue",
|
||||
target_entity_id: None,
|
||||
target_project_path: Some("other-group/other-project"),
|
||||
target_entity_iid: Some(99),
|
||||
reference_type: "closes",
|
||||
source_method: "api",
|
||||
};
|
||||
|
||||
let inserted = insert_entity_reference(&conn, &ref_).unwrap();
|
||||
assert!(inserted);
|
||||
|
||||
let (target_id, target_path, target_iid): (Option<i64>, Option<String>, Option<i64>) = conn
|
||||
.query_row(
|
||||
"SELECT target_entity_id, target_project_path, target_entity_iid \
|
||||
FROM entity_references WHERE source_entity_id = ?1",
|
||||
[mr_id],
|
||||
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(target_id.is_none());
|
||||
assert_eq!(target_path, Some("other-group/other-project".to_string()));
|
||||
assert_eq!(target_iid, Some(99));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_multiple_closes_references() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 210, 11, ?1, 'Second issue', 'opened', 1000, 2000, 2000)",
|
||||
rusqlite::params![project_id],
|
||||
)
|
||||
.unwrap();
|
||||
let issue_id_2 = 10i64;
|
||||
|
||||
for target_id in [issue_id, issue_id_2] {
|
||||
let ref_ = EntityReference {
|
||||
project_id,
|
||||
source_entity_type: "merge_request",
|
||||
source_entity_id: mr_id,
|
||||
target_entity_type: "issue",
|
||||
target_entity_id: Some(target_id),
|
||||
target_project_path: None,
|
||||
target_entity_iid: None,
|
||||
reference_type: "closes",
|
||||
source_method: "api",
|
||||
};
|
||||
insert_entity_reference(&conn, &ref_).unwrap();
|
||||
}
|
||||
|
||||
let count = count_references_for_source(&conn, "merge_request", mr_id).unwrap();
|
||||
assert_eq!(count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_issue_local_id_found() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
let resolved = resolve_issue_local_id(&conn, project_id, 10).unwrap();
|
||||
assert_eq!(resolved, Some(issue_id));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_issue_local_id_not_found() {
|
||||
let conn = setup_test_db();
|
||||
let (project_id, _issue_id, _mr_id) = seed_project_issue_mr(&conn);
|
||||
|
||||
let resolved = resolve_issue_local_id(&conn, project_id, 999).unwrap();
|
||||
assert!(resolved.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_project_path_found() {
|
||||
let conn = setup_test_db();
|
||||
seed_project_issue_mr(&conn);
|
||||
|
||||
let path = resolve_project_path(&conn, 100).unwrap();
|
||||
assert_eq!(path, Some("group/repo".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_project_path_not_found() {
|
||||
let conn = setup_test_db();
|
||||
|
||||
let path = resolve_project_path(&conn, 999).unwrap();
|
||||
assert!(path.is_none());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user