feat(xref): extract cross-references from descriptions, user notes, and fix system note regex
- Fix MENTIONED_RE/CLOSED_BY_RE to match real GitLab format
('mentioned in issue #N' / 'mentioned in merge request !N')
- Add GITLAB_URL_RE + parse_url_refs() for full URL extraction
- Add extract_refs_from_descriptions() -> source_method='description_parse'
- Add extract_refs_from_user_notes() -> source_method='note_parse'
- Wire both into orchestrator after system note extraction
- 36 tests: regex fix, URL parsing, integration, idempotency
This commit is contained in:
@@ -22,20 +22,34 @@ pub struct ExtractResult {
|
||||
pub parse_failures: usize,
|
||||
}
|
||||
|
||||
// GitLab system notes include the entity type word: "mentioned in issue #5"
|
||||
// or "mentioned in merge request !730". The word is mandatory in real data,
|
||||
// but we also keep the old bare-sigil form as a fallback (no data uses it today,
|
||||
// but other GitLab instances might differ).
|
||||
static MENTIONED_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"mentioned in (?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
|
||||
r"mentioned in (?:issue |merge request )?(?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
|
||||
)
|
||||
.expect("mentioned regex is valid")
|
||||
});
|
||||
|
||||
static CLOSED_BY_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"closed by (?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
|
||||
r"closed by (?:issue |merge request )?(?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
|
||||
)
|
||||
.expect("closed_by regex is valid")
|
||||
});
|
||||
|
||||
/// Matches full GitLab URLs like:
|
||||
/// `https://gitlab.example.com/group/project/-/issues/123`
|
||||
/// `https://gitlab.example.com/group/sub/project/-/merge_requests/456`
|
||||
static GITLAB_URL_RE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(
|
||||
r"https?://[^\s/]+/(?P<project>[^\s]+?)/-/(?P<entity_type>issues|merge_requests)/(?P<iid>\d+)",
|
||||
)
|
||||
.expect("gitlab url regex is valid")
|
||||
});
|
||||
|
||||
pub fn parse_cross_refs(body: &str) -> Vec<ParsedCrossRef> {
|
||||
let mut refs = Vec::new();
|
||||
|
||||
@@ -54,6 +68,47 @@ pub fn parse_cross_refs(body: &str) -> Vec<ParsedCrossRef> {
|
||||
refs
|
||||
}
|
||||
|
||||
/// Extract cross-references from GitLab URLs in free-text bodies (descriptions, user notes).
|
||||
pub fn parse_url_refs(body: &str) -> Vec<ParsedCrossRef> {
|
||||
let mut refs = Vec::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
|
||||
for caps in GITLAB_URL_RE.captures_iter(body) {
|
||||
let Some(entity_type_raw) = caps.name("entity_type").map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let Some(iid_str) = caps.name("iid").map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let Some(project) = caps.name("project").map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let Ok(iid) = iid_str.parse::<i64>() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let target_entity_type = match entity_type_raw {
|
||||
"issues" => "issue",
|
||||
"merge_requests" => "merge_request",
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let key = (target_entity_type, project.to_owned(), iid);
|
||||
if !seen.insert(key) {
|
||||
continue; // deduplicate within same body
|
||||
}
|
||||
|
||||
refs.push(ParsedCrossRef {
|
||||
reference_type: "mentioned".to_owned(),
|
||||
target_entity_type: target_entity_type.to_owned(),
|
||||
target_iid: iid,
|
||||
target_project_path: Some(project.to_owned()),
|
||||
});
|
||||
}
|
||||
|
||||
refs
|
||||
}
|
||||
|
||||
fn capture_to_cross_ref(
|
||||
caps: ®ex::Captures<'_>,
|
||||
reference_type: &str,
|
||||
@@ -233,6 +288,189 @@ fn resolve_cross_project_entity(
|
||||
resolve_entity_id(conn, project_id, entity_type, iid)
|
||||
}
|
||||
|
||||
/// Extract cross-references from issue and MR descriptions (GitLab URLs only).
|
||||
pub fn extract_refs_from_descriptions(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
|
||||
let mut result = ExtractResult::default();
|
||||
|
||||
let mut insert_stmt = conn.prepare_cached(
|
||||
"INSERT OR IGNORE INTO entity_references
|
||||
(project_id, source_entity_type, source_entity_id,
|
||||
target_entity_type, target_entity_id,
|
||||
target_project_path, target_entity_iid,
|
||||
reference_type, source_method, created_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'description_parse', ?9)",
|
||||
)?;
|
||||
|
||||
let now = now_ms();
|
||||
|
||||
// Issues with descriptions
|
||||
let mut issue_stmt = conn.prepare_cached(
|
||||
"SELECT id, iid, description FROM issues
|
||||
WHERE project_id = ?1 AND description IS NOT NULL AND description != ''",
|
||||
)?;
|
||||
let issues: Vec<(i64, i64, String)> = issue_stmt
|
||||
.query_map([project_id], |row| {
|
||||
Ok((row.get(0)?, row.get(1)?, row.get(2)?))
|
||||
})?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
for (entity_id, _iid, description) in &issues {
|
||||
insert_url_refs(
|
||||
conn,
|
||||
&mut insert_stmt,
|
||||
&mut result,
|
||||
project_id,
|
||||
"issue",
|
||||
*entity_id,
|
||||
description,
|
||||
now,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Merge requests with descriptions
|
||||
let mut mr_stmt = conn.prepare_cached(
|
||||
"SELECT id, iid, description FROM merge_requests
|
||||
WHERE project_id = ?1 AND description IS NOT NULL AND description != ''",
|
||||
)?;
|
||||
let mrs: Vec<(i64, i64, String)> = mr_stmt
|
||||
.query_map([project_id], |row| {
|
||||
Ok((row.get(0)?, row.get(1)?, row.get(2)?))
|
||||
})?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
for (entity_id, _iid, description) in &mrs {
|
||||
insert_url_refs(
|
||||
conn,
|
||||
&mut insert_stmt,
|
||||
&mut result,
|
||||
project_id,
|
||||
"merge_request",
|
||||
*entity_id,
|
||||
description,
|
||||
now,
|
||||
)?;
|
||||
}
|
||||
|
||||
if result.inserted > 0 || result.skipped_unresolvable > 0 {
|
||||
debug!(
|
||||
inserted = result.inserted,
|
||||
unresolvable = result.skipped_unresolvable,
|
||||
"Description cross-reference extraction complete"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Extract cross-references from user (non-system) notes (GitLab URLs only).
|
||||
pub fn extract_refs_from_user_notes(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
|
||||
let mut result = ExtractResult::default();
|
||||
|
||||
let mut note_stmt = conn.prepare_cached(
|
||||
"SELECT n.id, n.body, d.noteable_type,
|
||||
COALESCE(d.issue_id, d.merge_request_id) AS entity_id
|
||||
FROM notes n
|
||||
JOIN discussions d ON n.discussion_id = d.id
|
||||
WHERE n.is_system = 0
|
||||
AND n.project_id = ?1
|
||||
AND n.body IS NOT NULL",
|
||||
)?;
|
||||
|
||||
let notes: Vec<(i64, String, String, i64)> = note_stmt
|
||||
.query_map([project_id], |row| {
|
||||
Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
|
||||
})?
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
|
||||
if notes.is_empty() {
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
let mut insert_stmt = conn.prepare_cached(
|
||||
"INSERT OR IGNORE INTO entity_references
|
||||
(project_id, source_entity_type, source_entity_id,
|
||||
target_entity_type, target_entity_id,
|
||||
target_project_path, target_entity_iid,
|
||||
reference_type, source_method, created_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'note_parse', ?9)",
|
||||
)?;
|
||||
|
||||
let now = now_ms();
|
||||
|
||||
for (_, body, noteable_type, entity_id) in ¬es {
|
||||
let source_entity_type = noteable_type_to_entity_type(noteable_type);
|
||||
insert_url_refs(
|
||||
conn,
|
||||
&mut insert_stmt,
|
||||
&mut result,
|
||||
project_id,
|
||||
source_entity_type,
|
||||
*entity_id,
|
||||
body,
|
||||
now,
|
||||
)?;
|
||||
}
|
||||
|
||||
if result.inserted > 0 || result.skipped_unresolvable > 0 {
|
||||
debug!(
|
||||
inserted = result.inserted,
|
||||
unresolvable = result.skipped_unresolvable,
|
||||
"User note cross-reference extraction complete"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Shared helper: parse URL refs from a body and insert into entity_references.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn insert_url_refs(
|
||||
conn: &Connection,
|
||||
insert_stmt: &mut rusqlite::CachedStatement<'_>,
|
||||
result: &mut ExtractResult,
|
||||
project_id: i64,
|
||||
source_entity_type: &str,
|
||||
source_entity_id: i64,
|
||||
body: &str,
|
||||
now: i64,
|
||||
) -> Result<()> {
|
||||
let url_refs = parse_url_refs(body);
|
||||
|
||||
for xref in &url_refs {
|
||||
let target_entity_id = if let Some(ref path) = xref.target_project_path {
|
||||
resolve_cross_project_entity(conn, path, &xref.target_entity_type, xref.target_iid)
|
||||
} else {
|
||||
resolve_entity_id(conn, project_id, &xref.target_entity_type, xref.target_iid)
|
||||
};
|
||||
|
||||
let rows_changed = insert_stmt.execute(rusqlite::params![
|
||||
project_id,
|
||||
source_entity_type,
|
||||
source_entity_id,
|
||||
xref.target_entity_type,
|
||||
target_entity_id,
|
||||
xref.target_project_path,
|
||||
if target_entity_id.is_none() {
|
||||
Some(xref.target_iid)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
xref.reference_type,
|
||||
now,
|
||||
])?;
|
||||
|
||||
if rows_changed > 0 {
|
||||
if target_entity_id.is_none() {
|
||||
result.skipped_unresolvable += 1;
|
||||
} else {
|
||||
result.inserted += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "note_parser_tests.rs"]
|
||||
mod tests;
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use super::*;
|
||||
|
||||
// --- parse_cross_refs: real GitLab system note format ---
|
||||
|
||||
#[test]
|
||||
fn test_parse_mentioned_in_mr() {
|
||||
let refs = parse_cross_refs("mentioned in !567");
|
||||
let refs = parse_cross_refs("mentioned in merge request !567");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
@@ -12,7 +14,7 @@ fn test_parse_mentioned_in_mr() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_mentioned_in_issue() {
|
||||
let refs = parse_cross_refs("mentioned in #234");
|
||||
let refs = parse_cross_refs("mentioned in issue #234");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
@@ -22,7 +24,7 @@ fn test_parse_mentioned_in_issue() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_mentioned_cross_project() {
|
||||
let refs = parse_cross_refs("mentioned in group/repo!789");
|
||||
let refs = parse_cross_refs("mentioned in merge request group/repo!789");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
@@ -32,7 +34,7 @@ fn test_parse_mentioned_cross_project() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_mentioned_cross_project_issue() {
|
||||
let refs = parse_cross_refs("mentioned in group/repo#123");
|
||||
let refs = parse_cross_refs("mentioned in issue group/repo#123");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
@@ -42,7 +44,7 @@ fn test_parse_mentioned_cross_project_issue() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_closed_by_mr() {
|
||||
let refs = parse_cross_refs("closed by !567");
|
||||
let refs = parse_cross_refs("closed by merge request !567");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "closes");
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
@@ -52,7 +54,7 @@ fn test_parse_closed_by_mr() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_closed_by_cross_project() {
|
||||
let refs = parse_cross_refs("closed by group/repo!789");
|
||||
let refs = parse_cross_refs("closed by merge request group/repo!789");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "closes");
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
@@ -62,7 +64,7 @@ fn test_parse_closed_by_cross_project() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_multiple_refs() {
|
||||
let refs = parse_cross_refs("mentioned in !123 and mentioned in #456");
|
||||
let refs = parse_cross_refs("mentioned in merge request !123 and mentioned in issue #456");
|
||||
assert_eq!(refs.len(), 2);
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[0].target_iid, 123);
|
||||
@@ -84,7 +86,7 @@ fn test_parse_non_english_note() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_multi_level_group_path() {
|
||||
let refs = parse_cross_refs("mentioned in top/sub/project#123");
|
||||
let refs = parse_cross_refs("mentioned in issue top/sub/project#123");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
@@ -95,7 +97,7 @@ fn test_parse_multi_level_group_path() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_deeply_nested_group_path() {
|
||||
let refs = parse_cross_refs("mentioned in a/b/c/d/e!42");
|
||||
let refs = parse_cross_refs("mentioned in merge request a/b/c/d/e!42");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_project_path.as_deref(), Some("a/b/c/d/e"));
|
||||
assert_eq!(refs[0].target_iid, 42);
|
||||
@@ -103,7 +105,7 @@ fn test_parse_deeply_nested_group_path() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_hyphenated_project_path() {
|
||||
let refs = parse_cross_refs("mentioned in my-group/my-project#99");
|
||||
let refs = parse_cross_refs("mentioned in issue my-group/my-project#99");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
@@ -113,7 +115,7 @@ fn test_parse_hyphenated_project_path() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_dotted_project_path() {
|
||||
let refs = parse_cross_refs("mentioned in visiostack.io/backend#123");
|
||||
let refs = parse_cross_refs("mentioned in issue visiostack.io/backend#123");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
@@ -124,7 +126,7 @@ fn test_parse_dotted_project_path() {
|
||||
|
||||
#[test]
|
||||
fn test_parse_dotted_nested_project_path() {
|
||||
let refs = parse_cross_refs("closed by my.org/sub.group/my.project!42");
|
||||
let refs = parse_cross_refs("closed by merge request my.org/sub.group/my.project!42");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
@@ -134,16 +136,27 @@ fn test_parse_dotted_nested_project_path() {
|
||||
assert_eq!(refs[0].target_iid, 42);
|
||||
}
|
||||
|
||||
// Bare-sigil fallback (no "issue"/"merge request" word) still works
|
||||
#[test]
|
||||
fn test_parse_self_reference_is_valid() {
|
||||
fn test_parse_bare_sigil_fallback() {
|
||||
let refs = parse_cross_refs("mentioned in #123");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_iid, 123);
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_bare_sigil_closed_by() {
|
||||
let refs = parse_cross_refs("closed by !567");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].reference_type, "closes");
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[0].target_iid, 567);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_mixed_mentioned_and_closed() {
|
||||
let refs = parse_cross_refs("mentioned in !10 and closed by !20");
|
||||
let refs = parse_cross_refs("mentioned in merge request !10 and closed by merge request !20");
|
||||
assert_eq!(refs.len(), 2);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
assert_eq!(refs[0].target_iid, 10);
|
||||
@@ -151,6 +164,113 @@ fn test_parse_mixed_mentioned_and_closed() {
|
||||
assert_eq!(refs[1].target_iid, 20);
|
||||
}
|
||||
|
||||
// --- parse_url_refs ---
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_same_project_issue() {
|
||||
let refs = parse_url_refs(
|
||||
"See https://gitlab.visiostack.com/vs/typescript-code/-/issues/3537 for details",
|
||||
);
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
assert_eq!(refs[0].target_iid, 3537);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
Some("vs/typescript-code")
|
||||
);
|
||||
assert_eq!(refs[0].reference_type, "mentioned");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_merge_request() {
|
||||
let refs =
|
||||
parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/3548");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[0].target_iid, 3548);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
Some("vs/typescript-code")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_cross_project() {
|
||||
let refs = parse_url_refs(
|
||||
"Related: https://gitlab.visiostack.com/vs/python-code/-/merge_requests/5203",
|
||||
);
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[0].target_iid, 5203);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
Some("vs/python-code")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_with_anchor() {
|
||||
let refs =
|
||||
parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/issues/123#note_456");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
assert_eq!(refs[0].target_iid, 123);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_markdown_link() {
|
||||
let refs = parse_url_refs(
|
||||
"Check [this MR](https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/100) for context",
|
||||
);
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(refs[0].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[0].target_iid, 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_multiple_urls() {
|
||||
let body =
|
||||
"See https://gitlab.com/a/b/-/issues/1 and https://gitlab.com/a/b/-/merge_requests/2";
|
||||
let refs = parse_url_refs(body);
|
||||
assert_eq!(refs.len(), 2);
|
||||
assert_eq!(refs[0].target_entity_type, "issue");
|
||||
assert_eq!(refs[0].target_iid, 1);
|
||||
assert_eq!(refs[1].target_entity_type, "merge_request");
|
||||
assert_eq!(refs[1].target_iid, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_deduplicates() {
|
||||
let body = "See https://gitlab.com/a/b/-/issues/1 and again https://gitlab.com/a/b/-/issues/1";
|
||||
let refs = parse_url_refs(body);
|
||||
assert_eq!(
|
||||
refs.len(),
|
||||
1,
|
||||
"Duplicate URLs in same body should be deduplicated"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_non_gitlab_urls_ignored() {
|
||||
let refs = parse_url_refs(
|
||||
"Check https://google.com/search?q=test and https://github.com/org/repo/issues/1",
|
||||
);
|
||||
assert!(refs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_ref_deeply_nested_project() {
|
||||
let refs = parse_url_refs("https://gitlab.com/org/sub/deep/project/-/issues/42");
|
||||
assert_eq!(refs.len(), 1);
|
||||
assert_eq!(
|
||||
refs[0].target_project_path.as_deref(),
|
||||
Some("org/sub/deep/project")
|
||||
);
|
||||
assert_eq!(refs[0].target_iid, 42);
|
||||
}
|
||||
|
||||
// --- Integration tests: system notes (updated for real format) ---
|
||||
|
||||
fn setup_test_db() -> Connection {
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
|
||||
@@ -204,27 +324,31 @@ fn seed_test_data(conn: &Connection) -> i64 {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// System note: real GitLab format "mentioned in merge request !789"
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (40, 4000, 30, 1, 1, 'mentioned in !789', ?1, ?1, ?1)",
|
||||
VALUES (40, 4000, 30, 1, 1, 'mentioned in merge request !789', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// System note: real GitLab format "mentioned in issue #456"
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (41, 4001, 31, 1, 1, 'mentioned in #456', ?1, ?1, ?1)",
|
||||
VALUES (41, 4001, 31, 1, 1, 'mentioned in issue #456', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// User note (is_system=0) — should NOT be processed by system note extractor
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (42, 4002, 30, 1, 0, 'mentioned in !999', ?1, ?1, ?1)",
|
||||
VALUES (42, 4002, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// System note with no cross-ref pattern
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (43, 4003, 30, 1, 1, 'added label ~bug', ?1, ?1, ?1)",
|
||||
@@ -232,9 +356,10 @@ fn seed_test_data(conn: &Connection) -> i64 {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// System note: cross-project ref
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (44, 4004, 30, 1, 1, 'mentioned in other/project#999', ?1, ?1, ?1)",
|
||||
VALUES (44, 4004, 30, 1, 1, 'mentioned in issue other/project#999', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
@@ -323,3 +448,323 @@ fn test_extract_refs_empty_project() {
|
||||
assert_eq!(result.skipped_unresolvable, 0);
|
||||
assert_eq!(result.parse_failures, 0);
|
||||
}
|
||||
|
||||
// --- Integration tests: description extraction ---
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_descriptions_issue() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Issue with MR reference in description
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 3537, 'Test Issue', 'opened',
|
||||
'Related to https://gitlab.com/vs/typescript-code/-/merge_requests/3548',
|
||||
?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// The target MR so it resolves
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 3548, 'Fix MR', 'merged', 'fix', 'main', 'dev', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
|
||||
|
||||
assert_eq!(result.inserted, 1, "Should insert 1 description ref");
|
||||
assert_eq!(result.skipped_unresolvable, 0);
|
||||
|
||||
let method: String = conn
|
||||
.query_row(
|
||||
"SELECT source_method FROM entity_references WHERE project_id = 1",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(method, "description_parse");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_descriptions_mr() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 100, 'Target Issue', 'opened', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, description, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 200, 'Fixing MR', 'merged', 'fix', 'main', 'dev',
|
||||
'Fixes https://gitlab.com/vs/typescript-code/-/issues/100',
|
||||
?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
|
||||
|
||||
assert_eq!(result.inserted, 1);
|
||||
|
||||
let (src_type, tgt_type): (String, String) = conn
|
||||
.query_row(
|
||||
"SELECT source_entity_type, target_entity_type FROM entity_references WHERE project_id = 1",
|
||||
[],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(src_type, "merge_request");
|
||||
assert_eq!(tgt_type, "issue");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_descriptions_idempotent() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 1, 'Issue', 'opened',
|
||||
'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 2, 'MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let r1 = extract_refs_from_descriptions(&conn, 1).unwrap();
|
||||
assert_eq!(r1.inserted, 1);
|
||||
|
||||
let r2 = extract_refs_from_descriptions(&conn, 1).unwrap();
|
||||
assert_eq!(r2.inserted, 0, "Second run should insert 0 (idempotent)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_descriptions_cross_project_unresolved() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 1, 'Issue', 'opened',
|
||||
'See https://gitlab.com/vs/other-project/-/merge_requests/99', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = extract_refs_from_descriptions(&conn, 1).unwrap();
|
||||
|
||||
assert_eq!(result.inserted, 0);
|
||||
assert_eq!(
|
||||
result.skipped_unresolvable, 1,
|
||||
"Cross-project ref with no matching project should be unresolvable"
|
||||
);
|
||||
|
||||
let (path, iid): (String, i64) = conn
|
||||
.query_row(
|
||||
"SELECT target_project_path, target_entity_iid FROM entity_references WHERE target_entity_id IS NULL",
|
||||
[],
|
||||
|row| Ok((row.get(0)?, row.get(1)?)),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(path, "vs/other-project");
|
||||
assert_eq!(iid, 99);
|
||||
}
|
||||
|
||||
// --- Integration tests: user note extraction ---
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_user_notes_with_url() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 50, 'Source Issue', 'opened', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 60, 'Target MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
|
||||
VALUES (30, 'disc-user', 1, 10, 'Issue', ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// User note with a URL
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (40, 4000, 30, 1, 0,
|
||||
'This is related to https://gitlab.com/vs/code/-/merge_requests/60',
|
||||
?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = extract_refs_from_user_notes(&conn, 1).unwrap();
|
||||
|
||||
assert_eq!(result.inserted, 1);
|
||||
|
||||
let method: String = conn
|
||||
.query_row(
|
||||
"SELECT source_method FROM entity_references WHERE project_id = 1",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(method, "note_parse");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_user_notes_no_system_note_patterns() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 50, 'Source', 'opened', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 999, 'Target', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
|
||||
VALUES (30, 'disc-x', 1, 10, 'Issue', ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// User note with system-note-like text but no URL — should NOT extract
|
||||
// (user notes only use URL parsing, not system note pattern matching)
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (40, 4000, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let result = extract_refs_from_user_notes(&conn, 1).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.inserted, 0,
|
||||
"User notes should only parse URLs, not system note patterns"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_refs_from_user_notes_idempotent() {
|
||||
let conn = setup_test_db();
|
||||
let now = now_ms();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
|
||||
VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (10, 1000, 1, 1, 'Src', 'opened', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at)
|
||||
VALUES (20, 2000, 1, 2, 'Tgt', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at)
|
||||
VALUES (30, 'disc-y', 1, 10, 'Issue', ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at)
|
||||
VALUES (40, 4000, 30, 1, 0,
|
||||
'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)",
|
||||
[now],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let r1 = extract_refs_from_user_notes(&conn, 1).unwrap();
|
||||
assert_eq!(r1.inserted, 1);
|
||||
|
||||
let r2 = extract_refs_from_user_notes(&conn, 1).unwrap();
|
||||
assert_eq!(r2.inserted, 0, "Second extraction should be idempotent");
|
||||
}
|
||||
|
||||
@@ -640,6 +640,24 @@ pub async fn ingest_project_merge_requests_with_progress(
|
||||
);
|
||||
}
|
||||
|
||||
let desc_refs = crate::core::note_parser::extract_refs_from_descriptions(conn, project_id)?;
|
||||
if desc_refs.inserted > 0 || desc_refs.skipped_unresolvable > 0 {
|
||||
debug!(
|
||||
inserted = desc_refs.inserted,
|
||||
unresolvable = desc_refs.skipped_unresolvable,
|
||||
"Extracted cross-references from descriptions"
|
||||
);
|
||||
}
|
||||
|
||||
let user_note_refs = crate::core::note_parser::extract_refs_from_user_notes(conn, project_id)?;
|
||||
if user_note_refs.inserted > 0 || user_note_refs.skipped_unresolvable > 0 {
|
||||
debug!(
|
||||
inserted = user_note_refs.inserted,
|
||||
unresolvable = user_note_refs.skipped_unresolvable,
|
||||
"Extracted cross-references from user notes"
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
let enqueued = enqueue_mr_closes_issues_jobs(conn, project_id)?;
|
||||
if enqueued > 0 {
|
||||
|
||||
Reference in New Issue
Block a user