Files
gitlore/src/gitlab/transformers/discussion.rs
teernisse 83cd16c918 feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache
- Migration 022: composite indexes for notes queries + author_id column
- Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers
- Migration 025: backfill existing non-system notes into dirty queue
- Add lore notes CLI command with 17 filter options (author, path, resolution, etc.)
- Support table/json/jsonl/csv output formats with field selection
- Wire note dirty tracking through discussion and MR discussion ingestion
- Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration)
- Fix upsert_document_inner returning false for label/path-only changes
2026-02-12 13:31:24 -05:00

550 lines
16 KiB
Rust

use tracing::warn;
use crate::core::time::{iso_to_ms, iso_to_ms_strict, now_ms};
use crate::gitlab::types::{GitLabDiscussion, GitLabNote};
#[derive(Debug, Clone, Copy)]
pub enum NoteableRef {
Issue(i64),
MergeRequest(i64),
}
#[derive(Debug, Clone)]
pub struct NormalizedDiscussion {
pub gitlab_discussion_id: String,
pub project_id: i64,
pub issue_id: Option<i64>,
pub merge_request_id: Option<i64>,
pub noteable_type: String,
pub individual_note: bool,
pub first_note_at: Option<i64>,
pub last_note_at: Option<i64>,
pub last_seen_at: i64,
pub resolvable: bool,
pub resolved: bool,
}
#[derive(Debug, Clone)]
pub struct NormalizedNote {
pub gitlab_id: i64,
pub project_id: i64,
pub note_type: Option<String>,
pub is_system: bool,
pub author_id: Option<i64>,
pub author_username: String,
pub body: String,
pub created_at: i64,
pub updated_at: i64,
pub last_seen_at: i64,
pub position: i32,
pub resolvable: bool,
pub resolved: bool,
pub resolved_by: Option<String>,
pub resolved_at: Option<i64>,
pub position_old_path: Option<String>,
pub position_new_path: Option<String>,
pub position_old_line: Option<i32>,
pub position_new_line: Option<i32>,
pub position_type: Option<String>,
pub position_line_range_start: Option<i32>,
pub position_line_range_end: Option<i32>,
pub position_base_sha: Option<String>,
pub position_start_sha: Option<String>,
pub position_head_sha: Option<String>,
}
fn parse_timestamp(ts: &str) -> i64 {
match iso_to_ms(ts) {
Some(ms) => ms,
None => {
warn!(timestamp = ts, "Invalid timestamp, defaulting to epoch 0");
0
}
}
}
pub fn transform_discussion(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
noteable: NoteableRef,
) -> NormalizedDiscussion {
let now = now_ms();
let (issue_id, merge_request_id, noteable_type) = match noteable {
NoteableRef::Issue(id) => (Some(id), None, "Issue"),
NoteableRef::MergeRequest(id) => (None, Some(id), "MergeRequest"),
};
let note_timestamps: Vec<i64> = gitlab_discussion
.notes
.iter()
.filter_map(|n| iso_to_ms(&n.created_at))
.collect();
let first_note_at = note_timestamps.iter().min().copied();
let last_note_at = note_timestamps.iter().max().copied();
let resolvable = gitlab_discussion.notes.iter().any(|n| n.resolvable);
let resolved = if resolvable {
gitlab_discussion
.notes
.iter()
.filter(|n| n.resolvable)
.all(|n| n.resolved)
} else {
false
};
NormalizedDiscussion {
gitlab_discussion_id: gitlab_discussion.id.clone(),
project_id: local_project_id,
issue_id,
merge_request_id,
noteable_type: noteable_type.to_string(),
individual_note: gitlab_discussion.individual_note,
first_note_at,
last_note_at,
last_seen_at: now,
resolvable,
resolved,
}
}
pub fn transform_mr_discussion(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
local_mr_id: i64,
) -> NormalizedDiscussion {
transform_discussion(
gitlab_discussion,
local_project_id,
NoteableRef::MergeRequest(local_mr_id),
)
}
pub fn transform_notes(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
) -> Vec<NormalizedNote> {
let now = now_ms();
gitlab_discussion
.notes
.iter()
.enumerate()
.map(|(idx, note)| transform_single_note(note, local_project_id, idx as i32, now))
.collect()
}
fn transform_single_note(
note: &GitLabNote,
local_project_id: i64,
position: i32,
now: i64,
) -> NormalizedNote {
let (
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
) = extract_position_fields(&note.position);
NormalizedNote {
gitlab_id: note.id,
project_id: local_project_id,
note_type: note.note_type.clone(),
is_system: note.system,
author_id: Some(note.author.id),
author_username: note.author.username.clone(),
body: note.body.clone(),
created_at: parse_timestamp(&note.created_at),
updated_at: parse_timestamp(&note.updated_at),
last_seen_at: now,
position,
resolvable: note.resolvable,
resolved: note.resolved,
resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
resolved_at: note.resolved_at.as_ref().and_then(|ts| iso_to_ms(ts)),
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
}
}
#[allow(clippy::type_complexity)]
fn extract_position_fields(
position: &Option<crate::gitlab::types::GitLabNotePosition>,
) -> (
Option<String>,
Option<String>,
Option<i32>,
Option<i32>,
Option<String>,
Option<i32>,
Option<i32>,
Option<String>,
Option<String>,
Option<String>,
) {
match position {
Some(pos) => {
let line_range_start = pos.line_range.as_ref().and_then(|lr| lr.start_line());
let line_range_end = pos.line_range.as_ref().and_then(|lr| lr.end_line());
(
pos.old_path.clone(),
pos.new_path.clone(),
pos.old_line,
pos.new_line,
pos.position_type.clone(),
line_range_start,
line_range_end,
pos.base_sha.clone(),
pos.start_sha.clone(),
pos.head_sha.clone(),
)
}
None => (None, None, None, None, None, None, None, None, None, None),
}
}
pub fn transform_notes_with_diff_position(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
) -> Result<Vec<NormalizedNote>, String> {
let now = now_ms();
gitlab_discussion
.notes
.iter()
.enumerate()
.map(|(idx, note)| transform_single_note_strict(note, local_project_id, idx as i32, now))
.collect()
}
fn transform_single_note_strict(
note: &GitLabNote,
local_project_id: i64,
position: i32,
now: i64,
) -> Result<NormalizedNote, String> {
let created_at = iso_to_ms_strict(&note.created_at)?;
let updated_at = iso_to_ms_strict(&note.updated_at)?;
let resolved_at = match &note.resolved_at {
Some(ts) => Some(iso_to_ms_strict(ts)?),
None => None,
};
let (
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
) = extract_position_fields(&note.position);
Ok(NormalizedNote {
gitlab_id: note.id,
project_id: local_project_id,
note_type: note.note_type.clone(),
is_system: note.system,
author_id: Some(note.author.id),
author_username: note.author.username.clone(),
body: note.body.clone(),
created_at,
updated_at,
last_seen_at: now,
position,
resolvable: note.resolvable,
resolved: note.resolved,
resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
resolved_at,
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::gitlab::types::GitLabAuthor;
fn make_test_note(
id: i64,
created_at: &str,
system: bool,
resolvable: bool,
resolved: bool,
) -> GitLabNote {
GitLabNote {
id,
note_type: Some("DiscussionNote".to_string()),
body: format!("Note {}", id),
author: GitLabAuthor {
id: 1,
username: "testuser".to_string(),
name: "Test User".to_string(),
},
created_at: created_at.to_string(),
updated_at: created_at.to_string(),
system,
resolvable,
resolved,
resolved_by: None,
resolved_at: None,
position: None,
}
}
fn make_test_discussion(individual_note: bool, notes: Vec<GitLabNote>) -> GitLabDiscussion {
GitLabDiscussion {
id: "6a9c1750b37d513a43987b574953fceb50b03ce7".to_string(),
individual_note,
notes,
}
}
#[test]
fn transforms_discussion_payload_to_normalized_schema() {
let discussion = make_test_discussion(
false,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
let result = transform_discussion(&discussion, 100, NoteableRef::Issue(42));
assert_eq!(
result.gitlab_discussion_id,
"6a9c1750b37d513a43987b574953fceb50b03ce7"
);
assert_eq!(result.project_id, 100);
assert_eq!(result.issue_id, Some(42));
assert_eq!(result.merge_request_id, None);
assert_eq!(result.noteable_type, "Issue");
assert!(!result.individual_note);
}
#[test]
fn transforms_merge_request_discussion() {
let discussion = make_test_discussion(
false,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
let result = transform_discussion(&discussion, 100, NoteableRef::MergeRequest(99));
assert_eq!(result.issue_id, None);
assert_eq!(result.merge_request_id, Some(99));
assert_eq!(result.noteable_type, "MergeRequest");
}
#[test]
fn extracts_notes_array_from_discussion() {
let discussion = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
let notes = transform_notes(&discussion, 100);
assert_eq!(notes.len(), 2);
assert_eq!(notes[0].gitlab_id, 1);
assert_eq!(notes[1].gitlab_id, 2);
}
#[test]
fn sets_individual_note_flag_correctly() {
let threaded = make_test_discussion(
false,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
let standalone = make_test_discussion(
true,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
assert!(!transform_discussion(&threaded, 100, NoteableRef::Issue(42)).individual_note);
assert!(transform_discussion(&standalone, 100, NoteableRef::Issue(42)).individual_note);
}
#[test]
fn flags_system_notes_with_is_system_true() {
let discussion = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T09:00:00.000Z", true, false, false),
],
);
let notes = transform_notes(&discussion, 100);
assert!(!notes[0].is_system);
assert!(notes[1].is_system);
}
#[test]
fn preserves_note_order_via_position_field() {
let discussion = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, false, false),
make_test_note(3, "2024-01-16T11:00:00.000Z", false, false, false),
],
);
let notes = transform_notes(&discussion, 100);
assert_eq!(notes[0].position, 0);
assert_eq!(notes[1].position, 1);
assert_eq!(notes[2].position, 2);
}
#[test]
fn computes_first_note_at_and_last_note_at_correctly() {
let discussion = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T11:00:00.000Z", false, false, false),
make_test_note(3, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
let result = transform_discussion(&discussion, 100, NoteableRef::Issue(42));
assert_eq!(result.first_note_at, Some(1705395600000));
assert_eq!(result.last_note_at, Some(1705402800000));
}
#[test]
fn single_note_has_equal_first_and_last() {
let discussion = make_test_discussion(
false,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
let result = transform_discussion(&discussion, 100, NoteableRef::Issue(42));
assert_eq!(result.first_note_at, result.last_note_at);
assert_eq!(result.first_note_at, Some(1705395600000));
}
#[test]
fn computes_resolvable_when_any_note_is_resolvable() {
let not_resolvable = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
let resolvable = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, false),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
assert!(!transform_discussion(&not_resolvable, 100, NoteableRef::Issue(42)).resolvable);
assert!(transform_discussion(&resolvable, 100, NoteableRef::Issue(42)).resolvable);
}
#[test]
fn computes_resolved_only_when_all_resolvable_notes_resolved() {
let partial = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, true),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, true, false),
],
);
let fully_resolved = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, true),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, true, true),
],
);
let no_resolvable = make_test_discussion(
false,
vec![make_test_note(
1,
"2024-01-16T09:00:00.000Z",
false,
false,
false,
)],
);
assert!(!transform_discussion(&partial, 100, NoteableRef::Issue(42)).resolved);
assert!(transform_discussion(&fully_resolved, 100, NoteableRef::Issue(42)).resolved);
assert!(!transform_discussion(&no_resolvable, 100, NoteableRef::Issue(42)).resolved);
}
}