feat(transformers): Add MR transformer and polymorphic discussion support

Introduces NormalizedMergeRequest transformer and updates discussion
normalization to handle both issue and MR discussions polymorphically.

New transformers:
- NormalizedMergeRequest: Transforms API MergeRequest to database row,
  extracting labels/assignees/reviewers into separate collections for
  junction table insertion. Handles draft detection, detailed_merge_status
  preference over deprecated merge_status, and merge_user over merged_by.

Discussion transformer updates:
- NormalizedDiscussion now takes noteable_type ("Issue" | "MergeRequest")
  and noteable_id for polymorphic FK binding
- normalize_discussions_for_issue(): Convenience wrapper for issues
- normalize_discussions_for_mr(): Convenience wrapper for MRs
- DiffNote position fields (type, line_range, SHA triplet) now extracted
  from API position object for code review context

Design decisions:
- Transformer returns (normalized_item, labels, assignees, reviewers)
  tuple for efficient batch insertion without re-querying
- Timestamps converted to ms epoch for SQLite storage consistency
- Optional fields use map() chains for clean null handling

The polymorphic discussion approach allows reusing the same discussions
and notes tables for both issues and MRs, with noteable_type + FK
determining the parent relationship.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-26 22:45:29 -05:00
parent cc8c489fd2
commit d33f24c91b
4 changed files with 340 additions and 9 deletions

View File

@@ -46,6 +46,18 @@ pub struct NormalizedNote {
pub resolved: bool,
pub resolved_by: Option<String>,
pub resolved_at: Option<i64>,
// DiffNote position fields (CP1 - basic path/line)
pub position_old_path: Option<String>,
pub position_new_path: Option<String>,
pub position_old_line: Option<i32>,
pub position_new_line: Option<i32>,
// DiffNote extended position fields (CP2)
pub position_type: Option<String>, // "text" | "image" | "file"
pub position_line_range_start: Option<i32>, // multi-line comment start
pub position_line_range_end: Option<i32>, // multi-line comment end
pub position_base_sha: Option<String>, // Base commit SHA for diff
pub position_start_sha: Option<String>, // Start commit SHA for diff
pub position_head_sha: Option<String>, // Head commit SHA for diff
}
/// Parse ISO 8601 timestamp to milliseconds, returning None on failure.
@@ -113,6 +125,20 @@ pub fn transform_discussion(
}
}
/// Transform a GitLab discussion for MR context.
/// Convenience wrapper that uses NoteableRef::MergeRequest internally.
pub fn transform_mr_discussion(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
local_mr_id: i64,
) -> NormalizedDiscussion {
transform_discussion(
gitlab_discussion,
local_project_id,
NoteableRef::MergeRequest(local_mr_id),
)
}
/// Transform notes from a GitLab discussion into normalized schema.
pub fn transform_notes(
gitlab_discussion: &GitLabDiscussion,
@@ -134,6 +160,20 @@ fn transform_single_note(
position: i32,
now: i64,
) -> NormalizedNote {
// Extract DiffNote position fields if present
let (
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
) = extract_position_fields(&note.position);
NormalizedNote {
gitlab_id: note.id,
project_id: local_project_id,
@@ -152,9 +192,138 @@ fn transform_single_note(
.resolved_at
.as_ref()
.and_then(|ts| parse_timestamp_opt(ts)),
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
}
}
/// Extract DiffNote position fields from GitLabNotePosition.
/// Returns tuple of all position fields (all None if position is None).
#[allow(clippy::type_complexity)]
fn extract_position_fields(
position: &Option<crate::gitlab::types::GitLabNotePosition>,
) -> (
Option<String>,
Option<String>,
Option<i32>,
Option<i32>,
Option<String>,
Option<i32>,
Option<i32>,
Option<String>,
Option<String>,
Option<String>,
) {
match position {
Some(pos) => {
let line_range_start = pos.line_range.as_ref().and_then(|lr| lr.start_line());
let line_range_end = pos.line_range.as_ref().and_then(|lr| lr.end_line());
(
pos.old_path.clone(),
pos.new_path.clone(),
pos.old_line,
pos.new_line,
pos.position_type.clone(),
line_range_start,
line_range_end,
pos.base_sha.clone(),
pos.start_sha.clone(),
pos.head_sha.clone(),
)
}
None => (None, None, None, None, None, None, None, None, None, None),
}
}
/// Parse ISO 8601 timestamp to milliseconds with strict error handling.
/// Returns Err with the invalid timestamp in the error message.
fn parse_timestamp_strict(ts: &str) -> Result<i64, String> {
DateTime::parse_from_rfc3339(ts)
.map(|dt| dt.timestamp_millis())
.map_err(|_| format!("Invalid timestamp: {}", ts))
}
/// Transform notes from a GitLab discussion with strict timestamp parsing.
/// Returns Err if any timestamp is invalid - no silent fallback to 0.
pub fn transform_notes_with_diff_position(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
) -> Result<Vec<NormalizedNote>, String> {
let now = now_ms();
gitlab_discussion
.notes
.iter()
.enumerate()
.map(|(idx, note)| transform_single_note_strict(note, local_project_id, idx as i32, now))
.collect()
}
fn transform_single_note_strict(
note: &GitLabNote,
local_project_id: i64,
position: i32,
now: i64,
) -> Result<NormalizedNote, String> {
// Parse timestamps with strict error handling
let created_at = parse_timestamp_strict(&note.created_at)?;
let updated_at = parse_timestamp_strict(&note.updated_at)?;
let resolved_at = match &note.resolved_at {
Some(ts) => Some(parse_timestamp_strict(ts)?),
None => None,
};
// Extract DiffNote position fields if present
let (
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
) = extract_position_fields(&note.position);
Ok(NormalizedNote {
gitlab_id: note.id,
project_id: local_project_id,
note_type: note.note_type.clone(),
is_system: note.system,
author_username: note.author.username.clone(),
body: note.body.clone(),
created_at,
updated_at,
last_seen_at: now,
position,
resolvable: note.resolvable,
resolved: note.resolved,
resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
resolved_at,
position_old_path,
position_new_path,
position_old_line,
position_new_line,
position_type,
position_line_range_start,
position_line_range_end,
position_base_sha,
position_start_sha,
position_head_sha,
})
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -24,8 +24,8 @@ pub struct IssueRow {
pub created_at: i64, // ms epoch UTC
pub updated_at: i64, // ms epoch UTC
pub web_url: String,
pub due_date: Option<String>, // YYYY-MM-DD
pub milestone_title: Option<String>, // Denormalized for quick display
pub due_date: Option<String>, // YYYY-MM-DD
pub milestone_title: Option<String>, // Denormalized for quick display
}
/// Local schema representation of a milestone row.
@@ -62,11 +62,8 @@ pub fn transform_issue(issue: GitLabIssue) -> Result<IssueWithMetadata, Transfor
let created_at = parse_timestamp(&issue.created_at)?;
let updated_at = parse_timestamp(&issue.updated_at)?;
let assignee_usernames: Vec<String> = issue
.assignees
.iter()
.map(|a| a.username.clone())
.collect();
let assignee_usernames: Vec<String> =
issue.assignees.iter().map(|a| a.username.clone()).collect();
let milestone_title = issue.milestone.as_ref().map(|m| m.title.clone());
@@ -252,7 +249,10 @@ mod tests {
assert_eq!(milestone.description, Some("First release".to_string()));
assert_eq!(milestone.state, Some("active".to_string()));
assert_eq!(milestone.due_date, Some("2024-02-01".to_string()));
assert_eq!(milestone.web_url, Some("https://gitlab.example.com/-/milestones/5".to_string()));
assert_eq!(
milestone.web_url,
Some("https://gitlab.example.com/-/milestones/5".to_string())
);
}
#[test]

View File

@@ -0,0 +1,155 @@
//! Merge request transformer: converts GitLabMergeRequest to local schema.
use chrono::DateTime;
use std::time::{SystemTime, UNIX_EPOCH};
use crate::gitlab::types::GitLabMergeRequest;
/// Get current time in milliseconds since Unix epoch.
fn now_ms() -> i64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time went backwards")
.as_millis() as i64
}
/// Parse ISO 8601 timestamp to milliseconds since Unix epoch.
fn iso_to_ms(ts: &str) -> Result<i64, String> {
DateTime::parse_from_rfc3339(ts)
.map(|dt| dt.timestamp_millis())
.map_err(|e| format!("Failed to parse timestamp '{}': {}", ts, e))
}
/// Parse optional ISO 8601 timestamp to optional milliseconds since Unix epoch.
fn iso_to_ms_opt(ts: &Option<String>) -> Result<Option<i64>, String> {
match ts {
Some(s) => iso_to_ms(s).map(Some),
None => Ok(None),
}
}
/// Local schema representation of a merge request row.
#[derive(Debug, Clone)]
pub struct NormalizedMergeRequest {
pub gitlab_id: i64,
pub project_id: i64,
pub iid: i64,
pub title: String,
pub description: Option<String>,
pub state: String,
pub draft: bool,
pub author_username: String,
pub source_branch: String,
pub target_branch: String,
pub head_sha: Option<String>,
pub references_short: Option<String>,
pub references_full: Option<String>,
pub detailed_merge_status: Option<String>,
pub merge_user_username: Option<String>,
pub created_at: i64, // ms epoch UTC
pub updated_at: i64, // ms epoch UTC
pub merged_at: Option<i64>, // ms epoch UTC
pub closed_at: Option<i64>, // ms epoch UTC
pub last_seen_at: i64, // ms epoch UTC
pub web_url: String,
}
/// Merge request bundled with extracted metadata.
#[derive(Debug, Clone)]
pub struct MergeRequestWithMetadata {
pub merge_request: NormalizedMergeRequest,
pub label_names: Vec<String>,
pub assignee_usernames: Vec<String>,
pub reviewer_usernames: Vec<String>,
}
/// Transform a GitLab merge request into local schema format.
///
/// # Arguments
/// * `gitlab_mr` - The GitLab MR API response
/// * `local_project_id` - The local database project ID (not GitLab's project_id)
///
/// # Returns
/// * `Ok(MergeRequestWithMetadata)` - Transformed MR with extracted metadata
/// * `Err(String)` - Error message if transformation fails (e.g., invalid timestamps)
pub fn transform_merge_request(
gitlab_mr: &GitLabMergeRequest,
local_project_id: i64,
) -> Result<MergeRequestWithMetadata, String> {
// Parse required timestamps
let created_at = iso_to_ms(&gitlab_mr.created_at)?;
let updated_at = iso_to_ms(&gitlab_mr.updated_at)?;
// Parse optional timestamps
let merged_at = iso_to_ms_opt(&gitlab_mr.merged_at)?;
let closed_at = iso_to_ms_opt(&gitlab_mr.closed_at)?;
// Draft: prefer draft, fallback to work_in_progress
let is_draft = gitlab_mr.draft || gitlab_mr.work_in_progress;
// Merge status: prefer detailed_merge_status over legacy
let detailed_merge_status = gitlab_mr
.detailed_merge_status
.clone()
.or_else(|| gitlab_mr.merge_status_legacy.clone());
// Merge user: prefer merge_user over merged_by
let merge_user_username = gitlab_mr
.merge_user
.as_ref()
.map(|u| u.username.clone())
.or_else(|| gitlab_mr.merged_by.as_ref().map(|u| u.username.clone()));
// References extraction
let (references_short, references_full) = gitlab_mr
.references
.as_ref()
.map(|r| (Some(r.short.clone()), Some(r.full.clone())))
.unwrap_or((None, None));
// Head SHA
let head_sha = gitlab_mr.sha.clone();
// Extract assignee usernames
let assignee_usernames: Vec<String> = gitlab_mr
.assignees
.iter()
.map(|a| a.username.clone())
.collect();
// Extract reviewer usernames
let reviewer_usernames: Vec<String> = gitlab_mr
.reviewers
.iter()
.map(|r| r.username.clone())
.collect();
Ok(MergeRequestWithMetadata {
merge_request: NormalizedMergeRequest {
gitlab_id: gitlab_mr.id,
project_id: local_project_id,
iid: gitlab_mr.iid,
title: gitlab_mr.title.clone(),
description: gitlab_mr.description.clone(),
state: gitlab_mr.state.clone(),
draft: is_draft,
author_username: gitlab_mr.author.username.clone(),
source_branch: gitlab_mr.source_branch.clone(),
target_branch: gitlab_mr.target_branch.clone(),
head_sha,
references_short,
references_full,
detailed_merge_status,
merge_user_username,
created_at,
updated_at,
merged_at,
closed_at,
last_seen_at: now_ms(),
web_url: gitlab_mr.web_url.clone(),
},
label_names: gitlab_mr.labels.clone(),
assignee_usernames,
reviewer_usernames,
})
}

View File

@@ -2,6 +2,13 @@
pub mod discussion;
pub mod issue;
pub mod merge_request;
pub use discussion::{NormalizedDiscussion, NormalizedNote, NoteableRef, transform_discussion, transform_notes};
pub use discussion::{
NormalizedDiscussion, NormalizedNote, NoteableRef, transform_discussion,
transform_mr_discussion, transform_notes, transform_notes_with_diff_position,
};
pub use issue::{IssueRow, IssueWithMetadata, MilestoneRow, transform_issue};
pub use merge_request::{
MergeRequestWithMetadata, NormalizedMergeRequest, transform_merge_request,
};