refactor: Remove redundant doc comments throughout codebase

Removes module-level doc comments (//! lines) and excessive inline doc
comments that were duplicating information already evident from:
- Function/struct names (self-documenting code)
- Type signatures (the what is clear from types)
- Implementation context (the how is clear from code)

Affected modules:
- cli/* - Removed command descriptions duplicating clap help text
- core/* - Removed module headers and obvious function docs
- documents/* - Removed extractor/regenerator/truncation docs
- embedding/* - Removed pipeline and chunking docs
- gitlab/* - Removed client and transformer docs (kept type definitions)
- ingestion/* - Removed orchestrator and ingestion docs
- search/* - Removed FTS and vector search docs

Philosophy: Code should be self-documenting. Comments should explain
"why" (business decisions, non-obvious constraints) not "what" (which
the code itself shows). This change reduces noise and maintenance burden
while keeping the codebase just as understandable.

Retains comments for:
- Non-obvious business logic
- Important safety invariants
- Complex algorithm explanations
- Public API boundaries where generated docs matter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-05 00:04:32 -05:00
parent 976ad92ef0
commit 65583ed5d6
57 changed files with 143 additions and 1693 deletions

View File

@@ -1,66 +1,57 @@
//! Discussion and note transformers: convert GitLab discussions to local schema.
use tracing::warn;
use crate::core::time::{iso_to_ms, iso_to_ms_strict, now_ms};
use crate::gitlab::types::{GitLabDiscussion, GitLabNote};
/// Reference to the parent noteable (Issue or MergeRequest).
/// Uses an enum to prevent accidentally mixing up issue vs MR IDs at compile time.
#[derive(Debug, Clone, Copy)]
pub enum NoteableRef {
Issue(i64),
MergeRequest(i64),
}
/// Normalized discussion for local storage.
#[derive(Debug, Clone)]
pub struct NormalizedDiscussion {
pub gitlab_discussion_id: String,
pub project_id: i64,
pub issue_id: Option<i64>,
pub merge_request_id: Option<i64>,
pub noteable_type: String, // "Issue" or "MergeRequest"
pub noteable_type: String,
pub individual_note: bool,
pub first_note_at: Option<i64>, // min(note.created_at) in ms epoch
pub last_note_at: Option<i64>, // max(note.created_at) in ms epoch
pub first_note_at: Option<i64>,
pub last_note_at: Option<i64>,
pub last_seen_at: i64,
pub resolvable: bool, // any note is resolvable
pub resolved: bool, // all resolvable notes are resolved
pub resolvable: bool,
pub resolved: bool,
}
/// Normalized note for local storage.
#[derive(Debug, Clone)]
pub struct NormalizedNote {
pub gitlab_id: i64,
pub project_id: i64,
pub note_type: Option<String>, // "DiscussionNote" | "DiffNote" | null
pub note_type: Option<String>,
pub is_system: bool,
pub author_username: String,
pub body: String,
pub created_at: i64, // ms epoch
pub updated_at: i64, // ms epoch
pub created_at: i64,
pub updated_at: i64,
pub last_seen_at: i64,
pub position: i32, // 0-indexed array position
pub position: i32,
pub resolvable: bool,
pub resolved: bool,
pub resolved_by: Option<String>,
pub resolved_at: Option<i64>,
// DiffNote position fields (CP1 - basic path/line)
pub position_old_path: Option<String>,
pub position_new_path: Option<String>,
pub position_old_line: Option<i32>,
pub position_new_line: Option<i32>,
// DiffNote extended position fields (CP2)
pub position_type: Option<String>, // "text" | "image" | "file"
pub position_line_range_start: Option<i32>, // multi-line comment start
pub position_line_range_end: Option<i32>, // multi-line comment end
pub position_base_sha: Option<String>, // Base commit SHA for diff
pub position_start_sha: Option<String>, // Start commit SHA for diff
pub position_head_sha: Option<String>, // Head commit SHA for diff
pub position_type: Option<String>,
pub position_line_range_start: Option<i32>,
pub position_line_range_end: Option<i32>,
pub position_base_sha: Option<String>,
pub position_start_sha: Option<String>,
pub position_head_sha: Option<String>,
}
/// Parse ISO 8601 timestamp to milliseconds, defaulting to 0 on failure.
fn parse_timestamp(ts: &str) -> i64 {
match iso_to_ms(ts) {
Some(ms) => ms,
@@ -71,7 +62,6 @@ fn parse_timestamp(ts: &str) -> i64 {
}
}
/// Transform a GitLab discussion into normalized schema.
pub fn transform_discussion(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
@@ -79,13 +69,11 @@ pub fn transform_discussion(
) -> NormalizedDiscussion {
let now = now_ms();
// Derive issue_id, merge_request_id, and noteable_type from the enum
let (issue_id, merge_request_id, noteable_type) = match noteable {
NoteableRef::Issue(id) => (Some(id), None, "Issue"),
NoteableRef::MergeRequest(id) => (None, Some(id), "MergeRequest"),
};
// Compute first_note_at and last_note_at from notes
let note_timestamps: Vec<i64> = gitlab_discussion
.notes
.iter()
@@ -95,10 +83,8 @@ pub fn transform_discussion(
let first_note_at = note_timestamps.iter().min().copied();
let last_note_at = note_timestamps.iter().max().copied();
// Compute resolvable: any note is resolvable
let resolvable = gitlab_discussion.notes.iter().any(|n| n.resolvable);
// Compute resolved: all resolvable notes are resolved
let resolved = if resolvable {
gitlab_discussion
.notes
@@ -124,8 +110,6 @@ pub fn transform_discussion(
}
}
/// Transform a GitLab discussion for MR context.
/// Convenience wrapper that uses NoteableRef::MergeRequest internally.
pub fn transform_mr_discussion(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
@@ -138,7 +122,6 @@ pub fn transform_mr_discussion(
)
}
/// Transform notes from a GitLab discussion into normalized schema.
pub fn transform_notes(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
@@ -159,7 +142,6 @@ fn transform_single_note(
position: i32,
now: i64,
) -> NormalizedNote {
// Extract DiffNote position fields if present
let (
position_old_path,
position_new_path,
@@ -201,8 +183,6 @@ fn transform_single_note(
}
}
/// Extract DiffNote position fields from GitLabNotePosition.
/// Returns tuple of all position fields (all None if position is None).
#[allow(clippy::type_complexity)]
fn extract_position_fields(
position: &Option<crate::gitlab::types::GitLabNotePosition>,
@@ -240,8 +220,6 @@ fn extract_position_fields(
}
}
/// Transform notes from a GitLab discussion with strict timestamp parsing.
/// Returns Err if any timestamp is invalid - no silent fallback to 0.
pub fn transform_notes_with_diff_position(
gitlab_discussion: &GitLabDiscussion,
local_project_id: i64,
@@ -262,7 +240,6 @@ fn transform_single_note_strict(
position: i32,
now: i64,
) -> Result<NormalizedNote, String> {
// Parse timestamps with strict error handling
let created_at = iso_to_ms_strict(&note.created_at)?;
let updated_at = iso_to_ms_strict(&note.updated_at)?;
let resolved_at = match &note.resolved_at {
@@ -270,7 +247,6 @@ fn transform_single_note_strict(
None => None,
};
// Extract DiffNote position fields if present
let (
position_old_path,
position_new_path,
@@ -448,7 +424,7 @@ mod tests {
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T09:00:00.000Z", true, false, false), // system note
make_test_note(2, "2024-01-16T09:00:00.000Z", true, false, false),
],
);
@@ -482,16 +458,14 @@ mod tests {
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, false, false),
make_test_note(2, "2024-01-16T11:00:00.000Z", false, false, false), // latest
make_test_note(2, "2024-01-16T11:00:00.000Z", false, false, false),
make_test_note(3, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
let result = transform_discussion(&discussion, 100, NoteableRef::Issue(42));
// first_note_at should be 09:00 (note 1)
assert_eq!(result.first_note_at, Some(1705395600000));
// last_note_at should be 11:00 (note 2)
assert_eq!(result.last_note_at, Some(1705402800000));
}
@@ -527,7 +501,7 @@ mod tests {
let resolvable = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, false), // resolvable
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, false),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, false, false),
],
);
@@ -538,16 +512,14 @@ mod tests {
#[test]
fn computes_resolved_only_when_all_resolvable_notes_resolved() {
// Mix of resolved/unresolved - not resolved
let partial = make_test_discussion(
false,
vec![
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, true), // resolved
make_test_note(2, "2024-01-16T10:00:00.000Z", false, true, false), // not resolved
make_test_note(1, "2024-01-16T09:00:00.000Z", false, true, true),
make_test_note(2, "2024-01-16T10:00:00.000Z", false, true, false),
],
);
// All resolvable notes resolved
let fully_resolved = make_test_discussion(
false,
vec![
@@ -556,7 +528,6 @@ mod tests {
],
);
// No resolvable notes - resolved should be false
let no_resolvable = make_test_discussion(
false,
vec![make_test_note(

View File

@@ -1,5 +1,3 @@
//! Issue transformer: converts GitLabIssue to local schema.
use chrono::DateTime;
use thiserror::Error;
@@ -11,7 +9,6 @@ pub enum TransformError {
TimestampParse(String, String),
}
/// Local schema representation of an issue row.
#[derive(Debug, Clone)]
pub struct IssueRow {
pub gitlab_id: i64,
@@ -21,14 +18,13 @@ pub struct IssueRow {
pub description: Option<String>,
pub state: String,
pub author_username: String,
pub created_at: i64, // ms epoch UTC
pub updated_at: i64, // ms epoch UTC
pub created_at: i64,
pub updated_at: i64,
pub web_url: String,
pub due_date: Option<String>, // YYYY-MM-DD
pub milestone_title: Option<String>, // Denormalized for quick display
pub due_date: Option<String>,
pub milestone_title: Option<String>,
}
/// Local schema representation of a milestone row.
#[derive(Debug, Clone)]
pub struct MilestoneRow {
pub gitlab_id: i64,
@@ -41,7 +37,6 @@ pub struct MilestoneRow {
pub web_url: Option<String>,
}
/// Issue bundled with extracted metadata.
#[derive(Debug, Clone)]
pub struct IssueWithMetadata {
pub issue: IssueRow,
@@ -50,14 +45,12 @@ pub struct IssueWithMetadata {
pub milestone: Option<MilestoneRow>,
}
/// Parse ISO 8601 timestamp to milliseconds since Unix epoch.
fn parse_timestamp(ts: &str) -> Result<i64, TransformError> {
DateTime::parse_from_rfc3339(ts)
.map(|dt| dt.timestamp_millis())
.map_err(|e| TransformError::TimestampParse(ts.to_string(), e.to_string()))
}
/// Transform a GitLab issue into local schema format.
pub fn transform_issue(issue: &GitLabIssue) -> Result<IssueWithMetadata, TransformError> {
let created_at = parse_timestamp(&issue.created_at)?;
let updated_at = parse_timestamp(&issue.updated_at)?;
@@ -182,20 +175,16 @@ mod tests {
let issue = make_test_issue();
let result = transform_issue(&issue).unwrap();
// 2024-01-15T10:00:00.000Z = 1705312800000 ms
assert_eq!(result.issue.created_at, 1705312800000);
// 2024-01-20T15:30:00.000Z = 1705764600000 ms
assert_eq!(result.issue.updated_at, 1705764600000);
}
#[test]
fn handles_timezone_offset_timestamps() {
let mut issue = make_test_issue();
// GitLab can return timestamps with timezone offset
issue.created_at = "2024-01-15T05:00:00-05:00".to_string();
let result = transform_issue(&issue).unwrap();
// 05:00 EST = 10:00 UTC = same as original test
assert_eq!(result.issue.created_at, 1705312800000);
}
@@ -237,10 +226,8 @@ mod tests {
let result = transform_issue(&issue).unwrap();
// Denormalized title on issue for quick display
assert_eq!(result.issue.milestone_title, Some("v1.0".to_string()));
// Full milestone row for normalized storage
let milestone = result.milestone.expect("should have milestone");
assert_eq!(milestone.gitlab_id, 500);
assert_eq!(milestone.iid, 5);

View File

@@ -1,9 +1,6 @@
//! Merge request transformer: converts GitLabMergeRequest to local schema.
use crate::core::time::{iso_to_ms_opt_strict, iso_to_ms_strict, now_ms};
use crate::gitlab::types::GitLabMergeRequest;
/// Local schema representation of a merge request row.
#[derive(Debug, Clone)]
pub struct NormalizedMergeRequest {
pub gitlab_id: i64,
@@ -21,15 +18,14 @@ pub struct NormalizedMergeRequest {
pub references_full: Option<String>,
pub detailed_merge_status: Option<String>,
pub merge_user_username: Option<String>,
pub created_at: i64, // ms epoch UTC
pub updated_at: i64, // ms epoch UTC
pub merged_at: Option<i64>, // ms epoch UTC
pub closed_at: Option<i64>, // ms epoch UTC
pub last_seen_at: i64, // ms epoch UTC
pub created_at: i64,
pub updated_at: i64,
pub merged_at: Option<i64>,
pub closed_at: Option<i64>,
pub last_seen_at: i64,
pub web_url: String,
}
/// Merge request bundled with extracted metadata.
#[derive(Debug, Clone)]
pub struct MergeRequestWithMetadata {
pub merge_request: NormalizedMergeRequest,
@@ -38,61 +34,43 @@ pub struct MergeRequestWithMetadata {
pub reviewer_usernames: Vec<String>,
}
/// Transform a GitLab merge request into local schema format.
///
/// # Arguments
/// * `gitlab_mr` - The GitLab MR API response
/// * `local_project_id` - The local database project ID (not GitLab's project_id)
///
/// # Returns
/// * `Ok(MergeRequestWithMetadata)` - Transformed MR with extracted metadata
/// * `Err(String)` - Error message if transformation fails (e.g., invalid timestamps)
pub fn transform_merge_request(
gitlab_mr: &GitLabMergeRequest,
local_project_id: i64,
) -> Result<MergeRequestWithMetadata, String> {
// Parse required timestamps
let created_at = iso_to_ms_strict(&gitlab_mr.created_at)?;
let updated_at = iso_to_ms_strict(&gitlab_mr.updated_at)?;
// Parse optional timestamps
let merged_at = iso_to_ms_opt_strict(&gitlab_mr.merged_at)?;
let closed_at = iso_to_ms_opt_strict(&gitlab_mr.closed_at)?;
// Draft: prefer draft, fallback to work_in_progress
let is_draft = gitlab_mr.draft || gitlab_mr.work_in_progress;
// Merge status: prefer detailed_merge_status over legacy
let detailed_merge_status = gitlab_mr
.detailed_merge_status
.clone()
.or_else(|| gitlab_mr.merge_status_legacy.clone());
// Merge user: prefer merge_user over merged_by
let merge_user_username = gitlab_mr
.merge_user
.as_ref()
.map(|u| u.username.clone())
.or_else(|| gitlab_mr.merged_by.as_ref().map(|u| u.username.clone()));
// References extraction
let (references_short, references_full) = gitlab_mr
.references
.as_ref()
.map(|r| (Some(r.short.clone()), Some(r.full.clone())))
.unwrap_or((None, None));
// Head SHA
let head_sha = gitlab_mr.sha.clone();
// Extract assignee usernames
let assignee_usernames: Vec<String> = gitlab_mr
.assignees
.iter()
.map(|a| a.username.clone())
.collect();
// Extract reviewer usernames
let reviewer_usernames: Vec<String> = gitlab_mr
.reviewers
.iter()

View File

@@ -1,5 +1,3 @@
//! Transformers for converting GitLab API responses to local schema.
pub mod discussion;
pub mod issue;
pub mod merge_request;