refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,3 @@
|
||||
//! Performance metrics types and tracing layer for sync pipeline observability.
|
||||
//!
|
||||
//! Provides:
|
||||
//! - [`StageTiming`]: Serializable timing/counter data for pipeline stages
|
||||
//! - [`MetricsLayer`]: Custom tracing subscriber layer that captures span timing
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
@@ -14,16 +8,10 @@ use tracing::span::{Attributes, Id, Record};
|
||||
use tracing_subscriber::layer::{Context, Layer};
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
|
||||
/// Returns true when value is zero (for serde `skip_serializing_if`).
|
||||
fn is_zero(v: &usize) -> bool {
|
||||
*v == 0
|
||||
}
|
||||
|
||||
/// Timing and counter data for a single pipeline stage.
|
||||
///
|
||||
/// Supports nested sub-stages for hierarchical timing breakdowns.
|
||||
/// Fields with zero/empty values are omitted from JSON output to
|
||||
/// keep robot-mode payloads compact.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StageTiming {
|
||||
pub name: String,
|
||||
@@ -43,11 +31,6 @@ pub struct StageTiming {
|
||||
pub sub_stages: Vec<StageTiming>,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// MetricsLayer: custom tracing subscriber layer
|
||||
// ============================================================================
|
||||
|
||||
/// Internal data tracked per open span.
|
||||
struct SpanData {
|
||||
name: String,
|
||||
parent_id: Option<u64>,
|
||||
@@ -57,19 +40,12 @@ struct SpanData {
|
||||
retries: usize,
|
||||
}
|
||||
|
||||
/// Completed span data with its original ID and parent ID.
|
||||
struct CompletedSpan {
|
||||
id: u64,
|
||||
parent_id: Option<u64>,
|
||||
timing: StageTiming,
|
||||
}
|
||||
|
||||
/// Custom tracing layer that captures span timing and structured fields.
|
||||
///
|
||||
/// Collects data from `#[instrument]` spans and materializes it into
|
||||
/// a `Vec<StageTiming>` tree via [`extract_timings`].
|
||||
///
|
||||
/// Thread-safe via `Arc<Mutex<>>` — suitable for concurrent span operations.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetricsLayer {
|
||||
spans: Arc<Mutex<HashMap<u64, SpanData>>>,
|
||||
@@ -90,45 +66,34 @@ impl MetricsLayer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract timing tree for a completed run.
|
||||
///
|
||||
/// Returns the top-level stages with sub-stages nested.
|
||||
/// Call after the root span closes.
|
||||
pub fn extract_timings(&self) -> Vec<StageTiming> {
|
||||
let completed = self.completed.lock().unwrap_or_else(|e| e.into_inner());
|
||||
if completed.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Build children map: parent_id -> Vec<StageTiming>
|
||||
let mut children_map: HashMap<u64, Vec<StageTiming>> = HashMap::new();
|
||||
let mut roots = Vec::new();
|
||||
let mut id_to_timing: HashMap<u64, StageTiming> = HashMap::new();
|
||||
|
||||
// First pass: collect all timings by ID
|
||||
for entry in completed.iter() {
|
||||
id_to_timing.insert(entry.id, entry.timing.clone());
|
||||
}
|
||||
|
||||
// Second pass: process in reverse order (children close before parents)
|
||||
// to build the tree bottom-up
|
||||
for entry in completed.iter() {
|
||||
// Attach any children that were collected for this span
|
||||
if let Some(timing) = id_to_timing.get_mut(&entry.id)
|
||||
&& let Some(children) = children_map.remove(&entry.id)
|
||||
{
|
||||
timing.sub_stages = children;
|
||||
}
|
||||
|
||||
if let Some(parent_id) = entry.parent_id {
|
||||
// This is a child span — attach to parent's children
|
||||
if let Some(timing) = id_to_timing.remove(&entry.id) {
|
||||
children_map.entry(parent_id).or_default().push(timing);
|
||||
}
|
||||
if let Some(parent_id) = entry.parent_id
|
||||
&& let Some(timing) = id_to_timing.remove(&entry.id)
|
||||
{
|
||||
children_map.entry(parent_id).or_default().push(timing);
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining entries in id_to_timing are roots
|
||||
for entry in completed.iter() {
|
||||
if entry.parent_id.is_none()
|
||||
&& let Some(mut timing) = id_to_timing.remove(&entry.id)
|
||||
@@ -144,7 +109,6 @@ impl MetricsLayer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Visitor that extracts field values from span attributes.
|
||||
struct FieldVisitor<'a>(&'a mut HashMap<String, serde_json::Value>);
|
||||
|
||||
impl tracing::field::Visit for FieldVisitor<'_> {
|
||||
@@ -182,7 +146,6 @@ impl tracing::field::Visit for FieldVisitor<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Visitor that extracts event fields for rate-limit/retry detection.
|
||||
#[derive(Default)]
|
||||
struct EventVisitor {
|
||||
status_code: Option<u64>,
|
||||
@@ -248,7 +211,6 @@ where
|
||||
}
|
||||
|
||||
fn on_event(&self, event: &tracing::Event<'_>, ctx: Context<'_, S>) {
|
||||
// Count rate-limit and retry events on the current span
|
||||
if let Some(span_ref) = ctx.event_span(event) {
|
||||
let id = span_ref.id();
|
||||
if let Some(data) = self
|
||||
@@ -317,7 +279,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// Manual Debug impl since SpanData and CompletedSpan don't derive Debug
|
||||
impl std::fmt::Debug for SpanData {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SpanData")
|
||||
@@ -376,7 +337,6 @@ mod tests {
|
||||
assert_eq!(json["rate_limit_hits"], 2);
|
||||
assert_eq!(json["retries"], 5);
|
||||
|
||||
// Sub-stage present
|
||||
let sub = &json["sub_stages"][0];
|
||||
assert_eq!(sub["name"], "ingest_issues");
|
||||
assert_eq!(sub["project"], "group/repo");
|
||||
@@ -400,7 +360,6 @@ mod tests {
|
||||
let json = serde_json::to_value(&timing).unwrap();
|
||||
let obj = json.as_object().unwrap();
|
||||
|
||||
// Zero fields must be absent
|
||||
assert!(!obj.contains_key("items_skipped"));
|
||||
assert!(!obj.contains_key("errors"));
|
||||
assert!(!obj.contains_key("rate_limit_hits"));
|
||||
@@ -408,7 +367,6 @@ mod tests {
|
||||
assert!(!obj.contains_key("sub_stages"));
|
||||
assert!(!obj.contains_key("project"));
|
||||
|
||||
// Required fields always present
|
||||
assert!(obj.contains_key("name"));
|
||||
assert!(obj.contains_key("elapsed_ms"));
|
||||
assert!(obj.contains_key("items_processed"));
|
||||
@@ -539,13 +497,12 @@ mod tests {
|
||||
tracing::subscriber::with_default(subscriber, || {
|
||||
let span = tracing::info_span!("test_stage");
|
||||
let _guard = span.enter();
|
||||
// Simulate work
|
||||
});
|
||||
|
||||
let timings = metrics.extract_timings();
|
||||
assert_eq!(timings.len(), 1);
|
||||
assert_eq!(timings[0].name, "test_stage");
|
||||
assert!(timings[0].elapsed_ms < 100); // Should be near-instant
|
||||
assert!(timings[0].elapsed_ms < 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user