refactor: Remove redundant doc comments throughout codebase
Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
//! Raw payload storage with optional compression and deduplication.
|
||||
|
||||
use flate2::Compression;
|
||||
use flate2::read::GzDecoder;
|
||||
use flate2::write::GzEncoder;
|
||||
@@ -10,26 +8,21 @@ use std::io::{Read, Write};
|
||||
use super::error::Result;
|
||||
use super::time::now_ms;
|
||||
|
||||
/// Options for storing a payload.
|
||||
pub struct StorePayloadOptions<'a> {
|
||||
pub project_id: Option<i64>,
|
||||
pub resource_type: &'a str, // 'project' | 'issue' | 'mr' | 'note' | 'discussion'
|
||||
pub gitlab_id: &'a str, // TEXT because discussion IDs are strings
|
||||
pub resource_type: &'a str,
|
||||
pub gitlab_id: &'a str,
|
||||
pub json_bytes: &'a [u8],
|
||||
pub compress: bool,
|
||||
}
|
||||
|
||||
/// Store a raw API payload with optional compression and deduplication.
|
||||
/// Returns the row ID (either new or existing if duplicate).
|
||||
pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<i64> {
|
||||
let json_bytes = options.json_bytes;
|
||||
|
||||
// 2. SHA-256 hash the JSON bytes (pre-compression)
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(json_bytes);
|
||||
let payload_hash = format!("{:x}", hasher.finalize());
|
||||
|
||||
// 3. Check for duplicate by (project_id, resource_type, gitlab_id, payload_hash)
|
||||
let existing: Option<i64> = conn
|
||||
.query_row(
|
||||
"SELECT id FROM raw_payloads
|
||||
@@ -44,12 +37,10 @@ pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<
|
||||
)
|
||||
.ok();
|
||||
|
||||
// 4. If duplicate, return existing ID
|
||||
if let Some(id) = existing {
|
||||
return Ok(id);
|
||||
}
|
||||
|
||||
// 5. Compress if requested
|
||||
let (encoding, payload_bytes): (&str, std::borrow::Cow<'_, [u8]>) = if options.compress {
|
||||
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
|
||||
encoder.write_all(json_bytes)?;
|
||||
@@ -58,7 +49,6 @@ pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<
|
||||
("identity", std::borrow::Cow::Borrowed(json_bytes))
|
||||
};
|
||||
|
||||
// 6. INSERT with content_encoding
|
||||
conn.execute(
|
||||
"INSERT INTO raw_payloads
|
||||
(source, project_id, resource_type, gitlab_id, fetched_at, content_encoding, payload_hash, payload)
|
||||
@@ -77,8 +67,6 @@ pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<
|
||||
Ok(conn.last_insert_rowid())
|
||||
}
|
||||
|
||||
/// Read a raw payload by ID, decompressing if necessary.
|
||||
/// Returns None if not found.
|
||||
pub fn read_payload(conn: &Connection, id: i64) -> Result<Option<serde_json::Value>> {
|
||||
let row: Option<(String, Vec<u8>)> = conn
|
||||
.query_row(
|
||||
@@ -92,7 +80,6 @@ pub fn read_payload(conn: &Connection, id: i64) -> Result<Option<serde_json::Val
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// Decompress if needed
|
||||
let json_bytes = if encoding == "gzip" {
|
||||
let mut decoder = GzDecoder::new(&payload_bytes[..]);
|
||||
let mut decompressed = Vec::new();
|
||||
@@ -117,7 +104,6 @@ mod tests {
|
||||
let db_path = dir.path().join("test.db");
|
||||
let conn = create_connection(&db_path).unwrap();
|
||||
|
||||
// Create minimal schema for testing
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE raw_payloads (
|
||||
id INTEGER PRIMARY KEY,
|
||||
@@ -212,6 +198,6 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(id1, id2); // Same payload returns same ID
|
||||
assert_eq!(id1, id2);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user