fix: Content hash now computed after truncation, atomic job claiming
Two bug fixes: 1. extractor.rs: The content hash was computed on the pre-truncation content, meaning the hash stored in the document didn't correspond to the actual stored (truncated) content. This would cause change detection to miss updates when content changed only within the truncated portion. Hash is now computed after truncate_hard_cap() so it always matches the persisted content. 2. dependent_queue.rs: claim_jobs() had a TOCTOU race between the SELECT that found available jobs and the UPDATE that locked them. Under concurrent callers, two drain runs could claim the same job. Replaced with a single UPDATE ... RETURNING statement that atomically selects and locks jobs in one operation. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -166,12 +166,12 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
|
||||
content.push_str(desc);
|
||||
}
|
||||
|
||||
let content_hash = compute_content_hash(&content);
|
||||
let labels_hash = compute_list_hash(&labels);
|
||||
let paths_hash = compute_list_hash(&[]); // Issues have no paths
|
||||
|
||||
// Apply hard cap truncation for safety
|
||||
// Apply hard cap truncation for safety, then hash the final stored content
|
||||
let hard_cap = truncate_hard_cap(&content);
|
||||
let content_hash = compute_content_hash(&hard_cap.content);
|
||||
|
||||
Ok(Some(DocumentData {
|
||||
source_type: SourceType::Issue,
|
||||
@@ -281,12 +281,12 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
|
||||
content.push_str(desc);
|
||||
}
|
||||
|
||||
let content_hash = compute_content_hash(&content);
|
||||
let labels_hash = compute_list_hash(&labels);
|
||||
let paths_hash = compute_list_hash(&[]);
|
||||
|
||||
// Apply hard cap truncation for safety
|
||||
// Apply hard cap truncation for safety, then hash the final stored content
|
||||
let hard_cap = truncate_hard_cap(&content);
|
||||
let content_hash = compute_content_hash(&hard_cap.content);
|
||||
|
||||
Ok(Some(DocumentData {
|
||||
source_type: SourceType::MergeRequest,
|
||||
|
||||
Reference in New Issue
Block a user