perf: Eliminate double serialization, add SQLite tuning, optimize hot paths
11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -34,38 +34,36 @@ impl RateLimiter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute how long to wait, update last_request, and return the delay.
|
||||
/// The caller sleeps *after* releasing the mutex guard.
|
||||
/// Compute how long to wait and update last_request to the expected
|
||||
/// request time (now, or now + delay). The caller sleeps *after*
|
||||
/// releasing the mutex guard.
|
||||
fn check_delay(&mut self) -> Option<Duration> {
|
||||
let elapsed = self.last_request.elapsed();
|
||||
self.last_request = Instant::now();
|
||||
|
||||
if elapsed < self.min_interval {
|
||||
let jitter = Duration::from_millis(rand_jitter());
|
||||
Some(self.min_interval - elapsed + jitter)
|
||||
let delay = self.min_interval - elapsed + jitter;
|
||||
// Set last_request to when the request will actually fire
|
||||
self.last_request = Instant::now() + delay;
|
||||
Some(delay)
|
||||
} else {
|
||||
// No delay needed; request fires immediately
|
||||
self.last_request = Instant::now();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate random jitter between 0-50ms without external crate.
|
||||
/// Generate random jitter between 0-50ms using a lightweight atomic counter.
|
||||
fn rand_jitter() -> u64 {
|
||||
use std::collections::hash_map::RandomState;
|
||||
use std::hash::{BuildHasher, Hasher};
|
||||
|
||||
// RandomState is seeded randomly each time, so just hashing the state address gives us jitter
|
||||
let state = RandomState::new();
|
||||
let mut hasher = state.build_hasher();
|
||||
// Hash the address of the state (random per call) + current time nanos for more entropy
|
||||
hasher.write_usize(&state as *const _ as usize);
|
||||
hasher.write_u128(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_nanos(),
|
||||
);
|
||||
hasher.finish() % 50
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
let nanos = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.subsec_nanos() as u64;
|
||||
(n ^ nanos) % 50
|
||||
}
|
||||
|
||||
/// GitLab API client with rate limiting.
|
||||
@@ -719,6 +717,11 @@ impl GitLabClient {
|
||||
}
|
||||
|
||||
/// Fetch all three event types for an entity concurrently.
|
||||
///
|
||||
/// Uses `tokio::join!` instead of `try_join!` so that a 404 on one event
|
||||
/// type (e.g., labels) doesn't discard successfully-fetched data from the
|
||||
/// others (e.g., state events). 404s are treated as "no events" (empty vec);
|
||||
/// all other errors (including 403) are propagated for retry.
|
||||
pub async fn fetch_all_resource_events(
|
||||
&self,
|
||||
gitlab_project_id: i64,
|
||||
@@ -729,27 +732,35 @@ impl GitLabClient {
|
||||
Vec<GitLabLabelEvent>,
|
||||
Vec<GitLabMilestoneEvent>,
|
||||
)> {
|
||||
match entity_type {
|
||||
let (state_res, label_res, milestone_res) = match entity_type {
|
||||
"issue" => {
|
||||
let (state, label, milestone) = tokio::try_join!(
|
||||
tokio::join!(
|
||||
self.fetch_issue_state_events(gitlab_project_id, iid),
|
||||
self.fetch_issue_label_events(gitlab_project_id, iid),
|
||||
self.fetch_issue_milestone_events(gitlab_project_id, iid),
|
||||
)?;
|
||||
Ok((state, label, milestone))
|
||||
)
|
||||
}
|
||||
"merge_request" => {
|
||||
let (state, label, milestone) = tokio::try_join!(
|
||||
tokio::join!(
|
||||
self.fetch_mr_state_events(gitlab_project_id, iid),
|
||||
self.fetch_mr_label_events(gitlab_project_id, iid),
|
||||
self.fetch_mr_milestone_events(gitlab_project_id, iid),
|
||||
)?;
|
||||
Ok((state, label, milestone))
|
||||
)
|
||||
}
|
||||
_ => Err(LoreError::Other(format!(
|
||||
"Invalid entity type for resource events: {entity_type}"
|
||||
))),
|
||||
}
|
||||
_ => {
|
||||
return Err(LoreError::Other(format!(
|
||||
"Invalid entity type for resource events: {entity_type}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
// Treat 404 as "endpoint not available for this entity" → empty vec.
|
||||
// All other errors (403, network, etc.) propagate for retry handling.
|
||||
let state = coalesce_not_found(state_res)?;
|
||||
let label = coalesce_not_found(label_res)?;
|
||||
let milestone = coalesce_not_found(milestone_res)?;
|
||||
|
||||
Ok((state, label, milestone))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -781,6 +792,19 @@ fn parse_link_header_next(headers: &HeaderMap) -> Option<String> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert a resource-event fetch result: 404 → empty vec, other errors propagated.
|
||||
///
|
||||
/// 404 means the endpoint doesn't exist for this entity type — truly permanent.
|
||||
/// 403 and other errors are NOT coalesced: they may be environmental (VPN, token
|
||||
/// rotation) and should be retried via the drain loop's backoff mechanism.
|
||||
fn coalesce_not_found<T>(result: Result<Vec<T>>) -> Result<Vec<T>> {
|
||||
match result {
|
||||
Ok(v) => Ok(v),
|
||||
Err(LoreError::GitLabNotFound { .. }) => Ok(Vec::new()),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert milliseconds since epoch to ISO 8601 string.
|
||||
fn ms_to_iso8601(ms: i64) -> Option<String> {
|
||||
DateTime::<Utc>::from_timestamp_millis(ms)
|
||||
|
||||
@@ -58,7 +58,7 @@ fn parse_timestamp(ts: &str) -> Result<i64, TransformError> {
|
||||
}
|
||||
|
||||
/// Transform a GitLab issue into local schema format.
|
||||
pub fn transform_issue(issue: GitLabIssue) -> Result<IssueWithMetadata, TransformError> {
|
||||
pub fn transform_issue(issue: &GitLabIssue) -> Result<IssueWithMetadata, TransformError> {
|
||||
let created_at = parse_timestamp(&issue.created_at)?;
|
||||
let updated_at = parse_timestamp(&issue.updated_at)?;
|
||||
|
||||
@@ -83,17 +83,17 @@ pub fn transform_issue(issue: GitLabIssue) -> Result<IssueWithMetadata, Transfor
|
||||
gitlab_id: issue.id,
|
||||
iid: issue.iid,
|
||||
project_id: issue.project_id,
|
||||
title: issue.title,
|
||||
description: issue.description,
|
||||
state: issue.state,
|
||||
author_username: issue.author.username,
|
||||
title: issue.title.clone(),
|
||||
description: issue.description.clone(),
|
||||
state: issue.state.clone(),
|
||||
author_username: issue.author.username.clone(),
|
||||
created_at,
|
||||
updated_at,
|
||||
web_url: issue.web_url,
|
||||
due_date: issue.due_date,
|
||||
web_url: issue.web_url.clone(),
|
||||
due_date: issue.due_date.clone(),
|
||||
milestone_title,
|
||||
},
|
||||
label_names: issue.labels,
|
||||
label_names: issue.labels.clone(),
|
||||
assignee_usernames,
|
||||
milestone,
|
||||
})
|
||||
@@ -131,7 +131,7 @@ mod tests {
|
||||
#[test]
|
||||
fn transforms_issue_with_all_fields() {
|
||||
let issue = make_test_issue();
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
|
||||
assert_eq!(result.issue.gitlab_id, 12345);
|
||||
assert_eq!(result.issue.iid, 42);
|
||||
@@ -154,14 +154,14 @@ mod tests {
|
||||
let mut issue = make_test_issue();
|
||||
issue.description = None;
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
assert!(result.issue.description.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_label_names() {
|
||||
let issue = make_test_issue();
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
|
||||
assert_eq!(result.label_names.len(), 2);
|
||||
assert_eq!(result.label_names[0], "bug");
|
||||
@@ -173,14 +173,14 @@ mod tests {
|
||||
let mut issue = make_test_issue();
|
||||
issue.labels = vec![];
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
assert!(result.label_names.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_timestamps_to_ms_epoch() {
|
||||
let issue = make_test_issue();
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
|
||||
// 2024-01-15T10:00:00.000Z = 1705312800000 ms
|
||||
assert_eq!(result.issue.created_at, 1705312800000);
|
||||
@@ -194,7 +194,7 @@ mod tests {
|
||||
// GitLab can return timestamps with timezone offset
|
||||
issue.created_at = "2024-01-15T05:00:00-05:00".to_string();
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
// 05:00 EST = 10:00 UTC = same as original test
|
||||
assert_eq!(result.issue.created_at, 1705312800000);
|
||||
}
|
||||
@@ -215,7 +215,7 @@ mod tests {
|
||||
},
|
||||
];
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
assert_eq!(result.assignee_usernames.len(), 2);
|
||||
assert_eq!(result.assignee_usernames[0], "alice");
|
||||
assert_eq!(result.assignee_usernames[1], "bob");
|
||||
@@ -235,7 +235,7 @@ mod tests {
|
||||
web_url: Some("https://gitlab.example.com/-/milestones/5".to_string()),
|
||||
});
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
|
||||
// Denormalized title on issue for quick display
|
||||
assert_eq!(result.issue.milestone_title, Some("v1.0".to_string()));
|
||||
@@ -258,7 +258,7 @@ mod tests {
|
||||
#[test]
|
||||
fn handles_missing_milestone() {
|
||||
let issue = make_test_issue();
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
|
||||
assert!(result.issue.milestone_title.is_none());
|
||||
assert!(result.milestone.is_none());
|
||||
@@ -269,7 +269,7 @@ mod tests {
|
||||
let mut issue = make_test_issue();
|
||||
issue.due_date = Some("2024-02-15".to_string());
|
||||
|
||||
let result = transform_issue(issue).unwrap();
|
||||
let result = transform_issue(&issue).unwrap();
|
||||
assert_eq!(result.issue.due_date, Some("2024-02-15".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user