perf: Eliminate double serialization, add SQLite tuning, optimize hot paths
11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -15,19 +15,18 @@ pub struct StorePayloadOptions<'a> {
|
||||
pub project_id: Option<i64>,
|
||||
pub resource_type: &'a str, // 'project' | 'issue' | 'mr' | 'note' | 'discussion'
|
||||
pub gitlab_id: &'a str, // TEXT because discussion IDs are strings
|
||||
pub payload: &'a serde_json::Value,
|
||||
pub json_bytes: &'a [u8],
|
||||
pub compress: bool,
|
||||
}
|
||||
|
||||
/// Store a raw API payload with optional compression and deduplication.
|
||||
/// Returns the row ID (either new or existing if duplicate).
|
||||
pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<i64> {
|
||||
// 1. JSON stringify the payload
|
||||
let json_bytes = serde_json::to_vec(options.payload)?;
|
||||
let json_bytes = options.json_bytes;
|
||||
|
||||
// 2. SHA-256 hash the JSON bytes (pre-compression)
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(&json_bytes);
|
||||
hasher.update(json_bytes);
|
||||
let payload_hash = format!("{:x}", hasher.finalize());
|
||||
|
||||
// 3. Check for duplicate by (project_id, resource_type, gitlab_id, payload_hash)
|
||||
@@ -51,12 +50,12 @@ pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<
|
||||
}
|
||||
|
||||
// 5. Compress if requested
|
||||
let (encoding, payload_bytes) = if options.compress {
|
||||
let (encoding, payload_bytes): (&str, std::borrow::Cow<'_, [u8]>) = if options.compress {
|
||||
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
|
||||
encoder.write_all(&json_bytes)?;
|
||||
("gzip", encoder.finish()?)
|
||||
encoder.write_all(json_bytes)?;
|
||||
("gzip", std::borrow::Cow::Owned(encoder.finish()?))
|
||||
} else {
|
||||
("identity", json_bytes)
|
||||
("identity", std::borrow::Cow::Borrowed(json_bytes))
|
||||
};
|
||||
|
||||
// 6. INSERT with content_encoding
|
||||
@@ -71,7 +70,7 @@ pub fn store_payload(conn: &Connection, options: StorePayloadOptions) -> Result<
|
||||
now_ms(),
|
||||
encoding,
|
||||
&payload_hash,
|
||||
&payload_bytes,
|
||||
payload_bytes.as_ref(),
|
||||
),
|
||||
)?;
|
||||
|
||||
@@ -143,6 +142,7 @@ mod tests {
|
||||
fn test_store_and_read_payload() {
|
||||
let conn = setup_test_db();
|
||||
let payload = serde_json::json!({"title": "Test Issue", "id": 123});
|
||||
let json_bytes = serde_json::to_vec(&payload).unwrap();
|
||||
|
||||
let id = store_payload(
|
||||
&conn,
|
||||
@@ -150,7 +150,7 @@ mod tests {
|
||||
project_id: Some(1),
|
||||
resource_type: "issue",
|
||||
gitlab_id: "123",
|
||||
payload: &payload,
|
||||
json_bytes: &json_bytes,
|
||||
compress: false,
|
||||
},
|
||||
)
|
||||
@@ -164,6 +164,7 @@ mod tests {
|
||||
fn test_compression_roundtrip() {
|
||||
let conn = setup_test_db();
|
||||
let payload = serde_json::json!({"data": "x".repeat(1000)});
|
||||
let json_bytes = serde_json::to_vec(&payload).unwrap();
|
||||
|
||||
let id = store_payload(
|
||||
&conn,
|
||||
@@ -171,7 +172,7 @@ mod tests {
|
||||
project_id: Some(1),
|
||||
resource_type: "issue",
|
||||
gitlab_id: "456",
|
||||
payload: &payload,
|
||||
json_bytes: &json_bytes,
|
||||
compress: true,
|
||||
},
|
||||
)
|
||||
@@ -185,6 +186,7 @@ mod tests {
|
||||
fn test_deduplication() {
|
||||
let conn = setup_test_db();
|
||||
let payload = serde_json::json!({"id": 789});
|
||||
let json_bytes = serde_json::to_vec(&payload).unwrap();
|
||||
|
||||
let id1 = store_payload(
|
||||
&conn,
|
||||
@@ -192,7 +194,7 @@ mod tests {
|
||||
project_id: Some(1),
|
||||
resource_type: "issue",
|
||||
gitlab_id: "789",
|
||||
payload: &payload,
|
||||
json_bytes: &json_bytes,
|
||||
compress: false,
|
||||
},
|
||||
)
|
||||
@@ -204,7 +206,7 @@ mod tests {
|
||||
project_id: Some(1),
|
||||
resource_type: "issue",
|
||||
gitlab_id: "789",
|
||||
payload: &payload,
|
||||
json_bytes: &json_bytes,
|
||||
compress: false,
|
||||
},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user