fix(gitlab): Improve pagination heuristics and fix rate limiter lock contention

Two targeted fixes to the GitLab API client:

1. Pagination: When the x-next-page header is missing but the current
   page returned a full page of results, heuristically advance to the
   next page instead of stopping. This fixes silent data truncation
   observed with certain GitLab instances that omit pagination headers
   on intermediate pages. The existing early-exit on empty or partial
   pages remains as the termination condition.

2. Rate limiter: Refactor the async acquire() method into a synchronous
   check_delay() that computes the required sleep duration and updates
   last_request time while holding the mutex, then releases the lock
   before sleeping. This eliminates holding the Mutex<RateLimiter>
   across an await point, which previously could block other request
   tasks unnecessarily during the sleep interval.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-01-30 15:46:05 -05:00
parent 6e22f120d0
commit d31d5292f2

View File

@@ -15,7 +15,7 @@ use tracing::debug;
use super::types::{
GitLabDiscussion, GitLabIssue, GitLabMergeRequest, GitLabProject, GitLabUser, GitLabVersion,
};
use crate::core::error::{GiError, Result};
use crate::core::error::{LoreError, Result};
/// Simple rate limiter with jitter to prevent thundering herd.
struct RateLimiter {
@@ -31,17 +31,18 @@ impl RateLimiter {
}
}
async fn acquire(&mut self) {
/// Compute how long to wait, update last_request, and return the delay.
/// The caller sleeps *after* releasing the mutex guard.
fn check_delay(&mut self) -> Option<Duration> {
let elapsed = self.last_request.elapsed();
self.last_request = Instant::now();
if elapsed < self.min_interval {
// Add 0-50ms jitter to prevent synchronized requests
let jitter = Duration::from_millis(rand_jitter());
let wait_time = self.min_interval - elapsed + jitter;
sleep(wait_time).await;
Some(self.min_interval - elapsed + jitter)
} else {
None
}
self.last_request = Instant::now();
}
}
@@ -112,7 +113,10 @@ impl GitLabClient {
/// Make an authenticated API request.
async fn request<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<T> {
self.rate_limiter.lock().await.acquire().await;
let delay = self.rate_limiter.lock().await.check_delay();
if let Some(d) = delay {
sleep(d).await;
}
let url = format!("{}{}", self.base_url, path);
debug!(url = %url, "GitLab request");
@@ -123,7 +127,7 @@ impl GitLabClient {
.header("PRIVATE-TOKEN", &self.token)
.send()
.await
.map_err(|e| GiError::GitLabNetworkError {
.map_err(|e| LoreError::GitLabNetworkError {
base_url: self.base_url.clone(),
source: Some(e),
})?;
@@ -138,9 +142,9 @@ impl GitLabClient {
path: &str,
) -> Result<T> {
match response.status() {
StatusCode::UNAUTHORIZED => Err(GiError::GitLabAuthFailed),
StatusCode::UNAUTHORIZED => Err(LoreError::GitLabAuthFailed),
StatusCode::NOT_FOUND => Err(GiError::GitLabNotFound {
StatusCode::NOT_FOUND => Err(LoreError::GitLabNotFound {
resource: path.to_string(),
}),
@@ -152,7 +156,7 @@ impl GitLabClient {
.and_then(|s| s.parse().ok())
.unwrap_or(60);
Err(GiError::GitLabRateLimited { retry_after })
Err(LoreError::GitLabRateLimited { retry_after })
}
status if status.is_success() => {
@@ -160,7 +164,7 @@ impl GitLabClient {
Ok(body)
}
status => Err(GiError::Other(format!(
status => Err(LoreError::Other(format!(
"GitLab API error: {} {}",
status.as_u16(),
status.canonical_reason().unwrap_or("Unknown")
@@ -216,6 +220,7 @@ impl GitLabClient {
match result {
Ok((issues, headers)) => {
let is_empty = issues.is_empty();
let full_page = issues.len() as u32 == per_page;
// Yield each issue
for issue in issues {
@@ -233,12 +238,11 @@ impl GitLabClient {
page = next;
}
_ => {
// No next page or empty response - we're done
if is_empty {
if is_empty || !full_page {
break;
}
// Check if current page returned less than per_page (last page)
break;
// Full page but no x-next-page header: try next page heuristically
page += 1;
}
}
}
@@ -278,6 +282,7 @@ impl GitLabClient {
match result {
Ok((discussions, headers)) => {
let is_empty = discussions.is_empty();
let full_page = discussions.len() as u32 == per_page;
for discussion in discussions {
yield Ok(discussion);
@@ -293,10 +298,11 @@ impl GitLabClient {
page = next;
}
_ => {
if is_empty {
if is_empty || !full_page {
break;
}
break;
// Full page but no x-next-page header: try next page heuristically
page += 1;
}
}
}
@@ -462,19 +468,24 @@ impl GitLabClient {
.and_then(|s| s.parse::<u32>().ok());
let should_continue = match (link_next.is_some(), x_next_page, full_page) {
(true, _, _) => true, // Link header present: continue
(false, Some(np), _) if np > page => {
page = np;
(true, _, _) => {
page += 1; // Link header present: continue to next
true
}
(false, Some(np), _) if np > page => {
page = np; // x-next-page tells us exactly which page
true
}
(false, None, true) => {
page += 1; // Full page, no headers: try next
true
}
(false, None, true) => true, // Full page, no headers: try next
_ => false, // Otherwise we're done
};
if !should_continue || is_empty {
break;
}
page += 1;
}
Err(e) => {
yield Err(e);
@@ -491,7 +502,10 @@ impl GitLabClient {
path: &str,
params: &[(&str, String)],
) -> Result<(T, HeaderMap)> {
self.rate_limiter.lock().await.acquire().await;
let delay = self.rate_limiter.lock().await.check_delay();
if let Some(d) = delay {
sleep(d).await;
}
let url = format!("{}{}", self.base_url, path);
debug!(url = %url, ?params, "GitLab paginated request");
@@ -503,7 +517,7 @@ impl GitLabClient {
.header("PRIVATE-TOKEN", &self.token)
.send()
.await
.map_err(|e| GiError::GitLabNetworkError {
.map_err(|e| LoreError::GitLabNetworkError {
base_url: self.base_url.clone(),
source: Some(e),
})?;