Wave 7: Phase 2 features - sync --all, external refs, cross-alias discovery, CI/CD, reliability tests (bd-1ky, bd-1bp, bd-1rk, bd-1lj, bd-gvr, bd-1x5)

- Sync --all with async concurrency, per-host throttling, failure budgets, resumable execution
- External ref bundling at fetch time with origin tracking
- Cross-alias discovery (--all-aliases) for list and search commands
- CI/CD pipeline (.gitlab-ci.yml), cargo-deny config, Dockerfile, install script
- Reliability test suite: crash consistency (8 tests), lock contention (3 tests), property-based (4 tests)
- Criterion performance benchmarks (5 benchmarks)
- Bug fix: doctor --fix now repairs missing index.json when raw.json exists
- Bug fix: shared $ref references no longer incorrectly flagged as circular (refs.rs)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-12 15:29:31 -05:00
parent 398311ca4c
commit 4ac8659ebd
20 changed files with 3430 additions and 68 deletions

641
src/core/external_refs.rs Normal file
View File

@@ -0,0 +1,641 @@
use std::collections::HashSet;
use std::future::Future;
use std::pin::Pin;
use reqwest::Url;
use serde_json::Value;
use crate::core::http::AsyncHttpClient;
use crate::core::indexer::{detect_format, normalize_to_json};
use crate::errors::SwaggerCliError;
/// Configuration for external `$ref` resolution.
pub struct ExternalRefConfig {
/// Allowed hostnames for external ref fetching.
pub allow_hosts: Vec<String>,
/// Maximum chain depth for transitive external refs.
pub max_depth: u32,
/// Maximum total bytes fetched across all external refs.
pub max_bytes: u64,
}
/// Statistics returned after resolving external refs.
#[derive(Debug, Default)]
pub struct ResolveStats {
pub refs_resolved: usize,
pub refs_skipped: usize,
pub total_bytes_fetched: u64,
}
/// Resolve all external `$ref` entries in `value` by fetching and inlining them.
///
/// - Only resolves refs whose URL host is in `config.allow_hosts`.
/// - Internal refs (starting with `#/`) are left untouched.
/// - Circular external refs are detected and replaced with a marker.
/// - Stops when `config.max_depth` or `config.max_bytes` is exceeded.
///
/// Returns the resolution statistics.
pub async fn resolve_external_refs(
value: &mut Value,
base_url: Option<&str>,
config: &ExternalRefConfig,
client: &AsyncHttpClient,
) -> Result<ResolveStats, SwaggerCliError> {
let mut visited = HashSet::new();
let mut stats = ResolveStats::default();
resolve_recursive(value, base_url, config, client, 0, &mut visited, &mut stats).await?;
Ok(stats)
}
fn resolve_recursive<'a>(
value: &'a mut Value,
base_url: Option<&'a str>,
config: &'a ExternalRefConfig,
client: &'a AsyncHttpClient,
depth: u32,
visited: &'a mut HashSet<String>,
stats: &'a mut ResolveStats,
) -> Pin<Box<dyn Future<Output = Result<(), SwaggerCliError>> + Send + 'a>> {
Box::pin(async move {
if let Some(ref_str) = extract_external_ref(value) {
// Internal refs: leave untouched
if ref_str.starts_with("#/") {
return Ok(());
}
// Resolve the URL (may be relative)
let resolved_url = resolve_ref_url(&ref_str, base_url)?;
// Check for cycles
if visited.contains(&resolved_url) {
*value = serde_json::json!({ "$circular_external_ref": resolved_url });
stats.refs_skipped += 1;
return Ok(());
}
// Check depth limit
if depth >= config.max_depth {
stats.refs_skipped += 1;
return Ok(());
}
// Check host allowlist
let parsed = Url::parse(&resolved_url).map_err(|e| {
SwaggerCliError::InvalidSpec(format!(
"invalid external ref URL '{resolved_url}': {e}"
))
})?;
let host = parsed.host_str().ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"external ref URL '{resolved_url}' has no host"
))
})?;
if !config.allow_hosts.iter().any(|h| h == host) {
return Err(SwaggerCliError::PolicyBlocked(format!(
"external ref host '{host}' is not in --ref-allow-host allowlist. \
Add --ref-allow-host {host} to allow fetching from this host."
)));
}
// Check bytes limit before fetch
if stats.total_bytes_fetched >= config.max_bytes {
stats.refs_skipped += 1;
return Ok(());
}
// Fetch the external ref
let result = client.fetch_spec(&resolved_url).await?;
let fetched_bytes = result.bytes.len() as u64;
if stats.total_bytes_fetched + fetched_bytes > config.max_bytes {
return Err(SwaggerCliError::PolicyBlocked(format!(
"external ref total bytes would exceed --ref-max-bytes limit of {}. \
Resolved {} refs ({} bytes) before hitting the limit.",
config.max_bytes, stats.refs_resolved, stats.total_bytes_fetched
)));
}
stats.total_bytes_fetched += fetched_bytes;
// Parse the fetched content as JSON or YAML
let format = detect_format(
&result.bytes,
Some(&resolved_url),
result.content_type.as_deref(),
);
let json_bytes = normalize_to_json(&result.bytes, format).map_err(|_| {
SwaggerCliError::InvalidSpec(format!(
"external ref '{resolved_url}' returned invalid JSON/YAML"
))
})?;
let mut fetched_value: Value = serde_json::from_slice(&json_bytes)?;
// Handle fragment pointer within the fetched document
if let Some(frag) = parsed.fragment()
&& !frag.is_empty()
{
let pointer = if frag.starts_with('/') {
frag.to_string()
} else {
format!("/{frag}")
};
fetched_value = crate::core::refs::resolve_json_pointer(&fetched_value, &pointer)
.cloned()
.ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"fragment '{pointer}' not found in external ref '{resolved_url}'"
))
})?;
}
// Mark as visited for cycle detection, then recursively resolve nested external refs
visited.insert(resolved_url.clone());
// The base URL for nested refs is the URL of the document we just fetched (without fragment)
let nested_base = strip_fragment(&resolved_url);
resolve_recursive(
&mut fetched_value,
Some(&nested_base),
config,
client,
depth + 1,
visited,
stats,
)
.await?;
visited.remove(&resolved_url);
*value = fetched_value;
stats.refs_resolved += 1;
return Ok(());
}
// Walk into objects and arrays
match value {
Value::Object(map) => {
// Collect keys first to satisfy borrow checker with recursive async
let keys: Vec<String> = map.keys().cloned().collect();
for key in keys {
if let Some(val) = map.get_mut(&key) {
resolve_recursive(val, base_url, config, client, depth, visited, stats)
.await?;
}
}
}
Value::Array(arr) => {
for item in arr.iter_mut() {
resolve_recursive(item, base_url, config, client, depth, visited, stats)
.await?;
}
}
_ => {}
}
Ok(())
})
}
/// Extract the `$ref` string from a JSON object if present.
fn extract_external_ref(value: &Value) -> Option<String> {
let map = value.as_object()?;
let ref_val = map.get("$ref")?;
Some(ref_val.as_str()?.to_string())
}
/// Resolve a possibly-relative ref URL against a base URL.
fn resolve_ref_url(ref_str: &str, base_url: Option<&str>) -> Result<String, SwaggerCliError> {
// If the ref is already absolute, use it directly
if ref_str.contains("://") {
return Ok(ref_str.to_string());
}
// Relative ref requires a base URL
let base = base_url.ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"relative external ref '{ref_str}' cannot be resolved without a base URL"
))
})?;
let base_parsed = Url::parse(base)
.map_err(|e| SwaggerCliError::InvalidSpec(format!("invalid base URL '{base}': {e}")))?;
base_parsed
.join(ref_str)
.map(|u| u.to_string())
.map_err(|e| {
SwaggerCliError::InvalidSpec(format!(
"failed to resolve relative ref '{ref_str}' against base '{base}': {e}"
))
})
}
/// Strip the fragment portion from a URL string.
fn strip_fragment(url: &str) -> String {
match url.find('#') {
Some(idx) => url[..idx].to_string(),
None => url.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// -- URL resolution -------------------------------------------------------
#[test]
fn test_resolve_absolute_ref() {
let result = resolve_ref_url("https://example.com/schemas/Pet.json", None).unwrap();
assert_eq!(result, "https://example.com/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_ref() {
let result = resolve_ref_url(
"./schemas/Pet.json",
Some("https://example.com/api/spec.json"),
)
.unwrap();
assert_eq!(result, "https://example.com/api/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_parent() {
let result = resolve_ref_url(
"../schemas/Pet.json",
Some("https://example.com/api/v1/spec.json"),
)
.unwrap();
assert_eq!(result, "https://example.com/api/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_without_base_fails() {
let result = resolve_ref_url("./schemas/Pet.json", None);
assert!(result.is_err());
}
// -- Fragment stripping ---------------------------------------------------
#[test]
fn test_strip_fragment() {
assert_eq!(
strip_fragment("https://example.com/spec.json#/components/schemas/Pet"),
"https://example.com/spec.json"
);
assert_eq!(
strip_fragment("https://example.com/spec.json"),
"https://example.com/spec.json"
);
}
// -- External ref extraction ----------------------------------------------
#[test]
fn test_extract_external_ref_present() {
let v = json!({"$ref": "https://example.com/Pet.json"});
assert_eq!(
extract_external_ref(&v),
Some("https://example.com/Pet.json".to_string())
);
}
#[test]
fn test_extract_external_ref_internal() {
let v = json!({"$ref": "#/components/schemas/Pet"});
assert_eq!(
extract_external_ref(&v),
Some("#/components/schemas/Pet".to_string())
);
}
#[test]
fn test_extract_external_ref_absent() {
let v = json!({"type": "string"});
assert_eq!(extract_external_ref(&v), None);
}
// -- Integration tests with mockito ---------------------------------------
#[tokio::test]
async fn test_resolve_external_ref_allowed_host() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let pet_schema = json!({
"type": "object",
"properties": {
"name": { "type": "string" }
}
});
let _mock = server
.mock("GET", "/schemas/Pet.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&pet_schema).unwrap())
.create_async()
.await;
let spec_url = format!("http://{host}/api/spec.json");
let ref_url = format!("http://{host}/schemas/Pet.json");
let mut value = json!({
"openapi": "3.0.3",
"components": {
"schemas": {
"Pet": { "$ref": ref_url }
}
}
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, Some(&spec_url), &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 1);
assert_eq!(value["components"]["schemas"]["Pet"]["type"], "object");
assert_eq!(
value["components"]["schemas"]["Pet"]["properties"]["name"]["type"],
"string"
);
}
#[tokio::test]
async fn test_resolve_external_ref_disallowed_host() {
let mut value = json!({
"schema": { "$ref": "https://evil.example.com/schemas/Pet.json" }
});
let config = ExternalRefConfig {
allow_hosts: vec!["safe.example.com".to_string()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder().build();
let result = resolve_external_refs(&mut value, None, &config, &client).await;
assert!(result.is_err());
match result.unwrap_err() {
SwaggerCliError::PolicyBlocked(msg) => {
assert!(msg.contains("evil.example.com"));
assert!(msg.contains("--ref-allow-host"));
}
other => panic!("expected PolicyBlocked, got: {other:?}"),
}
}
#[tokio::test]
async fn test_resolve_internal_refs_untouched() {
let mut value = json!({
"schema": { "$ref": "#/components/schemas/Pet" }
});
let config = ExternalRefConfig {
allow_hosts: vec![],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder().build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 0);
assert_eq!(value["schema"]["$ref"], "#/components/schemas/Pet");
}
#[tokio::test]
async fn test_resolve_max_depth_limits_chains() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
// Chain: spec -> A.json -> B.json
// With max_depth=1, only A.json should be resolved
let b_schema = json!({
"type": "string",
"from": "B"
});
let _mock_b = server
.mock("GET", "/B.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&b_schema).unwrap())
.create_async()
.await;
let a_schema = json!({
"type": "object",
"nested": { "$ref": format!("http://{host}/B.json") }
});
let _mock_a = server
.mock("GET", "/A.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&a_schema).unwrap())
.create_async()
.await;
let ref_url = format!("http://{host}/A.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 1, // Only one level deep
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
// A was resolved (depth 0), B was skipped (depth 1 >= max_depth 1)
assert_eq!(stats.refs_resolved, 1);
assert_eq!(stats.refs_skipped, 1);
assert_eq!(value["schema"]["type"], "object");
}
#[tokio::test]
async fn test_resolve_circular_external_refs() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
// A.json refs B.json, B.json refs A.json
let b_schema = json!({
"type": "object",
"back": { "$ref": format!("http://{host}/A.json") }
});
let _mock_b = server
.mock("GET", "/B.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&b_schema).unwrap())
.create_async()
.await;
let a_schema = json!({
"type": "object",
"next": { "$ref": format!("http://{host}/B.json") }
});
let _mock_a = server
.mock("GET", "/A.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&a_schema).unwrap())
.create_async()
.await;
let ref_url = format!("http://{host}/A.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 10,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
// A and B resolved, but the circular back-ref to A was detected
assert_eq!(stats.refs_resolved, 2);
assert_eq!(stats.refs_skipped, 1);
assert!(
value["schema"]["next"]["back"]
.get("$circular_external_ref")
.is_some()
);
}
#[tokio::test]
async fn test_resolve_max_bytes_exceeded() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let large_body = "x".repeat(500);
let _mock = server
.mock("GET", "/big.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!("{{\"data\": \"{large_body}\"}}"))
.create_async()
.await;
let ref_url = format!("http://{host}/big.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 100, // Very small limit
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let result = resolve_external_refs(&mut value, None, &config, &client).await;
assert!(result.is_err());
match result.unwrap_err() {
SwaggerCliError::PolicyBlocked(msg) => {
assert!(msg.contains("--ref-max-bytes"));
}
other => panic!("expected PolicyBlocked, got: {other:?}"),
}
}
#[tokio::test]
async fn test_resolve_relative_ref_integration() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let pet_schema = json!({
"type": "object",
"properties": {
"name": { "type": "string" }
}
});
let _mock = server
.mock("GET", "/api/schemas/Pet.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&pet_schema).unwrap())
.create_async()
.await;
let base_url = format!("http://{host}/api/spec.json");
let mut value = json!({
"schema": { "$ref": "./schemas/Pet.json" }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, Some(&base_url), &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 1);
assert_eq!(value["schema"]["type"], "object");
}
}

View File

@@ -1,6 +1,7 @@
pub mod cache;
pub mod config;
pub mod diff;
pub mod external_refs;
pub mod http;
pub mod indexer;
pub mod network;

View File

@@ -63,10 +63,12 @@ fn expand_recursive(
let pointer = &ref_str[1..]; // strip leading '#'
if let Some(resolved) = resolve_json_pointer(root, pointer) {
let mut expanded = resolved.clone();
visited.insert(ref_str);
visited.insert(ref_str.clone());
expand_recursive(&mut expanded, root, max_depth, depth + 1, visited);
// Do not remove from visited: keep it for sibling detection within the same
// subtree path. The caller manages the visited set across siblings.
// Remove after expansion so sibling subtrees can also expand this ref.
// The ancestor path (tracked via depth) still prevents true circular refs
// because the ref is in `visited` during its own subtree's expansion.
visited.remove(&ref_str);
*value = expanded;
}
// If pointer doesn't resolve, leave the $ref as-is (broken ref)
@@ -292,4 +294,58 @@ mod tests {
// Broken internal ref left untouched
assert_eq!(value, original);
}
#[test]
fn test_expand_shared_refs_both_expand() {
// Two sibling subtrees reference the same schema. Both should expand
// correctly -- shared refs are NOT circular.
let root = json!({
"components": {
"schemas": {
"Pet": {
"type": "object",
"properties": {
"name": { "type": "string" }
}
}
}
}
});
let mut value = json!({
"requestBody": {
"schema": { "$ref": "#/components/schemas/Pet" }
},
"response": {
"schema": { "$ref": "#/components/schemas/Pet" }
}
});
expand_refs(&mut value, &root, 5);
// Both should be fully expanded (not marked as $circular_ref)
assert_eq!(value["requestBody"]["schema"]["type"], "object");
assert_eq!(
value["requestBody"]["schema"]["properties"]["name"]["type"],
"string"
);
assert_eq!(value["response"]["schema"]["type"], "object");
assert_eq!(
value["response"]["schema"]["properties"]["name"]["type"],
"string"
);
// Neither should have $circular_ref
assert!(
value["requestBody"]["schema"]
.get("$circular_ref")
.is_none(),
"requestBody ref should not be marked circular"
);
assert!(
value["response"]["schema"].get("$circular_ref").is_none(),
"response ref should not be marked circular"
);
}
}

View File

@@ -26,7 +26,7 @@ pub enum SearchResultType {
}
impl SearchResultType {
fn ordinal(self) -> u8 {
pub(crate) fn ordinal(self) -> u8 {
match self {
Self::Endpoint => 0,
Self::Schema => 1,