fix: normalize Tangled state NSIDs and backfill repo owner DIDs - Normalize state values like "sh.tangled.repo.pull.status.merged" to just "merged" - Include repo owner DIDs in PDS backfill query (pull.status records are on the repo owner, not the PR author)
Author: Aaron Steven White
Commit
a99c1a6dea5cf87843c7077a3b0fff52f16aff0bParent: f2e98c33a2
Structural diff unavailable
These commits were pushed via plain git push, so no pre-parsed
schemas are available. Install git-remote-cospan and re-push via panproto:// to
see scope-level changes, breaking change detection, and semantic diffs.
brew install panproto/tap/git-remote-cospan2 files changed +25 -7
@@ -9,6 +9,18 @@ use crate::xrpc::sse::IndexEvent;
99 1010 use super::dispatch; 1111 12+/// Normalize state values from Tangled's NSID format to simple strings. 13+/// e.g., "sh.tangled.repo.pull.status.merged" → "merged" 14+/// "sh.tangled.repo.issue.state.closed" → "closed" 15+fn normalize_state(state: &str) -> String { 16+ if let Some(last) = state.rsplit('.').next() { 17+ if state.contains("tangled") || state.contains('.') { 18+ return last.to_string(); 19+ } 20+ } 21+ state.to_string() 22+} 23+ 1224 /// Transform a record through the pre-compiled panproto morphism. 1325 /// Handles both Cospan (DB projection) and Tangled (interop + DB projection). 1426 pub(super) fn transform_record(
@@ -184,11 +196,9 @@ async fn dispatch_special_upsert(
184196 .and_then(|v| v.as_str()) 185197 .unwrap_or("") 186198 .to_string(); 187- let new_state = rec 188- .get("state") 189- .and_then(|v| v.as_str()) 190- .unwrap_or("open") 191- .to_string(); 199+ let new_state = normalize_state( 200+ rec.get("state").and_then(|v| v.as_str()).unwrap_or("open") 201+ ); 192202 193203 let mut row: db::issue_state::IssueStateRow = 194204 serde_json::from_value(transform_record(state, collection, rec))?;
@@ -309,6 +319,8 @@ async fn dispatch_special_upsert(
309319 if row.state.is_empty() { 310320 row.state = rec.get("status").and_then(|v| v.as_str()).unwrap_or("").to_string(); 311321 } 322+ // Normalize NSID-style states: "sh.tangled.repo.pull.status.merged" → "merged" 323+ row.state = normalize_state(&row.state); 312324 if row.pull_uri.is_empty() { 313325 row.pull_uri = pull_uri.clone(); 314326 }
@@ -20,11 +20,17 @@ const BACKFILL_COLLECTIONS: &[&str] = &[
2020 pub async fn run(state: Arc<AppState>) { 2121 tracing::info!("starting PDS backfill for state records"); 2222 23- // Get all unique DIDs that have pulls or issues 23+ // Get all unique DIDs: PR authors, issue authors, AND repo owners 2424 let dids = match sqlx::query_scalar::<_, String>( 2525 "SELECT DISTINCT did FROM pulls \ 2626 UNION \ 27- SELECT DISTINCT did FROM issues", 27+ SELECT DISTINCT did FROM issues \ 28+ UNION \ 29+ SELECT DISTINCT repo_did FROM pulls WHERE repo_did <> '' \ 30+ UNION \ 31+ SELECT DISTINCT repo_did FROM issues WHERE repo_did <> '' \ 32+ UNION \ 33+ SELECT DISTINCT did FROM repos WHERE source = 'tangled'", 2834 ) 2935 .fetch_all(&state.db) 3036 .await