fix: normalize Tangled state NSIDs and backfill repo owner DIDs - Normalize state values like "sh.tangled.repo.pull.status.merged" to just "merged" - Include repo owner DIDs in PDS backfill query (pull.status records are on the repo owner, not the PR author)

Author: Aaron Steven White
Commit a99c1a6dea5cf87843c7077a3b0fff52f16aff0b
Parent: f2e98c33a2
Structural diff unavailable

These commits were pushed via plain git push, so no pre-parsed schemas are available. Install git-remote-cospan and re-push via panproto:// to see scope-level changes, breaking change detection, and semantic diffs.

brew install panproto/tap/git-remote-cospan
2 files changed +25 -7
@@ -9,6 +9,18 @@ use crate::xrpc::sse::IndexEvent;
99 
1010 use super::dispatch;
1111 
12+/// Normalize state values from Tangled's NSID format to simple strings.
13+/// e.g., "sh.tangled.repo.pull.status.merged" → "merged"
14+///       "sh.tangled.repo.issue.state.closed" → "closed"
15+fn normalize_state(state: &str) -> String {
16+    if let Some(last) = state.rsplit('.').next() {
17+        if state.contains("tangled") || state.contains('.') {
18+            return last.to_string();
19+        }
20+    }
21+    state.to_string()
22+}
23+
1224 /// Transform a record through the pre-compiled panproto morphism.
1325 /// Handles both Cospan (DB projection) and Tangled (interop + DB projection).
1426 pub(super) fn transform_record(
@@ -184,11 +196,9 @@ async fn dispatch_special_upsert(
184196                 .and_then(|v| v.as_str())
185197                 .unwrap_or("")
186198                 .to_string();
187-            let new_state = rec
188-                .get("state")
189-                .and_then(|v| v.as_str())
190-                .unwrap_or("open")
191-                .to_string();
199+            let new_state = normalize_state(
200+                rec.get("state").and_then(|v| v.as_str()).unwrap_or("open")
201+            );
192202 
193203             let mut row: db::issue_state::IssueStateRow =
194204                 serde_json::from_value(transform_record(state, collection, rec))?;
@@ -309,6 +319,8 @@ async fn dispatch_special_upsert(
309319             if row.state.is_empty() {
310320                 row.state = rec.get("status").and_then(|v| v.as_str()).unwrap_or("").to_string();
311321             }
322+            // Normalize NSID-style states: "sh.tangled.repo.pull.status.merged" → "merged"
323+            row.state = normalize_state(&row.state);
312324             if row.pull_uri.is_empty() {
313325                 row.pull_uri = pull_uri.clone();
314326             }
@@ -20,11 +20,17 @@ const BACKFILL_COLLECTIONS: &[&str] = &[
2020 pub async fn run(state: Arc<AppState>) {
2121     tracing::info!("starting PDS backfill for state records");
2222 
23-    // Get all unique DIDs that have pulls or issues
23+    // Get all unique DIDs: PR authors, issue authors, AND repo owners
2424     let dids = match sqlx::query_scalar::<_, String>(
2525         "SELECT DISTINCT did FROM pulls \
2626          UNION \
27-         SELECT DISTINCT did FROM issues",
27+         SELECT DISTINCT did FROM issues \
28+         UNION \
29+         SELECT DISTINCT repo_did FROM pulls WHERE repo_did <> '' \
30+         UNION \
31+         SELECT DISTINCT repo_did FROM issues WHERE repo_did <> '' \
32+         UNION \
33+         SELECT DISTINCT did FROM repos WHERE source = 'tangled'",
2834     )
2935     .fetch_all(&state.db)
3036     .await
cospan · schematic version control on atproto built on AT Protocol