fix: filter anonymous AST nodes from structural diff output Tree-sitter grammars produce thousands of anonymous internal nodes ($0, $1289, etc.) that represent syntax tokens, punctuation, and anonymous grammar rules. These have zero semantic value for developers. All breaking/non-breaking changes, vertex lists, edge lists, and kind changes are now filtered through has_named_segment() which requires at least one non-$N, non-file-path segment in the vertex ID. The compatibility verdict is recomputed after filtering, so a commit that only changes anonymous nodes shows as COMPATIBLE.

Author: Aaron Steven White
Commit 79eecc117022b881e3b709c3d7d751db4ea0124d
Parent: 9c12ea2cdb
Structural diff unavailable

These commits were pushed via plain git push, so no pre-parsed schemas are available. Install git-remote-cospan and re-push via panproto:// to see scope-level changes, breaking change detection, and semantic diffs.

brew install panproto/tap/git-remote-cospan
1 file changed +86 -15
@@ -344,35 +344,106 @@ fn humanize_edge(src: &str, tgt: &str, name: &Option<String>) -> String {
344344 // ─── JSON serialization ────────────────────────────────────────────
345345 
346346 pub fn structural_diff_to_json(diff: &StructuralDiff) -> Value {
347+    // Filter out changes that reference only anonymous vertices ($N IDs).
348+    // These are tree-sitter internal AST nodes (syntax tokens, punctuation)
349+    // that carry no semantic meaning for developers.
350+    let breaking: Vec<Value> = diff
351+        .report
352+        .breaking
353+        .iter()
354+        .filter(|b| !is_anonymous_change_breaking(b))
355+        .map(breaking_json)
356+        .collect();
357+    let non_breaking: Vec<Value> = diff
358+        .report
359+        .non_breaking
360+        .iter()
361+        .filter(|nb| !is_anonymous_change_non_breaking(nb))
362+        .map(non_breaking_json)
363+        .collect();
364+
365+    // Filter raw vertex lists to only named vertices
366+    let added_vertices: Vec<&String> = diff.raw_diff.added_vertices.iter().filter(|v| has_named_segment(v)).collect();
367+    let removed_vertices: Vec<&String> = diff.raw_diff.removed_vertices.iter().filter(|v| has_named_segment(v)).collect();
368+
369+    let compatible = breaking.is_empty();
370+
347371     json!({
348372         "protocol": diff.protocol,
349-        "compatible": diff.report.compatible,
350-        "verdict": if diff.report.compatible { "compatible" } else { "breaking" },
351-        "breakingCount": diff.report.breaking.len(),
352-        "nonBreakingCount": diff.report.non_breaking.len(),
373+        "compatible": compatible,
374+        "verdict": if compatible { "compatible" } else { "breaking" },
375+        "breakingCount": breaking.len(),
376+        "nonBreakingCount": non_breaking.len(),
353377         "oldVertexCount": diff.old_vertex_count,
354378         "newVertexCount": diff.new_vertex_count,
355379         "oldEdgeCount": diff.old_edge_count,
356380         "newEdgeCount": diff.new_edge_count,
357-        "addedVertices": diff.raw_diff.added_vertices,
358-        "removedVertices": diff.raw_diff.removed_vertices,
359-        "kindChanges": diff.raw_diff.kind_changes.iter().map(|kc| json!({
360-            "vertexId": kc.vertex_id,
361-            "oldKind": kc.old_kind,
362-            "newKind": kc.new_kind,
363-        })).collect::<Vec<_>>(),
364-        "addedEdges": diff.raw_diff.added_edges.iter().map(edge_json).collect::<Vec<_>>(),
365-        "removedEdges": diff.raw_diff.removed_edges.iter().map(edge_json).collect::<Vec<_>>(),
381+        "addedVertices": added_vertices,
382+        "removedVertices": removed_vertices,
383+        "kindChanges": diff.raw_diff.kind_changes.iter()
384+            .filter(|kc| has_named_segment(&kc.vertex_id))
385+            .map(|kc| json!({
386+                "vertexId": kc.vertex_id,
387+                "oldKind": kc.old_kind,
388+                "newKind": kc.new_kind,
389+            })).collect::<Vec<_>>(),
390+        "addedEdges": diff.raw_diff.added_edges.iter()
391+            .filter(|e| has_named_segment(&e.src) || has_named_segment(&e.tgt))
392+            .map(edge_json).collect::<Vec<_>>(),
393+        "removedEdges": diff.raw_diff.removed_edges.iter()
394+            .filter(|e| has_named_segment(&e.src) || has_named_segment(&e.tgt))
395+            .map(edge_json).collect::<Vec<_>>(),
366396         "addedNsids": diff.raw_diff.added_nsids,
367397         "removedNsids": diff.raw_diff.removed_nsids,
368398         "changedNsids": diff.raw_diff.changed_nsids.iter().map(|(v, o, n)| json!({
369399             "vertexId": v, "oldNsid": o, "newNsid": n
370400         })).collect::<Vec<_>>(),
371-        "breakingChanges": diff.report.breaking.iter().map(breaking_json).collect::<Vec<_>>(),
372-        "nonBreakingChanges": diff.report.non_breaking.iter().map(non_breaking_json).collect::<Vec<_>>(),
401+        "breakingChanges": breaking,
402+        "nonBreakingChanges": non_breaking,
373403     })
374404 }
375405 
406+/// Check if a vertex ID has at least one named (non-$N, non-file-path) segment.
407+fn has_named_segment(id: &str) -> bool {
408+    if id.contains("::") {
409+        id.split("::").any(|s| {
410+            !s.starts_with('$') && !s.is_empty() && !s.contains('/') && !s.contains('.')
411+        })
412+    } else if id.contains(':') {
413+        // Lexicon-style IDs are always named
414+        true
415+    } else {
416+        !id.starts_with('$') && !id.is_empty()
417+    }
418+}
419+
420+/// Check if a breaking change references only anonymous vertices.
421+fn is_anonymous_change_breaking(b: &BreakingChange) -> bool {
422+    match b {
423+        BreakingChange::RemovedVertex { vertex_id } => !has_named_segment(vertex_id),
424+        BreakingChange::RemovedEdge { src, tgt, .. } => {
425+            !has_named_segment(src) && !has_named_segment(tgt)
426+        }
427+        BreakingChange::KindChanged { vertex_id, .. } => !has_named_segment(vertex_id),
428+        BreakingChange::ConstraintTightened { vertex_id, .. } => !has_named_segment(vertex_id),
429+        BreakingChange::ConstraintAdded { vertex_id, .. } => !has_named_segment(vertex_id),
430+        _ => false, // keep other change types
431+    }
432+}
433+
434+/// Check if a non-breaking change references only anonymous vertices.
435+fn is_anonymous_change_non_breaking(nb: &NonBreakingChange) -> bool {
436+    match nb {
437+        NonBreakingChange::AddedVertex { vertex_id } => !has_named_segment(vertex_id),
438+        NonBreakingChange::AddedEdge { src, tgt, .. } => {
439+            !has_named_segment(src) && !has_named_segment(tgt)
440+        }
441+        NonBreakingChange::ConstraintRelaxed { vertex_id, .. } => !has_named_segment(vertex_id),
442+        NonBreakingChange::ConstraintRemoved { vertex_id, .. } => !has_named_segment(vertex_id),
443+        _ => false,
444+    }
445+}
446+
376447 fn edge_json(e: &panproto_schema::Edge) -> Value {
377448     json!({
378449         "src": e.src,
cospan · schematic version control on atproto built on AT Protocol