fix: remove server-side schema import, add on-demand fallback parsing Server-side import via import_git_repo_incremental in receive_pack was parsing every file in every commit through tree-sitter on a 2GB server, taking 30+ minutes and blocking all requests. This is the wrong architecture: panproto's git-remote-cospan does client-side parsing and pushes pre-parsed Schema objects via XRPC. Changes: - receive_pack: remove background import_git_repo_incremental call. Raw git push now only updates the git mirror. Schema data requires pushing via git-remote-cospan (panproto:// URL). - getProjectSchema: add on-demand fallback that parses up to 50 files from HEAD when vcs store has no data. Gives immediate schema stats while the user transitions to git-remote-cospan. Tracked: panproto/panproto#28 (distribute git-remote-cospan binary)
Author: Aaron Steven White
Commit
c043cb60aeebc5a9cbc7185af76d2a495e6b2de4Parent: 75f860c20d
Structural diff unavailable
These commits were pushed via plain git push, so no pre-parsed
schemas are available. Install git-remote-cospan and re-push via panproto:// to
see scope-level changes, breaking change detection, and semantic diffs.
brew install panproto/tap/git-remote-cospan2 files changed +88 -72
@@ -214,68 +214,26 @@ pub async fn git_receive_pack(
214214 215215 drop(store_guard); 216216 217- // 5. Import into panproto-vcs asynchronously using incremental 218- // import. The marks file tracks which git OIDs have already been 219- // translated, so repeated pushes only import new commits. 217+ // 5. Schema import is NOT done server-side. The correct flow is: 218+ // 219+ // a) User installs git-remote-cospan (from panproto) 220+ // b) User pushes via: git push panproto://did/repo main 221+ // c) git-remote-cospan parses files LOCALLY via panproto 222+ // d) Pre-parsed Schema + Commit objects are sent via XRPC 223+ // e) Node stores them directly (zero parsing) 224+ // 225+ // Raw git push (this handler) only updates the git mirror. 226+ // Schema data becomes available when pushed via git-remote-cospan 227+ // or when the on-demand parser in getProjectSchema runs. 228+ // 229+ // See: https://github.com/panproto/panproto/issues/28 220230 if !import_tasks.is_empty() { 221- let store_clone = state.store.clone(); 222- let did_clone = did.clone(); 223- let repo_clone = repo.clone(); 224- tokio::task::spawn_blocking(move || { 225- // Open the stores under the lock, load marks, then DROP the 226- // lock before the expensive import. 227- let (mirror, mut vcs_store, known) = { 228- let store_guard = store_clone.blocking_lock(); 229- let mirror = match store_guard.open_or_init_git_mirror(&did_clone, &repo_clone) { 230- Ok(m) => m, 231- Err(e) => { 232- tracing::error!(error = %e, "background import: open mirror failed"); 233- return; 234- } 235- }; 236- let vcs_store = match store_guard.open_or_init(&did_clone, &repo_clone) { 237- Ok(s) => s, 238- Err(e) => { 239- tracing::error!(error = %e, "background import: open vcs store failed"); 240- return; 241- } 242- }; 243- let known = store_guard.load_import_marks(&did_clone, &repo_clone); 244- (mirror, vcs_store, known) 245- // store_guard dropped here: lock released 246- }; 247- for (new_oid, refname) in &import_tasks { 248- match panproto_git::import_git_repo_incremental( 249- &mirror, 250- &mut vcs_store, 251- new_oid, 252- &known, 253- ) { 254- Ok(result) => { 255- let _ = panproto_vcs::Store::set_ref(&mut vcs_store, refname, result.head_id); 256- // Persist the new OID mappings for future incremental imports. 257- let store_guard = store_clone.blocking_lock(); 258- store_guard.save_import_marks( 259- &did_clone, 260- &repo_clone, 261- &result.oid_map, 262- ); 263- drop(store_guard); 264- tracing::info!( 265- did = %did_clone, repo = %repo_clone, %refname, 266- commits = result.commit_count, 267- "background: incrementally imported git commits into panproto-vcs" 268- ); 269- } 270- Err(e) => { 271- tracing::warn!( 272- did = %did_clone, repo = %repo_clone, %refname, error = %e, 273- "background: panproto-vcs import failed" 274- ); 275- } 276- } 277- } 278- }); 231+ tracing::info!( 232+ %did, %repo, 233+ refs = import_tasks.len(), 234+ "git push received; git mirror updated. For schema analysis, \ 235+ push via git-remote-cospan (panproto:// URL) to send pre-parsed schemas." 236+ ); 279237 } 280238 281239 let full_response = format!("{}{}0000", pkt_line("unpack ok\n"), response);
@@ -80,8 +80,8 @@ pub async fn get_project_schema(
8080 8181 let registry = panproto_parse::ParserRegistry::new(); 8282 83- // Collect all file paths from the tree. 84- let mut file_paths: Vec<String> = Vec::new(); 83+ // Collect all file paths and blob OIDs from the tree. 84+ let mut file_entries: Vec<(String, git2::Oid)> = Vec::new(); 8585 tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| { 8686 if entry.kind() == Some(git2::ObjectType::Blob) { 8787 let name = entry.name().unwrap_or("");
@@ -90,17 +90,17 @@ pub async fn get_project_schema(
9090 } else { 9191 format!("{dir}{name}") 9292 }; 93- file_paths.push(path); 93+ file_entries.push((path, entry.id())); 9494 } 9595 git2::TreeWalkResult::Ok 9696 }) 9797 .map_err(|e| NodeError::Internal(format!("tree walk: {e}")))?; 9898 99- let file_count = file_paths.len(); 99+ let file_count = file_entries.len(); 100100 101101 // Language detection from file extensions (instant, no parsing). 102102 let mut lang_file_counts: HashMap<String, usize> = HashMap::new(); 103- for path in &file_paths { 103+ for (path, _) in &file_entries { 104104 let p = std::path::Path::new(path); 105105 if let Some(lang) = registry.detect_language(p) { 106106 *lang_file_counts.entry(lang.to_string()).or_default() += 1;
@@ -230,10 +230,68 @@ pub async fn get_project_schema(
230230 }))); 231231 } 232232 233- // Fallback: no vcs store data. Return language stats from extensions only. 233+ // Fallback: no vcs store data. Parse a subset of files on demand 234+ // to give users immediate schema data while the full import runs 235+ // in the background (or they switch to git-remote-cospan). 236+ // Cap at 50 files to keep latency under ~3 seconds. 237+ let on_demand_limit = 50; 238+ let mut total_vc = 0usize; 239+ let mut total_ec = 0usize; 240+ let mut parsed_count = 0usize; 241+ let mut lang_vertex_counts: HashMap<String, usize> = HashMap::new(); 242+ let mut file_schemas: Vec<Value> = Vec::new(); 243+ 244+ for (path, blob_oid) in file_entries.iter().take(on_demand_limit) { 245+ let blob = match mirror.find_blob(*blob_oid) { 246+ Ok(b) => b, 247+ Err(_) => continue, 248+ }; 249+ if let Some((schema, language)) = super::structural::parse_any(®istry, path, blob.content()) { 250+ let vc = schema.vertices.len(); 251+ let ec = schema.edges.len(); 252+ total_vc += vc; 253+ total_ec += ec; 254+ parsed_count += 1; 255+ 256+ *lang_vertex_counts.entry(language.clone()).or_default() += vc; 257+ 258+ let mut top_names: Vec<String> = Vec::new(); 259+ let mut seen = std::collections::HashSet::new(); 260+ for vid in schema.vertices.keys() { 261+ let vid_str: &str = vid; 262+ let human = humanize_vertex(vid_str); 263+ if human != vid_str && !human.contains(" in ") { 264+ if let Some(start) = human.find('`') { 265+ if let Some(end) = human[start + 1..].find('`') { 266+ let name = &human[start + 1..start + 1 + end]; 267+ if !name.is_empty() && !name.starts_with('$') && seen.insert(name.to_string()) { 268+ top_names.push(name.to_string()); 269+ } 270+ } 271+ } 272+ } 273+ } 274+ top_names.truncate(8); 275+ 276+ file_schemas.push(json!({ 277+ "path": path, 278+ "language": language, 279+ "vertexCount": vc, 280+ "edgeCount": ec, 281+ "topNames": top_names, 282+ })); 283+ } 284+ } 285+ 286+ file_schemas.sort_by(|a, b| b["vertexCount"].as_u64().cmp(&a["vertexCount"].as_u64())); 287+ 234288 let mut languages: Vec<Value> = lang_file_counts 235289 .iter() 236- .map(|(name, fc)| json!({ "name": name, "fileCount": fc, "vertexCount": 0 })) 290+ .map(|(name, fc)| json!({ 291+ "name": name, 292+ "fileCount": fc, 293+ "vertexCount": lang_vertex_counts.get(name.as_str()).copied().unwrap_or(0), 294+ })) 237295 .collect(); 238296 languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64())); 239297
@@ -246,11 +304,11 @@ pub async fn get_project_schema(
246304 Ok(Json(json!({ 247305 "commit": commit_oid.to_string(), 248306 "protocol": protocol, 249- "totalVertexCount": 0, 250- "totalEdgeCount": 0, 307+ "totalVertexCount": total_vc, 308+ "totalEdgeCount": total_ec, 251309 "fileCount": file_count, 252- "parsedFileCount": 0, 310+ "parsedFileCount": parsed_count, 253311 "languages": languages, 254- "fileSchemas": [], 312+ "fileSchemas": file_schemas, 255313 }))) 256314 }