From f593ebd1c45ebc501f6457a328aaac58809879a0 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Tue, 10 Feb 2026 19:59:49 -0800 Subject: [PATCH 01/12] Add H3 spatial indexing, two-tier facet loading, and benchmark optimizations (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add H3 spatial indexing, two-tier facet loading, and benchmark optimizations ## Changes - isamples_explorer.qmd: Two-tier facet loading (2KB summary for instant counts) - parquet_cesium_isamples_wide.qmd: Zoom-adaptive H3 clustering with LOD - zenodo_isamples_analysis.qmd: Data-driven H3 regional analysis - narrow_vs_wide_performance.qmd: Added geospatial and facet benchmarks ## Fixes Applied (Codex review) - Fixed MODE(n) → MODE(source) for cluster coloring - Added camera listener cleanup to prevent leaks - Added NaN guard for cluster label parsing - Added user-facing warning for facet summary failures Closes #1, #2, #3, #4 --- tutorials/isamples_explorer.qmd | 178 ++++++++-- tutorials/narrow_vs_wide_performance.qmd | 388 +++++++++++++++++++-- tutorials/parquet_cesium_isamples_wide.qmd | 335 ++++++++++++------ tutorials/zenodo_isamples_analysis.qmd | 253 ++++++++++---- 4 files changed, 916 insertions(+), 238 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index 7b85ab8..5939d9d 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -12,7 +12,7 @@ Search and explore **6.7 million physical samples** from scientific collections ::: {.callout-note} ### Serverless Architecture -This app queries a ~280 MB Parquet file directly in your browser using DuckDB-WASM. No server required! +This app uses a **two-tier loading strategy**: a 2KB pre-computed summary loads instantly for facet counts (source, material, context, specimen type), while the full ~280 MB Parquet file is only queried when drilling into records. All powered by DuckDB-WASM in your browser -- no server required! ::: ## Setup @@ -28,6 +28,9 @@ duckdbModule = import("https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.0/+ // Data source configuration parquet_url = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet" +// Pre-computed facet summaries (2KB - loads instantly) +facet_summaries_url = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_facet_summaries.parquet" + // Source color scheme (consistent with iSamples conventions) SOURCE_COLORS = ({ 'SESAR': '#3366CC', // Blue @@ -79,14 +82,18 @@ viewof searchInput = Inputs.text({ ### Filters +```{ojs} +facetSummariesWarning +``` + **Source** ```{ojs} //| code-fold: true -// Source checkboxes with counts +// Source checkboxes with counts - uses pre-computed summaries for instant load viewof sourceCheckboxes = { - // Get source counts based on current search - const counts = await sourceCounts; + // Use pre-computed facet summaries (instant) instead of scanning full parquet + const counts = facetsByType.source; const options = counts.map(r => r.value); return Inputs.checkbox(options, { @@ -104,6 +111,69 @@ viewof sourceCheckboxes = { } ``` +**Material** + +```{ojs} +//| code-fold: true +// Material filter - loaded from pre-computed summaries +viewof materialCheckboxes = { + const counts = facetsByType.material; + const options = counts.map(r => r.value); + return Inputs.checkbox(options, { + value: [], + format: (x) => { + const r = counts.find(s => s.value === x); + const count = r ? Number(r.count).toLocaleString() : "0"; + return html` + ${x} (${count}) + `; + } + }); +} +``` + +**Sampled Feature** + +```{ojs} +//| code-fold: true +// Context filter - loaded from pre-computed summaries +viewof contextCheckboxes = { + const counts = facetsByType.context; + const options = counts.map(r => r.value); + return Inputs.checkbox(options, { + value: [], + format: (x) => { + const r = counts.find(s => s.value === x); + const count = r ? Number(r.count).toLocaleString() : "0"; + return html` + ${x} (${count}) + `; + } + }); +} +``` + +**Specimen Type** + +```{ojs} +//| code-fold: true +// Object type filter - loaded from pre-computed summaries +viewof objectTypeCheckboxes = { + const counts = facetsByType.object_type; + const options = counts.map(r => r.value); + return Inputs.checkbox(options, { + value: [], + format: (x) => { + const r = counts.find(s => s.value === x); + const count = r ? Number(r.count).toLocaleString() : "0"; + return html` + ${x} (${count}) + `; + } + }); +} +``` + ```{ojs} //| code-fold: true html`Clear All Filters` @@ -131,6 +201,9 @@ viewof maxSamples = Inputs.range([1000, 100000], { const params = new URLSearchParams(); if (searchInput) params.set("q", searchInput); if (sourceCheckboxes?.length) params.set("sources", sourceCheckboxes.join(",")); + if (materialCheckboxes?.length) params.set("material", materialCheckboxes.join(",")); + if (contextCheckboxes?.length) params.set("context", contextCheckboxes.join(",")); + if (objectTypeCheckboxes?.length) params.set("object_type", objectTypeCheckboxes.join(",")); if (viewMode !== "globe") params.set("view", viewMode); const newUrl = params.toString() ? `?${params.toString()}` : window.location.pathname; @@ -264,7 +337,50 @@ async function runQuery(sql) { ```{ojs} //| code-fold: true -// Build WHERE clause from current filters +// Tier 1: Load pre-computed facet summaries (2KB, instant) +facetSummaries = { + facetSummariesError = null; + try { + const rows = await runQuery(`SELECT * FROM read_parquet('${facet_summaries_url}')`); + return rows; + } catch (e) { + console.error("Facet summaries load error:", e); + facetSummariesError = e; + return []; + } +} + +``` + +```{ojs} +//| code-fold: true +facetSummariesWarning = { + if (!facetSummariesError) return null; + return html`
+ Facet summaries failed to load. Filter counts may be missing. Try refreshing. +
`; +} + +// Extract facet counts by type from pre-computed summaries +facetsByType = { + const grouped = { source: [], material: [], context: [], object_type: [] }; + for (const row of facetSummaries) { + const ft = row.facet_type; + if (grouped[ft]) { + grouped[ft].push({ value: row.facet_value, count: Number(row.count), scheme: row.scheme }); + } + } + // Sort each by count descending + for (const key of Object.keys(grouped)) { + grouped[key].sort((a, b) => b.count - a.count); + } + return grouped; +} +``` + +```{ojs} +//| code-fold: true +// Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering) whereClause = { const conditions = [ "otype = 'MaterialSampleRecord'", @@ -288,40 +404,36 @@ whereClause = { conditions.push(`n IN (${sourceList})`); } + // Material filter + const materials = Array.from(materialCheckboxes || []); + if (materials.length > 0) { + const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_material_category IN (${matList})`); + } + + // Context (sampled feature) filter + const contexts = Array.from(contextCheckboxes || []); + if (contexts.length > 0) { + const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_context_category IN (${ctxList})`); + } + + // Object type (specimen type) filter + const objectTypes = Array.from(objectTypeCheckboxes || []); + if (objectTypes.length > 0) { + const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", "); + conditions.push(`has_specimen_category IN (${otList})`); + } + return conditions.join(" AND "); } ``` ```{ojs} //| code-fold: true -// Get source facet counts (respects text search but not source filter) -sourceCounts = { - let baseWhere = "otype = 'MaterialSampleRecord' AND latitude IS NOT NULL"; - - if (searchInput?.trim()) { - const term = searchInput.trim().replace(/'/g, "''"); - baseWhere += ` AND ( - label ILIKE '%${term}%' - OR description ILIKE '%${term}%' - OR CAST(place_name AS VARCHAR) ILIKE '%${term}%' - )`; - } - - const query = ` - SELECT n as value, COUNT(*) as count - FROM samples - WHERE ${baseWhere} - GROUP BY n - ORDER BY count DESC - `; - - try { - return await runQuery(query); - } catch (e) { - console.error("Facet query error:", e); - return []; - } -} +// Source counts now come from pre-computed facet summaries (Tier 1) +// No longer scans the full parquet file on every page load +sourceCounts = facetsByType.source ``` ```{ojs} diff --git a/tutorials/narrow_vs_wide_performance.qmd b/tutorials/narrow_vs_wide_performance.qmd index bfa1621..da5fa03 100644 --- a/tutorials/narrow_vs_wide_performance.qmd +++ b/tutorials/narrow_vs_wide_performance.qmd @@ -1,6 +1,6 @@ --- -title: "Narrow vs Wide Schema Performance Comparison" -categories: [parquet, performance, benchmarking] +title: "Narrow vs Wide vs H3 Schema Performance Comparison" +categories: [parquet, performance, benchmarking, h3] format: html: code-fold: true @@ -8,20 +8,23 @@ format: toc-depth: 3 --- -This page benchmarks the performance difference between **narrow** and **wide** parquet schema formats when accessing data "over the wire" via HTTP range requests in DuckDB-WASM. +This page benchmarks the performance difference between **narrow**, **wide**, and **wide + H3** parquet schema formats when accessing data "over the wire" via HTTP range requests in DuckDB-WASM. ## Introduction ### What are Narrow vs Wide Schemas? -The iSamples property graph data can be serialized in two different parquet formats: +The iSamples property graph data can be serialized in three different parquet formats: | Format | Description | File Size | Row Count | Sources | |--------|-------------|-----------|-----------|---------| | **Narrow** | Stores relationships as separate edge rows (`otype='_edge_'`) | ~850 MB | ~106M rows | All 4 sources | -| **Wide** | Stores relationships as `p__*` columns on entity rows | ~280 MB | ~20M rows | All 4 sources | +| **Wide** | Stores relationships as `p__*` columns on entity rows | ~278 MB | ~20M rows | All 4 sources | +| **Wide + H3** | Wide format + pre-computed H3 spatial indices (`h3_res4/6/8`) | ~292 MB | ~20M rows | All 4 sources | -Both formats represent the **same underlying data** (SESAR, OpenContext, GEOME, Smithsonian) with identical semantics, but the wide format is optimized for analytical queries by eliminating edge rows. +All three formats represent the **same underlying data** (SESAR, OpenContext, GEOME, Smithsonian) with identical semantics. The wide format eliminates edge rows; the H3 variant adds pre-computed hexagonal spatial indices for accelerated geospatial queries. + +A **facet summaries** file (2KB) provides pre-aggregated counts for instant facet lookups. **Data source**: Cloudflare R2 (updated January 2026) @@ -63,6 +66,8 @@ import { DuckDBClient } from "https://cdn.jsdelivr.net/npm/@observablehq/duckdb@ // Updated 2026-01-14: Using Zenodo narrow/wide files on Cloudflare R2 narrowUrl = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202512_narrow.parquet" wideUrl = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet" +wideH3Url = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide_h3.parquet" +summariesUrl = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_facet_summaries.parquet" ``` ### Environment Info @@ -110,7 +115,7 @@ viewof runBenchmarks = Inputs.button("Run All Benchmarks", { ```{ojs} //| echo: false // Initialize databases only when button is clicked (lazy loading) -// Returns { narrow, wide } or null if not yet clicked +// Returns { narrow, wide, wideH3 } or null if not yet clicked initDatabases = { if (runBenchmarks < 1) return null; @@ -127,8 +132,11 @@ initDatabases = { const wideDb = await DuckDBClient.of(); await wideDb.query(`CREATE VIEW wide AS SELECT * FROM read_parquet('${wideUrl}')`); + const wideH3Db = await DuckDBClient.of(); + await wideH3Db.query(`CREATE VIEW wide_h3 AS SELECT * FROM read_parquet('${wideH3Url}')`); + if (loadingDiv) loadingDiv.style.display = 'none'; - return { narrow: narrowDb, wide: wideDb }; + return { narrow: narrowDb, wide: wideDb, wideH3: wideH3Db }; } catch (e) { const errorMsg = `Failed to initialize databases: ${e.message}. This may be due to network issues or CORS restrictions.`; if (errorDiv) { @@ -142,6 +150,7 @@ initDatabases = { dbNarrow = initDatabases && !initDatabases.error ? initDatabases.narrow : null dbWide = initDatabases && !initDatabases.error ? initDatabases.wide : null +dbWideH3 = initDatabases && !initDatabases.error ? initDatabases.wideH3 : null ``` ## Data Validity Check @@ -155,7 +164,7 @@ Checking data validity... ```{ojs} validityCheck = { // Only run when button clicked AND databases are initialized - if (runBenchmarks < 1 || !dbNarrow || !dbWide) return null; + if (runBenchmarks < 1 || !dbNarrow || !dbWide || !dbWideH3) return null; const loadingDiv = document.getElementById('loading_validity'); const errorDiv = document.getElementById('error_display'); @@ -170,6 +179,10 @@ validityCheck = { const wideCount = await dbWide.query(`SELECT COUNT(*) as cnt FROM wide`); const wideTotal = wideCount[0].cnt; + // Count rows in wide+H3 + const wideH3Count = await dbWideH3.query(`SELECT COUNT(*) as cnt FROM wide_h3`); + const wideH3Total = wideH3Count[0].cnt; + // Count entity types in narrow (excluding edges) const narrowEntities = await dbNarrow.query(` SELECT COUNT(*) as cnt FROM narrow @@ -177,7 +190,7 @@ validityCheck = { `); const narrowEntityCount = narrowEntities[0].cnt; - // Count samples in both + // Count samples in all three const narrowSamples = await dbNarrow.query(` SELECT COUNT(*) as cnt FROM narrow WHERE otype = 'MaterialSampleRecord' @@ -188,13 +201,21 @@ validityCheck = { WHERE otype = 'MaterialSampleRecord' `); + const wideH3Samples = await dbWideH3.query(` + SELECT COUNT(*) as cnt FROM wide_h3 + WHERE otype = 'MaterialSampleRecord' + `); + return { narrowTotal: narrowTotal, wideTotal: wideTotal, + wideH3Total: wideH3Total, narrowEntities: narrowEntityCount, narrowSamples: narrowSamples[0].cnt, wideSamples: wideSamples[0].cnt, + wideH3Samples: wideH3Samples[0].cnt, sampleMatch: narrowSamples[0].cnt === wideSamples[0].cnt + && wideSamples[0].cnt === wideH3Samples[0].cnt }; } catch (e) { if (errorDiv) { @@ -221,12 +242,14 @@ validityCheck ? (validityCheck.error ? html` + - + +
Narrow total rows:${validityCheck.narrowTotal.toLocaleString()}
Wide total rows:${validityCheck.wideTotal.toLocaleString()}
Wide+H3 total rows:${validityCheck.wideH3Total.toLocaleString()}
Narrow entities (non-edge):${validityCheck.narrowEntities.toLocaleString()}
Narrow samples:${validityCheck.narrowSamples.toLocaleString()}
Wide samples:${validityCheck.wideSamples.toLocaleString()}
Sample count match:${validityCheck.sampleMatch ? '✅ Yes' : '❌ No'}
Wide+H3 samples:${validityCheck.wideH3Samples.toLocaleString()}
Sample count match (all 3):${validityCheck.sampleMatch ? '✅ Yes' : '❌ No'}
-

The wide schema has ~79% fewer rows because edge rows are eliminated and stored as columns.

+

The wide schema has ~79% fewer rows because edge rows are eliminated. The H3 variant adds 3 spatial index columns (h3_res4/6/8).

`) : html`

Click "Run All Benchmarks" to check data validity

` ``` @@ -635,22 +658,265 @@ benchmark3 ? (benchmark3.error ? html` `) : html`

Waiting for benchmark...

` ``` +## Benchmark 4: Geospatial Bounding Box Query + +This benchmark counts samples within the western United States (lat 32-49, lon -125 to -104), comparing lat/lon filtering on wide format versus H3-cell-based filtering on the H3-indexed file. + + + +```{ojs} +benchmark4 = { + if (runBenchmarks < 1 || !benchmark3 || benchmark3.error) return null; + + const loadingDiv = document.getElementById('loading_b4'); + const errorDiv = document.getElementById('error_display'); + if (loadingDiv) loadingDiv.hidden = false; + + // Wide: baseline lat/lon bounding box + const wideQuery = ` + SELECT COUNT(*) as cnt FROM wide + WHERE otype = 'MaterialSampleRecord' + AND latitude BETWEEN 32 AND 49 + AND longitude BETWEEN -125 AND -104 + `; + + // Wide+H3: H3-accelerated — first identify cells in the bbox, then filter by cells + const h3Query = ` + WITH cells AS ( + SELECT DISTINCT h3_res4 FROM wide_h3 + WHERE latitude BETWEEN 32 AND 49 + AND longitude BETWEEN -125 AND -104 + AND otype = 'MaterialSampleRecord' + ) + SELECT COUNT(*) as cnt FROM wide_h3 + WHERE h3_res4 IN (SELECT h3_res4 FROM cells) + AND otype = 'MaterialSampleRecord' + `; + + const runs = 3; + + try { + const wideTimes = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await dbWide.query(wideQuery); + wideTimes.push(performance.now() - start); + } + + const h3Times = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await dbWideH3.query(h3Query); + h3Times.push(performance.now() - start); + } + + const median = arr => { + const sorted = [...arr].sort((a, b) => a - b); + if (sorted.length === 2) return (sorted[0] + sorted[1]) / 2; + return sorted[Math.floor(sorted.length / 2)]; + }; + const warmMedian = arr => { + if (arr.length <= 1) return arr[0] || 0; + return median(arr.slice(1)); + }; + + const wideMedian = warmMedian(wideTimes); + const h3Median = warmMedian(h3Times); + + return { + name: "Geospatial BBox (Western US)", + wideCold: wideTimes[0], + wideMedian: wideMedian, + wideAll: wideTimes, + h3Cold: h3Times[0], + h3Median: h3Median, + h3All: h3Times, + speedup: wideMedian / h3Median + }; + } catch (e) { + if (errorDiv) { + errorDiv.textContent = `Benchmark 4 failed: ${e.message}`; + errorDiv.style.display = 'block'; + } + return { error: e.message }; + } finally { + if (loadingDiv) loadingDiv.hidden = true; + } +} +``` + +```{ojs} +//| echo: false +benchmark4 ? (benchmark4.error ? html` +
+

Benchmark 4 Error

+

Benchmark 4 failed: ${benchmark4.error}

+
+` : html` +
+

${benchmark4.name}

+ + + + + + + + + + + + + + + + + + + +
SchemaCold (1st run)Warm (median)All runs
Wide (lat/lon)${benchmark4.wideCold.toFixed(0)} ms${benchmark4.wideMedian.toFixed(0)} ms${benchmark4.wideAll.map(t => t.toFixed(0)).join(', ')} ms
Wide+H3 (cell filter)${benchmark4.h3Cold.toFixed(0)} ms${benchmark4.h3Median.toFixed(0)} ms${benchmark4.h3All.map(t => t.toFixed(0)).join(', ')} ms
+

Speedup: ${benchmark4.speedup.toFixed(2)}x (H3 is ${benchmark4.speedup > 1 ? 'faster' : 'slower'})

+
+`) : html`

Waiting for benchmark...

` +``` + +## Benchmark 5: Facet Aggregation — Full Scan vs Pre-computed Summary + +This benchmark compares full-scan source aggregation on the wide file versus a pre-computed 2KB facet summary file. + + + +```{ojs} +benchmark5 = { + if (runBenchmarks < 1 || !benchmark4 || benchmark4.error) return null; + + const loadingDiv = document.getElementById('loading_b5'); + const errorDiv = document.getElementById('error_display'); + if (loadingDiv) loadingDiv.hidden = false; + + // Full scan: aggregate source counts from full wide file + const fullScanQuery = ` + SELECT n, COUNT(*) as cnt FROM wide + WHERE otype = 'MaterialSampleRecord' + GROUP BY n + `; + + // Pre-computed: read from 2KB summary file + const summaryQuery = ` + SELECT facet_value, count FROM read_parquet('${summariesUrl}') + WHERE facet_type = 'source' + `; + + const runs = 3; + + try { + const fullScanTimes = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await dbWide.query(fullScanQuery); + fullScanTimes.push(performance.now() - start); + } + + const summaryTimes = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await dbWide.query(summaryQuery); + summaryTimes.push(performance.now() - start); + } + + const median = arr => { + const sorted = [...arr].sort((a, b) => a - b); + if (sorted.length === 2) return (sorted[0] + sorted[1]) / 2; + return sorted[Math.floor(sorted.length / 2)]; + }; + const warmMedian = arr => { + if (arr.length <= 1) return arr[0] || 0; + return median(arr.slice(1)); + }; + + const fullScanMedian = warmMedian(fullScanTimes); + const summaryMedian = warmMedian(summaryTimes); + + return { + name: "Facet Aggregation (full scan vs summary)", + fullScanCold: fullScanTimes[0], + fullScanMedian: fullScanMedian, + fullScanAll: fullScanTimes, + summaryCold: summaryTimes[0], + summaryMedian: summaryMedian, + summaryAll: summaryTimes, + speedup: fullScanMedian / summaryMedian + }; + } catch (e) { + if (errorDiv) { + errorDiv.textContent = `Benchmark 5 failed: ${e.message}`; + errorDiv.style.display = 'block'; + } + return { error: e.message }; + } finally { + if (loadingDiv) loadingDiv.hidden = true; + } +} +``` + +```{ojs} +//| echo: false +benchmark5 ? (benchmark5.error ? html` +
+

Benchmark 5 Error

+

Benchmark 5 failed: ${benchmark5.error}

+
+` : html` +
+

${benchmark5.name}

+ + + + + + + + + + + + + + + + + + + +
ApproachCold (1st run)Warm (median)All runs
Full scan (280MB)${benchmark5.fullScanCold.toFixed(0)} ms${benchmark5.fullScanMedian.toFixed(0)} ms${benchmark5.fullScanAll.map(t => t.toFixed(0)).join(', ')} ms
Pre-computed (2KB)${benchmark5.summaryCold.toFixed(0)} ms${benchmark5.summaryMedian.toFixed(0)} ms${benchmark5.summaryAll.map(t => t.toFixed(0)).join(', ')} ms
+

Speedup: ${benchmark5.speedup.toFixed(2)}x (pre-computed is ${benchmark5.speedup > 1 ? 'faster' : 'slower'})

+
+`) : html`

Waiting for benchmark...

` +``` + ## Results Summary ```{ojs} //| echo: false allResults = { - if (!benchmark1 || !benchmark2 || !benchmark3) return null; + if (!benchmark1 || !benchmark2 || !benchmark3 || !benchmark4 || !benchmark5) return null; - const results = [benchmark1, benchmark2, benchmark3]; - const successful = results.filter(r => r && !r.error); - const avgSpeedup = successful.length - ? successful.reduce((sum, r) => sum + r.speedup, 0) / successful.length + // Schema comparison benchmarks (narrow vs wide) + const schemaResults = [benchmark1, benchmark2, benchmark3]; + const schemaSuccessful = schemaResults.filter(r => r && !r.error); + const avgSchemaSpeedup = schemaSuccessful.length + ? schemaSuccessful.reduce((sum, r) => sum + r.speedup, 0) / schemaSuccessful.length : null; return { - benchmarks: results, - avgSpeedup: avgSpeedup + schemaBenchmarks: schemaResults, + geoBenchmark: benchmark4, + facetBenchmark: benchmark5, + avgSchemaSpeedup: avgSchemaSpeedup }; } @@ -658,6 +924,7 @@ allResults ? html`

Summary Results

+

Schema Comparison (Narrow vs Wide)

@@ -668,7 +935,7 @@ allResults ? html` -${allResults.benchmarks.map(b => { +${allResults.schemaBenchmarks.map(b => { const hasError = !b || b.error; return html` @@ -683,26 +950,76 @@ ${allResults.benchmarks.map(b => { - + + + +
Average - -${allResults.avgSpeedup != null ? `${allResults.avgSpeedup.toFixed(2)}x` : 'N/A'}${allResults.avgSchemaSpeedup != null ? `${allResults.avgSchemaSpeedup.toFixed(2)}x` : 'N/A'}
+ +

Geospatial Query (Wide vs Wide+H3)

+ + + + + + + + + +${allResults.geoBenchmark && !allResults.geoBenchmark.error ? html` + + + + + + + + + +` : html``} + +
ApproachWarm (ms)Speedup
Wide (lat/lon bbox)${allResults.geoBenchmark.wideMedian.toFixed(0)}baseline
Wide+H3 (cell filter)${allResults.geoBenchmark.h3Median.toFixed(0)}${allResults.geoBenchmark.speedup.toFixed(2)}x
Error: ${allResults.geoBenchmark?.error || 'N/A'}
+ +

Facet Aggregation (Full Scan vs Pre-computed Summary)

+ + + + + + + + + +${allResults.facetBenchmark && !allResults.facetBenchmark.error ? html` + + + + + + + + + + +` : html``}
ApproachWarm (ms)Speedup
Full scan (280MB wide)${allResults.facetBenchmark.fullScanMedian.toFixed(0)}baseline
Pre-computed summary (2KB)${allResults.facetBenchmark.summaryMedian.toFixed(0)}${allResults.facetBenchmark.speedup.toFixed(2)}x
Error: ${allResults.facetBenchmark?.error || 'N/A'}

Key Findings

Recommendation

-

For browser-based analysis with DuckDB-WASM, the wide format is recommended for: +

For browser-based analysis with DuckDB-WASM:

-

` : html`
@@ -736,7 +1053,18 @@ ${allResults.benchmarks.map(b => { {otype: 'MaterialSampleRecord', p__produced_by: [456], p__has_material_category: [789]} ``` -This eliminates ~9M edge rows, resulting in the 60% file size reduction. +**Wide+H3 schema** adds pre-computed spatial indices: +```sql +-- Same as wide, plus H3 hexagonal index columns +{..., h3_res4: 595536348953485311, h3_res6: 604265133842685951, h3_res8: 613003918731886591} +``` + +The wide format eliminates ~9M edge rows (60% file size reduction). The H3 variant adds ~14MB for spatial index columns that enable cell-based geospatial filtering. + +**Facet summaries** (2KB) pre-compute common aggregations: +```sql +{facet_type: 'source', facet_value: 'SESAR', scheme: null, count: 4600000} +``` ## See Also diff --git a/tutorials/parquet_cesium_isamples_wide.qmd b/tutorials/parquet_cesium_isamples_wide.qmd index e123540..e13de7e 100644 --- a/tutorials/parquet_cesium_isamples_wide.qmd +++ b/tutorials/parquet_cesium_isamples_wide.qmd @@ -3,18 +3,20 @@ title: Using Cesium for display of remote parquet (iSamples Wide Format). categories: [parquet, spatial, recipe, wide, isamples] --- -This page renders points from the **full iSamples wide-format** parquet file (all sources: SESAR, OpenContext, GEOME, Smithsonian) on Cesium using point primitives. +This page renders points from the **full iSamples wide-format** parquet file (all sources: SESAR, OpenContext, GEOME, Smithsonian) on Cesium using point primitives, with **zoom-adaptive H3 clustering** for fast initial load. ::: {.callout-note} -## iSamples Full Dataset (Wide Format) +## iSamples Full Dataset (Wide Format + H3 Spatial Index) This page uses the **iSamples combined dataset** (Jan 2026) which includes: - **6.7M MaterialSampleRecords** from all iSamples sources - **Source breakdown**: SESAR (4.6M), OpenContext (1M), GEOME (605K), Smithsonian (322K) -- **~280 MB** wide format (vs ~850 MB narrow) - 67% smaller -- **20M total rows** (all entity types, no edge rows) -- **47 columns** with flattened latitude/longitude (direct column access, no JSON parsing) +- **~292 MB** wide format with H3 indices (vs ~850 MB narrow) - 66% smaller +- **H3 spatial index**: Pre-computed `h3_res4`, `h3_res6`, `h3_res8` columns for zoom-adaptive clustering +- **Clustered view**: At high altitude shows res4 clusters, medium shows res6, close-up shows res8 +- **Toggle**: Switch between clustered (fast) and all-points (detailed) views +- **Color-coded**: Points/clusters colored by dominant data source ::: @@ -42,8 +44,8 @@ Cesium.Ion.defaultAccessToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOi ```{ojs} //| echo: false viewof parquet_path = Inputs.text({ - label:"Source (iSamples Wide Format)", - value:"https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet", + label:"Source (iSamples Wide Format + H3)", + value:"https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide_h3.parquet", placeholder: "URL or file:///path/to/file.parquet", width:"100%", submit:true @@ -82,6 +84,34 @@ viewof classifyTrigger = { classifyDots = classifyTrigger > 0 ? classifyTrigger : null ``` +```{ojs} +//| echo: false +// View mode toggle: clustered (H3 LOD) vs all points +viewof viewModeToggle = Inputs.radio(["clustered", "all points"], { + label: "View Mode:", + value: "clustered" +}) +``` + +```{ojs} +//| echo: false +// Source color scheme for cluster coloring +CLUSTER_COLORS = ({ + 'SESAR': '#3366CC', + 'OPENCONTEXT': '#DC3912', + 'GEOME': '#109618', + 'SMITHSONIAN': '#FF9900', + 'default': '#808080' +}) + +// H3 resolution based on camera height +function getH3ResForHeight(height) { + if (height > 5000000) return 4; // Continental view + if (height > 500000) return 6; // Regional view + return 8; // Local view +} +``` + ::: {.callout-tip collapse="true"} #### Using a local cached file for faster performance @@ -156,136 +186,244 @@ async function loadData(query, params = [], waiting_id = null, key = "default") } } +// Query H3 clusters at a given resolution +async function queryH3Clusters(h3Res) { + const col = `h3_res${h3Res}`; + const query = ` + SELECT + ${col} as hex_id, + COUNT(*) as n, + AVG(latitude) as lat, + AVG(longitude) as lon, + MODE(source) as dominant_source + FROM nodes + WHERE otype = 'MaterialSampleRecord' + AND ${col} IS NOT NULL + AND latitude IS NOT NULL + AND longitude IS NOT NULL + GROUP BY ${col} + `; + return await loadData(query, [], "loading_1", "clusters"); +} + +// Render clustered points on the globe +async function renderClusters(clusters) { + content.points.removeAll(); + if (!clusters || clusters.length === 0) return; + + const scalar = new Cesium.NearFarScalar(1.5e2, 2, 8.0e6, 0.5); + const maxCount = Math.max(...clusters.map(c => c.n)); + + for (const cluster of clusters) { + const source = cluster.dominant_source || 'default'; + const colorHex = CLUSTER_COLORS[source] || CLUSTER_COLORS.default; + const color = Cesium.Color.fromCssColorString(colorHex); + // Size proportional to log of count (range: 4-20px) + const size = Math.max(4, Math.min(20, 4 + Math.log(Number(cluster.n)) * 2.5)); + + content.points.add({ + id: `cluster_${cluster.hex_id}_n${cluster.n}_${source}`, + position: Cesium.Cartesian3.fromDegrees( + cluster.lon, + cluster.lat, + 0 + ), + pixelSize: size, + color: color, + scaleByDistance: scalar, + }); + } + content.enableTracking(); +} + locations = { // Performance telemetry performance.mark('locations-start'); - // Get loading indicator element for progress updates const loadingDiv = document.getElementById('loading_1'); if (loadingDiv) { loadingDiv.hidden = false; - loadingDiv.innerHTML = 'Loading geocodes...'; + loadingDiv.innerHTML = 'Loading...'; } - // Fast query: just get all distinct geocodes (no classification!) - // The Zenodo wide parquet has latitude/longitude as direct columns (like Eric's OpenContext) - const query = ` - SELECT DISTINCT - pid, - latitude, - longitude - FROM nodes - WHERE otype = 'GeospatialCoordLocation' - AND latitude IS NOT NULL - AND longitude IS NOT NULL - `; - - performance.mark('query-start'); - const data = await loadData(query, [], "loading_1", "locations"); - performance.mark('query-end'); - performance.measure('locations-query', 'query-start', 'query-end'); - const queryTime = performance.getEntriesByName('locations-query')[0].duration; - - // Handle null data (query failed) - if (!data) { - console.error('Query failed - data is null'); - if (loadingDiv) { - loadingDiv.innerHTML = 'Query failed - check console for errors'; - loadingDiv.hidden = false; - } - return []; + // Remove any existing camera change listener to avoid leaks + if (content._cameraChangedHandler) { + content.viewer.camera.changed.removeEventListener(content._cameraChangedHandler); + content._cameraChangedHandler = null; + } + if (content._cameraChangedDebounceTimer) { + clearTimeout(content._cameraChangedDebounceTimer); + content._cameraChangedDebounceTimer = null; } - console.log(`Query executed in ${queryTime.toFixed(0)}ms - retrieved ${data.length} locations`); + if (viewModeToggle === "clustered") { + // Clustered mode: load H3 clusters based on initial zoom level + if (loadingDiv) loadingDiv.innerHTML = 'Loading H3 clusters (res4)...'; - // Clear the existing PointPrimitiveCollection - content.points.removeAll(); + performance.mark('query-start'); + const clusters = await queryH3Clusters(4); + performance.mark('query-end'); + performance.measure('locations-query', 'query-start', 'query-end'); + const queryTime = performance.getEntriesByName('locations-query')[0].duration; - // Single color for all points (blue) - const defaultColor = Cesium.Color.fromCssColorString('#2E86AB'); - const defaultSize = 4; + if (!clusters) { + if (loadingDiv) { + loadingDiv.innerHTML = 'Cluster query failed - check console'; + loadingDiv.hidden = false; + } + return []; + } - // Render points in chunks to keep UI responsive - const CHUNK_SIZE = 500; - const scalar = new Cesium.NearFarScalar(1.5e2, 2, 8.0e6, 0.2); + console.log(`H3 cluster query (res4) in ${queryTime.toFixed(0)}ms - ${clusters.length} clusters`); + + performance.mark('render-start'); + await renderClusters(clusters); + performance.mark('render-end'); + performance.measure('locations-render', 'render-start', 'render-end'); + + if (loadingDiv) loadingDiv.hidden = true; + + // Set up camera change listener for zoom-adaptive LOD + let lastRes = 4; + let debounceTimer = null; + const cameraChangedHandler = () => { + if (viewModeToggle !== "clustered") return; + const height = content.viewer.camera.positionCartographic.height; + const newRes = getH3ResForHeight(height); + if (newRes !== lastRes) { + lastRes = newRes; + clearTimeout(debounceTimer); + debounceTimer = setTimeout(async () => { + const ld = document.getElementById('loading_1'); + if (ld) { ld.hidden = false; ld.innerHTML = `Loading H3 clusters (res${newRes})...`; } + const newClusters = await queryH3Clusters(newRes); + if (newClusters) { + await renderClusters(newClusters); + console.log(`Zoom-adaptive: switched to res${newRes}, ${newClusters.length} clusters`); + } + if (ld) ld.hidden = true; + }, 300); + content._cameraChangedDebounceTimer = debounceTimer; + } + }; + content._cameraChangedHandler = cameraChangedHandler; + content.viewer.camera.changed.addEventListener(cameraChangedHandler); + content.viewer.camera.percentageChanged = 0.1; + + performance.mark('locations-end'); + performance.measure('locations-total', 'locations-start', 'locations-end'); + return clusters; + + } else { + // All points mode: load every geocode (original behavior) + if (loadingDiv) loadingDiv.innerHTML = 'Loading all geocodes...'; + + const query = ` + SELECT DISTINCT + pid, + latitude, + longitude + FROM nodes + WHERE otype = 'GeospatialCoordLocation' + AND latitude IS NOT NULL + AND longitude IS NOT NULL + `; + + performance.mark('query-start'); + const data = await loadData(query, [], "loading_1", "locations"); + performance.mark('query-end'); + performance.measure('locations-query', 'query-start', 'query-end'); + const queryTime = performance.getEntriesByName('locations-query')[0].duration; + + if (!data) { + if (loadingDiv) { + loadingDiv.innerHTML = 'Query failed - check console for errors'; + loadingDiv.hidden = false; + } + return []; + } - performance.mark('render-start'); - for (let i = 0; i < data.length; i += CHUNK_SIZE) { - const chunk = data.slice(i, i + CHUNK_SIZE); - const endIdx = Math.min(i + CHUNK_SIZE, data.length); + console.log(`Query executed in ${queryTime.toFixed(0)}ms - retrieved ${data.length} locations`); - // Update progress indicator - if (loadingDiv) { - const pct = Math.round((endIdx / data.length) * 100); - loadingDiv.innerHTML = `Rendering geocodes... ${endIdx.toLocaleString()}/${data.length.toLocaleString()} (${pct}%)`; - } + content.points.removeAll(); + const defaultColor = Cesium.Color.fromCssColorString('#2E86AB'); + const defaultSize = 4; + const CHUNK_SIZE = 500; + const scalar = new Cesium.NearFarScalar(1.5e2, 2, 8.0e6, 0.2); - // Add points for this chunk - for (const row of chunk) { - content.points.add({ - id: row.pid, - position: Cesium.Cartesian3.fromDegrees( - row.longitude, //longitude - row.latitude, //latitude - 0 //elevation, m - ), - pixelSize: defaultSize, - color: defaultColor, - scaleByDistance: scalar, - }); - } + performance.mark('render-start'); + for (let i = 0; i < data.length; i += CHUNK_SIZE) { + const chunk = data.slice(i, i + CHUNK_SIZE); + const endIdx = Math.min(i + CHUNK_SIZE, data.length); - // Yield to browser between chunks to keep UI responsive - if (i + CHUNK_SIZE < data.length) { - await new Promise(resolve => setTimeout(resolve, 0)); - } - } - performance.mark('render-end'); - performance.measure('locations-render', 'render-start', 'render-end'); - const renderTime = performance.getEntriesByName('locations-render')[0].duration; + if (loadingDiv) { + const pct = Math.round((endIdx / data.length) * 100); + loadingDiv.innerHTML = `Rendering geocodes... ${endIdx.toLocaleString()}/${data.length.toLocaleString()} (${pct}%)`; + } - // Hide loading indicator - if (loadingDiv) { - loadingDiv.hidden = true; - } + for (const row of chunk) { + content.points.add({ + id: row.pid, + position: Cesium.Cartesian3.fromDegrees(row.longitude, row.latitude, 0), + pixelSize: defaultSize, + color: defaultColor, + scaleByDistance: scalar, + }); + } - performance.mark('locations-end'); - performance.measure('locations-total', 'locations-start', 'locations-end'); - const totalTime = performance.getEntriesByName('locations-total')[0].duration; + if (i + CHUNK_SIZE < data.length) { + await new Promise(resolve => setTimeout(resolve, 0)); + } + } + performance.mark('render-end'); + performance.measure('locations-render', 'render-start', 'render-end'); + const renderTime = performance.getEntriesByName('locations-render')[0].duration; - console.log(`Rendering completed in ${renderTime.toFixed(0)}ms`); - console.log(`Total time (query + render): ${totalTime.toFixed(0)}ms`); + if (loadingDiv) loadingDiv.hidden = true; - content.enableTracking(); - return data; + performance.mark('locations-end'); + performance.measure('locations-total', 'locations-start', 'locations-end'); + const totalTime = performance.getEntriesByName('locations-total')[0].duration; + console.log(`Rendering completed in ${renderTime.toFixed(0)}ms, total: ${totalTime.toFixed(0)}ms`); + + content.enableTracking(); + return data; + } } function createShowPrimitive(viewer) { return function(movement) { - // Get the point at the mouse end position const selectPoint = viewer.viewer.scene.pick(movement.endPosition); - // Clear the current selection, if there is one and it is different to the selectPoint + // Clear previous selection if (viewer.currentSelection !== null) { - //console.log(`selected.p ${viewer.currentSelection}`) if (Cesium.defined(selectPoint) && selectPoint !== viewer.currentSelection) { - console.log(`selected.p 2 ${viewer.currentSelection}`) - viewer.currentSelection.primitive.pixelSize = 4; + viewer.currentSelection.primitive.pixelSize = viewer.currentSelection._origSize || 4; viewer.currentSelection.primitive.outlineColor = Cesium.Color.TRANSPARENT; viewer.currentSelection.outlineWidth = 0; viewer.currentSelection = null; } } - // If selectPoint is valid and no currently selected point if (Cesium.defined(selectPoint) && selectPoint.hasOwnProperty("primitive")) { - //console.log(`showPrimitiveId ${selectPoint.id}`); - //const carto = Cesium.Cartographic.fromCartesian(selectPoint.primitive.position) viewer.pointLabel.position = selectPoint.primitive.position; viewer.pointLabel.label.show = true; - //viewer.pointLabel.label.text = `id:${selectPoint.id}, ${carto}`; - viewer.pointLabel.label.text = `${selectPoint.id}`; + + // Parse cluster info from ID (format: cluster__n_) + const id = String(selectPoint.id || ''); + if (id.startsWith('cluster_')) { + const parts = id.split('_'); + const count = parts[2] ? parts[2].replace('n', '') : '?'; + const source = parts.slice(3).join('_') || '?'; + const countNum = Number(count); + const countLabel = Number.isFinite(countNum) ? countNum.toLocaleString() : count; + viewer.pointLabel.label.text = `Cluster: ${countLabel} samples\nSource: ${source}\nCell: ${parts[1]}`; + } else { + viewer.pointLabel.label.text = `${selectPoint.id}`; + } + + selectPoint._origSize = selectPoint.primitive.pixelSize; selectPoint.primitive.pixelSize = 20; selectPoint.primitive.outlineColor = Cesium.Color.YELLOW; selectPoint.primitive.outlineWidth = 3; @@ -556,7 +694,8 @@ selectedSamplesCombined = { } } -md`Retrieved ${pointdata.length} locations from ${parquet_path}.`; +md`Retrieved ${pointdata.length} ${viewModeToggle === "clustered" ? "clusters" : "locations"} from ${parquet_path}. +${viewModeToggle === "clustered" ? "\n*Clustered view: point size reflects sample count, color reflects dominant source. Zoom in/out to change H3 resolution.*" : ""}`; ``` ```{ojs} diff --git a/tutorials/zenodo_isamples_analysis.qmd b/tutorials/zenodo_isamples_analysis.qmd index 48793f9..97b0e56 100644 --- a/tutorials/zenodo_isamples_analysis.qmd +++ b/tutorials/zenodo_isamples_analysis.qmd @@ -27,12 +27,17 @@ This tutorial demonstrates how to efficiently analyze large geospatial datasets ## Dataset Information -**Primary dataset** (Jan 2026): -- **URL**: `https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet` -- **Size**: ~280 MB wide format, 6.7M MaterialSampleRecords (20M total rows) +**Primary dataset** (Jan 2026, H3-indexed): +- **URL**: `https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide_h3.parquet` +- **Size**: ~292 MB wide format with H3 indices, 6.7M MaterialSampleRecords (20M total rows) +- **H3 columns**: Pre-computed `h3_res4`, `h3_res6`, `h3_res8` (BIGINT) for spatial grouping - **Sources**: SESAR (4.6M), OpenContext (1M), GEOME (605K), Smithsonian (322K) - **Hosting**: Cloudflare R2 with HTTP range request support +**Facet summaries** (2KB, instant): +- **URL**: `https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_facet_summaries.parquet` +- **Schema**: `facet_type`, `facet_value`, `scheme`, `count` + **Note**: *Data was originally archived on Zenodo and is now served from Cloudflare R2 for better performance and reliability.* **Fallback dataset** (if remote data fails): @@ -81,8 +86,11 @@ d3 = require("d3@7") topojson = require("topojson-client@3") // Dataset URLs - try multiple options for CORS compatibility -// Primary: Cloudflare R2 (Jan 2026 wide format) +// Primary: Cloudflare R2 (Jan 2026 wide format with H3 indices) parquet_urls = [ + 'https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide_h3.parquet', + + // Fallback: original wide format without H3 'https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet', // Fallback: older versions @@ -90,6 +98,9 @@ parquet_urls = [ 'https://zenodo.org/api/records/15278211/files/isamples_export_2025_04_21_16_23_46_geo.parquet/content' ] +// Pre-computed facet summaries (2KB - loads instantly) +facet_summaries_url = 'https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_facet_summaries.parquet' + // Test CORS and find working URL - with rate limiting protection working_parquet_url = { // Check if we've recently failed (to avoid repeated rate limiting) @@ -419,36 +430,89 @@ geo_stats = { }; } -// Regional analysis using bounding boxes +// Data-driven regional analysis using H3 res4 cell grouping +// Replaces hardcoded CASE WHEN bounding boxes with dynamic discovery regional_data = { - const result = await db.query(` - SELECT - CASE - WHEN sample_location_longitude BETWEEN -125 AND -66 - AND sample_location_latitude BETWEEN 24 AND 50 THEN 'North America' - WHEN sample_location_longitude BETWEEN -11 AND 40 - AND sample_location_latitude BETWEEN 35 AND 71 THEN 'Europe' - WHEN sample_location_longitude BETWEEN 95 AND 141 - AND sample_location_latitude BETWEEN 18 AND 54 THEN 'East Asia' - WHEN sample_location_longitude BETWEEN 113 AND 154 - AND sample_location_latitude BETWEEN -44 AND -10 THEN 'Australia' - ELSE 'Other' - END as region, - source_collection, - count(*) as sample_count, - avg(sample_location_latitude) as avg_lat, - avg(sample_location_longitude) as avg_lon - FROM isamples_data - WHERE sample_location_latitude IS NOT NULL - AND sample_location_longitude IS NOT NULL - GROUP BY 1, 2 - ORDER BY region, sample_count DESC - `); - // Convert BigInt values to Numbers - return result.toArray().map(row => ({ - ...row, - sample_count: Number(row.sample_count) - })); + // Check if h3_res4 column exists (H3-indexed file) + let hasH3 = false; + try { + const colCheck = await db.query(`SELECT h3_res4 FROM isamples_data LIMIT 1`); + hasH3 = true; + } catch (e) { + hasH3 = false; + } + + if (hasH3) { + // H3-based regional grouping: discover dense clusters dynamically + const result = await db.query(` + SELECT + h3_res4, + COUNT(*) as sample_count, + AVG(sample_location_latitude) as avg_lat, + AVG(sample_location_longitude) as avg_lon, + COUNT(DISTINCT source_collection) as source_count, + MODE(source_collection) as dominant_source + FROM isamples_data + WHERE sample_location_latitude IS NOT NULL + AND sample_location_longitude IS NOT NULL + AND h3_res4 IS NOT NULL + GROUP BY h3_res4 + HAVING COUNT(*) > 100 + ORDER BY sample_count DESC + `); + // Assign region labels based on centroid location + return result.toArray().map(row => { + const lat = row.avg_lat; + const lon = row.avg_lon; + let region; + if (lon >= -130 && lon <= -60 && lat >= 20 && lat <= 55) region = 'North America'; + else if (lon >= -15 && lon <= 45 && lat >= 30 && lat <= 75) region = 'Europe'; + else if (lon >= 90 && lon <= 150 && lat >= 15 && lat <= 55) region = 'East Asia'; + else if (lon >= 110 && lon <= 160 && lat >= -50 && lat <= -5) region = 'Australia'; + else if (lon >= -90 && lon <= -30 && lat >= -60 && lat <= 15) region = 'South America'; + else if (lon >= -20 && lon <= 55 && lat >= -40 && lat <= 30) region = 'Africa'; + else region = 'Other'; + return { + region, + source_collection: row.dominant_source, + sample_count: Number(row.sample_count), + avg_lat: row.avg_lat, + avg_lon: row.avg_lon, + h3_cell: row.h3_res4, + source_count: Number(row.source_count) + }; + }); + } else { + // Fallback for non-H3 files: use simple lat/lon-based grouping + const result = await db.query(` + SELECT + source_collection, + count(*) as sample_count, + avg(sample_location_latitude) as avg_lat, + avg(sample_location_longitude) as avg_lon + FROM isamples_data + WHERE sample_location_latitude IS NOT NULL + AND sample_location_longitude IS NOT NULL + GROUP BY source_collection + ORDER BY sample_count DESC + `); + return result.toArray().map(row => { + const lat = row.avg_lat; + const lon = row.avg_lon; + let region = 'Other'; + if (lon >= -130 && lon <= -60 && lat >= 20 && lat <= 55) region = 'North America'; + else if (lon >= -15 && lon <= 45 && lat >= 30 && lat <= 75) region = 'Europe'; + else if (lon >= 90 && lon <= 150 && lat >= 15 && lat <= 55) region = 'East Asia'; + else if (lon >= 110 && lon <= 160 && lat >= -50 && lat <= -5) region = 'Australia'; + return { + region, + source_collection: row.source_collection, + sample_count: Number(row.sample_count), + avg_lat: row.avg_lat, + avg_lon: row.avg_lon + }; + }); + } } ``` @@ -461,8 +525,10 @@ md` - **Latitude range**: ${geo_stats.min_lat.toFixed(3)}° to ${geo_stats.max_lat.toFixed(3)}° - **Longitude range**: ${geo_stats.min_lon.toFixed(3)}° to ${geo_stats.max_lon.toFixed(3)}° - **Average location**: ${geo_stats.avg_lat.toFixed(3)}°, ${geo_stats.avg_lon.toFixed(3)}° -- **Total regional records**: ${regional_data.length} -- **Regions found**: ${[...new Set(regional_data.map(d => d.region))].join(', ')} +- **Dense H3 clusters**: ${regional_data.length} (cells with >100 samples) +- **Regions discovered**: ${[...new Set(regional_data.map(d => d.region))].join(', ')} + +*Regional grouping is data-driven using H3 resolution-4 hexagonal cells, replacing hardcoded bounding boxes.* ` ``` @@ -486,27 +552,26 @@ viewof selected_region = Inputs.select( ```{ojs} //| label: regional-chart -// Regional distribution chart +// Regional distribution chart (data-driven from H3 clusters) regional_chart = { - // Validate that regional_data is an array if (!Array.isArray(regional_data)) { return html`
Error: Regional data is not available
`; } - - // Aggregate the regional data by region like we do for source data + + // Aggregate H3 cell data by discovered region const regionTotals = d3.rollup( - regional_data, - v => d3.sum(v, d => d.sample_count), + regional_data, + v => d3.sum(v, d => d.sample_count), d => d.region ); - + const aggregatedData = Array.from(regionTotals, ([region, total]) => ({ region: region, sample_count: total })).sort((a, b) => b.sample_count - a.sample_count); - + return Plot.plot({ - title: `Sample Distribution by Region (${aggregatedData.length} regions)`, + title: `Sample Distribution by Region (H3-derived, ${aggregatedData.length} regions)`, width: 700, height: 300, marginLeft: 120, @@ -527,7 +592,7 @@ regional_chart = { }), Plot.text(aggregatedData, { x: "sample_count", - y: "region", + y: "region", text: d => d3.format("~s")(d.sample_count), dx: 10, textAnchor: "start" @@ -714,42 +779,76 @@ Explore the distribution of material categories across different sources. ```{ojs} //| label: material-analysis -// Get top material categories by source +// Material data: use pre-computed facet summaries for instant results +// Falls back to full-scan if summaries unavailable material_data = { - const result = await db.query(` - SELECT - source_collection, - has_material_category, - count(*) as category_count - FROM isamples_data - WHERE has_material_category IS NOT NULL - GROUP BY source_collection, has_material_category - ORDER BY source_collection, category_count DESC - `); - // Convert BigInt values to Numbers - return result.toArray().map(row => ({ - ...row, - category_count: Number(row.category_count) - })); + try { + // Try pre-computed summaries first (2KB, instant) + const result = await db.query(` + SELECT + facet_value as has_material_category, + 'ALL' as source_collection, + count as category_count + FROM read_parquet('${facet_summaries_url}') + WHERE facet_type = 'material' + ORDER BY count DESC + `); + return result.toArray().map(row => ({ + ...row, + category_count: Number(row.category_count) + })); + } catch (e) { + console.warn("Facet summaries unavailable, falling back to full scan:", e.message); + const result = await db.query(` + SELECT + source_collection, + has_material_category, + count(*) as category_count + FROM isamples_data + WHERE has_material_category IS NOT NULL + GROUP BY source_collection, has_material_category + ORDER BY source_collection, category_count DESC + `); + return result.toArray().map(row => ({ + ...row, + category_count: Number(row.category_count) + })); + } } -// Get top 10 categories overall +// Top categories from pre-computed summaries (instant) top_categories = { - const result = await db.query(` - SELECT - has_material_category, - count(*) as total_count - FROM isamples_data - WHERE has_material_category IS NOT NULL - GROUP BY has_material_category - ORDER BY total_count DESC - LIMIT 10 - `); - // Convert BigInt values to Numbers - return result.toArray().map(row => ({ - ...row, - total_count: Number(row.total_count) - })); + try { + const result = await db.query(` + SELECT + facet_value as has_material_category, + count as total_count + FROM read_parquet('${facet_summaries_url}') + WHERE facet_type = 'material' + ORDER BY count DESC + LIMIT 10 + `); + return result.toArray().map(row => ({ + ...row, + total_count: Number(row.total_count) + })); + } catch (e) { + console.warn("Facet summaries unavailable, falling back to full scan:", e.message); + const result = await db.query(` + SELECT + has_material_category, + count(*) as total_count + FROM isamples_data + WHERE has_material_category IS NOT NULL + GROUP BY has_material_category + ORDER BY total_count DESC + LIMIT 10 + `); + return result.toArray().map(row => ({ + ...row, + total_count: Number(row.total_count) + })); + } } ``` From c96c38442ec000e092773de9d13197b59a6642c8 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 12 Feb 2026 17:36:31 -0800 Subject: [PATCH 02/12] Add progressive globe demo with H3 aggregated loading Loads 580KB H3 res4 summary for instant globe render (<1s), then switches to res6/res8 on zoom with viewport filtering. Click triggers sample detail query from full 280MB parquet. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 596 ++++++++++++++++++++++++++++++++ 1 file changed, 596 insertions(+) create mode 100644 tutorials/progressive_globe.qmd diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd new file mode 100644 index 0000000..7fe8976 --- /dev/null +++ b/tutorials/progressive_globe.qmd @@ -0,0 +1,596 @@ +--- +title: "Progressive Globe: Instant H3 → Detail on Demand" +categories: [parquet, spatial, h3, performance, isamples] +--- + +Explore **6.7 million material samples** from iSamples — the globe loads instantly with H3 hexagonal aggregates, then fills in individual sample points as you zoom in. + +::: {.callout-note} +## How It Works + +1. **Instant** (<1s): Load pre-aggregated H3 res4 summary (580 KB) → colored circles on globe +2. **On zoom**: Load finer H3 resolutions (res6, res8) for visible area +3. **On click**: Query individual samples at that location from the full dataset + +Circle size = log(sample count). Color = dominant source. +::: + + + + + +```{ojs} +//| output: false +Cesium.Ion.defaultAccessToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiIwNzk3NjkyMy1iNGI1LTRkN2UtODRiMy04OTYwYWE0N2M3ZTkiLCJpZCI6Njk1MTcsImlhdCI6MTYzMzU0MTQ3N30.e70dpNzOCDRLDGxRguQCC-tRzGzA-23Xgno5lNgCeB4'; +``` + +```{ojs} +//| echo: false + +// === Data URLs === +R2_BASE = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev" + +h3_res4_url = `${R2_BASE}/isamples_202601_h3_summary_res4.parquet` +h3_res6_url = `${R2_BASE}/isamples_202601_h3_summary_res6.parquet` +h3_res8_url = `${R2_BASE}/isamples_202601_h3_summary_res8.parquet` +wide_url = `${R2_BASE}/isamples_202601_wide.parquet` + +// Source colors +SOURCE_COLORS = ({ + SESAR: '#3366CC', + OPENCONTEXT: '#DC3912', + GEOME: '#109618', + SMITHSONIAN: '#FF9900' +}) +``` + +```{ojs} +//| echo: false + +// === DuckDB setup === +db = { + const instance = await DuckDBClient.of(); + return instance; +} +``` + +
+ Source: + SESAR + OpenContext + GEOME + Smithsonian +
+ +
+ Phase: Loading... + Points: 0 + Samples: 0 + Time: - +
+ +
+ Phase 1: Loading H3 global overview (580 KB)... +
+ +::: {.panel-tabset} + +## Globe + +
+ +## Details + + + +```{ojs} +//| echo: false +mutable clickedPointId = "unset" +mutable clickedInfo = null +``` + +```{ojs} +//| echo: false +// Show clicked cluster/point info +html`${ + clickedInfo && clickedInfo.type === 'cluster' ? + html`
+

H3 Cluster (Resolution ${clickedInfo.resolution})

+ + + + +
Samples${clickedInfo.count?.toLocaleString()}
Dominant Source${clickedInfo.source}
Center${clickedInfo.lat?.toFixed(4)}°, ${clickedInfo.lng?.toFixed(4)}°
+
` + : clickedInfo && clickedInfo.type === 'point' ? + html`
+

Sample Point

+

${clickedInfo.pid}

+
` + : html`
+ Click a point on the globe to see details. +
` +}` +``` + +```{ojs} +//| echo: false +// Query samples at clicked cluster location +samplesAtCluster = { + if (!clickedInfo || clickedInfo.type !== 'cluster') return []; + + const lat = clickedInfo.lat; + const lng = clickedInfo.lng; + const delta = clickedInfo.resolution === 4 ? 2.0 : clickedInfo.resolution === 6 ? 0.5 : 0.1; + + const q = ` + SELECT pid, label, n as source, latitude, longitude, description + FROM read_parquet('${wide_url}') + WHERE otype = 'MaterialSampleRecord' + AND latitude BETWEEN ${lat - delta} AND ${lat + delta} + AND longitude BETWEEN ${lng - delta} AND ${lng + delta} + LIMIT 50 + `; + try { + return await db.query(q); + } catch(e) { + console.error("Sample query failed:", e); + return []; + } +} +``` + +```{ojs} +//| echo: false +html`${ + samplesAtCluster && samplesAtCluster.length > 0 ? + html`
+

Nearby Samples (${samplesAtCluster.length})

+
+ + + + + + + + + + ${samplesAtCluster.map((s, i) => html` + + + + + + `)} + +
LabelSourceDescription
${s.label || s.pid} + + ${s.source} + + + ${s.description ? (s.description.length > 120 ? s.description.slice(0, 120) + '...' : s.description) : '-'} +
+
+
` + : clickedInfo && clickedInfo.type === 'cluster' ? + html`
+ Loading nearby samples from full dataset... +
` + : html`` +}` +``` + +::: + +```{ojs} +//| echo: false +//| output: false + +// === Cesium Viewer === +viewer = { + const v = new Cesium.Viewer("cesiumContainer", { + timeline: false, + animation: false, + baseLayerPicker: false, + fullscreenElement: "cesiumContainer", + terrain: Cesium.Terrain.fromWorldTerrain() + }); + + // Global view + const globalRect = Cesium.Rectangle.fromDegrees(-180, -60, 180, 80); + Cesium.Camera.DEFAULT_VIEW_RECTANGLE = globalRect; + Cesium.Camera.DEFAULT_VIEW_FACTOR = 0.5; + + const once = () => { + v.camera.setView({ destination: globalRect }); + v.scene.postRender.removeEventListener(once); + }; + v.scene.postRender.addEventListener(once); + + // Point collections for different phases + v.h3Points = new Cesium.PointPrimitiveCollection(); + v.scene.primitives.add(v.h3Points); + + v.detailPoints = new Cesium.PointPrimitiveCollection(); + v.scene.primitives.add(v.detailPoints); + + // Label for hover + v.pointLabel = v.entities.add({ + label: { + show: false, + showBackground: true, + font: "13px monospace", + horizontalOrigin: Cesium.HorizontalOrigin.LEFT, + verticalOrigin: Cesium.VerticalOrigin.BOTTOM, + pixelOffset: new Cesium.Cartesian2(15, 0), + disableDepthTestDistance: Number.POSITIVE_INFINITY, + text: "", + } + }); + + // Hover handler + const hoverHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); + hoverHandler.setInputAction((movement) => { + const picked = v.scene.pick(movement.endPosition); + if (Cesium.defined(picked) && picked.primitive && picked.id) { + v.pointLabel.position = picked.primitive.position; + v.pointLabel.label.show = true; + const meta = picked.id; + if (typeof meta === 'object' && meta.count) { + v.pointLabel.label.text = `${meta.source}: ${meta.count.toLocaleString()} samples`; + } else { + v.pointLabel.label.text = String(meta); + } + } else { + v.pointLabel.label.show = false; + } + }, Cesium.ScreenSpaceEventType.MOUSE_MOVE); + + // Click handler + const clickHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); + clickHandler.setInputAction((e) => { + const picked = v.scene.pick(e.position); + if (Cesium.defined(picked) && picked.primitive && picked.id) { + const meta = picked.id; + if (typeof meta === 'object' && meta.count) { + mutable clickedInfo = { + type: 'cluster', + count: meta.count, + source: meta.source, + lat: meta.lat, + lng: meta.lng, + resolution: meta.resolution + }; + } else { + mutable clickedInfo = { type: 'point', pid: meta }; + } + } + }, Cesium.ScreenSpaceEventType.LEFT_CLICK); + + return v; +} +``` + +```{ojs} +//| echo: false +//| output: false + +// === PHASE 1: Load H3 res4 aggregates (instant) === +phase1 = { + performance.mark('phase1-start'); + + const updateStats = (phase, points, samples, time) => { + document.getElementById('statPhase').textContent = phase; + document.getElementById('statPoints').textContent = points.toLocaleString(); + document.getElementById('statSamples').textContent = samples.toLocaleString(); + document.getElementById('statTime').textContent = time; + }; + + const indicator = document.getElementById('phaseIndicator'); + + // Query pre-aggregated H3 res4 data + const data = await db.query(` + SELECT h3_cell, sample_count, center_lat, center_lng, + dominant_source, source_count, resolution + FROM read_parquet('${h3_res4_url}') + ORDER BY sample_count DESC + `); + + performance.mark('phase1-query-done'); + + // Render as scaled colored circles + const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.5); + let totalSamples = 0; + + for (const row of data) { + const count = row.sample_count; + totalSamples += count; + const color = SOURCE_COLORS[row.dominant_source] || '#666666'; + + // Circle size: 3px base + log scale, capped at 20 + const size = Math.min(3 + Math.log10(count) * 4, 20); + + viewer.h3Points.add({ + id: { + count: count, + source: row.dominant_source, + lat: row.center_lat, + lng: row.center_lng, + resolution: 4 + }, + position: Cesium.Cartesian3.fromDegrees( + row.center_lng, + row.center_lat, + 0 + ), + pixelSize: size, + color: Cesium.Color.fromCssColorString(color).withAlpha(0.8), + scaleByDistance: scalar, + }); + } + + performance.mark('phase1-end'); + performance.measure('phase1-total', 'phase1-start', 'phase1-end'); + const elapsed = performance.getEntriesByName('phase1-total')[0].duration; + + updateStats('H3 Global', data.length, totalSamples, `${(elapsed/1000).toFixed(1)}s`); + + if (indicator) { + indicator.style.background = '#e8f5e9'; + indicator.style.color = '#2e7d32'; + indicator.innerHTML = `Phase 1 complete: ${data.length.toLocaleString()} H3 clusters covering ${totalSamples.toLocaleString()} samples in ${(elapsed/1000).toFixed(1)}s. Zoom in to see finer detail.`; + } + + console.log(`Phase 1: ${data.length} H3 res4 clusters, ${totalSamples.toLocaleString()} samples in ${elapsed.toFixed(0)}ms`); + + return { count: data.length, samples: totalSamples, elapsed }; +} +``` + +```{ojs} +//| echo: false +//| output: false + +// === PHASE 2: Zoom-triggered detail loading === +// Monitor camera height and load finer resolution when zoomed in +zoomWatcher = { + // Wait for phase 1 to complete + if (!phase1) return; + + let currentResolution = 4; + let loadingRes = false; + + const loadResolution = async (res, url) => { + if (loadingRes) return; + loadingRes = true; + + const indicator = document.getElementById('phaseIndicator'); + if (indicator) { + indicator.style.background = '#e3f2fd'; + indicator.style.color = '#1565c0'; + indicator.innerHTML = `Loading H3 resolution ${res} detail...`; + } + + performance.mark(`res${res}-start`); + + // Get camera bounding box + const rect = viewer.camera.computeViewRectangle(); + let data; + + if (rect) { + const west = Cesium.Math.toDegrees(rect.west); + const south = Cesium.Math.toDegrees(rect.south); + const east = Cesium.Math.toDegrees(rect.east); + const north = Cesium.Math.toDegrees(rect.north); + + data = await db.query(` + SELECT h3_cell, sample_count, center_lat, center_lng, + dominant_source, source_count, resolution + FROM read_parquet('${url}') + WHERE center_lat BETWEEN ${south} AND ${north} + AND center_lng BETWEEN ${west} AND ${east} + ORDER BY sample_count DESC + `); + } else { + data = await db.query(` + SELECT h3_cell, sample_count, center_lat, center_lng, + dominant_source, source_count, resolution + FROM read_parquet('${url}') + ORDER BY sample_count DESC + `); + } + + // Clear old points and add new ones + viewer.h3Points.removeAll(); + + const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); + let totalSamples = 0; + + for (const row of data) { + const count = row.sample_count; + totalSamples += count; + const color = SOURCE_COLORS[row.dominant_source] || '#666666'; + const size = Math.min(3 + Math.log10(count) * 3.5, 18); + + viewer.h3Points.add({ + id: { + count: count, + source: row.dominant_source, + lat: row.center_lat, + lng: row.center_lng, + resolution: res + }, + position: Cesium.Cartesian3.fromDegrees( + row.center_lng, + row.center_lat, + 0 + ), + pixelSize: size, + color: Cesium.Color.fromCssColorString(color).withAlpha(0.85), + scaleByDistance: scalar, + }); + } + + performance.mark(`res${res}-end`); + performance.measure(`res${res}-total`, `res${res}-start`, `res${res}-end`); + const elapsed = performance.getEntriesByName(`res${res}-total`)[0].duration; + + document.getElementById('statPhase').textContent = `H3 Res${res}`; + document.getElementById('statPoints').textContent = data.length.toLocaleString(); + document.getElementById('statSamples').textContent = totalSamples.toLocaleString(); + document.getElementById('statTime').textContent = `${(elapsed/1000).toFixed(1)}s`; + + if (indicator) { + indicator.style.background = '#e8f5e9'; + indicator.style.color = '#2e7d32'; + indicator.innerHTML = `Showing ${data.length.toLocaleString()} H3 res${res} clusters (${totalSamples.toLocaleString()} samples) in viewport. Loaded in ${(elapsed/1000).toFixed(1)}s.`; + } + + currentResolution = res; + loadingRes = false; + + console.log(`Loaded res${res}: ${data.length} clusters, ${totalSamples.toLocaleString()} samples in ${elapsed.toFixed(0)}ms`); + }; + + // Monitor camera movement + let debounceTimer = null; + + viewer.camera.changed.addEventListener(() => { + if (debounceTimer) clearTimeout(debounceTimer); + + debounceTimer = setTimeout(async () => { + const height = viewer.camera.positionCartographic.height; + + // Height thresholds for resolution switching + // > 3,000 km: res4 (continental) + // 300 km - 3,000 km: res6 (city-level) + // < 300 km: res8 (neighborhood) + + let targetRes; + if (height > 3000000) { + targetRes = 4; + } else if (height > 300000) { + targetRes = 6; + } else { + targetRes = 8; + } + + if (targetRes !== currentResolution) { + const urls = { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }; + await loadResolution(targetRes, urls[targetRes]); + } + }, 800); // Debounce: wait 800ms after camera stops + }); + + // Lower the change threshold so it fires more often + viewer.camera.percentageChanged = 0.1; + + return "Zoom watcher active"; +} +``` + +## How This Demo Works + +This demo uses **pre-aggregated H3 hexagonal indices** to achieve near-instant globe rendering: + +| Phase | Data Source | Size | What You See | +|-------|-----------|------|-------------| +| **1. Instant** | H3 res4 summary | 580 KB | 38K colored circles (continental scale) | +| **2. Zoom in** | H3 res6 summary | 1.6 MB | 112K circles (city scale) | +| **3. Zoom more** | H3 res8 summary | 2.5 MB | 176K circles (neighborhood scale) | +| **4. Click** | Full wide parquet | ~280 MB (range request) | Individual sample details | + +**vs. the original approach**: Loading all 6.7M sample coordinates from the 280 MB parquet takes 5-10 seconds. This progressive approach shows meaningful data in under 1 second. + +### Data Pipeline + +``` +zenodo_wide.parquet (280 MB, 20M rows) + ↓ pqg add-h3 (add H3 columns at res 4/6/8) +zenodo_wide_h3.parquet (292 MB) + ↓ DuckDB GROUP BY h3_resN +h3_summary_res4.parquet (580 KB, 38K rows) +h3_summary_res6.parquet (1.6 MB, 112K rows) +h3_summary_res8.parquet (2.5 MB, 176K rows) +``` + +### Source Distribution + +The color of each circle shows which data source dominates that area: + +- **SESAR** (blue): Geological/earth science samples — globally distributed +- **OpenContext** (red): Archaeological samples — concentrated in Mediterranean, Middle East +- **GEOME** (green): Genomic/biological samples — oceanic and tropical regions +- **Smithsonian** (orange): Museum specimens — clustered at collection sites + +## See Also + +- [Cesium Globe (All Points)](/tutorials/parquet_cesium_isamples_wide.html) — Full point-level rendering +- [Interactive Explorer](/tutorials/isamples_explorer.html) — Search and filter with facets +- [Deep-Dive Analysis](/tutorials/zenodo_isamples_analysis.html) — DuckDB-WASM SQL tutorial From 1078fe4b062bab5fac75dc6f027134dd04a16f49 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 12 Feb 2026 17:44:13 -0800 Subject: [PATCH 03/12] Fix progressive globe: render stats bar from OJS cells DOM elements created in raw HTML aren't available when OJS cells execute. Move legend, stats bar, and phase indicator into OJS cells and add null guards on all getElementById calls. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 48 ++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 7fe8976..28dbd64 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -119,24 +119,34 @@ db = { } ``` -
+```{ojs} +//| echo: false +// Render legend and stats bar from OJS so they're in the DOM before phase1 runs +legend_html = html`
Source: SESAR OpenContext GEOME Smithsonian -
+
` +``` -
+```{ojs} +//| echo: false +statsBar = html`
Phase: Loading... Points: 0 Samples: 0 Time: - -
+
` +``` -
+```{ojs} +//| echo: false +phaseIndicatorEl = html`
Phase 1: Loading H3 global overview (580 KB)... -
+
` +``` ::: {.panel-tabset} @@ -343,13 +353,18 @@ viewer = { // === PHASE 1: Load H3 res4 aggregates (instant) === phase1 = { + // Ensure OJS-rendered DOM elements are available + void statsBar; + void phaseIndicatorEl; + performance.mark('phase1-start'); const updateStats = (phase, points, samples, time) => { - document.getElementById('statPhase').textContent = phase; - document.getElementById('statPoints').textContent = points.toLocaleString(); - document.getElementById('statSamples').textContent = samples.toLocaleString(); - document.getElementById('statTime').textContent = time; + const el = (id) => document.getElementById(id); + if (el('statPhase')) el('statPhase').textContent = phase; + if (el('statPoints')) el('statPoints').textContent = points.toLocaleString(); + if (el('statSamples')) el('statSamples').textContent = samples.toLocaleString(); + if (el('statTime')) el('statTime').textContent = time; }; const indicator = document.getElementById('phaseIndicator'); @@ -420,8 +435,10 @@ phase1 = { // === PHASE 2: Zoom-triggered detail loading === // Monitor camera height and load finer resolution when zoomed in zoomWatcher = { - // Wait for phase 1 to complete + // Wait for phase 1 to complete and DOM to be ready if (!phase1) return; + void statsBar; + void phaseIndicatorEl; let currentResolution = 4; let loadingRes = false; @@ -501,10 +518,11 @@ zoomWatcher = { performance.measure(`res${res}-total`, `res${res}-start`, `res${res}-end`); const elapsed = performance.getEntriesByName(`res${res}-total`)[0].duration; - document.getElementById('statPhase').textContent = `H3 Res${res}`; - document.getElementById('statPoints').textContent = data.length.toLocaleString(); - document.getElementById('statSamples').textContent = totalSamples.toLocaleString(); - document.getElementById('statTime').textContent = `${(elapsed/1000).toFixed(1)}s`; + const el = (id) => document.getElementById(id); + if (el('statPhase')) el('statPhase').textContent = `H3 Res${res}`; + if (el('statPoints')) el('statPoints').textContent = data.length.toLocaleString(); + if (el('statSamples')) el('statSamples').textContent = totalSamples.toLocaleString(); + if (el('statTime')) el('statTime').textContent = `${(elapsed/1000).toFixed(1)}s`; if (indicator) { indicator.style.background = '#e8f5e9'; From 9c46c21b6f5aa1c6ebefd8c594b99e4a9b79b23a Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 05:33:38 -0800 Subject: [PATCH 04/12] Click cluster dot to fly-to and drill down to next H3 resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clicking an H3 cluster now flies the camera to that location at an altitude that triggers the next resolution level (res4→res6→res8). The zoom watcher then automatically loads finer detail. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 28dbd64..6798c5b 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -322,7 +322,7 @@ viewer = { } }, Cesium.ScreenSpaceEventType.MOUSE_MOVE); - // Click handler + // Click handler: fly to cluster and drill down const clickHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); clickHandler.setInputAction((e) => { const picked = v.scene.pick(e.position); @@ -337,6 +337,20 @@ viewer = { lng: meta.lng, resolution: meta.resolution }; + + // Fly to the cluster at an altitude that triggers the next resolution + // res4 → fly to 1500km (triggers res6) + // res6 → fly to 150km (triggers res8) + // res8 → fly to 30km (close detail) + const altitudes = { 4: 1500000, 6: 150000, 8: 30000 }; + const altitude = altitudes[meta.resolution] || 500000; + + v.camera.flyTo({ + destination: Cesium.Cartesian3.fromDegrees( + meta.lng, meta.lat, altitude + ), + duration: 1.5 + }); } else { mutable clickedInfo = { type: 'point', pid: meta }; } From a7477edef49c52164a93cbdeffdf9d5cde2e44a9 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 05:48:48 -0800 Subject: [PATCH 05/12] Redesign progressive globe: side panel + global data + info-only clicks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Side-by-side layout: globe left, live info panel right (always visible) - Load full H3 files globally (no viewport filtering) — no gaps when panning - Click shows cluster info + nearby samples in side panel (no camera fly-to) - Zoom watcher switches resolution automatically: res4 → res6 → res8 - Stats, legend, cluster card, and sample list all in side panel Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 571 ++++++++++++++++---------------- 1 file changed, 282 insertions(+), 289 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 6798c5b..7a7c8c4 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -3,16 +3,16 @@ title: "Progressive Globe: Instant H3 → Detail on Demand" categories: [parquet, spatial, h3, performance, isamples] --- -Explore **6.7 million material samples** from iSamples — the globe loads instantly with H3 hexagonal aggregates, then fills in individual sample points as you zoom in. +Explore **6.7 million material samples** from iSamples — the globe loads instantly with H3 hexagonal aggregates, then refines as you zoom. -::: {.callout-note} +::: {.callout-note collapse="true"} ## How It Works -1. **Instant** (<1s): Load pre-aggregated H3 res4 summary (580 KB) → colored circles on globe -2. **On zoom**: Load finer H3 resolutions (res6, res8) for visible area -3. **On click**: Query individual samples at that location from the full dataset +1. **Instant** (<1s): Pre-aggregated H3 res4 summary (580 KB) → 38K colored circles +2. **Zoom in**: Automatically switches to res6 (112K) then res8 (176K) clusters +3. **Click**: Shows cluster info and queries nearby samples from the full dataset -Circle size = log(sample count). Color = dominant source. +Circle size = log(sample count). Color = dominant data source. ::: @@ -26,60 +26,103 @@ Circle size = log(sample count). Color = dominant source. height: auto; z-index: 999; } + .globe-layout { + display: grid; + grid-template-columns: 1fr 340px; + gap: 12px; + margin-bottom: 16px; + } + @media (max-width: 900px) { + .globe-layout { + grid-template-columns: 1fr; + } + } #cesiumContainer { + width: 100%; + min-height: 500px; aspect-ratio: 4/3; } - .stats-bar { + .side-panel { display: flex; - gap: 24px; - padding: 12px 16px; - background: #1a1a2e; - color: white; + flex-direction: column; + gap: 8px; + max-height: 700px; + overflow-y: auto; + } + .panel-section { + background: #f8f9fa; border-radius: 6px; - margin-bottom: 8px; - font-family: monospace; + padding: 12px; + font-size: 13px; + } + .panel-section h4 { + margin: 0 0 8px 0; font-size: 14px; - flex-wrap: wrap; } - .stats-bar .stat { - display: flex; - align-items: center; + .stats-compact { + display: grid; + grid-template-columns: 1fr 1fr; gap: 6px; } - .stats-bar .stat-value { + .stat-box { + background: #1a1a2e; + color: white; + padding: 8px 10px; + border-radius: 4px; + text-align: center; + } + .stat-box .stat-value { font-weight: bold; font-size: 16px; + font-family: monospace; + display: block; } - .stats-bar .stat-label { + .stat-box .stat-label { color: #aaa; - font-size: 12px; + font-size: 11px; } .legend { display: flex; - gap: 16px; - padding: 8px 16px; - background: #f8f9fa; - border-radius: 4px; - margin-bottom: 8px; - font-size: 13px; + gap: 10px; flex-wrap: wrap; + font-size: 12px; } .legend-item { display: flex; align-items: center; - gap: 4px; + gap: 3px; } .legend-dot { - width: 12px; - height: 12px; + width: 10px; + height: 10px; border-radius: 50%; display: inline-block; } - .phase-indicator { - padding: 8px 16px; + .source-badge { + color: white; + padding: 2px 8px; + border-radius: 10px; + font-size: 0.8em; + white-space: nowrap; + } + .cluster-card { + border-left: 4px solid #ccc; + padding: 10px 12px; + background: white; + border-radius: 0 6px 6px 0; + } + .sample-row { + padding: 6px 0; + border-bottom: 1px solid #eee; + line-height: 1.4; + } + .sample-row:last-child { border-bottom: none; } + .sample-label { font-weight: 600; font-size: 13px; } + .sample-desc { font-size: 12px; color: #555; margin-top: 2px; } + .phase-msg { + padding: 6px 10px; border-radius: 4px; - font-size: 13px; - margin-bottom: 4px; + font-size: 12px; transition: all 0.3s ease; } @@ -91,28 +134,29 @@ Cesium.Ion.defaultAccessToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOi ```{ojs} //| echo: false - -// === Data URLs === R2_BASE = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev" - h3_res4_url = `${R2_BASE}/isamples_202601_h3_summary_res4.parquet` h3_res6_url = `${R2_BASE}/isamples_202601_h3_summary_res6.parquet` h3_res8_url = `${R2_BASE}/isamples_202601_h3_summary_res8.parquet` wide_url = `${R2_BASE}/isamples_202601_wide.parquet` -// Source colors SOURCE_COLORS = ({ SESAR: '#3366CC', OPENCONTEXT: '#DC3912', GEOME: '#109618', SMITHSONIAN: '#FF9900' }) + +SOURCE_NAMES = ({ + SESAR: 'SESAR', + OPENCONTEXT: 'OpenContext', + GEOME: 'GEOME', + SMITHSONIAN: 'Smithsonian' +}) ``` ```{ojs} //| echo: false - -// === DuckDB setup === db = { const instance = await DuckDBClient.of(); return instance; @@ -121,76 +165,123 @@ db = { ```{ojs} //| echo: false -// Render legend and stats bar from OJS so they're in the DOM before phase1 runs -legend_html = html`
- Source: - SESAR - OpenContext - GEOME - Smithsonian -
` +mutable clickedInfo = null +mutable globeStatus = ({ phase: "Loading...", points: 0, samples: 0, time: "-", resolution: 4 }) ``` -```{ojs} -//| echo: false -statsBar = html`
- Phase: Loading... - Points: 0 - Samples: 0 - Time: - -
` -``` + ```{ojs} //| echo: false -phaseIndicatorEl = html`
- Phase 1: Loading H3 global overview (580 KB)... -
` -``` - -::: {.panel-tabset} - -## Globe - -
- -## Details +// Side panel: stats + legend + cluster info + samples +sidePanel = { + const status = globeStatus; + const info = clickedInfo; + + // Stats section + const statsHtml = html`
+
+
+ ${status.phase} + Resolution +
+
+ ${status.points.toLocaleString()} + Clusters +
+
+ ${status.samples.toLocaleString()} + Samples +
+
+ ${status.time} + Load Time +
+
+
+
+ SESAR + OpenContext + GEOME + Smithsonian +
+
+
+ Zoom in to see finer detail. +
+
`; + + // Cluster info section + let clusterHtml; + if (info && info.type === 'cluster') { + clusterHtml = html`
+

Selected Cluster

+
+
+ + ${SOURCE_NAMES[info.source] || info.source} + + H3 res${info.resolution} +
+
+ ${info.count.toLocaleString()} samples +
+
+ ${info.lat.toFixed(4)}°, ${info.lng.toFixed(4)}° +
+
+
`; + } else { + clusterHtml = html`
+ Click a cluster on the globe +
`; + } - + return html`
+ ${statsHtml} + ${clusterHtml} +
+
`; +} +``` ```{ojs} //| echo: false -mutable clickedPointId = "unset" -mutable clickedInfo = null +// Build the globe + side panel layout +layout = { + // Need sidePanel to exist first + void sidePanel; + + return html`
+
+
+
`; +} ``` ```{ojs} //| echo: false -// Show clicked cluster/point info -html`${ - clickedInfo && clickedInfo.type === 'cluster' ? - html`
-

H3 Cluster (Resolution ${clickedInfo.resolution})

- - - - -
Samples${clickedInfo.count?.toLocaleString()}
Dominant Source${clickedInfo.source}
Center${clickedInfo.lat?.toFixed(4)}°, ${clickedInfo.lng?.toFixed(4)}°
-
` - : clickedInfo && clickedInfo.type === 'point' ? - html`
-

Sample Point

-

${clickedInfo.pid}

-
` - : html`
- Click a point on the globe to see details. -
` -}` +//| output: false +// Mount the side panel into the layout after both are rendered +mountPanel = { + void layout; + void sidePanel; + + // Wait a tick for DOM + await new Promise(r => setTimeout(r, 50)); + + const mount = document.getElementById('sidePanelMount'); + if (mount && sidePanel) { + mount.innerHTML = ''; + mount.appendChild(sidePanel); + } + return true; +} ``` ```{ojs} //| echo: false -// Query samples at clicked cluster location +// Query samples when a cluster is clicked samplesAtCluster = { if (!clickedInfo || clickedInfo.type !== 'cluster') return []; @@ -204,7 +295,7 @@ samplesAtCluster = { WHERE otype = 'MaterialSampleRecord' AND latitude BETWEEN ${lat - delta} AND ${lat + delta} AND longitude BETWEEN ${lng - delta} AND ${lng + delta} - LIMIT 50 + LIMIT 30 `; try { return await db.query(q); @@ -217,46 +308,44 @@ samplesAtCluster = { ```{ojs} //| echo: false -html`${ - samplesAtCluster && samplesAtCluster.length > 0 ? - html`
-

Nearby Samples (${samplesAtCluster.length})

-
- - - - - - - - - - ${samplesAtCluster.map((s, i) => html` - - - - - - `)} - -
LabelSourceDescription
${s.label || s.pid} - - ${s.source} - - - ${s.description ? (s.description.length > 120 ? s.description.slice(0, 120) + '...' : s.description) : '-'} -
+//| output: false +// Render sample list into the side panel's container +renderSamples = { + void mountPanel; + const samples = samplesAtCluster; + const info = clickedInfo; + const container = document.getElementById('samplesContainer'); + if (!container) return; + + if (!info || info.type !== 'cluster') { + container.innerHTML = ''; + return; + } + + if (!samples || samples.length === 0) { + container.innerHTML = '
Loading nearby samples...
'; + return; + } + + let html = `

Nearby Samples (${samples.length})

`; + for (const s of samples) { + const color = SOURCE_COLORS[s.source] || '#666'; + const name = SOURCE_NAMES[s.source] || s.source; + const desc = s.description + ? (s.description.length > 100 ? s.description.slice(0, 100) + '...' : s.description) + : ''; + html += `
+
+ ${s.label || s.pid} + ${name}
-
` - : clickedInfo && clickedInfo.type === 'cluster' ? - html`
- Loading nearby samples from full dataset... -
` - : html`` -}` -``` + ${desc ? `
${desc}
` : ''} +
`; + } -::: + container.innerHTML = html; +} +``` ```{ojs} //| echo: false @@ -264,6 +353,10 @@ html`${ // === Cesium Viewer === viewer = { + // Wait for layout to render + void layout; + await new Promise(r => setTimeout(r, 100)); + const v = new Cesium.Viewer("cesiumContainer", { timeline: false, animation: false, @@ -272,7 +365,6 @@ viewer = { terrain: Cesium.Terrain.fromWorldTerrain() }); - // Global view const globalRect = Cesium.Rectangle.fromDegrees(-180, -60, 180, 80); Cesium.Camera.DEFAULT_VIEW_RECTANGLE = globalRect; Cesium.Camera.DEFAULT_VIEW_FACTOR = 0.5; @@ -283,13 +375,9 @@ viewer = { }; v.scene.postRender.addEventListener(once); - // Point collections for different phases v.h3Points = new Cesium.PointPrimitiveCollection(); v.scene.primitives.add(v.h3Points); - v.detailPoints = new Cesium.PointPrimitiveCollection(); - v.scene.primitives.add(v.detailPoints); - // Label for hover v.pointLabel = v.entities.add({ label: { @@ -304,7 +392,7 @@ viewer = { } }); - // Hover handler + // Hover: show tooltip const hoverHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); hoverHandler.setInputAction((movement) => { const picked = v.scene.pick(movement.endPosition); @@ -322,7 +410,7 @@ viewer = { } }, Cesium.ScreenSpaceEventType.MOUSE_MOVE); - // Click handler: fly to cluster and drill down + // Click: info only (no camera movement) const clickHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); clickHandler.setInputAction((e) => { const picked = v.scene.pick(e.position); @@ -337,22 +425,6 @@ viewer = { lng: meta.lng, resolution: meta.resolution }; - - // Fly to the cluster at an altitude that triggers the next resolution - // res4 → fly to 1500km (triggers res6) - // res6 → fly to 150km (triggers res8) - // res8 → fly to 30km (close detail) - const altitudes = { 4: 1500000, 6: 150000, 8: 30000 }; - const altitude = altitudes[meta.resolution] || 500000; - - v.camera.flyTo({ - destination: Cesium.Cartesian3.fromDegrees( - meta.lng, meta.lat, altitude - ), - duration: 1.5 - }); - } else { - mutable clickedInfo = { type: 'point', pid: meta }; } } }, Cesium.ScreenSpaceEventType.LEFT_CLICK); @@ -365,25 +437,11 @@ viewer = { //| echo: false //| output: false -// === PHASE 1: Load H3 res4 aggregates (instant) === +// === PHASE 1: Load H3 res4 aggregates globally === phase1 = { - // Ensure OJS-rendered DOM elements are available - void statsBar; - void phaseIndicatorEl; - + void mountPanel; performance.mark('phase1-start'); - const updateStats = (phase, points, samples, time) => { - const el = (id) => document.getElementById(id); - if (el('statPhase')) el('statPhase').textContent = phase; - if (el('statPoints')) el('statPoints').textContent = points.toLocaleString(); - if (el('statSamples')) el('statSamples').textContent = samples.toLocaleString(); - if (el('statTime')) el('statTime').textContent = time; - }; - - const indicator = document.getElementById('phaseIndicator'); - - // Query pre-aggregated H3 res4 data const data = await db.query(` SELECT h3_cell, sample_count, center_lat, center_lng, dominant_source, source_count, resolution @@ -391,9 +449,6 @@ phase1 = { ORDER BY sample_count DESC `); - performance.mark('phase1-query-done'); - - // Render as scaled colored circles const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.5); let totalSamples = 0; @@ -401,8 +456,6 @@ phase1 = { const count = row.sample_count; totalSamples += count; const color = SOURCE_COLORS[row.dominant_source] || '#666666'; - - // Circle size: 3px base + log scale, capped at 20 const size = Math.min(3 + Math.log10(count) * 4, 20); viewer.h3Points.add({ @@ -413,11 +466,7 @@ phase1 = { lng: row.center_lng, resolution: 4 }, - position: Cesium.Cartesian3.fromDegrees( - row.center_lng, - row.center_lat, - 0 - ), + position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), pixelSize: size, color: Cesium.Color.fromCssColorString(color).withAlpha(0.8), scaleByDistance: scalar, @@ -428,16 +477,22 @@ phase1 = { performance.measure('phase1-total', 'phase1-start', 'phase1-end'); const elapsed = performance.getEntriesByName('phase1-total')[0].duration; - updateStats('H3 Global', data.length, totalSamples, `${(elapsed/1000).toFixed(1)}s`); + mutable globeStatus = { + phase: "H3 Res4", + points: data.length, + samples: totalSamples, + time: `${(elapsed/1000).toFixed(1)}s`, + resolution: 4 + }; - if (indicator) { - indicator.style.background = '#e8f5e9'; - indicator.style.color = '#2e7d32'; - indicator.innerHTML = `Phase 1 complete: ${data.length.toLocaleString()} H3 clusters covering ${totalSamples.toLocaleString()} samples in ${(elapsed/1000).toFixed(1)}s. Zoom in to see finer detail.`; + const msg = document.getElementById('phaseMsg'); + if (msg) { + msg.style.background = '#e8f5e9'; + msg.style.color = '#2e7d32'; + msg.textContent = `${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. Zoom in for finer detail.`; } - console.log(`Phase 1: ${data.length} H3 res4 clusters, ${totalSamples.toLocaleString()} samples in ${elapsed.toFixed(0)}ms`); - + console.log(`Phase 1: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); return { count: data.length, samples: totalSamples, elapsed }; } ``` @@ -446,13 +501,9 @@ phase1 = { //| echo: false //| output: false -// === PHASE 2: Zoom-triggered detail loading === -// Monitor camera height and load finer resolution when zoomed in +// === Zoom watcher: switch resolution globally (no viewport filtering) === zoomWatcher = { - // Wait for phase 1 to complete and DOM to be ready if (!phase1) return; - void statsBar; - void phaseIndicatorEl; let currentResolution = 4; let loadingRes = false; @@ -461,43 +512,23 @@ zoomWatcher = { if (loadingRes) return; loadingRes = true; - const indicator = document.getElementById('phaseIndicator'); - if (indicator) { - indicator.style.background = '#e3f2fd'; - indicator.style.color = '#1565c0'; - indicator.innerHTML = `Loading H3 resolution ${res} detail...`; + const msg = document.getElementById('phaseMsg'); + if (msg) { + msg.style.background = '#e3f2fd'; + msg.style.color = '#1565c0'; + msg.textContent = `Loading H3 res${res}...`; } performance.mark(`res${res}-start`); - // Get camera bounding box - const rect = viewer.camera.computeViewRectangle(); - let data; - - if (rect) { - const west = Cesium.Math.toDegrees(rect.west); - const south = Cesium.Math.toDegrees(rect.south); - const east = Cesium.Math.toDegrees(rect.east); - const north = Cesium.Math.toDegrees(rect.north); - - data = await db.query(` - SELECT h3_cell, sample_count, center_lat, center_lng, - dominant_source, source_count, resolution - FROM read_parquet('${url}') - WHERE center_lat BETWEEN ${south} AND ${north} - AND center_lng BETWEEN ${west} AND ${east} - ORDER BY sample_count DESC - `); - } else { - data = await db.query(` - SELECT h3_cell, sample_count, center_lat, center_lng, - dominant_source, source_count, resolution - FROM read_parquet('${url}') - ORDER BY sample_count DESC - `); - } + // Load ALL data globally — files are small enough (max 2.5 MB) + const data = await db.query(` + SELECT h3_cell, sample_count, center_lat, center_lng, + dominant_source, source_count, resolution + FROM read_parquet('${url}') + ORDER BY sample_count DESC + `); - // Clear old points and add new ones viewer.h3Points.removeAll(); const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); @@ -517,11 +548,7 @@ zoomWatcher = { lng: row.center_lng, resolution: res }, - position: Cesium.Cartesian3.fromDegrees( - row.center_lng, - row.center_lat, - 0 - ), + position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), pixelSize: size, color: Cesium.Color.fromCssColorString(color).withAlpha(0.85), scaleByDistance: scalar, @@ -532,94 +559,60 @@ zoomWatcher = { performance.measure(`res${res}-total`, `res${res}-start`, `res${res}-end`); const elapsed = performance.getEntriesByName(`res${res}-total`)[0].duration; - const el = (id) => document.getElementById(id); - if (el('statPhase')) el('statPhase').textContent = `H3 Res${res}`; - if (el('statPoints')) el('statPoints').textContent = data.length.toLocaleString(); - if (el('statSamples')) el('statSamples').textContent = totalSamples.toLocaleString(); - if (el('statTime')) el('statTime').textContent = `${(elapsed/1000).toFixed(1)}s`; - - if (indicator) { - indicator.style.background = '#e8f5e9'; - indicator.style.color = '#2e7d32'; - indicator.innerHTML = `Showing ${data.length.toLocaleString()} H3 res${res} clusters (${totalSamples.toLocaleString()} samples) in viewport. Loaded in ${(elapsed/1000).toFixed(1)}s.`; + mutable globeStatus = { + phase: `H3 Res${res}`, + points: data.length, + samples: totalSamples, + time: `${(elapsed/1000).toFixed(1)}s`, + resolution: res + }; + + if (msg) { + msg.style.background = '#e8f5e9'; + msg.style.color = '#2e7d32'; + msg.textContent = `${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Maximum detail.'}`; } currentResolution = res; loadingRes = false; - - console.log(`Loaded res${res}: ${data.length} clusters, ${totalSamples.toLocaleString()} samples in ${elapsed.toFixed(0)}ms`); + console.log(`Loaded res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); }; - // Monitor camera movement let debounceTimer = null; viewer.camera.changed.addEventListener(() => { if (debounceTimer) clearTimeout(debounceTimer); - debounceTimer = setTimeout(async () => { const height = viewer.camera.positionCartographic.height; - - // Height thresholds for resolution switching - // > 3,000 km: res4 (continental) - // 300 km - 3,000 km: res6 (city-level) - // < 300 km: res8 (neighborhood) - let targetRes; - if (height > 3000000) { - targetRes = 4; - } else if (height > 300000) { - targetRes = 6; - } else { - targetRes = 8; - } + if (height > 3000000) targetRes = 4; + else if (height > 300000) targetRes = 6; + else targetRes = 8; if (targetRes !== currentResolution) { const urls = { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }; await loadResolution(targetRes, urls[targetRes]); } - }, 800); // Debounce: wait 800ms after camera stops + }, 800); }); - // Lower the change threshold so it fires more often viewer.camera.percentageChanged = 0.1; - - return "Zoom watcher active"; + return "active"; } ``` ## How This Demo Works -This demo uses **pre-aggregated H3 hexagonal indices** to achieve near-instant globe rendering: - -| Phase | Data Source | Size | What You See | -|-------|-----------|------|-------------| -| **1. Instant** | H3 res4 summary | 580 KB | 38K colored circles (continental scale) | -| **2. Zoom in** | H3 res6 summary | 1.6 MB | 112K circles (city scale) | -| **3. Zoom more** | H3 res8 summary | 2.5 MB | 176K circles (neighborhood scale) | -| **4. Click** | Full wide parquet | ~280 MB (range request) | Individual sample details | - -**vs. the original approach**: Loading all 6.7M sample coordinates from the 280 MB parquet takes 5-10 seconds. This progressive approach shows meaningful data in under 1 second. - -### Data Pipeline - -``` -zenodo_wide.parquet (280 MB, 20M rows) - ↓ pqg add-h3 (add H3 columns at res 4/6/8) -zenodo_wide_h3.parquet (292 MB) - ↓ DuckDB GROUP BY h3_resN -h3_summary_res4.parquet (580 KB, 38K rows) -h3_summary_res6.parquet (1.6 MB, 112K rows) -h3_summary_res8.parquet (2.5 MB, 176K rows) -``` - -### Source Distribution +Pre-aggregated H3 hexagonal indices achieve near-instant globe rendering: -The color of each circle shows which data source dominates that area: +| Phase | Data | Size | Points | +|-------|------|------|--------| +| **Instant** | H3 res4 | 580 KB | 38K clusters (continental) | +| **Zoom in** | H3 res6 | 1.6 MB | 112K clusters (city) | +| **Zoom more** | H3 res8 | 2.5 MB | 176K clusters (neighborhood) | +| **Click** | Full dataset | ~280 MB (range req.) | Individual samples | -- **SESAR** (blue): Geological/earth science samples — globally distributed -- **OpenContext** (red): Archaeological samples — concentrated in Mediterranean, Middle East -- **GEOME** (green): Genomic/biological samples — oceanic and tropical regions -- **Smithsonian** (orange): Museum specimens — clustered at collection sites +**vs. original**: Loading 6.7M coordinates from 280 MB takes 5-10s. This shows data in <1s. ## See Also From 4fa19fa504d3287cbf839bcfadd00694d41ebd66 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 05:55:31 -0800 Subject: [PATCH 06/12] Fix infinite loop: replace OJS reactivity with imperative DOM updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The side panel was causing a reactive cycle: globeStatus change → sidePanel re-render → layout re-render → viewer re-create → phase1 re-run → globeStatus change → loop Fix: all side panel content is static HTML. Stats, cluster card, and sample list are updated via getElementById/innerHTML only. No OJS mutable variables, no reactive cascade. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 535 +++++++++++--------------------- 1 file changed, 174 insertions(+), 361 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 7a7c8c4..26150f1 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -33,9 +33,7 @@ Circle size = log(sample count). Color = dominant data source. margin-bottom: 16px; } @media (max-width: 900px) { - .globe-layout { - grid-template-columns: 1fr; - } + .globe-layout { grid-template-columns: 1fr; } } #cesiumContainer { width: 100%; @@ -55,10 +53,7 @@ Circle size = log(sample count). Color = dominant data source. padding: 12px; font-size: 13px; } - .panel-section h4 { - margin: 0 0 8px 0; - font-size: 14px; - } + .panel-section h4 { margin: 0 0 8px 0; font-size: 14px; } .stats-compact { display: grid; grid-template-columns: 1fr 1fr; @@ -71,62 +66,51 @@ Circle size = log(sample count). Color = dominant data source. border-radius: 4px; text-align: center; } - .stat-box .stat-value { - font-weight: bold; - font-size: 16px; - font-family: monospace; - display: block; - } - .stat-box .stat-label { - color: #aaa; - font-size: 11px; - } - .legend { - display: flex; - gap: 10px; - flex-wrap: wrap; - font-size: 12px; - } - .legend-item { - display: flex; - align-items: center; - gap: 3px; - } - .legend-dot { - width: 10px; - height: 10px; - border-radius: 50%; - display: inline-block; - } - .source-badge { - color: white; - padding: 2px 8px; - border-radius: 10px; - font-size: 0.8em; - white-space: nowrap; - } - .cluster-card { - border-left: 4px solid #ccc; - padding: 10px 12px; - background: white; - border-radius: 0 6px 6px 0; - } - .sample-row { - padding: 6px 0; - border-bottom: 1px solid #eee; - line-height: 1.4; - } + .stat-box .val { font-weight: bold; font-size: 16px; font-family: monospace; display: block; } + .stat-box .lbl { color: #aaa; font-size: 11px; } + .legend { display: flex; gap: 10px; flex-wrap: wrap; font-size: 12px; } + .legend-item { display: flex; align-items: center; gap: 3px; } + .legend-dot { width: 10px; height: 10px; border-radius: 50%; display: inline-block; } + .source-badge { color: white; padding: 2px 8px; border-radius: 10px; font-size: 0.8em; white-space: nowrap; } + .cluster-card { border-left: 4px solid #ccc; padding: 10px 12px; background: white; border-radius: 0 6px 6px 0; } + .sample-row { padding: 6px 0; border-bottom: 1px solid #eee; line-height: 1.4; } .sample-row:last-child { border-bottom: none; } .sample-label { font-weight: 600; font-size: 13px; } .sample-desc { font-size: 12px; color: #555; margin-top: 2px; } - .phase-msg { - padding: 6px 10px; - border-radius: 4px; - font-size: 12px; - transition: all 0.3s ease; - } + .phase-msg { padding: 6px 10px; border-radius: 4px; font-size: 12px; transition: all 0.3s ease; } + #clusterSection .empty-state { color: #999; text-align: center; padding: 20px; } + +
+
+
+
+
+
Loading...Resolution
+
0Clusters
+
0Samples
+
-Load Time
+
+
+
+ SESAR + OpenContext + GEOME + Smithsonian +
+
+
+Loading H3 global overview... +
+
+
+
Click a cluster on the globe
+
+
+
+
+ ```{ojs} //| output: false Cesium.Ion.defaultAccessToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiIwNzk3NjkyMy1iNGI1LTRkN2UtODRiMy04OTYwYWE0N2M3ZTkiLCJpZCI6Njk1MTcsImlhdCI6MTYzMzU0MTQ3N30.e70dpNzOCDRLDGxRguQCC-tRzGzA-23Xgno5lNgCeB4'; @@ -134,6 +118,9 @@ Cesium.Ion.defaultAccessToken = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOi ```{ojs} //| echo: false +//| output: false + +// === Constants === R2_BASE = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev" h3_res4_url = `${R2_BASE}/isamples_202601_h3_summary_res4.parquet` h3_res6_url = `${R2_BASE}/isamples_202601_h3_summary_res6.parquet` @@ -141,200 +128,70 @@ h3_res8_url = `${R2_BASE}/isamples_202601_h3_summary_res8.parquet` wide_url = `${R2_BASE}/isamples_202601_wide.parquet` SOURCE_COLORS = ({ - SESAR: '#3366CC', - OPENCONTEXT: '#DC3912', - GEOME: '#109618', - SMITHSONIAN: '#FF9900' + SESAR: '#3366CC', OPENCONTEXT: '#DC3912', + GEOME: '#109618', SMITHSONIAN: '#FF9900' }) - SOURCE_NAMES = ({ - SESAR: 'SESAR', - OPENCONTEXT: 'OpenContext', - GEOME: 'GEOME', - SMITHSONIAN: 'Smithsonian' + SESAR: 'SESAR', OPENCONTEXT: 'OpenContext', + GEOME: 'GEOME', SMITHSONIAN: 'Smithsonian' }) -``` -```{ojs} -//| echo: false -db = { - const instance = await DuckDBClient.of(); - return instance; +// === Helper: update stats via DOM (no reactivity) === +function updateStats(phase, points, samples, time) { + const s = (id, v) => { const e = document.getElementById(id); if (e) e.textContent = v; }; + s('sPhase', phase); + s('sPoints', points.toLocaleString()); + s('sSamples', samples.toLocaleString()); + s('sTime', time); } -``` - -```{ojs} -//| echo: false -mutable clickedInfo = null -mutable globeStatus = ({ phase: "Loading...", points: 0, samples: 0, time: "-", resolution: 4 }) -``` - +function updatePhaseMsg(text, type) { + const m = document.getElementById('phaseMsg'); + if (!m) return; + m.textContent = text; + if (type === 'loading') { m.style.background = '#e3f2fd'; m.style.color = '#1565c0'; } + else { m.style.background = '#e8f5e9'; m.style.color = '#2e7d32'; } +} -```{ojs} -//| echo: false -// Side panel: stats + legend + cluster info + samples -sidePanel = { - const status = globeStatus; - const info = clickedInfo; - - // Stats section - const statsHtml = html`
-
-
- ${status.phase} - Resolution -
-
- ${status.points.toLocaleString()} - Clusters -
-
- ${status.samples.toLocaleString()} - Samples -
-
- ${status.time} - Load Time +function updateClusterCard(info) { + const el = document.getElementById('clusterSection'); + if (!el) return; + if (!info) { + el.innerHTML = '
Click a cluster on the globe
'; + return; + } + const color = SOURCE_COLORS[info.source] || '#666'; + const name = SOURCE_NAMES[info.source] || info.source; + el.innerHTML = `

Selected Cluster

+
+
+ ${name} + H3 res${info.resolution}
-
-
-
- SESAR - OpenContext - GEOME - Smithsonian +
+ ${info.count.toLocaleString()} samples
-
-
- Zoom in to see finer detail. -
-
`; - - // Cluster info section - let clusterHtml; - if (info && info.type === 'cluster') { - clusterHtml = html`
-

Selected Cluster

-
-
- - ${SOURCE_NAMES[info.source] || info.source} - - H3 res${info.resolution} -
-
- ${info.count.toLocaleString()} samples -
-
- ${info.lat.toFixed(4)}°, ${info.lng.toFixed(4)}° -
+
+ ${info.lat.toFixed(4)}, ${info.lng.toFixed(4)}
`; - } else { - clusterHtml = html`
- Click a cluster on the globe -
`; - } - - return html`
- ${statsHtml} - ${clusterHtml} -
-
`; -} -``` - -```{ojs} -//| echo: false -// Build the globe + side panel layout -layout = { - // Need sidePanel to exist first - void sidePanel; - - return html`
-
-
-
`; } -``` - -```{ojs} -//| echo: false -//| output: false -// Mount the side panel into the layout after both are rendered -mountPanel = { - void layout; - void sidePanel; - - // Wait a tick for DOM - await new Promise(r => setTimeout(r, 50)); - - const mount = document.getElementById('sidePanelMount'); - if (mount && sidePanel) { - mount.innerHTML = ''; - mount.appendChild(sidePanel); - } - return true; -} -``` - -```{ojs} -//| echo: false -// Query samples when a cluster is clicked -samplesAtCluster = { - if (!clickedInfo || clickedInfo.type !== 'cluster') return []; - - const lat = clickedInfo.lat; - const lng = clickedInfo.lng; - const delta = clickedInfo.resolution === 4 ? 2.0 : clickedInfo.resolution === 6 ? 0.5 : 0.1; - - const q = ` - SELECT pid, label, n as source, latitude, longitude, description - FROM read_parquet('${wide_url}') - WHERE otype = 'MaterialSampleRecord' - AND latitude BETWEEN ${lat - delta} AND ${lat + delta} - AND longitude BETWEEN ${lng - delta} AND ${lng + delta} - LIMIT 30 - `; - try { - return await db.query(q); - } catch(e) { - console.error("Sample query failed:", e); - return []; - } -} -``` - -```{ojs} -//| echo: false -//| output: false -// Render sample list into the side panel's container -renderSamples = { - void mountPanel; - const samples = samplesAtCluster; - const info = clickedInfo; - const container = document.getElementById('samplesContainer'); - if (!container) return; - - if (!info || info.type !== 'cluster') { - container.innerHTML = ''; - return; - } +function updateSamples(samples) { + const el = document.getElementById('samplesSection'); + if (!el) return; if (!samples || samples.length === 0) { - container.innerHTML = '
Loading nearby samples...
'; + el.innerHTML = ''; return; } - - let html = `

Nearby Samples (${samples.length})

`; + let h = `

Nearby Samples (${samples.length})

`; for (const s of samples) { const color = SOURCE_COLORS[s.source] || '#666'; const name = SOURCE_NAMES[s.source] || s.source; const desc = s.description ? (s.description.length > 100 ? s.description.slice(0, 100) + '...' : s.description) : ''; - html += `
+ h += `
${s.label || s.pid} ${name} @@ -342,8 +199,18 @@ renderSamples = { ${desc ? `
${desc}
` : ''}
`; } + el.innerHTML = h; +} +``` + +```{ojs} +//| echo: false +//| output: false - container.innerHTML = html; +// === DuckDB === +db = { + const instance = await DuckDBClient.of(); + return instance; } ``` @@ -351,12 +218,8 @@ renderSamples = { //| echo: false //| output: false -// === Cesium Viewer === +// === Cesium Viewer (created once, never re-created) === viewer = { - // Wait for layout to render - void layout; - await new Promise(r => setTimeout(r, 100)); - const v = new Cesium.Viewer("cesiumContainer", { timeline: false, animation: false, @@ -368,7 +231,6 @@ viewer = { const globalRect = Cesium.Rectangle.fromDegrees(-180, -60, 180, 80); Cesium.Camera.DEFAULT_VIEW_RECTANGLE = globalRect; Cesium.Camera.DEFAULT_VIEW_FACTOR = 0.5; - const once = () => { v.camera.setView({ destination: globalRect }); v.scene.postRender.removeEventListener(once); @@ -378,53 +240,60 @@ viewer = { v.h3Points = new Cesium.PointPrimitiveCollection(); v.scene.primitives.add(v.h3Points); - // Label for hover + // Hover tooltip v.pointLabel = v.entities.add({ label: { - show: false, - showBackground: true, - font: "13px monospace", + show: false, showBackground: true, font: "13px monospace", horizontalOrigin: Cesium.HorizontalOrigin.LEFT, verticalOrigin: Cesium.VerticalOrigin.BOTTOM, pixelOffset: new Cesium.Cartesian2(15, 0), - disableDepthTestDistance: Number.POSITIVE_INFINITY, - text: "", + disableDepthTestDistance: Number.POSITIVE_INFINITY, text: "", } }); - // Hover: show tooltip - const hoverHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); - hoverHandler.setInputAction((movement) => { + new Cesium.ScreenSpaceEventHandler(v.scene.canvas).setInputAction((movement) => { const picked = v.scene.pick(movement.endPosition); if (Cesium.defined(picked) && picked.primitive && picked.id) { v.pointLabel.position = picked.primitive.position; v.pointLabel.label.show = true; const meta = picked.id; - if (typeof meta === 'object' && meta.count) { - v.pointLabel.label.text = `${meta.source}: ${meta.count.toLocaleString()} samples`; - } else { - v.pointLabel.label.text = String(meta); - } + v.pointLabel.label.text = (typeof meta === 'object' && meta.count) + ? `${meta.source}: ${meta.count.toLocaleString()} samples` + : String(meta); } else { v.pointLabel.label.show = false; } }, Cesium.ScreenSpaceEventType.MOUSE_MOVE); - // Click: info only (no camera movement) - const clickHandler = new Cesium.ScreenSpaceEventHandler(v.scene.canvas); - clickHandler.setInputAction((e) => { + // Click: update side panel via DOM (no OJS mutable → no reactivity cascade) + new Cesium.ScreenSpaceEventHandler(v.scene.canvas).setInputAction(async (e) => { const picked = v.scene.pick(e.position); if (Cesium.defined(picked) && picked.primitive && picked.id) { const meta = picked.id; if (typeof meta === 'object' && meta.count) { - mutable clickedInfo = { - type: 'cluster', - count: meta.count, - source: meta.source, - lat: meta.lat, - lng: meta.lng, - resolution: meta.resolution - }; + // Update cluster card immediately + updateClusterCard(meta); + + // Show loading state for samples + const sampEl = document.getElementById('samplesSection'); + if (sampEl) sampEl.innerHTML = '
Loading nearby samples...
'; + + // Query nearby samples from full dataset + const delta = meta.resolution === 4 ? 2.0 : meta.resolution === 6 ? 0.5 : 0.1; + try { + const samples = await db.query(` + SELECT pid, label, n as source, latitude, longitude, description + FROM read_parquet('${wide_url}') + WHERE otype = 'MaterialSampleRecord' + AND latitude BETWEEN ${meta.lat - delta} AND ${meta.lat + delta} + AND longitude BETWEEN ${meta.lng - delta} AND ${meta.lng + delta} + LIMIT 30 + `); + updateSamples(samples); + } catch(err) { + console.error("Sample query failed:", err); + if (sampEl) sampEl.innerHTML = '
Query failed — try again.
'; + } } } }, Cesium.ScreenSpaceEventType.LEFT_CLICK); @@ -437,14 +306,13 @@ viewer = { //| echo: false //| output: false -// === PHASE 1: Load H3 res4 aggregates globally === +// === PHASE 1: Load H3 res4 globally (instant) === phase1 = { - void mountPanel; - performance.mark('phase1-start'); + performance.mark('p1-start'); const data = await db.query(` SELECT h3_cell, sample_count, center_lat, center_lng, - dominant_source, source_count, resolution + dominant_source, source_count FROM read_parquet('${h3_res4_url}') ORDER BY sample_count DESC `); @@ -455,45 +323,25 @@ phase1 = { for (const row of data) { const count = row.sample_count; totalSamples += count; - const color = SOURCE_COLORS[row.dominant_source] || '#666666'; const size = Math.min(3 + Math.log10(count) * 4, 20); - viewer.h3Points.add({ - id: { - count: count, - source: row.dominant_source, - lat: row.center_lat, - lng: row.center_lng, - resolution: 4 - }, + id: { count, source: row.dominant_source, lat: row.center_lat, lng: row.center_lng, resolution: 4 }, position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), pixelSize: size, - color: Cesium.Color.fromCssColorString(color).withAlpha(0.8), + color: Cesium.Color.fromCssColorString(SOURCE_COLORS[row.dominant_source] || '#666').withAlpha(0.8), scaleByDistance: scalar, }); } - performance.mark('phase1-end'); - performance.measure('phase1-total', 'phase1-start', 'phase1-end'); - const elapsed = performance.getEntriesByName('phase1-total')[0].duration; - - mutable globeStatus = { - phase: "H3 Res4", - points: data.length, - samples: totalSamples, - time: `${(elapsed/1000).toFixed(1)}s`, - resolution: 4 - }; - - const msg = document.getElementById('phaseMsg'); - if (msg) { - msg.style.background = '#e8f5e9'; - msg.style.color = '#2e7d32'; - msg.textContent = `${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. Zoom in for finer detail.`; - } + performance.mark('p1-end'); + performance.measure('p1', 'p1-start', 'p1-end'); + const elapsed = performance.getEntriesByName('p1')[0].duration; + updateStats('H3 Res4', data.length, totalSamples, `${(elapsed/1000).toFixed(1)}s`); + updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. Zoom in for finer detail.`, 'done'); console.log(`Phase 1: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); - return { count: data.length, samples: totalSamples, elapsed }; + + return { count: data.length, samples: totalSamples }; } ``` @@ -501,102 +349,67 @@ phase1 = { //| echo: false //| output: false -// === Zoom watcher: switch resolution globally (no viewport filtering) === +// === Zoom watcher: switch H3 resolution globally === zoomWatcher = { if (!phase1) return; - let currentResolution = 4; - let loadingRes = false; - - const loadResolution = async (res, url) => { - if (loadingRes) return; - loadingRes = true; + let currentRes = 4; + let loading = false; - const msg = document.getElementById('phaseMsg'); - if (msg) { - msg.style.background = '#e3f2fd'; - msg.style.color = '#1565c0'; - msg.textContent = `Loading H3 res${res}...`; - } - - performance.mark(`res${res}-start`); + const loadRes = async (res, url) => { + if (loading) return; + loading = true; + updatePhaseMsg(`Loading H3 res${res}...`, 'loading'); - // Load ALL data globally — files are small enough (max 2.5 MB) + performance.mark(`r${res}-s`); const data = await db.query(` SELECT h3_cell, sample_count, center_lat, center_lng, - dominant_source, source_count, resolution + dominant_source, source_count FROM read_parquet('${url}') ORDER BY sample_count DESC `); viewer.h3Points.removeAll(); - const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); - let totalSamples = 0; + let total = 0; for (const row of data) { - const count = row.sample_count; - totalSamples += count; - const color = SOURCE_COLORS[row.dominant_source] || '#666666'; - const size = Math.min(3 + Math.log10(count) * 3.5, 18); - + total += row.sample_count; + const size = Math.min(3 + Math.log10(row.sample_count) * 3.5, 18); viewer.h3Points.add({ - id: { - count: count, - source: row.dominant_source, - lat: row.center_lat, - lng: row.center_lng, - resolution: res - }, + id: { count: row.sample_count, source: row.dominant_source, lat: row.center_lat, lng: row.center_lng, resolution: res }, position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), pixelSize: size, - color: Cesium.Color.fromCssColorString(color).withAlpha(0.85), + color: Cesium.Color.fromCssColorString(SOURCE_COLORS[row.dominant_source] || '#666').withAlpha(0.85), scaleByDistance: scalar, }); } - performance.mark(`res${res}-end`); - performance.measure(`res${res}-total`, `res${res}-start`, `res${res}-end`); - const elapsed = performance.getEntriesByName(`res${res}-total`)[0].duration; - - mutable globeStatus = { - phase: `H3 Res${res}`, - points: data.length, - samples: totalSamples, - time: `${(elapsed/1000).toFixed(1)}s`, - resolution: res - }; - - if (msg) { - msg.style.background = '#e8f5e9'; - msg.style.color = '#2e7d32'; - msg.textContent = `${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Maximum detail.'}`; - } + performance.mark(`r${res}-e`); + performance.measure(`r${res}`, `r${res}-s`, `r${res}-e`); + const elapsed = performance.getEntriesByName(`r${res}`)[0].duration; - currentResolution = res; - loadingRes = false; - console.log(`Loaded res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); - }; + updateStats(`H3 Res${res}`, data.length, total, `${(elapsed/1000).toFixed(1)}s`); + updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${total.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Maximum detail.'}`, 'done'); - let debounceTimer = null; + currentRes = res; + loading = false; + console.log(`Res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); + }; + let timer = null; viewer.camera.changed.addEventListener(() => { - if (debounceTimer) clearTimeout(debounceTimer); - debounceTimer = setTimeout(async () => { - const height = viewer.camera.positionCartographic.height; - let targetRes; - if (height > 3000000) targetRes = 4; - else if (height > 300000) targetRes = 6; - else targetRes = 8; - - if (targetRes !== currentResolution) { - const urls = { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }; - await loadResolution(targetRes, urls[targetRes]); + if (timer) clearTimeout(timer); + timer = setTimeout(async () => { + const h = viewer.camera.positionCartographic.height; + const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + if (target !== currentRes) { + await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); } }, 800); }); - viewer.camera.percentageChanged = 0.1; + return "active"; } ``` From 78408994c24153d6efd2fb40d4d029c1112c171e Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 06:20:45 -0800 Subject: [PATCH 07/12] Add sub-res8 individual sample drill-down to progressive globe - New 4th zoom tier: below 120km altitude, switches from H3 clusters to individual sample points loaded from lite parquet (60MB vs 280MB) - Two-stage sample card: instant metadata from lite file, lazy-loaded description from full wide parquet on click - Viewport caching with 30% padding for smooth panning - Stale-request guards for async camera/query flows - Hysteresis thresholds (120km enter / 180km exit) to prevent flicker - Separate PointPrimitiveCollection for samples vs clusters - Cluster click queries now use lite parquet instead of wide (5x faster) Data files on R2: - isamples_202601_samples_map_lite.parquet (60MB, 6M rows, 9 columns) - Still uses H3 summary files for res4/6/8 cluster view Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 339 +++++++++++++++++++++++++++----- 1 file changed, 295 insertions(+), 44 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 26150f1..02ce975 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -3,14 +3,15 @@ title: "Progressive Globe: Instant H3 → Detail on Demand" categories: [parquet, spatial, h3, performance, isamples] --- -Explore **6.7 million material samples** from iSamples — the globe loads instantly with H3 hexagonal aggregates, then refines as you zoom. +Explore **6.7 million material samples** from iSamples — the globe loads instantly with H3 hexagonal aggregates, then refines as you zoom down to individual samples. ::: {.callout-note collapse="true"} ## How It Works 1. **Instant** (<1s): Pre-aggregated H3 res4 summary (580 KB) → 38K colored circles 2. **Zoom in**: Automatically switches to res6 (112K) then res8 (176K) clusters -3. **Click**: Shows cluster info and queries nearby samples from the full dataset +3. **Zoom deeper** (<120 km): Individual sample points from 60 MB lite parquet +4. **Click**: Cluster info or individual sample card with full metadata Circle size = log(sample count). Color = dominant data source. ::: @@ -77,8 +78,12 @@ Circle size = log(sample count). Color = dominant data source. .sample-row:last-child { border-bottom: none; } .sample-label { font-weight: 600; font-size: 13px; } .sample-desc { font-size: 12px; color: #555; margin-top: 2px; } + .sample-meta { font-size: 11px; color: #888; margin-top: 2px; } .phase-msg { padding: 6px 10px; border-radius: 4px; font-size: 12px; transition: all 0.3s ease; } #clusterSection .empty-state { color: #999; text-align: center; padding: 20px; } + .detail-loading { text-align: center; color: #999; padding: 8px; font-size: 12px; } + .detail-link { color: #1565c0; text-decoration: none; font-size: 12px; } + .detail-link:hover { text-decoration: underline; } @@ -105,7 +110,7 @@ Loading H3 global overview...
-
Click a cluster on the globe
+
Click a cluster or sample on the globe
@@ -125,6 +130,7 @@ R2_BASE = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev" h3_res4_url = `${R2_BASE}/isamples_202601_h3_summary_res4.parquet` h3_res6_url = `${R2_BASE}/isamples_202601_h3_summary_res6.parquet` h3_res8_url = `${R2_BASE}/isamples_202601_h3_summary_res8.parquet` +lite_url = `${R2_BASE}/isamples_202601_samples_map_lite.parquet` wide_url = `${R2_BASE}/isamples_202601_wide.parquet` SOURCE_COLORS = ({ @@ -136,7 +142,7 @@ SOURCE_NAMES = ({ GEOME: 'GEOME', SMITHSONIAN: 'Smithsonian' }) -// === Helper: update stats via DOM (no reactivity) === +// === Helpers: update DOM imperatively (no OJS reactivity) === function updateStats(phase, points, samples, time) { const s = (id, v) => { const e = document.getElementById(id); if (e) e.textContent = v; }; s('sPhase', phase); @@ -157,7 +163,7 @@ function updateClusterCard(info) { const el = document.getElementById('clusterSection'); if (!el) return; if (!info) { - el.innerHTML = '
Click a cluster on the globe
'; + el.innerHTML = '
Click a cluster or sample on the globe
'; return; } const color = SOURCE_COLORS[info.source] || '#666'; @@ -177,6 +183,48 @@ function updateClusterCard(info) {
`; } +function updateSampleCard(sample) { + const el = document.getElementById('clusterSection'); + if (!el) return; + const color = SOURCE_COLORS[sample.source] || '#666'; + const name = SOURCE_NAMES[sample.source] || sample.source; + const placeParts = sample.place_name; + const placeStr = Array.isArray(placeParts) && placeParts.length > 0 + ? placeParts.filter(Boolean).join(' › ') + : ''; + el.innerHTML = `

Sample

+
+
+ ${name} +
+
+ ${sample.label || sample.pid || 'Unnamed'} +
+
+ ${sample.lat.toFixed(5)}, ${sample.lng.toFixed(5)} +
+ ${placeStr ? `
${placeStr}
` : ''} + ${sample.result_time ? `
Date: ${sample.result_time}
` : ''} +
Loading full details...
+
`; +} + +function updateSampleDetail(detail) { + const el = document.getElementById('sampleDetail'); + if (!el) return; + if (!detail) { + el.innerHTML = 'Detail query failed'; + return; + } + const desc = detail.description + ? (detail.description.length > 300 ? detail.description.slice(0, 300) + '...' : detail.description) + : ''; + el.innerHTML = `${desc ? `
${desc}
` : ''} + `; +} + function updateSamples(samples) { const el = document.getElementById('samplesSection'); if (!el) return; @@ -237,10 +285,15 @@ viewer = { }; v.scene.postRender.addEventListener(once); + // Two separate point collections: clusters and individual samples v.h3Points = new Cesium.PointPrimitiveCollection(); v.scene.primitives.add(v.h3Points); - // Hover tooltip + v.samplePoints = new Cesium.PointPrimitiveCollection(); + v.scene.primitives.add(v.samplePoints); + v.samplePoints.show = false; // hidden until point mode + + // Hover tooltip — works for both clusters and samples v.pointLabel = v.entities.add({ label: { show: false, showBackground: true, font: "13px monospace", @@ -257,43 +310,69 @@ viewer = { v.pointLabel.position = picked.primitive.position; v.pointLabel.label.show = true; const meta = picked.id; - v.pointLabel.label.text = (typeof meta === 'object' && meta.count) - ? `${meta.source}: ${meta.count.toLocaleString()} samples` - : String(meta); + if (typeof meta === 'object' && meta.type === 'sample') { + v.pointLabel.label.text = `${meta.label || meta.pid}`; + } else if (typeof meta === 'object' && meta.count) { + v.pointLabel.label.text = `${meta.source}: ${meta.count.toLocaleString()} samples`; + } else { + v.pointLabel.label.text = String(meta); + } } else { v.pointLabel.label.show = false; } }, Cesium.ScreenSpaceEventType.MOUSE_MOVE); - // Click: update side panel via DOM (no OJS mutable → no reactivity cascade) + // Click handler — routes to cluster card or sample card new Cesium.ScreenSpaceEventHandler(v.scene.canvas).setInputAction(async (e) => { const picked = v.scene.pick(e.position); - if (Cesium.defined(picked) && picked.primitive && picked.id) { - const meta = picked.id; - if (typeof meta === 'object' && meta.count) { - // Update cluster card immediately - updateClusterCard(meta); - - // Show loading state for samples - const sampEl = document.getElementById('samplesSection'); - if (sampEl) sampEl.innerHTML = '
Loading nearby samples...
'; - - // Query nearby samples from full dataset - const delta = meta.resolution === 4 ? 2.0 : meta.resolution === 6 ? 0.5 : 0.1; - try { - const samples = await db.query(` - SELECT pid, label, n as source, latitude, longitude, description - FROM read_parquet('${wide_url}') - WHERE otype = 'MaterialSampleRecord' - AND latitude BETWEEN ${meta.lat - delta} AND ${meta.lat + delta} - AND longitude BETWEEN ${meta.lng - delta} AND ${meta.lng + delta} - LIMIT 30 - `); - updateSamples(samples); - } catch(err) { - console.error("Sample query failed:", err); - if (sampEl) sampEl.innerHTML = '
Query failed — try again.
'; + if (!Cesium.defined(picked) || !picked.primitive || !picked.id) return; + const meta = picked.id; + + if (typeof meta === 'object' && meta.type === 'sample') { + // --- Individual sample click --- + updateSampleCard(meta); + // Clear nearby list + const sampEl = document.getElementById('samplesSection'); + if (sampEl) sampEl.innerHTML = ''; + + // Stage 2: lazy-load full description from wide parquet + try { + const detail = await db.query(` + SELECT description + FROM read_parquet('${wide_url}') + WHERE pid = '${meta.pid.replace(/'/g, "''")}' + LIMIT 1 + `); + if (detail && detail.length > 0) { + updateSampleDetail(detail[0]); + } else { + updateSampleDetail({ description: '' }); } + } catch(err) { + console.error("Detail query failed:", err); + updateSampleDetail(null); + } + + } else if (typeof meta === 'object' && meta.count) { + // --- Cluster click --- + updateClusterCard(meta); + + const sampEl = document.getElementById('samplesSection'); + if (sampEl) sampEl.innerHTML = '
Loading nearby samples...
'; + + const delta = meta.resolution === 4 ? 2.0 : meta.resolution === 6 ? 0.5 : 0.1; + try { + const samples = await db.query(` + SELECT pid, label, n as source, latitude, longitude, description + FROM read_parquet('${lite_url}') + WHERE latitude BETWEEN ${meta.lat - delta} AND ${meta.lat + delta} + AND longitude BETWEEN ${meta.lng - delta} AND ${meta.lng + delta} + LIMIT 30 + `); + updateSamples(samples); + } catch(err) { + console.error("Sample query failed:", err); + if (sampEl) sampEl.innerHTML = '
Query failed — try again.
'; } } }, Cesium.ScreenSpaceEventType.LEFT_CLICK); @@ -349,13 +428,26 @@ phase1 = { //| echo: false //| output: false -// === Zoom watcher: switch H3 resolution globally === +// === Zoom watcher: H3 cluster mode + individual sample point mode === zoomWatcher = { if (!phase1) return; + // --- State --- + let mode = 'cluster'; // 'cluster' or 'point' let currentRes = 4; let loading = false; + let requestId = 0; // stale-request guard + + // Hysteresis thresholds to avoid flicker + const ENTER_POINT_ALT = 120000; // 120 km → enter point mode + const EXIT_POINT_ALT = 180000; // 180 km → exit point mode + const POINT_BUDGET = 5000; + + // Viewport cache: avoid re-querying same area + let cachedBounds = null; // { south, north, west, east } + let cachedData = null; // array of rows + // --- H3 cluster loading (existing logic) --- const loadRes = async (res, url) => { if (loading) return; loading = true; @@ -390,23 +482,181 @@ zoomWatcher = { const elapsed = performance.getEntriesByName(`r${res}`)[0].duration; updateStats(`H3 Res${res}`, data.length, total, `${(elapsed/1000).toFixed(1)}s`); - updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${total.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Maximum detail.'}`, 'done'); + updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${total.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Zoom closer for individual samples.'}`, 'done'); currentRes = res; loading = false; console.log(`Res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); }; + // --- Get camera viewport bounds --- + function getViewportBounds() { + const rect = viewer.camera.computeViewRectangle(viewer.scene.globe.ellipsoid); + if (!rect) return null; + return { + south: Cesium.Math.toDegrees(rect.south), + north: Cesium.Math.toDegrees(rect.north), + west: Cesium.Math.toDegrees(rect.west), + east: Cesium.Math.toDegrees(rect.east) + }; + } + + // --- Check if viewport is within cached bounds --- + function isWithinCache(bounds) { + if (!cachedBounds || !bounds) return false; + return bounds.south >= cachedBounds.south && + bounds.north <= cachedBounds.north && + bounds.west >= cachedBounds.west && + bounds.east <= cachedBounds.east; + } + + // --- Load individual samples for current viewport --- + async function loadViewportSamples() { + const myReqId = ++requestId; + const bounds = getViewportBounds(); + if (!bounds) return; + + // If viewport is within cached area, just re-render from cache + if (isWithinCache(bounds) && cachedData) { + renderSamplePoints(cachedData, bounds); + return; + } + + // Fetch with 30% padding for smooth panning + const latPad = (bounds.north - bounds.south) * 0.3; + const lngPad = (bounds.east - bounds.west) * 0.3; + const padded = { + south: bounds.south - latPad, + north: bounds.north + latPad, + west: bounds.west - lngPad, + east: bounds.east + lngPad + }; + + updatePhaseMsg('Loading individual samples...', 'loading'); + + try { + performance.mark('sp-s'); + const data = await db.query(` + SELECT pid, label, source, latitude, longitude, + place_name, result_time + FROM read_parquet('${lite_url}') + WHERE latitude BETWEEN ${padded.south} AND ${padded.north} + AND longitude BETWEEN ${padded.west} AND ${padded.east} + LIMIT ${POINT_BUDGET} + `); + performance.mark('sp-e'); + performance.measure('sp', 'sp-s', 'sp-e'); + const elapsed = performance.getEntriesByName('sp').pop().duration; + + // Stale guard: discard if a newer request was issued + if (myReqId !== requestId) { + console.log(`Discarding stale sample response (req ${myReqId}, current ${requestId})`); + return; + } + + // Cache the padded bounds + data + cachedBounds = padded; + cachedData = Array.from(data); + + renderSamplePoints(cachedData, bounds); + + updateStats('Samples', cachedData.length, cachedData.length, `${(elapsed/1000).toFixed(1)}s`); + updatePhaseMsg(`${cachedData.length.toLocaleString()} individual samples. Click one for details.`, 'done'); + console.log(`Point mode: ${cachedData.length} samples in ${elapsed.toFixed(0)}ms`); + + } catch(err) { + if (myReqId !== requestId) return; + console.error("Viewport sample query failed:", err); + updatePhaseMsg('Sample query failed — try again.', 'loading'); + } + } + + // --- Render sample points on globe --- + function renderSamplePoints(data, bounds) { + viewer.samplePoints.removeAll(); + const scalar = new Cesium.NearFarScalar(1e2, 8, 2e5, 3); + + for (const row of data) { + const color = SOURCE_COLORS[row.source] || '#666'; + viewer.samplePoints.add({ + id: { + type: 'sample', + pid: row.pid, + label: row.label, + source: row.source, + lat: row.latitude, + lng: row.longitude, + place_name: row.place_name, + result_time: row.result_time + }, + position: Cesium.Cartesian3.fromDegrees(row.longitude, row.latitude, 0), + pixelSize: 6, + color: Cesium.Color.fromCssColorString(color).withAlpha(0.9), + scaleByDistance: scalar, + }); + } + } + + // --- Mode transitions --- + function enterPointMode() { + mode = 'point'; + viewer.h3Points.show = false; + viewer.samplePoints.show = true; + loadViewportSamples(); + console.log('Entered point mode'); + } + + function exitPointMode() { + mode = 'cluster'; + viewer.samplePoints.show = false; + viewer.samplePoints.removeAll(); + viewer.h3Points.show = true; + cachedBounds = null; + cachedData = null; + + // Restore cluster stats + let clusterCount = viewer.h3Points.length; + updateStats(`H3 Res${currentRes}`, clusterCount, '—', '—'); + updatePhaseMsg(`${clusterCount.toLocaleString()} clusters. Zoom closer for individual samples.`, 'done'); + console.log('Exited point mode'); + } + + // --- Camera change handler --- let timer = null; viewer.camera.changed.addEventListener(() => { if (timer) clearTimeout(timer); timer = setTimeout(async () => { const h = viewer.camera.positionCartographic.height; - const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; - if (target !== currentRes) { - await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); + + // Determine target mode with hysteresis + const targetMode = h < ENTER_POINT_ALT ? 'point' + : h > EXIT_POINT_ALT ? 'cluster' + : mode; + + if (targetMode === 'point' && mode !== 'point') { + // Make sure we're at res8 clusters before transitioning + if (currentRes !== 8 && !loading) { + await loadRes(8, h3_res8_url); + } + enterPointMode(); + } else if (targetMode === 'cluster' && mode !== 'cluster') { + exitPointMode(); + // Reload appropriate resolution + const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + if (target !== currentRes && !loading) { + await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); + } + } else if (targetMode === 'point') { + // Already in point mode — update viewport samples + loadViewportSamples(); + } else { + // Cluster mode — check if resolution should change + const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + if (target !== currentRes && !loading) { + await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); + } } - }, 800); + }, 600); }); viewer.camera.percentageChanged = 0.1; @@ -416,16 +666,17 @@ zoomWatcher = { ## How This Demo Works -Pre-aggregated H3 hexagonal indices achieve near-instant globe rendering: +Pre-aggregated H3 hexagonal indices achieve near-instant globe rendering, with seamless drill-down to individual samples: | Phase | Data | Size | Points | |-------|------|------|--------| | **Instant** | H3 res4 | 580 KB | 38K clusters (continental) | | **Zoom in** | H3 res6 | 1.6 MB | 112K clusters (city) | | **Zoom more** | H3 res8 | 2.5 MB | 176K clusters (neighborhood) | -| **Click** | Full dataset | ~280 MB (range req.) | Individual samples | +| **Zoom deep** | Map lite | 60 MB (range req.) | Up to 5K individual samples | +| **Click sample** | Full dataset | ~280 MB (range req.) | Full metadata for 1 sample | -**vs. original**: Loading 6.7M coordinates from 280 MB takes 5-10s. This shows data in <1s. +**4 parquet files, zero backend.** All queries run in your browser via DuckDB-WASM with HTTP range requests — only the bytes you need are transferred. ## See Also From d951c718ecb017a42781b08958ecabac4e410b31 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 06:25:19 -0800 Subject: [PATCH 08/12] Fix bugs from Codex review: deadlock, schema mismatch, timing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - loadRes: wrap in try/catch/finally so `loading` flag always resets on query failure (was permanent deadlock — finding #2) - Schema fix: cluster-click query used `n as source` but the lite parquet has column named `source` (finding #4) - Remove unnecessary ORDER BY on H3 loads (finding #8) - Use .pop() instead of [0] for performance timing entries (finding #11) - Add rel="noopener noreferrer" to target="_blank" link (finding #7) Deferred: XSS escaping (trusted data), antimeridian handling, detail click caching, startup error fallback. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 72 +++++++++++++++++---------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 02ce975..c4324b2 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -221,7 +221,7 @@ function updateSampleDetail(detail) { : ''; el.innerHTML = `${desc ? `
${desc}
` : ''} `; } @@ -363,7 +363,7 @@ viewer = { const delta = meta.resolution === 4 ? 2.0 : meta.resolution === 6 ? 0.5 : 0.1; try { const samples = await db.query(` - SELECT pid, label, n as source, latitude, longitude, description + SELECT pid, label, source, latitude, longitude, description FROM read_parquet('${lite_url}') WHERE latitude BETWEEN ${meta.lat - delta} AND ${meta.lat + delta} AND longitude BETWEEN ${meta.lng - delta} AND ${meta.lng + delta} @@ -393,7 +393,6 @@ phase1 = { SELECT h3_cell, sample_count, center_lat, center_lng, dominant_source, source_count FROM read_parquet('${h3_res4_url}') - ORDER BY sample_count DESC `); const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.5); @@ -414,7 +413,7 @@ phase1 = { performance.mark('p1-end'); performance.measure('p1', 'p1-start', 'p1-end'); - const elapsed = performance.getEntriesByName('p1')[0].duration; + const elapsed = performance.getEntriesByName('p1').pop().duration; updateStats('H3 Res4', data.length, totalSamples, `${(elapsed/1000).toFixed(1)}s`); updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${totalSamples.toLocaleString()} samples. Zoom in for finer detail.`, 'done'); @@ -453,40 +452,45 @@ zoomWatcher = { loading = true; updatePhaseMsg(`Loading H3 res${res}...`, 'loading'); - performance.mark(`r${res}-s`); - const data = await db.query(` - SELECT h3_cell, sample_count, center_lat, center_lng, - dominant_source, source_count - FROM read_parquet('${url}') - ORDER BY sample_count DESC - `); - - viewer.h3Points.removeAll(); - const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); - let total = 0; + try { + performance.mark(`r${res}-s`); + const data = await db.query(` + SELECT h3_cell, sample_count, center_lat, center_lng, + dominant_source, source_count + FROM read_parquet('${url}') + `); - for (const row of data) { - total += row.sample_count; - const size = Math.min(3 + Math.log10(row.sample_count) * 3.5, 18); - viewer.h3Points.add({ - id: { count: row.sample_count, source: row.dominant_source, lat: row.center_lat, lng: row.center_lng, resolution: res }, - position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), - pixelSize: size, - color: Cesium.Color.fromCssColorString(SOURCE_COLORS[row.dominant_source] || '#666').withAlpha(0.85), - scaleByDistance: scalar, - }); - } + viewer.h3Points.removeAll(); + const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); + let total = 0; + + for (const row of data) { + total += row.sample_count; + const size = Math.min(3 + Math.log10(row.sample_count) * 3.5, 18); + viewer.h3Points.add({ + id: { count: row.sample_count, source: row.dominant_source, lat: row.center_lat, lng: row.center_lng, resolution: res }, + position: Cesium.Cartesian3.fromDegrees(row.center_lng, row.center_lat, 0), + pixelSize: size, + color: Cesium.Color.fromCssColorString(SOURCE_COLORS[row.dominant_source] || '#666').withAlpha(0.85), + scaleByDistance: scalar, + }); + } - performance.mark(`r${res}-e`); - performance.measure(`r${res}`, `r${res}-s`, `r${res}-e`); - const elapsed = performance.getEntriesByName(`r${res}`)[0].duration; + performance.mark(`r${res}-e`); + performance.measure(`r${res}`, `r${res}-s`, `r${res}-e`); + const elapsed = performance.getEntriesByName(`r${res}`).pop().duration; - updateStats(`H3 Res${res}`, data.length, total, `${(elapsed/1000).toFixed(1)}s`); - updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${total.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Zoom closer for individual samples.'}`, 'done'); + updateStats(`H3 Res${res}`, data.length, total, `${(elapsed/1000).toFixed(1)}s`); + updatePhaseMsg(`${data.length.toLocaleString()} clusters, ${total.toLocaleString()} samples. ${res < 8 ? 'Zoom in for finer detail.' : 'Zoom closer for individual samples.'}`, 'done'); - currentRes = res; - loading = false; - console.log(`Res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); + currentRes = res; + console.log(`Res${res}: ${data.length} clusters in ${elapsed.toFixed(0)}ms`); + } catch(err) { + console.error(`Failed to load res${res}:`, err); + updatePhaseMsg(`Failed to load H3 res${res} — try zooming again.`, 'loading'); + } finally { + loading = false; + } }; // --- Get camera viewport bounds --- From 497d5529bdfee8bab9d0d12004525e550eb23e34 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 06:40:19 -0800 Subject: [PATCH 09/12] Add progressive globe to sidebar navigation Co-Authored-By: Claude Opus 4.6 --- _quarto.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_quarto.yml b/_quarto.yml index 3fe9ffa..1ac7b40 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -54,6 +54,8 @@ website: href: tutorials/zenodo_isamples_analysis.qmd - text: "3D Globe Visualization" href: tutorials/parquet_cesium_isamples_wide.qmd + - text: "Progressive Globe (H3 + Samples)" + href: tutorials/progressive_globe.qmd - text: "Technical: Narrow vs Wide" href: tutorials/narrow_vs_wide_performance.qmd From 986dd7cff6569ff73918b6e9c72c049842569e00 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 06:44:22 -0800 Subject: [PATCH 10/12] Fix cluster-click query: remove description column missing from lite parquet The samples_map_lite.parquet doesn't have a description column. Use place_name for nearby sample cards instead. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index c4324b2..3dcd798 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -236,8 +236,9 @@ function updateSamples(samples) { for (const s of samples) { const color = SOURCE_COLORS[s.source] || '#666'; const name = SOURCE_NAMES[s.source] || s.source; - const desc = s.description - ? (s.description.length > 100 ? s.description.slice(0, 100) + '...' : s.description) + const placeParts = s.place_name; + const desc = Array.isArray(placeParts) && placeParts.length > 0 + ? placeParts.filter(Boolean).join(' › ') : ''; h += `
@@ -363,7 +364,7 @@ viewer = { const delta = meta.resolution === 4 ? 2.0 : meta.resolution === 6 ? 0.5 : 0.1; try { const samples = await db.query(` - SELECT pid, label, source, latitude, longitude, description + SELECT pid, label, source, latitude, longitude, place_name FROM read_parquet('${lite_url}') WHERE latitude BETWEEN ${meta.lat - delta} AND ${meta.lat + delta} AND longitude BETWEEN ${meta.lng - delta} AND ${meta.lng + delta} From 900a2fb69e4b8da565185b67af50408cd762c730 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 06:55:33 -0800 Subject: [PATCH 11/12] Add URL state encoding for shareable deep links - Hash-based URL state: lat, lng, alt, heading, pitch, mode, pid - v=1 schema versioning for future compatibility - parseNum with Number.isFinite (avoids lat=0 bug from Codex review) - replaceState for continuous camera movement, pushState for mode transitions and sample/cluster selection - Browser back/forward via hashchange listener with flight animation - Suppress flag prevents hash write loops during navigation restore - Deep-link startup: fly to position and restore sample card from pid - Share View button copies current URL to clipboard with toast - pid takes precedence over h3 (canonicalized on write) Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 192 +++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 3 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 3dcd798..7312e31 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -84,6 +84,12 @@ Circle size = log(sample count). Color = dominant data source. .detail-loading { text-align: center; color: #999; padding: 8px; font-size: 12px; } .detail-link { color: #1565c0; text-decoration: none; font-size: 12px; } .detail-link:hover { text-decoration: underline; } + .share-btn { + background: #1565c0; color: white; border: none; padding: 4px 12px; + border-radius: 4px; font-size: 12px; cursor: pointer; + } + .share-btn:hover { background: #0d47a1; } + .share-toast { font-size: 12px; color: #2e7d32; opacity: 0; transition: opacity 0.3s; } @@ -105,6 +111,10 @@ Circle size = log(sample count). Color = dominant data source. Smithsonian
+
+ + +
Loading H3 global overview...
@@ -142,6 +152,48 @@ SOURCE_NAMES = ({ GEOME: 'GEOME', SMITHSONIAN: 'Smithsonian' }) +// === URL State: encode/decode globe state in hash fragment === +function parseNum(val, def, min, max) { + if (val == null) return def; + const n = parseFloat(val); + if (!Number.isFinite(n)) return def; + if (min != null && n < min) return min; + if (max != null && n > max) return max; + return n; +} + +function readHash() { + const params = new URLSearchParams(location.hash.slice(1)); + return { + v: parseInt(params.get('v')) || 0, + lat: parseNum(params.get('lat'), null, -90, 90), + lng: parseNum(params.get('lng'), null, -180, 180), + alt: parseNum(params.get('alt'), null, 100, 40000000), + heading: parseNum(params.get('heading'), 0, 0, 360), + pitch: parseNum(params.get('pitch'), -90, -90, 0), + mode: params.get('mode') || null, + pid: params.get('pid') || null, + }; +} + +function buildHash(v) { + const cam = v.camera; + const carto = cam.positionCartographic; + const params = new URLSearchParams(); + params.set('v', '1'); + params.set('lat', Cesium.Math.toDegrees(carto.latitude).toFixed(4)); + params.set('lng', Cesium.Math.toDegrees(carto.longitude).toFixed(4)); + params.set('alt', Math.round(carto.height).toString()); + const heading = Cesium.Math.toDegrees(cam.heading) % 360; + const pitch = Cesium.Math.toDegrees(cam.pitch); + if (Math.abs(heading) > 1) params.set('heading', heading.toFixed(1)); + if (Math.abs(pitch + 90) > 1) params.set('pitch', pitch.toFixed(1)); + const gs = v._globeState; + if (gs.mode === 'point') params.set('mode', 'point'); + if (gs.selectedPid) params.set('pid', gs.selectedPid); + return '#' + params.toString(); +} + // === Helpers: update DOM imperatively (no OJS reactivity) === function updateStats(phase, points, samples, time) { const s = (id, v) => { const e = document.getElementById(id); if (e) e.textContent = v; }; @@ -280,8 +332,19 @@ viewer = { const globalRect = Cesium.Rectangle.fromDegrees(-180, -60, 180, 80); Cesium.Camera.DEFAULT_VIEW_RECTANGLE = globalRect; Cesium.Camera.DEFAULT_VIEW_FACTOR = 0.5; + const ih = v._initialHash; const once = () => { - v.camera.setView({ destination: globalRect }); + if (ih.lat != null && ih.lng != null) { + v.camera.setView({ + destination: Cesium.Cartesian3.fromDegrees(ih.lng, ih.lat, ih.alt || 20000000), + orientation: { + heading: Cesium.Math.toRadians(ih.heading), + pitch: Cesium.Math.toRadians(ih.pitch) + } + }); + } else { + v.camera.setView({ destination: globalRect }); + } v.scene.postRender.removeEventListener(once); }; v.scene.postRender.addEventListener(once); @@ -294,6 +357,12 @@ viewer = { v.scene.primitives.add(v.samplePoints); v.samplePoints.show = false; // hidden until point mode + // URL deep-link state + v._globeState = { mode: 'cluster', selectedPid: null }; + v._initialHash = readHash(); + v._suppressHashWrite = true; // cleared after zoomWatcher initializes + v._suppressTimer = null; + // Hover tooltip — works for both clusters and samples v.pointLabel = v.entities.add({ label: { @@ -332,6 +401,8 @@ viewer = { if (typeof meta === 'object' && meta.type === 'sample') { // --- Individual sample click --- updateSampleCard(meta); + v._globeState.selectedPid = meta.pid; + history.pushState(null, '', buildHash(v)); // Clear nearby list const sampEl = document.getElementById('samplesSection'); if (sampEl) sampEl.innerHTML = ''; @@ -357,6 +428,8 @@ viewer = { } else if (typeof meta === 'object' && meta.count) { // --- Cluster click --- updateClusterCard(meta); + v._globeState.selectedPid = null; + history.pushState(null, '', buildHash(v)); const sampEl = document.getElementById('samplesSection'); if (sampEl) sampEl.innerHTML = '
Loading nearby samples...
'; @@ -603,19 +676,23 @@ zoomWatcher = { } // --- Mode transitions --- - function enterPointMode() { + function enterPointMode(pushHistory) { mode = 'point'; + viewer._globeState.mode = 'point'; viewer.h3Points.show = false; viewer.samplePoints.show = true; + if (pushHistory !== false) history.pushState(null, '', buildHash(viewer)); loadViewportSamples(); console.log('Entered point mode'); } - function exitPointMode() { + function exitPointMode(pushHistory) { mode = 'cluster'; + viewer._globeState.mode = 'cluster'; viewer.samplePoints.show = false; viewer.samplePoints.removeAll(); viewer.h3Points.show = true; + if (pushHistory !== false) history.pushState(null, '', buildHash(viewer)); cachedBounds = null; cachedData = null; @@ -661,10 +738,119 @@ zoomWatcher = { await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); } } + + // Update URL hash (replaceState for continuous movement) + if (!viewer._suppressHashWrite) { + history.replaceState(null, '', buildHash(viewer)); + } }, 600); }); viewer.camera.percentageChanged = 0.1; + // --- Handle browser back/forward --- + window.addEventListener('hashchange', async () => { + const state = readHash(); + if (state.lat == null || state.lng == null) return; + + viewer._suppressHashWrite = true; + clearTimeout(viewer._suppressTimer); + viewer.camera.cancelFlight(); + viewer.camera.flyTo({ + destination: Cesium.Cartesian3.fromDegrees(state.lng, state.lat, state.alt || 20000000), + orientation: { + heading: Cesium.Math.toRadians(state.heading), + pitch: Cesium.Math.toRadians(state.pitch) + }, + duration: 1.5, + }); + + // After flight settles, force mode and clear suppress flag + viewer._suppressTimer = setTimeout(() => { + viewer._suppressHashWrite = false; + const s = readHash(); + if (s.mode === 'point' && mode !== 'point') enterPointMode(false); + else if (s.mode !== 'point' && mode === 'point') exitPointMode(false); + }, 2000); + + // Handle pid selection + if (state.pid) { + viewer._globeState.selectedPid = state.pid; + try { + const sample = await db.query(` + SELECT pid, label, source, latitude, longitude, place_name, result_time + FROM read_parquet('${lite_url}') + WHERE pid = '${state.pid.replace(/'/g, "''")}' + LIMIT 1 + `); + if (sample && sample.length > 0) { + const s = sample[0]; + updateSampleCard({ + pid: s.pid, label: s.label, source: s.source, + lat: s.latitude, lng: s.longitude, + place_name: s.place_name, result_time: s.result_time + }); + } + } catch(err) { + console.error("Hash pid query failed:", err); + } + } else { + viewer._globeState.selectedPid = null; + updateClusterCard(null); + } + }); + + // --- Share button --- + const shareBtn = document.getElementById('shareBtn'); + if (shareBtn) { + shareBtn.addEventListener('click', async () => { + history.replaceState(null, '', buildHash(viewer)); + try { + await navigator.clipboard.writeText(location.href); + const toast = document.getElementById('shareToast'); + if (toast) { + toast.style.opacity = '1'; + setTimeout(() => { toast.style.opacity = '0'; }, 2000); + } + } catch(err) { + prompt('Copy this link:', location.href); + } + }); + } + + // --- Deep-link: restore selection from initial hash --- + const ih = viewer._initialHash; + if (ih.pid) { + viewer._globeState.selectedPid = ih.pid; + try { + const sample = await db.query(` + SELECT pid, label, source, latitude, longitude, place_name, result_time + FROM read_parquet('${lite_url}') + WHERE pid = '${ih.pid.replace(/'/g, "''")}' + LIMIT 1 + `); + if (sample && sample.length > 0) { + const s = sample[0]; + updateSampleCard({ + pid: s.pid, label: s.label, source: s.source, + lat: s.latitude, lng: s.longitude, + place_name: s.place_name, result_time: s.result_time + }); + const detail = await db.query(` + SELECT description FROM read_parquet('${wide_url}') + WHERE pid = '${ih.pid.replace(/'/g, "''")}' + LIMIT 1 + `); + if (detail && detail.length > 0) updateSampleDetail(detail[0]); + else updateSampleDetail({ description: '' }); + } + } catch(err) { + console.error("Deep-link pid query failed:", err); + } + } + + // Enable hash writing now that everything is initialized + viewer._suppressHashWrite = false; + return "active"; } ``` From 2a599c3c93fb8ca91ab5b4a05ed5ca06eb4835f7 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 13 Feb 2026 07:00:26 -0800 Subject: [PATCH 12/12] Fix startup crash: move _initialHash before globalRect block that reads it v._initialHash was set after the once() closure that references it, causing undefined.lat TypeError on page load. Co-Authored-By: Claude Opus 4.6 --- tutorials/progressive_globe.qmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tutorials/progressive_globe.qmd b/tutorials/progressive_globe.qmd index 7312e31..ffa49d5 100644 --- a/tutorials/progressive_globe.qmd +++ b/tutorials/progressive_globe.qmd @@ -329,6 +329,12 @@ viewer = { terrain: Cesium.Terrain.fromWorldTerrain() }); + // URL deep-link state (must be set before globalRect/once block reads it) + v._globeState = { mode: 'cluster', selectedPid: null }; + v._initialHash = readHash(); + v._suppressHashWrite = true; // cleared after zoomWatcher initializes + v._suppressTimer = null; + const globalRect = Cesium.Rectangle.fromDegrees(-180, -60, 180, 80); Cesium.Camera.DEFAULT_VIEW_RECTANGLE = globalRect; Cesium.Camera.DEFAULT_VIEW_FACTOR = 0.5; @@ -357,12 +363,6 @@ viewer = { v.scene.primitives.add(v.samplePoints); v.samplePoints.show = false; // hidden until point mode - // URL deep-link state - v._globeState = { mode: 'cluster', selectedPid: null }; - v._initialHash = readHash(); - v._suppressHashWrite = true; // cleared after zoomWatcher initializes - v._suppressTimer = null; - // Hover tooltip — works for both clusters and samples v.pointLabel = v.entities.add({ label: {