Skip to content

Commit 33d6906

Browse files
authored
Merge pull request #29 from rdhyee/issue-13-parquet-duckdb
Implement Eric Kansa's authoritative iSamples queries in Cesium tutorial
2 parents 2a397b4 + d35f70b commit 33d6906

File tree

1 file changed

+196
-54
lines changed

1 file changed

+196
-54
lines changed

tutorials/parquet_cesium.qmd

Lines changed: 196 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -343,57 +343,190 @@ async function get_samples_at_geo_cord_location_via_sample_event(pid) {
343343
if (pid === null || pid ==="" || pid == "unset") {
344344
return [];
345345
}
346+
// Eric Kansa's authoritative query from open-context-py
347+
// Source: https://github.com/ekansa/open-context-py/blob/staging/opencontext_py/apps/all_items/isamples/isamples_explore.py
346348
const q = `
347-
-- Path 1: Direct event location
348-
SELECT DISTINCT
349-
s.pid as sample_pid,
350-
s.label as sample_label,
351-
s.name as sample_name,
352-
event.label as event_label,
353-
event.pid as event_pid,
354-
site.label as site_label,
355-
site.pid as site_pid,
356-
'direct_event_location' as location_path
357-
FROM nodes s
358-
JOIN nodes e1 ON s.row_id = e1.s AND e1.p = 'produced_by'
359-
JOIN nodes event ON e1.o[1] = event.row_id
360-
JOIN nodes e2 ON event.row_id = e2.s AND e2.p = 'sample_location'
361-
JOIN nodes g ON e2.o[1] = g.row_id
362-
LEFT JOIN nodes e3 ON event.row_id = e3.s AND e3.p = 'sampling_site'
363-
LEFT JOIN nodes site ON e3.o[1] = site.row_id
364-
WHERE s.otype = 'MaterialSampleRecord'
365-
AND event.otype = 'SamplingEvent'
366-
AND g.otype = 'GeospatialCoordLocation'
367-
AND g.pid = ?
349+
SELECT
350+
geo.latitude,
351+
geo.longitude,
352+
site.label AS sample_site_label,
353+
site.pid AS sample_site_pid,
354+
samp.pid AS sample_pid,
355+
samp.alternate_identifiers AS sample_alternate_identifiers,
356+
samp.label AS sample_label,
357+
samp.description AS sample_description,
358+
samp.thumbnail_url AS sample_thumbnail_url,
359+
samp.thumbnail_url IS NOT NULL as has_thumbnail
360+
FROM nodes AS geo
361+
JOIN nodes AS rel_se ON (
362+
rel_se.p = 'sample_location'
363+
AND
364+
list_contains(rel_se.o, geo.row_id)
365+
)
366+
JOIN nodes AS se ON (
367+
rel_se.s = se.row_id
368+
AND
369+
se.otype = 'SamplingEvent'
370+
)
371+
JOIN nodes AS rel_site ON (
372+
se.row_id = rel_site.s
373+
AND
374+
rel_site.p = 'sampling_site'
375+
)
376+
JOIN nodes AS site ON (
377+
rel_site.o[1] = site.row_id
378+
AND
379+
site.otype = 'SamplingSite'
380+
)
381+
JOIN nodes AS rel_samp ON (
382+
rel_samp.p = 'produced_by'
383+
AND
384+
list_contains(rel_samp.o, se.row_id)
385+
)
386+
JOIN nodes AS samp ON (
387+
rel_samp.s = samp.row_id
388+
AND
389+
samp.otype = 'MaterialSampleRecord'
390+
)
391+
WHERE geo.pid = ?
392+
AND geo.otype = 'GeospatialCoordLocation'
393+
ORDER BY has_thumbnail DESC
394+
`;
395+
const result = await loadData(q, [pid], "loading_combined", "samples_combined");
396+
return result ?? [];
397+
}
368398
369-
UNION
399+
async function get_sample_data_via_sample_pid(sample_pid) {
400+
if (sample_pid === null || sample_pid === "" || sample_pid === "unset") {
401+
return null;
402+
}
403+
// Eric Kansa's query: Get full sample data including geo and site info
404+
const q = `
405+
SELECT
406+
samp.row_id,
407+
samp.pid AS sample_pid,
408+
samp.alternate_identifiers AS sample_alternate_identifiers,
409+
samp.label AS sample_label,
410+
samp.description AS sample_description,
411+
samp.thumbnail_url AS sample_thumbnail_url,
412+
samp.thumbnail_url IS NOT NULL as has_thumbnail,
413+
geo.latitude,
414+
geo.longitude,
415+
site.label AS sample_site_label,
416+
site.pid AS sample_site_pid
417+
FROM nodes AS samp
418+
JOIN nodes AS samp_rel_se ON (
419+
samp_rel_se.s = samp.row_id
420+
AND
421+
samp_rel_se.p = 'produced_by'
422+
)
423+
JOIN nodes AS se ON (
424+
samp_rel_se.o[1] = se.row_id
425+
AND
426+
se.otype = 'SamplingEvent'
427+
)
428+
JOIN nodes AS geo_rel_se ON (
429+
geo_rel_se.s = se.row_id
430+
AND
431+
geo_rel_se.p = 'sample_location'
432+
)
433+
JOIN nodes AS geo ON (
434+
geo_rel_se.o[1] = geo.row_id
435+
AND
436+
geo.otype = 'GeospatialCoordLocation'
437+
)
438+
JOIN nodes AS site_rel_se ON (
439+
site_rel_se.s = se.row_id
440+
AND
441+
site_rel_se.p = 'sampling_site'
442+
)
443+
JOIN nodes AS site ON (
444+
site_rel_se.o[1] = site.row_id
445+
AND
446+
site.otype = 'SamplingSite'
447+
)
448+
WHERE samp.pid = ?
449+
AND samp.otype = 'MaterialSampleRecord'
450+
`;
451+
const result = await loadData(q, [sample_pid], "loading_sample_data", "sample_data");
452+
return result && result.length ? result[0] : null;
453+
}
370454
371-
-- Path 2: Via site location
372-
SELECT DISTINCT
373-
s.pid as sample_pid,
374-
s.label as sample_label,
375-
s.name as sample_name,
376-
event.label as event_label,
377-
event.pid as event_pid,
378-
site.label as site_label,
379-
site.pid as site_pid,
380-
'via_site_location' as location_path
381-
FROM nodes s
382-
JOIN nodes e1 ON s.row_id = e1.s AND e1.p = 'produced_by'
383-
JOIN nodes event ON e1.o[1] = event.row_id
384-
JOIN nodes e2 ON event.row_id = e2.s AND e2.p = 'sampling_site'
385-
JOIN nodes site ON e2.o[1] = site.row_id
386-
JOIN nodes e3 ON site.row_id = e3.s AND e3.p = 'site_location'
387-
JOIN nodes g ON e3.o[1] = g.row_id
388-
WHERE s.otype = 'MaterialSampleRecord'
389-
AND event.otype = 'SamplingEvent'
390-
AND site.otype = 'SamplingSite'
391-
AND g.otype = 'GeospatialCoordLocation'
392-
AND g.pid = ?
455+
async function get_sample_data_agents_sample_pid(sample_pid) {
456+
if (sample_pid === null || sample_pid === "" || sample_pid === "unset") {
457+
return [];
458+
}
459+
// Eric Kansa's query: Get agent info (who collected/registered)
460+
const q = `
461+
SELECT
462+
samp.row_id,
463+
samp.pid AS sample_pid,
464+
samp.alternate_identifiers AS sample_alternate_identifiers,
465+
samp.label AS sample_label,
466+
samp.description AS sample_description,
467+
samp.thumbnail_url AS sample_thumbnail_url,
468+
samp.thumbnail_url IS NOT NULL as has_thumbnail,
469+
agent_rel_se.p AS predicate,
470+
agent.pid AS agent_pid,
471+
agent.name AS agent_name,
472+
agent.alternate_identifiers AS agent_alternate_identifiers
473+
FROM nodes AS samp
474+
JOIN nodes AS samp_rel_se ON (
475+
samp_rel_se.s = samp.row_id
476+
AND
477+
samp_rel_se.p = 'produced_by'
478+
)
479+
JOIN nodes AS se ON (
480+
samp_rel_se.o[1] = se.row_id
481+
AND
482+
se.otype = 'SamplingEvent'
483+
)
484+
JOIN nodes AS agent_rel_se ON (
485+
agent_rel_se.s = se.row_id
486+
AND
487+
list_contains(['responsibility', 'registrant'], agent_rel_se.p)
488+
)
489+
JOIN nodes AS agent ON (
490+
list_contains(agent_rel_se.o, agent.row_id)
491+
AND
492+
agent.otype = 'Agent'
493+
)
494+
WHERE samp.pid = ?
495+
AND samp.otype = 'MaterialSampleRecord'
496+
`;
497+
const result = await loadData(q, [sample_pid], "loading_agents", "agents");
498+
return result ?? [];
499+
}
393500
394-
ORDER BY sample_label
501+
async function get_sample_types_and_keywords_via_sample_pid(sample_pid) {
502+
if (sample_pid === null || sample_pid === "" || sample_pid === "unset") {
503+
return [];
504+
}
505+
// Eric Kansa's query: Get classification keywords and types
506+
const q = `
507+
SELECT
508+
samp.row_id,
509+
samp.pid AS sample_pid,
510+
samp.alternate_identifiers AS sample_alternate_identifiers,
511+
samp.label AS sample_label,
512+
kw_rel.p AS predicate,
513+
kw.pid AS keyword_pid,
514+
kw.label AS keyword
515+
FROM nodes AS samp
516+
JOIN nodes AS kw_rel ON (
517+
kw_rel.s = samp.row_id
518+
AND
519+
list_contains(['keywords', 'has_sample_object_type', 'has_material_category'], kw_rel.p)
520+
)
521+
JOIN nodes AS kw ON (
522+
list_contains(kw_rel.o, kw.row_id)
523+
AND
524+
kw.otype = 'IdentifiedConcept'
525+
)
526+
WHERE samp.pid = ?
527+
AND samp.otype = 'MaterialSampleRecord'
395528
`;
396-
const result = await loadData(q, [pid, pid], "loading_combined", "samples_combined");
529+
const result = await loadData(q, [sample_pid], "loading_keywords", "keywords");
397530
return result ?? [];
398531
}
399532
@@ -691,18 +824,27 @@ ${JSON.stringify(samples_2, null, 2)}
691824
```
692825

693826

694-
## Combined Samples at Location (Path 1 + Path 2 with Rich Metadata)
827+
## Samples at Location via Sampling Event (Eric Kansa's Query)
828+
829+
<div id="loading_combined" hidden>Loading samples…</div>
830+
831+
This query implements Eric Kansa's authoritative `get_samples_at_geo_cord_location_via_sample_event` function from [open-context-py](https://github.com/ekansa/open-context-py/blob/staging/opencontext_py/apps/all_items/isamples/isamples_explore.py).
695832

696-
<div id="loading_combined" hidden>Loading combined samples…</div>
833+
**Query Strategy (Path 1 Only)**:
834+
- Starts at a GeospatialCoordLocation (clicked point)
835+
- Walks **backward** via `sample_location` edges to find SamplingEvents that reference this location
836+
- From those events, finds MaterialSampleRecords produced by them
837+
- Requires site context (INNER JOIN on `sampling_site` → SamplingSite)
697838

698-
This query implements Eric Kansa's `get_samples_at_geo_cord_location_via_sample_event` function, which combines both Path 1 and Path 2 using UNION and returns sample metadata including:
839+
**Returns**:
840+
- Geographic coordinates: `latitude`, `longitude`
841+
- Sample metadata: `sample_pid`, `sample_label`, `sample_description`, `sample_alternate_identifiers`
842+
- Site context: `sample_site_label`, `sample_site_pid`
843+
- Media: `sample_thumbnail_url`, `has_thumbnail`
699844

700-
- Sample metadata: `sample_pid`, `sample_label`, `sample_name`
701-
- Event context: `event_label`, `event_pid`
702-
- Site information: `site_label`, `site_pid` (when available via Path 2)
703-
- Path indicator: `location_path` (direct_event_location or via_site_location)
845+
**Ordering**: Prioritizes samples with images (`ORDER BY has_thumbnail DESC`)
704846

705-
Results are ordered alphabetically by sample label.
847+
**Important**: This query only returns samples whose **sampling events directly reference this geolocation** via `sample_location` (Path 1). Samples that reach this location only through their site's `site_location` (Path 2) are **not included**. This means site marker locations may return 0 results if no events were recorded at that exact coordinate.
706848

707849
```{ojs}
708850
//| echo: false

0 commit comments

Comments
 (0)