Skip to content

Latest commit

 

History

History
150 lines (128 loc) · 4.74 KB

File metadata and controls

150 lines (128 loc) · 4.74 KB

Filtering

WHERE Clauses in SQL

UDTFs support filtering via WHERE clauses in SQL. The filters are pushed down to the CDF API call, improving performance:

-- Filter by external_id
SELECT * FROM small_boat_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE external_id = 'my-boat-123'
LIMIT 10;

Predicate Pushdown

Predicate pushdown means that WHERE clause conditions are evaluated in the UDTF's Python code before making CDF API calls. This reduces the amount of data transferred and improves query performance.

Supported Filter Operations:

  • Equality: WHERE external_id = 'value'
  • Inequality: WHERE count > 100, WHERE timestamp < '2025-01-01'
  • NULL checks: WHERE name IS NOT NULL, WHERE description IS NULL
  • Multiple conditions: WHERE space = 'sailboat' AND external_id = 'vessel'

Filter Examples

Equality Filters

-- Filter by single property
SELECT * FROM small_boat_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE name = 'MyBoat'
LIMIT 10;

-- Filter by space and external_id
SELECT * FROM vessel_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE space = 'sailboat' AND external_id = 'vessel-123'
LIMIT 10;

Range Filters

-- Filter by timestamp range
SELECT * FROM pump_view_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE timestamp > '2025-01-01' AND timestamp < '2025-12-31'
ORDER BY timestamp;

-- Filter by numeric range
SELECT * FROM sensor_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE value > 100 AND value < 200
LIMIT 10;

NULL Handling

-- Filter out NULL values
SELECT * FROM small_boat_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE description IS NOT NULL
LIMIT 10;

-- Find records with NULL values
SELECT * FROM vessel_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE name IS NULL
LIMIT 10;

Multiple Conditions

-- Complex filtering with multiple conditions
SELECT * FROM pump_view_udtf(
    client_id => SECRET('cdf_sailboat_sailboat', 'client_id'),
    client_secret => SECRET('cdf_sailboat_sailboat', 'client_secret'),
    tenant_id => SECRET('cdf_sailboat_sailboat', 'tenant_id'),
    cdf_cluster => SECRET('cdf_sailboat_sailboat', 'cdf_cluster'),
    project => SECRET('cdf_sailboat_sailboat', 'project'),
    name => NULL,
    description => NULL
)
WHERE space = 'power'
  AND timestamp > '2025-01-01'
  AND status = 'active'
  AND value > 50
ORDER BY timestamp DESC
LIMIT 100;

Next Steps