Archive: deterministic location jitter in SQL - QutEcoacoustics/baw-server GitHub Wiki
We had an attempt to move our location jitter function into SQL.
It works well in principle, but we decided to simply store a copy of the coordinates in jittered form. Storing coordinates means we have don't have to do expensive compute on every query - just a simple branch based on permissions.
Still the function is worth keeping for reference in case we need something like it in the future.
CREATE EXTENSION IF NOT EXISTS pg_crypto WITH SCHEMA public;
--
-- Name: clamp(numeric, numeric, numeric); Type: FUNCTION; Schema: public; Owner: -
--
CREATE FUNCTION public.clamp(value numeric, minimum numeric, maximum numeric) RETURNS numeric
LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE
AS $$
BEGIN
RETURN GREATEST(minimum, LEAST(maximum, value));
END;
$$;
--
-- Name: FUNCTION clamp(value numeric, minimum numeric, maximum numeric); Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON FUNCTION public.clamp(value numeric, minimum numeric, maximum numeric) IS '
Clamps a numeric value between a minimum and maximum.
Parameters:
value - The numeric value to clamp.
minimum - The lower bound of the clamping range.
maximum - The upper bound of the clamping range.
Returns:
The value constrained to the [minimum, maximum] interval.
';
--
-- Name: obfuscate_location(numeric, numeric, numeric, text, numeric, boolean); Type: FUNCTION; Schema: public; Owner: -
--
CREATE FUNCTION public.obfuscate_location(INOUT latitude numeric, INOUT longitude numeric, jitter_amount numeric, salt text, jitter_exclusion numeric DEFAULT 0.1, INOUT obfuscated boolean DEFAULT true) RETURNS record
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE
AS $$
DECLARE
random_a numeric;
random_b numeric;
BEGIN
-- why include the option to not obfuscate?
-- it makes calling and using this function easier. We can just
-- pass in a result from e.g. a permissions test on whether or not
-- to obfuscate and we don't need a conditional at every calling site
IF obfuscated THEN
-- set seed as simple concatenation of the latitude and longitude and salt
-- our random number is deterministic and can also exclude ranges around 0
SELECT a, b INTO random_a, random_b FROM
sample_two_immutable_randoms(
(COALESCE(latitude, 0.0))::text || (COALESCE(longitude, 0.0))::text || jitter_amount::text || salt,
jitter_exclusion
);
-- apply jitter to each component, and clamp to valid range
-- leave nulls unaffected
IF latitude IS NOT NULL THEN
latitude := round(clamp(latitude + (random_a * jitter_amount), -90.0, 90.0), 6);
END IF;
IF longitude IS NOT NULL THEN
longitude := round(clamp(longitude + (random_b * jitter_amount), -180.0, 180.0), 6);
END IF;
END IF;
END;
$$;
--
-- Name: FUNCTION obfuscate_location(INOUT latitude numeric, INOUT longitude numeric, jitter_amount numeric, salt text, jitter_exclusion numeric, INOUT obfuscated boolean); Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON FUNCTION public.obfuscate_location(INOUT latitude numeric, INOUT longitude numeric, jitter_amount numeric, salt text, jitter_exclusion numeric, INOUT obfuscated boolean) IS '
Obfuscates geographic coordinates (latitude and longitude) by applying a deterministic jitter.
Parameters:
latitude - Original latitude in decimal degrees.
longitude - Original longitude in decimal degrees.
jitter_amount - Maximum magnitude of jitter to apply.
salt - Seed text to make obfuscation deterministic per row or entity.
jitter_exclusion - Fraction of the unit interval [0, 1] representing an exclusion zone around 0
where random jitter values are pushed outward (default 0.1).
obfuscated - Boolean flag; if false, returns original coordinates without modification (default true).
Returns:
A scalar composite type with fields:
latitude - The obfuscated latitude.
longitude - The obfuscated longitude.
obfuscated - Boolean indicating whether the output was actually obfuscated.
Behavior:
- Deterministic: same inputs (including salt) always produce the same output.
- Leaves NULL coordinates unaffected.
- Applies clamping to ensure latitude stays within [-90, 90] and longitude within [-180, 180].
- Rounds results to 6 decimal places.
';
--
-- Name: sample_two_immutable_randoms(text, numeric); Type: FUNCTION; Schema: public; Owner: -
--
CREATE FUNCTION public.sample_two_immutable_randoms(seed text, exclusion numeric DEFAULT 0.0, OUT a numeric, OUT b numeric) RETURNS record
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE
AS $$
DECLARE
hash varchar(40);
int64_max CONSTANT bigint DEFAULT 9223372036854775807;
BEGIN
-- the sha1 is a cryptographic hash function so should be well distributed
-- the digest is 160 bits (40 hex characters)
hash := encode(digest(seed::text,'sha1'), 'hex');
-- samples two 8 byte integers from the digest to get two random-ish numbers
-- we convert to bigint then to numeric and scale to [-1, 1]
a := (('x' || substr(hash, 1, 16))::bit(64)::bigint)::numeric / int64_max;
b := (('x' || substr(hash, 17, 16))::bit(64)::bigint)::numeric / int64_max;
--rescale the random values if both are within the exclusion zone
IF (abs(a) < exclusion) AND (abs(b) < exclusion) THEN
-- what we do here is we take the value that's in the exclusion zone and
-- stretch it out over the valid zone instead.
-- e.g. 0.05 is half of an 0.1 exclusion zone. The valid zone is [0.1, 1.0] (|0.9|).
-- Half way along that interval is 0.45.
a := (sign(a) + (a = 0)::int) * (((1 - exclusion) * (abs(a)/exclusion)) + exclusion);
b := (sign(b) + (b = 0)::int) * (((1 - exclusion) * (abs(b)/exclusion)) + exclusion);
END IF;
END
$$;
--
-- Name: FUNCTION sample_two_immutable_randoms(seed text, exclusion numeric, OUT a numeric, OUT b numeric); Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON FUNCTION public.sample_two_immutable_randoms(seed text, exclusion numeric, OUT a numeric, OUT b numeric) IS '
Generates two deterministic pseudo-random numbers in the range [-1, -exclusion] ∩ [exclusion, 1] based on a seed string.
Parameters:
seed - A text string used to generate deterministic random numbers.
exclusion - Optional fraction of the unit interval (0.0 - 1.0) to define an exclusion zone around 0.
If both random numbers fall inside this exclusion zone, they are scaled outward to avoid it.
Returns:
A scalar composite type with fields:
a - A pseudo-random number in [-1, -exclusion] ∩ [exclusion, 1].
b - A pseudo-random number in [-1, -exclusion] ∩ [exclusion, 1].
Behavior:
- Deterministic: same seed and exclusion always produce the same output.
- Optional exclusion zone ensures that small random numbers near 0 can be pushed outward.
IMMUTABLE.
';
Testing functions
#-- select s.n, r.* #-- from generate_series(-1000, 2000) s(n) #-- cross join LATERAL sample_two_immutable_randoms(s.n::text, 0.1) r
#select id, name, sites.latitude, sites.longitude, location.*, sites.latitude - location.latitude as latitude_delta, sites.longitude - location.longitude as longitude_delta #from sites #CROSS JOIN LATERAL obfuscate_location(latitude::numeric, longitude::numeric, 0.033::numeric, id::text, 0.1, true) AS location #order by id desc #--limit 100;