create table players_scd (
player_name TEXT,
scoring_class scoring_class,
is_active BOOLEAN,
start_season INTEGER,
end_date INTEGER,
current_season INTEGER,
PRIMARY KEY(player_name,start_season)
)
insert into players_scd
with with_previous as (
select
player_name,
current_season,
scoring_class,
is_active,
LAG(scoring_class,1) over (partition by player_name order by current_season) as previous_scoring_class,
LAG(is_active,1) over (partition by player_name order by current_season) as previous_is_active
from players
where current_season <= 2021
),
with_indicator as (
select *,
CASE
WHEN scoring_class <> previous_scoring_class THEN 1
when is_active <> previous_is_active then 1
else 0
end as change_indicator
from with_previous
),
with_streaks as (
select *,
sum(change_indicator)
over (partition by player_name order by current_season) as streak_identifier
from with_indicator
)
select player_name,
scoring_class,
is_active,
MIN(current_season) as start_season,
MAX(current_season) as end_season,
2021 as current_season
from with_streaks
group by player_name,streak_identifier,is_active,scoring_class
order by player_name,streak_identifier
select * from players_scd
create table players_scd (
player_name TEXT,
scoring_class scoring_class,
is_active BOOLEAN,
start_season INTEGER,
end_date INTEGER,
current_season INTEGER,
PRIMARY KEY(player_name,start_season)
)
create type scd_type as (
scoring_class scoring_class,
is_active boolean,
start_season integer,
end_season integer
)
with last_season_scd as (
select * from players_scd
where current_season = 2021
and end_season = 2021
),
historical_scd as (
select * from players_scd
where current_season = 2021
and end_season < 2021
),
this_season_date as(
select * from players
where current_season = 2022
),
unchanged_records as (
select
ts.player_name,
ts.scoring_class,
ts.is_active,
ls.start_season,
ts.current_season as end_season
from this_season_date ts
join last_season_scd ls
on ls.player_name = ts.player_name
where ts.scoring_class = ls.scoring_class
and ts.is_active = ls.is_active
),
changed_records as(
select
ts.player_name,
unnest(ARRAy[
row(
ls.scoring_class,
ls.is_active,
ls.start_season,
ls.end_season
)::scd_type,
row(
ts.scoring_class,
ts.is_active,
ts.current_season,
ts.current_season
)::scd_type
]) as records
from this_season_date ts
left join last_season_scd ls
on ls.player_name = ts.player_name
where (ts.scoring_class <> ls.scoring_class
and ts.is_active <> ls.is_active)
),
unnested_changed_records as (
select player_name,
(records::scd_type).scoring_class,
(records::scd_type).is_active,
(records::scd_type).start_season,
(records::scd_type).end_season
from changed_records
),
new_records as (
select
ts.player_name,
ts.scoring_class,
ts.is_active,
ts.current_season as start season,
ts.current_season as end_season
from this_season_data ts
left join last_season_scd ls
on ts.player_name = ls.player_name
where ls.player_name is null
)
select * from unnested_changed_records