upsert - prayagupa/rdb GitHub Wiki
Upsert
bq
sql_to_merge = """
MERGE INTO `pro1.ad_creative` T
USING `pro1.ad_creative_temp` S
ON T.id = S.id
WHEN MATCHED THEN
UPDATE SET
ad_id = S.ad_id,
ad_type = S.ad_type,
status = S.status,
modified_date = S.modified_date
WHEN NOT MATCHED THEN
INSERT (id, ad_id, name, ad_type, status, created_date, modified_date)
VALUES(S.id, S.ad_id, S.name, S.ad_type, S.status, S.created_date, S.modified_date);"""
Pg update
On UPDATE
, PG rows have infomask
, infomask2
and t_ctid
, where the latter provides the link between the old and the new version of a row.
INSERT INTO visiting_user(user_name)
SELECT ('upaupaupa')
WHERE NOT EXISTS (SELECT user_name from visiting_user where user_name='upaupaupa');
There is some way for PostgreSQL to find the new version of an updated row. That is why the experiment returned a result row.
-- session1
BEGIN;
UPDATE visiting_user SET user_name = 'new username' WHERE user_id = 1;
COMMIT;
-- one row is returned
--session2
SELECT user_id FROM visiting_user WHERE user_id = 1 FOR UPDATE;
-- hangs
There is no connection between the old, deleted row and the newly inserted one in following exp, that’s why we get no result in this case.
--
TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, 'upaupaupa');
-- session1
BEGIN;
DELETE FROM visiting_user WHERE user_id = 1;
INSERT INTO visiting_user VALUES (1, 'username 1');
COMMIT;
-- no row is returned
-- session2
SELECT user_id FROM visiting_user WHERE user_name = 'username 1' FOR UPDATE;
-- hangs
update pageinspect
TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, '42');
CREATE EXTENSION pageinspect;
SELECT lp,
t_xmin AS xmin,
t_xmax AS xmax,
t_ctid,
to_hex(t_infomask2) AS infomask2,
to_hex(t_infomask) AS infomask,
t_attrs
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
lp | xmin | xmax | t_ctid | infomask2 | infomask | t_attrs
----+------+------+--------+-----------+----------+-----------------------------------------------------------------
1 | 750 | 0 | (0,1) | 3 | 902 | {"\\x01000000","\\x15757061757061757061","\\x9d0e1bc952930200"}
--
--
UPDATE visiting_user SET user_name = '2' WHERE user_id = 1;
SELECT lp,
t_xmin AS xmin,
t_xmax AS xmax,
t_ctid,
to_hex(t_infomask2) AS infomask2,
to_hex(t_infomask) AS infomask,
t_attrs
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
lp | xmin | xmax | t_ctid | infomask2 | infomask | t_attrs
----+------+------+--------+-----------+----------+---------------------------------------------------
1 | 753 | 754 | (0,2) | 4003 | 102 | {"\\x01000000","\\x073432","\\x73299fef52930200"}
2 | 754 | 0 | (0,2) | 8003 | 2802 | {"\\x01000000","\\x0532","\\x73299fef52930200"}
(2 rows)
The first entry is the old version of the row, the second the new version.
lp
is the line pointer number, which stands for the number of the tuple within the data page.t_ctid
contains the tuple identifier of the updated version of the row.
delete pageinspect
TRUNCATE visiting_user;
INSERT INTO visiting_user VALUES (1, '42');
-- inspect
lp | xmin | xmax | t_ctid | infomask2 | infomask | t_attrs
----+------+------+--------+-----------+----------+---------------------------------------------------
1 | 756 | 0 | (0,1) | 3 | 802 | {"\\x01000000","\\x073432","\\x3a61041253930200"}
--
--
BEGIN;
DELETE FROM visiting_user WHERE user_id = 1;
INSERT INTO visiting_user VALUES (2, '42');
COMMIT;
SELECT lp,
t_xmin AS xmin,
t_xmax AS xmax,
t_ctid,
to_hex(t_infomask2) AS infomask2,
to_hex(t_infomask) AS infomask,
t_attrs
FROM heap_page_item_attrs(get_raw_page('visiting_user', 0), 'visiting_user');
lp | xmin | xmax | t_ctid | infomask2 | infomask | t_attrs
----+------+------+--------+-----------+----------+---------------------------------------------------
1 | 756 | 757 | (0,1) | 2003 | 102 | {"\\x01000000","\\x073432","\\x3a61041253930200"}
2 | 757 | 0 | (0,2) | 3 | 802 | {"\\x02000000","\\x073432","\\x1bbd6c1353930200"}
(2 rows)
-
t_ctid
column from the old, deleted tuple is unchanged and there is no link to the new tuple. -
The second tuple is not found by the
SELECT ... FOR UPDATE
, since it is “invisible” to the “snapshot” used for scanning the table. -
infomask
:HEAP_XMIN_COMMITTED
(x01000000
) means that the tuple was valid before it was removed -
infomask2
: 2 is the number of columns, andHEAP_KEYS_UPDATED
(x02000000
) means that the tuple is deleted or updated