DB upgrade scripts - PanDAWMS/panda-server GitHub Wiki

[WIP] 30 June 2025 (DB version 0.0.29): Alter JEDI_DATASET_CONTENTS table

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
alter table JEDI_DATASET_CONTENTS MODIFY
(
FIRSTEVENT               NUMBER(11),
STARTEVENT               NUMBER(11),    
ENDEVENT                 NUMBER(11)    
);

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=29 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=29 where component ='SERVER';
commit;

23 June 2025 (DB version 0.0.28): Create WORKER_NODE_GPUS table and alter DATA_CAROUSEL_REQUESTS table

CREATE TABLE "ATLAS_PANDA"."WORKER_NODE_GPUS"(
    "SITE" varchar2(128),
    "HOST_NAME" varchar2(128),
    "VENDOR" varchar(128),
    "MODEL" varchar(128),
    "COUNT" number(3, 0),
    "VRAM" number(20, 0),
    "ARCHITECTURE" varchar(128),
    "FRAMEWORK" varchar(128),
    "FRAMEWORK_VERSION" varchar(20),
    "DRIVER_VERSION" varchar(20),
    "LAST_SEEN" date,
    CONSTRAINT PK_WORKER_NODE_GPU PRIMARY KEY ("SITE", "HOST_NAME", "VENDOR", "MODEL")
)ORGANIZATION INDEX COMPRESS 1;

CREATE INDEX IDX_WORKER_NODE_GPU_LAST_SEEN ON "ATLAS_PANDA"."WORKER_NODE_GPUS"("LAST_SEEN");-- Table Comment
COMMENT ON TABLE "ATLAS_PANDA"."WORKER_NODE_GPUS" IS 'Stores information about the GPUs associated to a worker node seen by PanDA pilots';-- Column Comments
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."SITE" IS 'The name of the site (not PanDA queue) where the worker node is located.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."HOST_NAME" IS 'The hostname of the worker node.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."VENDOR" IS 'GPU vendor, e.g. NVIDIA.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."MODEL" IS 'GPU model, e.g. A100 80GB.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."COUNT" IS 'Number of GPUs of this type in the worker node.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."VRAM" IS 'VRAM memory in MB.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."ARCHITECTURE" IS 'GPU architecture, e.g. Tesla, Ampere.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."FRAMEWORK" IS 'Driver framework available, e.g. CUDA.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."FRAMEWORK_VERSION" IS 'Version of the driver framework, e.g. 12.2';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."DRIVER_VERSION" IS 'Version of the driver, e.g. 575.51.03.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_GPUS"."LAST_SEEN" IS 'Timestamp of the last time the worker node was active.';


ALTER TABLE "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS" 
ADD (
    "LAST_STAGED_TIME" DATE,
    "LOCKED_BY" VARCHAR2(64 BYTE),
    "LOCK_TIME" DATE
);

COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."LAST_STAGED_TIME" IS 'Last time of update about staged files';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."LOCKED_BY" IS 'The process which locks the request entry';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."LOCK_TIME" IS 'Timestamp when the request entry was locked';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=28 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=28 where component ='SERVER';
COMMIT; 

30 April 2025 (DB version 0.0.27): add NUCLEUS in JOBS_SHARE_STATS and adjust the oracle job accordingly

alter table "ATLAS_PANDA"."JOBS_SHARE_STATS" add "NUCLEUS" VARCHAR2(52 BYTE);
COMMENT ON COLUMN "ATLAS_PANDA"."JOBS_SHARE_STATS"."NUCLEUS" IS 'Name of the site where the task is assigned in WORLD cloud';
alter table "ATLAS_PANDA"."JOBSDEFINED_SHARE_STATS" add "NUCLEUS" VARCHAR2(52 BYTE);
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSDEFINED_SHARE_STATS"."NUCLEUS" IS 'Name of the site where the task is assigned in WORLD cloud';

--------------------------------------------------------
--  DDL for Procedure UPDATE_JOBSACT_STATS_BY_GSHARE
--------------------------------------------------------
set define off;

CREATE OR REPLACE PROCEDURE "ATLAS_PANDA"."UPDATE_JOBSACT_STATS_BY_GSHARE"
AS
BEGIN
-- 16th Sept 2024 , ver 1.6
-- 14th Nov 2023 , ver 1.5
-- 27th Nov 2020 , ver 1.4
-- 29th Jan 2018 , ver 1.3
-- to easily identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Aggregates data by global share for the active jobs!');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );

DELETE from ATLAS_PANDA.JOBS_SHARE_STATS;

INSERT INTO ATLAS_PANDA.JOBS_SHARE_STATS (TS, GSHARE, WORKQUEUE_ID, RESOURCE_TYPE,
                                          COMPUTINGSITE, JOBSTATUS,
                                          MAXPRIORITY, PRORATED_DISKIO_AVG, PRORATED_MEM_AVG, NJOBS, HS, VO, NUCLEUS)
WITH
    sc_slimmed AS (
    SELECT /*+ MATERIALIZE */ sc.panda_queue AS pq, sc.data.corepower AS cp
    FROM ATLAS_PANDA.schedconfig_json sc
    )
SELECT sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus,
      MAX(currentPriority) AS maxPriority,
      AVG(diskIO/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg, AVG(minRamCount/NVL(ja4.coreCount, 1)) AS proratedMemAvg,
      COUNT(*) AS num_of_jobs, COUNT(*) * NVL(ja4.coreCount, 1) * sc_s.cp AS HS, VO, NUCLEUS
FROM ATLAS_PANDA.jobsActive4 ja4, sc_slimmed sc_s
WHERE ja4.computingsite = sc_s.pq
GROUP BY sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus, ja4.coreCount, sc_s.cp, VO, NUCLEUS;


COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;
/


--------------------------------------------------------
--  DDL for Procedure UPDATE_JOBSDEF_STATS_BY_GSHARE
--------------------------------------------------------
set define off;

CREATE OR REPLACE PROCEDURE "ATLAS_PANDA"."UPDATE_JOBSDEF_STATS_BY_GSHARE"
AS
BEGIN
-- 16th Sept 2024 , ver 1.2
-- 14th Nov 2023 , ver 1.1
-- 27th Nov 2020 , ver 1.0
-- Based on UPDATE_JOBSACT_STATS_BY_GSHARE
-- to easily identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Aggregates data by global share for the active jobs!');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );


DELETE from ATLAS_PANDA.JOBSDEFINED_SHARE_STATS;

INSERT INTO ATLAS_PANDA.JOBSDEFINED_SHARE_STATS (TS, GSHARE, WORKQUEUE_ID, RESOURCE_TYPE,
                                          COMPUTINGSITE, JOBSTATUS,
                                          MAXPRIORITY, PRORATED_DISKIO_AVG, PRORATED_MEM_AVG, NJOBS, HS, VO, NUCLEUS)
WITH
    sc_slimmed AS (
    SELECT /*+ MATERIALIZE */ sc.panda_queue AS pq, sc.data.corepower AS cp
    FROM ATLAS_PANDA.schedconfig_json sc
    )
SELECT sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus,
      MAX(currentPriority) AS maxPriority,
      AVG(diskIO/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg, AVG(minRamCount/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg,
      COUNT(*) AS num_of_jobs, COUNT(*) * NVL(ja4.coreCount, 1) * sc_s.cp AS HS, VO, NUCLEUS
FROM ATLAS_PANDA.jobsDefined4 ja4, sc_slimmed sc_s
WHERE ja4.computingsite = sc_s.pq
GROUP BY sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus, ja4.coreCount, sc_s.cp, VO, NUCLEUS;


COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;
/

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=27 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=27 where component ='SERVER';
COMMIT;

09 April 2025 (DB version 0.0.26): add total local disk to worker node

ALTER TABLE "ATLAS_PANDA"."WORKER_NODE"
ADD "TOTAL_LOCAL_DISK" number(9,0);
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CLOCK_SPEED" IS 'Clock speed of the CPU in MHz.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."TOTAL_LOCAL_DISK" IS 'Total local disk in GB.';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=26 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=26 where component ='SERVER';
COMMIT;

24 February 2025 (DB version 0.0.25): add worker node map and cpu benchmarks

CREATE TABLE "ATLAS_PANDA"."WORKER_NODE"(
    "SITE" varchar2(128),
    "HOST_NAME" varchar2(128),
    "CPU_MODEL" varchar(128),
    "N_LOGICAL_CPUS" number(9,0),
    "N_SOCKETS" number(9,0),
    "CORES_PER_SOCKET" number(9,0),
    "THREADS_PER_CORE" number(9,0),
    "CPU_ARCHITECTURE" varchar2(20),
    "CPU_ARCHITECTURE_LEVEL" varchar2(20),
    "CLOCK_SPEED" number(9,2),
    "TOTAL_MEMORY" number(9,0),
    "LAST_SEEN" date,
    CONSTRAINT PK_WORKER_NODE PRIMARY KEY ("SITE", "HOST_NAME", "CPU_MODEL")
)ORGANIZATION INDEX COMPRESS 1;

CREATE INDEX IDX_WORKER_NODE_LAST_SEEN ON "ATLAS_PANDA"."WORKER_NODE"("LAST_SEEN");

-- Table Comment
COMMENT ON TABLE "ATLAS_PANDA"."WORKER_NODE" IS 'Stores information about worker nodes seen by PanDA pilots';

-- Column Comments
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."SITE" IS 'The name of the site (not PanDA queue) where the worker node is located.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."HOST_NAME" IS 'The hostname of the worker node.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_MODEL" IS 'The specific model of the CPU.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."N_LOGICAL_CPUS" IS 'Total number of logical CPUs (calculated as sockets * cores per socket * threads per core).';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."N_SOCKETS" IS 'Number of physical CPU sockets.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CORES_PER_SOCKET" IS 'Number of CPU cores per physical socket.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."THREADS_PER_CORE" IS 'Number of threads per CPU core.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_ARCHITECTURE" IS 'The CPU architecture (e.g., x86_64, ARM).';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_ARCHITECTURE_LEVEL" IS 'The specific level/version of the CPU architecture.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CLOCK_SPEED" IS 'Clock speed of the CPU in GHz.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."TOTAL_MEMORY" IS 'Total amount of RAM in MB.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."LAST_SEEN" IS 'Timestamp of the last time the worker node was active.';

CREATE TABLE "ATLAS_PANDA"."CPU_BENCHMARKS" (
    "CPU_TYPE" VARCHAR2(128),
    "SMT_ENABLED" NUMBER(1),  -- 0 or 1
    "SOCKETS" NUMBER(2),
    "CORES_PER_SOCKET" NUMBER(9),
    "NCORES" NUMBER(9),
    "SITE" VARCHAR2(128),
    "SCORE_PER_CORE" NUMBER(10,2),
    "TIMESTAMP" DATE,
    "SOURCE" VARCHAR2(256)
);

CREATE TABLE "ATLAS_PANDA"."WORKER_NODE_MAP"(
    "ATLAS_SITE" varchar2(128),
    "WORKER_NODE" varchar2(128),
    "CPU_TYPE" varchar(128),
    "LAST_SEEN" date,
    "CORES" number(9,0),
    "ARCHITECTURE_LEVEL" varchar2(20),
    CONSTRAINT PK_WORKER_NODE_MAP PRIMARY KEY ("ATLAS_SITE", "WORKER_NODE")
);


create or replace PROCEDURE UPDATE_WORKER_NODE_MAP
AS
BEGIN

-- 2025 02 24, ver 1.0
-- to easy identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Updates worker node map with last days job data');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );

MERGE INTO ATLAS_PANDA.WORKER_NODE_MAP WNM
USING (
    WITH sc_slimmed AS (
        SELECT
            panda_queue,
            scj.data.atlas_site AS atlas_site
        FROM
            atlas_panda.schedconfig_json scj
    )
    SELECT
        DISTINCT
        sc_slimmed.atlas_site,
        CASE
            WHEN INSTR(jobsarchived4.modificationhost, '@') > 0
            THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1)
            ELSE jobsarchived4.modificationhost
        END AS WORKERNODE,
        REGEXP_SUBSTR(
            cpuconsumptionunit,
            's?\+?(.+?)\s\d+-Core',
            1, 1, NULL, 1
        ) AS CPU_TYPE,
        MAX(
            CASE
                WHEN cpuconsumptionunit IS NULL OR TRIM(cpuconsumptionunit) = ''
                THEN 0
                WHEN cpuconsumptionunit NOT LIKE '%-Core%'
                THEN 0
                ELSE
                    TO_NUMBER(NVL(REGEXP_SUBSTR(cpuconsumptionunit, '(\d+)-Core', 1, 1, NULL, 1), -1))
            END
        ) AS NUM_CORE,
        CPU_ARCHITECTURE_LEVEL
    FROM
        atlas_panda.jobsarchived4
    JOIN
        sc_slimmed
        ON jobsarchived4.computingsite = sc_slimmed.panda_queue
    WHERE
        endtime > sysdate - interval '1' day
        AND jobstatus IN ('finished', 'failed')
        AND modificationhost NOT LIKE 'aipanda%'
        AND CPU_ARCHITECTURE_LEVEL IS NOT NULL
        AND REGEXP_SUBSTR(
            cpuconsumptionunit,
            's?\+?(.+?)\s\d+-Core',
            1, 1, NULL, 1
        ) IS NOT NULL
    GROUP BY
        sc_slimmed.atlas_site,
        CASE
            WHEN INSTR(jobsarchived4.modificationhost, '@') > 0
            THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1)
            ELSE jobsarchived4.modificationhost
        END,
        REGEXP_SUBSTR(
            cpuconsumptionunit,
            's?\+?(.+?)\s\d+-Core',
            1, 1, NULL, 1
        ),
        CPU_ARCHITECTURE_LEVEL
) source
ON (
    source.ATLAS_SITE = WNM.ATLAS_SITE
    AND source.WORKERNODE = WNM.WORKER_NODE
    AND source.CPU_TYPE = WNM.CPU_TYPE
)
WHEN MATCHED THEN
    UPDATE SET
        WNM.LAST_SEEN = SYSDATE
WHEN NOT MATCHED THEN
    INSERT (
        ATLAS_SITE, WORKER_NODE, CPU_TYPE, CORES, ARCHITECTURE_LEVEL, LAST_SEEN
    )
    VALUES (
        source.ATLAS_SITE, source.WORKERNODE, source.CPU_TYPE, source.NUM_CORE,
        source.CPU_ARCHITECTURE_LEVEL, SYSDATE
    );

COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;


BEGIN
    dbms_scheduler.create_job(
        job_name => 'UPDATE_WORKER_NODE_MAP_JOB',
        job_type => 'PLSQL_BLOCK',
        job_action => 'BEGIN ATLAS_PANDA.UPDATE_WORKER_NODE_MAP;	END;',
        start_date => SYSTIMESTAMP,
        repeat_interval => 'FREQ=DAILY; BYHOUR=8; BYMINUTE=0; BYSECOND=0;',
        auto_drop => FALSE,
        enabled => TRUE,
        comments => 'Runs every day at 8 AM to refresh WORKER_NODE_MAP data.'
        );
END;
/

CREATE TABLE "ATLAS_PANDA"."WORKER_NODE_METRICS"(
    "SITE" varchar2(128),
    "HOST_NAME" varchar2(128),
    "TIMESTAMP" TIMESTAMP DEFAULT SYSTIMESTAMP AT TIME ZONE 'UTC',
    "KEY" varchar2(20),
    "STATISTICS" varchar2(500),
    CONSTRAINT wn_metrics_json CHECK ("STATISTICS" IS JSON) ENABLE
)
PARTITION BY RANGE ("TIMESTAMP")
INTERVAL (NUMTOYMINTERVAL(1, 'MONTH')) (
    PARTITION "WN_METRICS_BASE" VALUES LESS THAN (TO_DATE('2025-03-01', 'YYYY-MM-DD'))
);

CREATE INDEX mn_metrics_idx ON "ATLAS_PANDA"."WORKER_NODE_METRICS"("SITE", "HOST_NAME", "TIMESTAMP");

-- Table Comment
COMMENT ON TABLE "ATLAS_PANDA"."WORKER_NODE_METRICS" IS 'Metrics related to a worker node';

-- Column Comments
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."SITE" IS 'The name of the site (not PanDA queue) where the worker node is located.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."HOST_NAME" IS 'The hostname of the worker node.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."TIMESTAMP" IS 'Timestamp the metrics were collected.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."KEY" IS 'Key of the metrics entry.';
COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."STATISTICS" IS 'Metrics in json format.';


create or replace PROCEDURE UPDATE_WORKER_NODE_METRICS
AS
BEGIN

-- 2025 03 19, ver 1.0
-- to easy identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Updates worker node statistics with last days job and worker data');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );


INSERT INTO "ATLAS_PANDA"."WORKER_NODE_METRICS" (site, host_name, key, statistics)
WITH sc_slimmed AS (
    SELECT
        panda_queue,
        scj.data.atlas_site AS atlas_site
    FROM
        atlas_panda.schedconfig_json scj
),
pilot_statistics AS(
SELECT 
    sc_slimmed.atlas_site,
    CASE
        WHEN INSTR(jobsarchived4.modificationhost, '@') > 0
        THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1)
        ELSE jobsarchived4.modificationhost
    END as worker_node,
    'jobs' as KEY,
    JSON_OBJECT(
        KEY 'jobs_failed' VALUE COUNT(CASE WHEN jobstatus = 'failed' THEN 1 END),
        KEY 'jobs_finished' VALUE COUNT(CASE WHEN jobstatus = 'finished' THEN 1 END),
        KEY 'hc_failed' VALUE COUNT(CASE WHEN jobstatus = 'failed' AND produsername = 'gangarbt' THEN 1 END),
        KEY 'hc_finished' VALUE COUNT(CASE WHEN jobstatus = 'finished' AND produsername = 'gangarbt' THEN 1 END),
        KEY 'hssec_failed' VALUE SUM(CASE WHEN jobstatus = 'failed' THEN hs06sec ELSE 0 END),
        KEY 'hssec_finished' VALUE SUM(CASE WHEN jobstatus = 'finished' THEN hs06sec ELSE 0 END)
    ) AS pilot
FROM atlas_panda.jobsarchived4
JOIN sc_slimmed ON computingsite = sc_slimmed.panda_queue
WHERE endtime > CAST(SYSTIMESTAMP AT TIME ZONE 'UTC' AS DATE) - INTERVAL '1' DAY
AND jobstatus IN ('finished', 'failed')
AND modificationhost not like 'aipanda%'
GROUP BY sc_slimmed.atlas_site,
    CASE
        WHEN INSTR(jobsarchived4.modificationhost, '@') > 0
        THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1)
        ELSE jobsarchived4.modificationhost
    END),
harvester_statistics AS (
SELECT 
    sc_slimmed.atlas_site,
    TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') AS timestamp, 
    'workers' AS KEY,
    JSON_OBJECT(
        KEY 'worker_failed' VALUE COUNT(CASE WHEN status = 'failed' THEN 1 END),
        KEY 'worker_finished' VALUE COUNT(CASE WHEN status = 'finished' THEN 1 END),
        KEY 'worker_cancelled' VALUE COUNT(CASE WHEN status = 'cancelled' THEN 1 END)
    ) AS harvestor,
    CASE
        WHEN INSTR(nodeid, '@') > 0
        THEN REGEXP_SUBSTR(nodeid, '@(.+)', 1, 1, NULL, 1)
        ELSE nodeid
    END AS workernode
FROM atlas_panda.harvester_workers
JOIN sc_slimmed ON computingsite = sc_slimmed.panda_queue
WHERE endtime > CAST(SYSTIMESTAMP AT TIME ZONE 'UTC' AS DATE) - INTERVAL '1' DAY
AND status IN ('finished', 'failed')
GROUP BY 
    sc_slimmed.atlas_site,
    CASE
        WHEN INSTR(nodeid, '@') > 0
        THEN REGEXP_SUBSTR(nodeid, '@(.+)', 1, 1, NULL, 1)
        ELSE nodeid
    END)
SELECT atlas_site, worker_node, key, pilot FROM pilot_statistics
UNION ALL
SELECT atlas_site, workernode, key, harvestor FROM harvester_statistics;

COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;


BEGIN
    dbms_scheduler.create_job(
        job_name => 'UPDATE_WORKER_NODE_METRICS_JOB',
        job_type => 'PLSQL_BLOCK',
        job_action => 'BEGIN ATLAS_PANDA.UPDATE_WORKER_NODE_METRICS;	END;',
        start_date => SYSTIMESTAMP,
        repeat_interval => 'FREQ=DAILY; BYHOUR=8; BYMINUTE=0; BYSECOND=0;',
        auto_drop => FALSE,
        enabled => TRUE,
        comments => 'Runs every day at 8 AM to refresh WORKER_NODE_METRICS data.'
        );
END;
/

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=25 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=25 where component ='SERVER';
COMMIT;

14 February 2025 (DB version 0.0.24): add system metric related tables and columns

ALTER TABLE "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS" 
ADD (
    "SOURCE_TAPE" VARCHAR2(64 BYTE),
    "PARAMETERS" CLOB
);

ALTER TABLE "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS" ADD CONSTRAINT ensure_json_parameters CHECK ("PARAMETERS" IS JSON);

COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."SOURCE_TAPE" IS 'Physical tape behind source RSE';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."PARAMETERS" IS 'Extra parameters of staging in JSON';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=24 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=24 where component ='SERVER';
COMMIT;

04 December 2024 (DB version 0.0.23): add system metric related tables and columns

CREATE TABLE "ATLAS_PANDA"."JOB_METRICS" (
    "PANDAID" NUMBER(11) NOT NULL,
    "JEDITASKID" NUMBER(11),
    "CREATIONTIME" DATE,
    "MODIFICATIONTIME" DATE,
    "DATA" CLOB,
    CONSTRAINT "PK_JOB_METRICS" PRIMARY KEY ("PANDAID")
)
PARTITION BY RANGE ("MODIFICATIONTIME") 
INTERVAL (NUMTOYMINTERVAL(1, 'MONTH')) (
    PARTITION "P_BASE" VALUES LESS THAN (TO_DATE('2024-12-01', 'YYYY-MM-DD'))
);

COMMENT ON TABLE "ATLAS_PANDA"."JOB_METRICS" IS 'System metrics per job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOB_METRICS"."PANDAID" IS 'PandaID for the job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOB_METRICS"."JEDITASKID" IS 'JEDI task ID for the job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOB_METRICS"."CREATIONTIME" IS 'Time of data creation';
COMMENT ON COLUMN "ATLAS_PANDA"."JOB_METRICS"."MODIFICATIONTIME" IS 'Time of last update';
COMMENT ON COLUMN "ATLAS_PANDA"."JOB_METRICS"."DATA" IS 'Serialized dictionary of job metrics';


CREATE TABLE "ATLAS_PANDA"."TASK_METRICS" (
    "JEDITASKID" NUMBER(11) NOT NULL,
    "CREATIONTIME" DATE,
    "MODIFICATIONTIME" DATE,
    "DATA" CLOB,
    CONSTRAINT "PK_TASK_METRICS" PRIMARY KEY ("JEDITASKID")
)
PARTITION BY RANGE ("MODIFICATIONTIME") 
INTERVAL (NUMTOYMINTERVAL(1, 'MONTH')) (
    PARTITION "P_BASE" VALUES LESS THAN (TO_DATE('2024-12-01', 'YYYY-MM-DD'))
);

COMMENT ON TABLE "ATLAS_PANDA"."TASK_METRICS" IS 'System metrics per task';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_METRICS"."JEDITASKID" IS 'JEDI task ID for the task';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_METRICS"."CREATIONTIME" IS 'Time of data creation';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_METRICS"."MODIFICATIONTIME" IS 'Time of last update';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_METRICS"."DATA" IS 'Serialized dictionary of task metrics';

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."JEDI_TASKS" ADD "ACTIVATEDTIME" DATE;
ALTER TABLE "ATLAS_PANDA"."JEDI_TASKS" ADD "QUEUEDTIME" DATE;

COMMENT ON COLUMN "ATLAS_PANDA"."JEDI_TASKS"."ACTIVATEDTIME" IS 'Time of activation processing workload';
COMMENT ON COLUMN "ATLAS_PANDA"."JEDI_TASKS"."QUEUEDTIME" IS 'Start time of queuing period ready to generate jobs';

ALTER TABLE "ATLAS_PANDA"."CONFIG"
ADD "VALUE_JSON" CLOB 
CONSTRAINT "VALUE_JSON_IS_JSON" CHECK ("VALUE_JSON" IS JSON);

ALTER TABLE "ATLAS_PANDA"."CONFIG"
DROP CONSTRAINT "CONFIG_VALUE_NN";

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=23 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=23 where component ='SERVER';

21 November 2024 (DB version 0.0.22): add ERROR_CLASSIFICATION table

CREATE TABLE "ATLAS_PANDA"."ERROR_CLASSIFICATION" ( 
"ID" NUMBER GENERATED ALWAYS AS IDENTITY ( START WITH 1000000 INCREMENT BY 1 NOCACHE NOORDER ) NOT NULL, 
"ERROR_SOURCE" VARCHAR2(30 BYTE) NOT NULL, 
"ERROR_CODE" NUMBER(10, 0) NOT NULL, 
"ERROR_DIAG" VARCHAR2(256 BYTE) NOT NULL, 
"DESCRIPTION" VARCHAR2(250 BYTE), 
"ERROR_CLASS" VARCHAR2(30 BYTE) NOT NULL, 
"ACTIVE" CHAR(1 BYTE) NOT NULL,
"REG_DATE" TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP, 
CONSTRAINT "PK_ERROR_CLASSIFICATION" PRIMARY KEY (ID)
);

COMMENT ON TABLE "ATLAS_PANDA"."ERROR_CLASSIFICATION" IS 'Classification of job error codes+messages to system, user or others';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ID" IS 'Sequential ID of the request. 1M offset to avoid overlapping IDs with retry module';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ERROR_SOURCE" IS 'Source of the error: pilotErrorCode, exeErrorCode, ddmErrorCode...';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ERROR_CODE" IS 'Error code number';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ERROR_DIAG" IS 'Error message';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."DESCRIPTION" IS 'Any description or comment on the entry';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ERROR_CLASS" IS 'Error class: system, user,...';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."ACTIVE" IS 'Y or N. Depending on whether the entry is confirmed';
COMMENT ON COLUMN "ATLAS_PANDA"."ERROR_CLASSIFICATION"."REG_DATE" IS 'Registration date, defaults to current timestamp';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=22 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=22 where component ='SERVER';
COMMIT;

18 November 2024 (DB version 0.0.21): add DATA_CAROUSEL tables&sequence. Add new column JEDI_TASKS.framework

CREATE TABLE "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS" (
    "REQUEST_ID" NUMBER NOT NULL,
    "DATASET" VARCHAR2(256 BYTE) NOT NULL,
    "SOURCE_RSE" VARCHAR2(64 BYTE),
    "DESTINATION_RSE" VARCHAR2(64 BYTE),
    "DDM_RULE_ID" VARCHAR2(64 BYTE),
    "STATUS" VARCHAR2(32 BYTE),
    "TOTAL_FILES" NUMBER(9,0),
    "STAGED_FILES" NUMBER(9,0),
    "DATASET_SIZE" NUMBER(18,0),
    "STAGED_SIZE" NUMBER(18,0),
    "CREATION_TIME" DATE,
    "START_TIME" DATE,
    "END_TIME" DATE,
    "MODIFICATION_TIME" DATE,
    "CHECK_TIME" DATE,
    CONSTRAINT "DATA_CAROU_REQ_PK" PRIMARY KEY ("REQUEST_ID") ENABLE
);

COMMENT ON TABLE "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS" IS 'Table of Data Carousel requests';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."REQUEST_ID" IS 'Sequential ID of the request, generated from Oracle sequence object ATLAS_PANDA.JEDI_DATA_CAROUSEL_REQUEST_ID_SEQ when new request is inserted';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."DATASET" IS 'Dataset to stage';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."SOURCE_RSE" IS 'Source RSE (usually tape) of staging';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."DESTINATION_RSE" IS 'Destination RSE (usually DATADISK) of staging';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."DDM_RULE_ID" IS 'DDM rule ID of the staging rule';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."STATUS" IS 'Status of the request';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."TOTAL_FILES" IS 'Number of total files of the dataset';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."STAGED_FILES" IS 'Number of files already staged';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."DATASET_SIZE" IS 'Size in bytes of the dataset';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."STAGED_SIZE" IS 'Size in bytes of files already staged';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."CREATION_TIME" IS 'Timestamp when the request is created';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."START_TIME" IS 'Timestamp when the request starts staging';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."END_TIME" IS 'Timestamp when the request ended';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."MODIFICATION_TIME" IS 'Timestamp of the last request update';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_REQUESTS"."CHECK_TIME" IS 'Last time when the request was checked';

CREATE SEQUENCE  "ATLAS_PANDA"."JEDI_DATA_CAROUSEL_REQUEST_ID_SEQ"  MINVALUE 1 MAXVALUE 9999999999999999999999999999 INCREMENT BY 1 START WITH 1 NOCACHE  NOORDER NOCYCLE;

CREATE TABLE "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS" (
    "REQUEST_ID" NUMBER(12,0) NOT NULL,
    "TASK_ID" NUMBER(12,0) NOT NULL,
    CONSTRAINT "DATA_CAROU_REL_UC" UNIQUE ("REQUEST_ID", "TASK_ID") ENABLE
);

COMMENT ON TABLE "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS" IS 'Table of mapping between Data Carousel requests and tasks';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS"."REQUEST_ID" IS 'ID of the request';
COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS"."TASK_ID" IS 'ID of the task';

ALTER TABLE "ATLAS_PANDA"."JEDI_TASKS" ADD "FRAMEWORK" VARCHAR2(50);
COMMENT ON COLUMN "ATLAS_PANDA"."JEDI_TASKS"."FRAMEWORK" IS 'Submission framework that was used to generate the task';


UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=21 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=21 where component ='SERVER';

16 September 2024 (DB version 0.0.20): add ATLAS_PANDA.JOBS_SHARE_STATS.PRORATED_MEM_AVG and ATLAS_PANDA.JOBSDEFINED_SHARE_STATS.PRORATED_MEM_AVG

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."JOBS_SHARE_STATS" ADD "PRORATED_MEM_AVG" NUMBER(11,2);
ALTER TABLE "ATLAS_PANDA"."JOBSDEFINED_SHARE_STATS" ADD "PRORATED_MEM_AVG" NUMBER(11,2);    

COMMENT ON COLUMN "ATLAS_PANDA"."JOBS_SHARE_STATS"."PRORATED_MEM_AVG" IS 'avg(minRamCount/corecount)';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSDEFINED_SHARE_STATS"."PRORATED_MEM_AVG" IS 'avg(minRamCount/corecount)';

set define off;

CREATE OR REPLACE PROCEDURE "ATLAS_PANDA"."UPDATE_JOBSACT_STATS_BY_GSHARE"
AS
BEGIN
-- 16th Sept 2024 , ver 1.6
-- 14th Nov 2023 , ver 1.5
-- 27th Nov 2020 , ver 1.4
-- 29th Jan 2018 , ver 1.3
-- to easily identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Aggregates data by global share for the active jobs!');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );

DELETE from ATLAS_PANDA.JOBS_SHARE_STATS;

INSERT INTO ATLAS_PANDA.JOBS_SHARE_STATS (TS, GSHARE, WORKQUEUE_ID, RESOURCE_TYPE,
                                          COMPUTINGSITE, JOBSTATUS,
                                          MAXPRIORITY, PRORATED_DISKIO_AVG, PRORATED_MEM_AVG, NJOBS, HS, VO)
WITH
    sc_slimmed AS (
    SELECT /*+ MATERIALIZE */ sc.panda_queue AS pq, sc.data.corepower AS cp
    FROM ATLAS_PANDA.schedconfig_json sc
    )
SELECT sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus,
      MAX(currentPriority) AS maxPriority,
      AVG(diskIO/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg, AVG(minRamCount/NVL(ja4.coreCount, 1)) AS proratedMemAvg,
      COUNT(*) AS num_of_jobs, COUNT(*) * NVL(ja4.coreCount, 1) * sc_s.cp AS HS, VO
FROM ATLAS_PANDA.jobsActive4 ja4, sc_slimmed sc_s
WHERE ja4.computingsite = sc_s.pq
GROUP BY sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus, ja4.coreCount, sc_s.cp, VO;


COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;
/


--------------------------------------------------------
--  DDL for Procedure UPDATE_JOBSDEF_STATS_BY_GSHARE
--------------------------------------------------------
set define off;

CREATE OR REPLACE PROCEDURE "ATLAS_PANDA"."UPDATE_JOBSDEF_STATS_BY_GSHARE"
AS
BEGIN
-- 16th Sept 2024 , ver 1.2
-- 14th Nov 2023 , ver 1.1
-- 27th Nov 2020 , ver 1.0
-- Based on UPDATE_JOBSACT_STATS_BY_GSHARE
-- to easily identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Aggregates data by global share for the active jobs!');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );


DELETE from ATLAS_PANDA.JOBSDEFINED_SHARE_STATS;

INSERT INTO ATLAS_PANDA.JOBSDEFINED_SHARE_STATS (TS, GSHARE, WORKQUEUE_ID, RESOURCE_TYPE,
                                          COMPUTINGSITE, JOBSTATUS,
                                          MAXPRIORITY, PRORATED_DISKIO_AVG, PRORATED_MEM_AVG, NJOBS, HS, VO)
WITH
    sc_slimmed AS (
    SELECT /*+ MATERIALIZE */ sc.panda_queue AS pq, sc.data.corepower AS cp
    FROM ATLAS_PANDA.schedconfig_json sc
    )
SELECT sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus,
      MAX(currentPriority) AS maxPriority,
      AVG(diskIO/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg, AVG(minRamCount/NVL(ja4.coreCount, 1)) AS proratedDiskioAvg,
      COUNT(*) AS num_of_jobs, COUNT(*) * NVL(ja4.coreCount, 1) * sc_s.cp AS HS, VO
FROM ATLAS_PANDA.jobsDefined4 ja4, sc_slimmed sc_s
WHERE ja4.computingsite = sc_s.pq
GROUP BY sysdate, gshare, workqueue_id, ja4.resource_type, computingSite, jobStatus, ja4.coreCount, sc_s.cp, VO;


COMMIT;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;
/


UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=20 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=20 where component ='SERVER';
COMMIT;

2 September 2024 (DB version 0.0.19): add NUM_OF_CORES column to mv_jobsactive4_stats

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."MV_JOBSACTIVE4_STATS" ADD "NUM_OF_CORES" NUMBER;

COMMENT ON COLUMN "ATLAS_PANDA"."MV_JOBSACTIVE4_STATS"."NUM_OF_CORES" IS 'Number of cores computed by grouping all set of attributes(columns) listed in that column, ';

set define off;

  CREATE OR REPLACE PROCEDURE "ATLAS_PANDA"."UPDATE_JOBSACTIVE_STATS" 
AS
BEGIN

-- ver 1.3 , last modified on 2nd September 2024
-- added NUM_OF_CORES columns
-- ver 1.2 , last modified on 2th July 2013
-- added VO and WORKQUEUE_ID columns

-- to easy identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs
DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Aggregates data for the active jobs!');
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' );


DELETE from mv_jobsactive4_stats;

INSERT INTO mv_jobsactive4_stats
  (CUR_DATE,
  CLOUD,
  COMPUTINGSITE,
  COUNTRYGROUP,
  WORKINGGROUP,
  RELOCATIONFLAG,
  JOBSTATUS,
  PROCESSINGTYPE,
  PRODSOURCELABEL,
  CURRENTPRIORITY,
  VO,
  WORKQUEUE_ID,
  NUM_OF_JOBS,
  NUM_OF_CORES
  )
  SELECT
    sysdate,
    cloud,
    computingSite,
    countrygroup,
    workinggroup,
    relocationflag,
    jobStatus,
    processingType,
    prodSourceLabel,
    TRUNC(currentPriority, -1) AS currentPriority,
    VO,
    WORKQUEUE_ID,
    COUNT(*)  AS num_of_jobs,
    SUM(COALESCE(actualcorecount, corecount)) AS num_of_cores
  FROM jobsActive4
  GROUP BY
    sysdate,
    cloud,
    computingSite,
    countrygroup,
    workinggroup,
    relocationflag,
    jobStatus,
    processingType,
    prodSourceLabel,
    TRUNC(currentPriority, -1),
    VO,
    WORKQUEUE_ID;
commit;

DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null);
DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null);

end;

/

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=19 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=19 where component ='SERVER';
COMMIT;

10 July 2024 (DB version 0.0.18): add MINRAMCOUNT column to Harvester Worker table

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."HARVESTER_WORKERS" ADD "MINRAMCOUNT" NUMBER(11,0);

COMMENT ON COLUMN "ATLAS_PANDA"."HARVESTER_WORKERS"."MINRAMCOUNT" IS 'Worker memory requirements';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=18 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=18 where component ='SERVER';
COMMIT;

21 May 2024 (DB version 0.0.17): add OUTPUTFILETYPE column to JOBS tables

-- As ATLAS_PANDA
ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."JOBSDEFINED4" ADD "OUTPUTFILETYPE" VARCHAR2(32);
ALTER TABLE "ATLAS_PANDA"."JOBSWAITING4" ADD "OUTPUTFILETYPE" VARCHAR2(32);
ALTER TABLE "ATLAS_PANDA"."JOBSACTIVE4" ADD "OUTPUTFILETYPE" VARCHAR2(32);
ALTER TABLE "ATLAS_PANDA"."JOBSARCHIVED4" ADD "OUTPUTFILETYPE" VARCHAR2(32);

COMMENT ON COLUMN "ATLAS_PANDA"."JOBSDEFINED4"."OUTPUTFILETYPE" IS 'Comma-separated list of output file types for the job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSWAITING4"."OUTPUTFILETYPE" IS 'Comma-separated list of output file types for the job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSACTIVE4"."OUTPUTFILETYPE" IS 'Comma-separated list of output file types for the job';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSARCHIVED4"."OUTPUTFILETYPE" IS 'Comma-separated list of output file types for the job';

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=17 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=17 where component ='SERVER';
COMMIT;

-- Update BULKCOPY_PANDA_PARTITIONS

-- As ATLAS_PANDAARCH
ALTER TABLE "ATLAS_PANDAARCH"."JOBSARCHIVED" ADD "OUTPUTFILETYPE" VARCHAR2(32);

COMMENT ON COLUMN "ATLAS_PANDAARCH"."JOBSARCHIVED"."OUTPUTFILETYPE" IS 'Comma-separated list of output file types for the job';

28 Aug 2023 (DB version 0.0.16): add REALMODIFICATIONTIME column to JEDI_TASKS table

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;

-- MODIFICATIONTIME TRIGGER
-- New column JEDI_TASKS.REALMODIFICATIONTIME
ALTER TABLE "ATLAS_PANDA"."JEDI_TASKS" ADD ("REALMODIFICATIONTIME" DATE);
COMMENT ON COLUMN "ATLAS_PANDA"."JEDI_TASKS"."REALMODIFICATIONTIME" IS 'Set ALWAYS to last modification time, without any tricks like old timestamps';

CREATE INDEX "ATLAS_PANDA"."JEDI_TASKS_REALMODTIME_IDX" ON "ATLAS_PANDA"."JEDI_TASKS" ("REALMODIFICATIONTIME");

-- Trigger to set JEDI_TASKS.REALMODIFICATIONTIME to current UTC timestamp
CREATE OR REPLACE TRIGGER "ATLAS_PANDA"."UPDATE_REALMODIFICATIONTIME"
BEFORE UPDATE OR INSERT OF MODIFICATIONTIME ON "ATLAS_PANDA"."JEDI_TASKS"
FOR EACH ROW
BEGIN
    :NEW."REALMODIFICATIONTIME" := SYS_EXTRACT_UTC(systimestamp);
END;
/

-- SQL_QUEUE TABLE

CREATE TABLE "ATLAS_PANDA"."SQL_QUEUE" 
   (	
    "TOPIC" VARCHAR2(50 BYTE),
    "PANDAID" NUMBER(11,0), 
    "EXECUTION_ORDER" NUMBER(5,0),
    "JEDITASKID" NUMBER(11,0), 
    "CREATIONTIME" DATE,
    "DATA" VARCHAR2(4000 BYTE),
    CONSTRAINT "SQL_QUEUE_PK" PRIMARY KEY ("TOPIC", "PANDAID", "EXECUTION_ORDER")
   );

CREATE INDEX "ATLAS_PANDA"."SQL_QUEUE_TOPIC_TASK_IDX" ON "ATLAS_PANDA"."SQL_QUEUE" ("TOPIC", "JEDITASKID");
CREATE INDEX "ATLAS_PANDA"."SQL_QUEUE_TOPIC_CREATIONTIME_IDX" ON "ATLAS_PANDA"."SQL_QUEUE" ("TOPIC", "CREATIONTIME");
   
COMMENT ON TABLE "ATLAS_PANDA"."SQL_QUEUE" IS 'Queue to send messages between agents';   
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."TOPIC" IS 'Topic of the message';
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."PANDAID" IS 'Job ID';
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."EXECUTION_ORDER" IS 'In case multiple SQLs need to be executed together';
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."JEDITASKID" IS 'JEDI Task ID in case the messages want to be batched';
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."CREATIONTIME" IS 'Timestamp when the message was created';
COMMENT ON COLUMN "ATLAS_PANDA"."SQL_QUEUE"."DATA" IS 'CLOB in JSON format containing the SQL query and variables';

-- Update versions
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=16 where component='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=16 where component='SERVER';
COMMIT;

28 Jun 2023 (DB version 0.0.15): add ERROR_DIAG column to JEDI_EVENTS table

-- AS ATLAS_PANDA
ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."JEDI_EVENTS" ADD ("ERROR_DIAG" VARCHAR2(500 BYTE));

DELETE FROM "ATLAS_PANDA"."PANDADB_VERSION" WHERE component='SCHEMA';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=15 where component='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=15 where component='SERVER';
COMMIT;

28 Mar 2023 (DB version 0.0.14): add ERROR_DIAG column to job tables

-- AS ATLAS_PANDA
ALTER SESSION set DDL_LOCK_TIMEOUT = 30;

ALTER TABLE "ATLAS_PANDA"."JOBSDEFINED4" ADD ("CPU_ARCHITECTURE_LEVEL" VARCHAR2(20));
ALTER TABLE "ATLAS_PANDA"."JOBSWAITING4" ADD ("CPU_ARCHITECTURE_LEVEL" VARCHAR2(20));
ALTER TABLE "ATLAS_PANDA"."JOBSACTIVE4" ADD ("CPU_ARCHITECTURE_LEVEL" VARCHAR2(20));
ALTER TABLE "ATLAS_PANDA"."JOBSARCHIVED4" ADD ("CPU_ARCHITECTURE_LEVEL" VARCHAR2(20));

COMMENT ON COLUMN "ATLAS_PANDA"."JOBSDEFINED4"."CPU_ARCHITECTURE_LEVEL" IS 'Micro architecture level, e.g. x86-64-v1';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSWAITING4"."CPU_ARCHITECTURE_LEVEL" IS 'Micro architecture level, e.g. x86-64-v1';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSACTIVE4"."CPU_ARCHITECTURE_LEVEL" IS 'Micro architecture level, e.g. x86-64-v1';
COMMENT ON COLUMN "ATLAS_PANDA"."JOBSARCHIVED4"."CPU_ARCHITECTURE_LEVEL" IS 'Micro architecture level, e.g. x86-64-v1';

-- ADD THE COLUMNS TO THE BULKCOPY_PANDA_PARTITIONS JOB

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=14 where component ='SCHEMA';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=14 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=14 where component ='SERVER';
COMMIT;

-- As ATLAS_PANDAARCH
ALTER TABLE "ATLAS_PANDAARCH"."JOBSARCHIVED" ADD ("CPU_ARCHITECTURE_LEVEL" VARCHAR2(20));

10 Feb 2023 (DB version 0.0.13): add ATLAS_PANDA.CARBON_REGION_EMISSIONS table

-- AS ATLAS_PANDA
CREATE TABLE "ATLAS_PANDA"."CARBON_REGION_EMISSIONS" 
   (	
    "REGION" VARCHAR2(16 BYTE), 
	"TIMESTAMP" DATE, 
	"VALUE" NUMBER(10,0), 
	CONSTRAINT "CARBON_REGION_EMISSIONS_PK" PRIMARY KEY ("REGION", "TIMESTAMP") ENABLE
   ) ORGANIZATION INDEX COMPRESS 1;

COMMENT ON TABLE "ATLAS_PANDA"."CARBON_REGION_EMISSIONS"  IS 'Table to store CO2 emission time series for a region or country';
COMMENT ON COLUMN "ATLAS_PANDA"."CARBON_REGION_EMISSIONS"."REGION" IS 'The name of the region. The region is usually a country, but can be more fine grained in large countries';
COMMENT ON COLUMN "ATLAS_PANDA"."CARBON_REGION_EMISSIONS"."TIMESTAMP" IS 'When the value was taken';
COMMENT ON COLUMN "ATLAS_PANDA"."CARBON_REGION_EMISSIONS"."VALUE" IS 'Emissions value in gCO2/kWh';

ALTER SESSION set DDL_LOCK_TIMEOUT = 30;
ALTER TABLE "ATLAS_PANDA"."JOBSDEFINED4" ADD ("GCO2_REGIONAL" NUMBER(10, 2), "GCO2_GLOBAL NUMBER"(10, 2));
ALTER TABLE "ATLAS_PANDA"."JOBSWAITING4" ADD ("GCO2_REGIONAL" NUMBER(10, 2), "GCO2_GLOBAL NUMBER"(10, 2));
ALTER TABLE "ATLAS_PANDA"."JOBSACTIVE4" ADD ("GCO2_REGIONAL" NUMBER(10, 2), "GCO2_GLOBAL NUMBER"(10, 2));
ALTER TABLE "ATLAS_PANDA"."JOBSARCHIVED4" ADD ("GCO2_REGIONAL" NUMBER(10, 2), "GCO2_GLOBAL NUMBER"(10, 2));

-- ALSO ADD THE COLUMNS TO BULKCOPY_PANDA_PARTITIONS

UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=13 where component ='SCHEMA';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=13 where component ='JEDI';
UPDATE "ATLAS_PANDA"."PANDADB_VERSION" SET major=0, minor=0, patch=13 where component ='SERVER';
COMMIT;

-- As ATLAS_PANDAARCH
ALTER TABLE ATLAS_PANDAARCH.JOBSARCHIVED ADD (GCO2_REGIONAL NUMBER(10, 2), GCO2_GLOBAL NUMBER(10, 2));

16 Sept 2022: add ATLAS_PANDA.TASK_EVALUATION table

CREATE TABLE "ATLAS_PANDA"."TASK_EVALUATION"
(
  "JEDITASKID" NUMBER(11) NOT NULL,
  "METRIC" VARCHAR2(128),
  "VALUE_JSON" CLOB,
  "TIMESTAMP" DATE,
  CONSTRAINT ensure_json_task_evaluation CHECK (value_json IS JSON),
  CONSTRAINT TASK_EVALUATION_PK PRIMARY KEY(jeditaskid, metric)
);

COMMENT ON TABLE "ATLAS_PANDA"."TASK_EVALUATION"  IS 'Evaluation values for active user tasks used to improve analysis job brokerage';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_EVALUATION"."JEDITASKID" IS 'Task id';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_EVALUATION"."METRIC" IS 'Metric being stored';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_EVALUATION"."VALUE_JSON" IS 'Value in JSON format';
COMMENT ON COLUMN "ATLAS_PANDA"."TASK_EVALUATION"."TIMESTAMP" IS 'Timestamp when metric was generated';

20 Jul 2022: add JEDI_DATASETS.NFILESMISSNG

ALTER SESSION SET DDL_LOCK_TIMEOUT=600;

ALTER TABLE ATLAS_PANDA.JEDI_DATASETS ADD NFILESMISSING NUMBER(10);
COMMENT ON COLUMN "ATLAS_PANDA"."JEDI_DATASETS"."NFILESMISSING" IS 'The number of missing files for datasets';

commit;

15 Jun 2022: add validation for schedconfig json

ALTER TABLE ATLAS_PANDA.schedconfig_json
ADD CONSTRAINT schedconfig_data_validation check (data is JSON);

24 May 2022: add TASK_ATTEMPTS table and schedule clean up job to keep only last years data

CREATE TABLE "ATLAS_PANDA"."TASK_ATTEMPTS"
  (
    jeditaskid NUMBER(11),
    attemptnr NUMBER(6),
    starttime DATE,
    endtime DATE,
    startstatus VARCHAR2(32),
    endstatus VARCHAR2(32),
    CONSTRAINT TASK_ATTEMPTS_PK PRIMARY KEY (jeditaskid, attemptnr)
  )
  PARTITION BY RANGE (starttime) INTERVAL (NUMTODSINTERVAL(1, 'DAY')) 
    (PARTITION "DATA_BEFORE_20220524" VALUES LESS THAN (TIMESTAMP '2022-05-24 00:00:00') )
  ;

BEGIN
dbms_scheduler.create_job(
	job_name => 'SL_WINDOW_TASK_ATTEMPTS_1YEAR',
	job_type => 'PLSQL_BLOCK',
	job_action => 'BEGIN ATLAS_PANDA.PANDA_TABLE_SL_WINDOW(''TASK_ATTEMPTS'',''STARTTIME'', 365); END; ',
	start_date => SYSTIMESTAMP,
	repeat_interval => 'FREQ=WEEKLY; BYDAY=TUE; BYHOUR=11; BYMINUTE=0; BYSECOND=0;',
	auto_drop => FALSE,
	enabled => TRUE,
	comments =>  'Sustains 365 days of data sliding window on the TASK_ATTEMPTS table! The table is partitioned daily using the automatic INTERVAL approach');
END;

26 Nov 2021: add floating point precision to MEMORY_LEAK_X2 columns

ALTER TABLE ATLAS_PANDAARCH.JOBSARCHIVED modify memory_leak_x2 NUMBER(14,3);

ALTER TABLE ATLAS_PANDA.JOBSDEFINED4 modify memory_leak_x2 NUMBER(14,3);
ALTER TABLE ATLAS_PANDA.JOBSWAITING4 modify memory_leak_x2 NUMBER(14,3);
ALTER TABLE ATLAS_PANDA.JOBSACTIVE4 modify memory_leak_x2 NUMBER(14,3);
ALTER TABLE ATLAS_PANDA.JOBSARCHIVED4 modify memory_leak_x2 NUMBER(14,3);

ALTER TABLE ATLAS_PANDA.JEDI_TASKS modify memory_leak_x2 NUMBER(14,3);

22 Nov 2021: creation of SECRETS table

CREATE TABLE "ATLAS_PANDA"."SECRETS"
   ( "OWNER" VARCHAR2(60) NOT NULL ENABLE,
     "UPDATED_AT" TIMESTAMP (6) NOT NULL ENABLE,
     "DATA" CLOB,
     PRIMARY KEY ("OWNER")
  );

9 Nov 2021: creation of METRICS table and adding new columns to HARVESTER_WORKERS

ALTER TABLE ATLAS_PANDA.HARVESTER_WORKERS ADD (pilotStartTime DATE,
                                               pilotEndTime DATE,
                                               pilotStatus VARCHAR2(80 BYTE),
                                               pilotStatusSyncTime DATE);

CREATE TABLE "DOMA_PANDA"."METRICS"
  (
    computingsite VARCHAR2(128),
    gshare VARCHAR2(32),
    metric VARCHAR2(128),
    value_json CLOB,
    timestamp DATE,
    CONSTRAINT ensure_json_metrics CHECK (value_json IS JSON),
    CONSTRAINT METRICS_SITE_GSHARE_METRIC_UQ UNIQUE (computingsite, gshare, metric)
  );