|
34 | 34 | -- IMPORTANT: Please always update to up2date version |
35 | 35 | -------------------------------------------------------- |
36 | 36 |
|
37 | | - INSERT INTO "ATLAS_PANDA"."PANDADB_VERSION" VALUES ('SERVER', 0, 0, 24); |
38 | | - INSERT INTO "ATLAS_PANDA"."PANDADB_VERSION" VALUES ('JEDI', 0, 0, 24); |
| 37 | + INSERT INTO "ATLAS_PANDA"."PANDADB_VERSION" VALUES ('SERVER', 0, 0, 25); |
| 38 | + INSERT INTO "ATLAS_PANDA"."PANDADB_VERSION" VALUES ('JEDI', 0, 0, 25); |
39 | 39 |
|
40 | 40 | -------------------------------------------------------- |
41 | 41 | -- DDL for Sequence FILESTABLE4_ROW_ID_SEQ |
@@ -2937,6 +2937,104 @@ COMMENT ON TABLE "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS" IS 'Table of mapping be |
2937 | 2937 | COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS"."REQUEST_ID" IS 'ID of the request'; |
2938 | 2938 | COMMENT ON COLUMN "ATLAS_PANDA"."DATA_CAROUSEL_RELATIONS"."TASK_ID" IS 'ID of the task'; |
2939 | 2939 |
|
| 2940 | +-------------------------------------------------------- |
| 2941 | +-- DDL for Table WORKER_NODE |
| 2942 | +-------------------------------------------------------- |
| 2943 | + |
| 2944 | +CREATE TABLE "ATLAS_PANDA"."WORKER_NODE"( |
| 2945 | + "SITE" varchar2(128), |
| 2946 | + "HOST_NAME" varchar2(128), |
| 2947 | + "CPU_MODEL" varchar(128), |
| 2948 | + "N_LOGICAL_CPUS" number(9,0), |
| 2949 | + "N_SOCKETS" number(9,0), |
| 2950 | + "CORES_PER_SOCKET" number(9,0), |
| 2951 | + "THREADS_PER_CORE" number(9,0), |
| 2952 | + "CPU_ARCHITECTURE" varchar2(20), |
| 2953 | + "CPU_ARCHITECTURE_LEVEL" varchar2(20), |
| 2954 | + "CLOCK_SPEED" number(9,2), |
| 2955 | + "TOTAL_MEMORY" number(9,0), |
| 2956 | + "LAST_SEEN" date, |
| 2957 | + CONSTRAINT PK_WORKER_NODE PRIMARY KEY ("SITE", "HOST_NAME", "CPU_MODEL") |
| 2958 | +)ORGANIZATION INDEX COMPRESS 1; |
| 2959 | + |
| 2960 | +CREATE INDEX IDX_WORKER_NODE_LAST_SEEN ON "ATLAS_PANDA"."WORKER_NODE"("LAST_SEEN"); |
| 2961 | + |
| 2962 | +-- Table Comment |
| 2963 | +COMMENT ON TABLE "ATLAS_PANDA"."WORKER_NODE" IS 'Stores information about worker nodes seen by PanDA pilots'; |
| 2964 | + |
| 2965 | +-- Column Comments |
| 2966 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."SITE" IS 'The name of the site (not PanDA queue) where the worker node is located.'; |
| 2967 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."HOST_NAME" IS 'The hostname of the worker node.'; |
| 2968 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_MODEL" IS 'The specific model of the CPU.'; |
| 2969 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."N_LOGICAL_CPUS" IS 'Total number of logical CPUs (calculated as sockets * cores per socket * threads per core).'; |
| 2970 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."N_SOCKETS" IS 'Number of physical CPU sockets.'; |
| 2971 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CORES_PER_SOCKET" IS 'Number of CPU cores per physical socket.'; |
| 2972 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."THREADS_PER_CORE" IS 'Number of threads per CPU core.'; |
| 2973 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_ARCHITECTURE" IS 'The CPU architecture (e.g., x86_64, ARM).'; |
| 2974 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CPU_ARCHITECTURE_LEVEL" IS 'The specific level/version of the CPU architecture.'; |
| 2975 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."CLOCK_SPEED" IS 'Clock speed of the CPU in GHz.'; |
| 2976 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."TOTAL_MEMORY" IS 'Total amount of RAM in MB.'; |
| 2977 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE"."LAST_SEEN" IS 'Timestamp of the last time the worker node was active.'; |
| 2978 | + |
| 2979 | + |
| 2980 | +-------------------------------------------------------- |
| 2981 | +-- DDL for Table CPU_BENCHMARKS |
| 2982 | +-------------------------------------------------------- |
| 2983 | + |
| 2984 | +CREATE TABLE "ATLAS_PANDA"."CPU_BENCHMARKS" ( |
| 2985 | + "CPU_TYPE" VARCHAR2(128), |
| 2986 | + "SMT_ENABLED" NUMBER(1), -- 0 or 1 |
| 2987 | + "SOCKETS" NUMBER(2), |
| 2988 | + "CORES_PER_SOCKET" NUMBER(9), |
| 2989 | + "NCORES" NUMBER(9), |
| 2990 | + "SITE" VARCHAR2(128), |
| 2991 | + "SCORE_PER_CORE" NUMBER(10,2), |
| 2992 | + "TIMESTAMP" DATE, |
| 2993 | + "SOURCE" VARCHAR2(256) |
| 2994 | +); |
| 2995 | + |
| 2996 | +-------------------------------------------------------- |
| 2997 | +-- DDL for Table WORKER_NODE_MAP |
| 2998 | +-------------------------------------------------------- |
| 2999 | + |
| 3000 | +CREATE TABLE "ATLAS_PANDA"."WORKER_NODE_MAP"( |
| 3001 | + "ATLAS_SITE" varchar2(128), |
| 3002 | + "WORKER_NODE" varchar2(128), |
| 3003 | + "CPU_TYPE" varchar(128), |
| 3004 | + "LAST_SEEN" date, |
| 3005 | + "CORES" number(9,0), |
| 3006 | + "ARCHITECTURE_LEVEL" varchar2(20), |
| 3007 | + CONSTRAINT PK_WORKER_NODE_MAP PRIMARY KEY ("ATLAS_SITE", "WORKER_NODE") |
| 3008 | +); |
| 3009 | + |
| 3010 | +-------------------------------------------------------- |
| 3011 | +-- DDL for Table WORKER_NODE_METRICS |
| 3012 | +-------------------------------------------------------- |
| 3013 | +CREATE TABLE "ATLAS_PANDA"."WORKER_NODE_METRICS"( |
| 3014 | + "SITE" varchar2(128), |
| 3015 | + "HOST_NAME" varchar2(128), |
| 3016 | + "TIMESTAMP" TIMESTAMP DEFAULT SYSTIMESTAMP AT TIME ZONE 'UTC', |
| 3017 | + "KEY" varchar2(20), |
| 3018 | + "STATISTICS" varchar2(500), |
| 3019 | + CONSTRAINT wn_metrics_json CHECK ("STATISTICS" IS JSON) ENABLE |
| 3020 | +) |
| 3021 | +PARTITION BY RANGE ("TIMESTAMP") |
| 3022 | +INTERVAL (NUMTOYMINTERVAL(1, 'MONTH')) ( |
| 3023 | + PARTITION "WN_METRICS_BASE" VALUES LESS THAN (TO_DATE('2025-03-01', 'YYYY-MM-DD')) |
| 3024 | +); |
| 3025 | + |
| 3026 | +CREATE INDEX mn_metrics_idx ON "ATLAS_PANDA"."WORKER_NODE_METRICS"("SITE", "HOST_NAME", "TIMESTAMP"); |
| 3027 | + |
| 3028 | +-- Table Comment |
| 3029 | +COMMENT ON TABLE "ATLAS_PANDA"."WORKER_NODE_METRICS" IS 'Metrics related to a worker node'; |
| 3030 | + |
| 3031 | +-- Column Comments |
| 3032 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."SITE" IS 'The name of the site (not PanDA queue) where the worker node is located.'; |
| 3033 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."HOST_NAME" IS 'The hostname of the worker node.'; |
| 3034 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."TIMESTAMP" IS 'Timestamp the metrics were collected.'; |
| 3035 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."KEY" IS 'Key of the metrics entry.'; |
| 3036 | +COMMENT ON COLUMN "ATLAS_PANDA"."WORKER_NODE_METRICS"."STATISTICS" IS 'Metrics in json format.'; |
| 3037 | + |
2940 | 3038 |
|
2941 | 3039 | -------------------------------------------------------- |
2942 | 3040 | -- DDL for Index JEDI_DATASETCONTENT_LFN_IDX |
@@ -6144,6 +6242,199 @@ END; |
6144 | 6242 |
|
6145 | 6243 | / |
6146 | 6244 |
|
| 6245 | + |
| 6246 | +-------------------------------------------------------- |
| 6247 | +-- DDL for Procedure UPDATE_WORKER_NODE_MAP |
| 6248 | +-------------------------------------------------------- |
| 6249 | +set define off; |
| 6250 | + |
| 6251 | +create or replace PROCEDURE UPDATE_WORKER_NODE_MAP |
| 6252 | +AS |
| 6253 | +BEGIN |
| 6254 | + |
| 6255 | +-- 2025 02 24, ver 1.0 |
| 6256 | +-- to easy identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs |
| 6257 | +DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Updates worker node map with last days job data'); |
| 6258 | +DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' ); |
| 6259 | + |
| 6260 | +MERGE INTO ATLAS_PANDA.WORKER_NODE_MAP WNM |
| 6261 | +USING ( |
| 6262 | + WITH sc_slimmed AS ( |
| 6263 | + SELECT |
| 6264 | + panda_queue, |
| 6265 | + scj.data.atlas_site AS atlas_site |
| 6266 | + FROM |
| 6267 | + atlas_panda.schedconfig_json scj |
| 6268 | + ) |
| 6269 | + SELECT |
| 6270 | + DISTINCT |
| 6271 | + sc_slimmed.atlas_site, |
| 6272 | + CASE |
| 6273 | + WHEN INSTR(jobsarchived4.modificationhost, '@') > 0 |
| 6274 | + THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1) |
| 6275 | + ELSE jobsarchived4.modificationhost |
| 6276 | + END AS WORKERNODE, |
| 6277 | + REGEXP_SUBSTR( |
| 6278 | + cpuconsumptionunit, |
| 6279 | + 's?\+?(.+?)\s\d+-Core', |
| 6280 | + 1, 1, NULL, 1 |
| 6281 | + ) AS CPU_TYPE, |
| 6282 | + MAX( |
| 6283 | + CASE |
| 6284 | + WHEN cpuconsumptionunit IS NULL OR TRIM(cpuconsumptionunit) = '' |
| 6285 | + THEN 0 |
| 6286 | + WHEN cpuconsumptionunit NOT LIKE '%-Core%' |
| 6287 | + THEN 0 |
| 6288 | + ELSE |
| 6289 | + TO_NUMBER(NVL(REGEXP_SUBSTR(cpuconsumptionunit, '(\d+)-Core', 1, 1, NULL, 1), -1)) |
| 6290 | + END |
| 6291 | + ) AS NUM_CORE, |
| 6292 | + CPU_ARCHITECTURE_LEVEL |
| 6293 | + FROM |
| 6294 | + atlas_panda.jobsarchived4 |
| 6295 | + JOIN |
| 6296 | + sc_slimmed |
| 6297 | + ON jobsarchived4.computingsite = sc_slimmed.panda_queue |
| 6298 | + WHERE |
| 6299 | + endtime > sysdate - interval '1' day |
| 6300 | + AND jobstatus IN ('finished', 'failed') |
| 6301 | + AND modificationhost NOT LIKE 'aipanda%' |
| 6302 | + AND CPU_ARCHITECTURE_LEVEL IS NOT NULL |
| 6303 | + AND REGEXP_SUBSTR( |
| 6304 | + cpuconsumptionunit, |
| 6305 | + 's?\+?(.+?)\s\d+-Core', |
| 6306 | + 1, 1, NULL, 1 |
| 6307 | + ) IS NOT NULL |
| 6308 | + GROUP BY |
| 6309 | + sc_slimmed.atlas_site, |
| 6310 | + CASE |
| 6311 | + WHEN INSTR(jobsarchived4.modificationhost, '@') > 0 |
| 6312 | + THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1) |
| 6313 | + ELSE jobsarchived4.modificationhost |
| 6314 | + END, |
| 6315 | + REGEXP_SUBSTR( |
| 6316 | + cpuconsumptionunit, |
| 6317 | + 's?\+?(.+?)\s\d+-Core', |
| 6318 | + 1, 1, NULL, 1 |
| 6319 | + ), |
| 6320 | + CPU_ARCHITECTURE_LEVEL |
| 6321 | +) source |
| 6322 | +ON ( |
| 6323 | + source.ATLAS_SITE = WNM.ATLAS_SITE |
| 6324 | + AND source.WORKERNODE = WNM.WORKER_NODE |
| 6325 | + AND source.CPU_TYPE = WNM.CPU_TYPE |
| 6326 | +) |
| 6327 | +WHEN MATCHED THEN |
| 6328 | + UPDATE SET |
| 6329 | + WNM.LAST_SEEN = SYSDATE |
| 6330 | +WHEN NOT MATCHED THEN |
| 6331 | + INSERT ( |
| 6332 | + ATLAS_SITE, WORKER_NODE, CPU_TYPE, CORES, ARCHITECTURE_LEVEL, LAST_SEEN |
| 6333 | + ) |
| 6334 | + VALUES ( |
| 6335 | + source.ATLAS_SITE, source.WORKERNODE, source.CPU_TYPE, source.NUM_CORE, |
| 6336 | + source.CPU_ARCHITECTURE_LEVEL, SYSDATE |
| 6337 | + ); |
| 6338 | + |
| 6339 | +COMMIT; |
| 6340 | + |
| 6341 | +DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null); |
| 6342 | +DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null); |
| 6343 | + |
| 6344 | +end; |
| 6345 | + |
| 6346 | +/ |
| 6347 | + |
| 6348 | +-------------------------------------------------------- |
| 6349 | +-- DDL for Procedure UPDATE_WORKER_NODE_METRICS |
| 6350 | +-------------------------------------------------------- |
| 6351 | +set define off; |
| 6352 | + |
| 6353 | +create or replace PROCEDURE UPDATE_WORKER_NODE_METRICS |
| 6354 | +AS |
| 6355 | +BEGIN |
| 6356 | + |
| 6357 | +-- 2025 03 19, ver 1.0 |
| 6358 | +-- to easy identify the session and better view on resource usage by setting a dedicated module for the PanDA jobs |
| 6359 | +DBMS_APPLICATION_INFO.SET_MODULE( module_name => 'PanDA scheduler job', action_name => 'Updates worker node statistics with last days job and worker data'); |
| 6360 | +DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => sys_context('userenv', 'host') || ' ( ' || sys_context('userenv', 'ip_address') || ' )' ); |
| 6361 | + |
| 6362 | + |
| 6363 | +INSERT INTO "ATLAS_PANDA"."WORKER_NODE_METRICS" (site, host_name, key, statistics) |
| 6364 | +WITH sc_slimmed AS ( |
| 6365 | + SELECT |
| 6366 | + panda_queue, |
| 6367 | + scj.data.atlas_site AS atlas_site |
| 6368 | + FROM |
| 6369 | + atlas_panda.schedconfig_json scj |
| 6370 | +), |
| 6371 | +pilot_statistics AS( |
| 6372 | +SELECT |
| 6373 | + sc_slimmed.atlas_site, |
| 6374 | + CASE |
| 6375 | + WHEN INSTR(jobsarchived4.modificationhost, '@') > 0 |
| 6376 | + THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1) |
| 6377 | + ELSE jobsarchived4.modificationhost |
| 6378 | + END as worker_node, |
| 6379 | + 'jobs' as KEY, |
| 6380 | + JSON_OBJECT( |
| 6381 | + KEY 'jobs_failed' VALUE COUNT(CASE WHEN jobstatus = 'failed' THEN 1 END), |
| 6382 | + KEY 'jobs_finished' VALUE COUNT(CASE WHEN jobstatus = 'finished' THEN 1 END), |
| 6383 | + KEY 'hc_failed' VALUE COUNT(CASE WHEN jobstatus = 'failed' AND produsername = 'gangarbt' THEN 1 END), |
| 6384 | + KEY 'hc_finished' VALUE COUNT(CASE WHEN jobstatus = 'finished' AND produsername = 'gangarbt' THEN 1 END), |
| 6385 | + KEY 'hssec_failed' VALUE SUM(CASE WHEN jobstatus = 'failed' THEN hs06sec ELSE 0 END), |
| 6386 | + KEY 'hssec_finished' VALUE SUM(CASE WHEN jobstatus = 'finished' THEN hs06sec ELSE 0 END) |
| 6387 | + ) AS pilot |
| 6388 | +FROM atlas_panda.jobsarchived4 |
| 6389 | +JOIN sc_slimmed ON computingsite = sc_slimmed.panda_queue |
| 6390 | +WHERE endtime > CAST(SYSTIMESTAMP AT TIME ZONE 'UTC' AS DATE) - INTERVAL '1' DAY |
| 6391 | +AND jobstatus IN ('finished', 'failed') |
| 6392 | +AND modificationhost not like 'aipanda%' |
| 6393 | +GROUP BY sc_slimmed.atlas_site, |
| 6394 | + CASE |
| 6395 | + WHEN INSTR(jobsarchived4.modificationhost, '@') > 0 |
| 6396 | + THEN REGEXP_SUBSTR(jobsarchived4.modificationhost, '@(.+)', 1, 1, NULL, 1) |
| 6397 | + ELSE jobsarchived4.modificationhost |
| 6398 | + END), |
| 6399 | +harvester_statistics AS ( |
| 6400 | +SELECT |
| 6401 | + sc_slimmed.atlas_site, |
| 6402 | + TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') AS timestamp, |
| 6403 | + 'workers' AS KEY, |
| 6404 | + JSON_OBJECT( |
| 6405 | + KEY 'worker_failed' VALUE COUNT(CASE WHEN status = 'failed' THEN 1 END), |
| 6406 | + KEY 'worker_finished' VALUE COUNT(CASE WHEN status = 'finished' THEN 1 END), |
| 6407 | + KEY 'worker_cancelled' VALUE COUNT(CASE WHEN status = 'cancelled' THEN 1 END) |
| 6408 | + ) AS harvestor, |
| 6409 | + CASE |
| 6410 | + WHEN INSTR(nodeid, '@') > 0 |
| 6411 | + THEN REGEXP_SUBSTR(nodeid, '@(.+)', 1, 1, NULL, 1) |
| 6412 | + ELSE nodeid |
| 6413 | + END AS workernode |
| 6414 | +FROM atlas_panda.harvester_workers |
| 6415 | +JOIN sc_slimmed ON computingsite = sc_slimmed.panda_queue |
| 6416 | +WHERE endtime > CAST(SYSTIMESTAMP AT TIME ZONE 'UTC' AS DATE) - INTERVAL '1' DAY |
| 6417 | +AND status IN ('finished', 'failed') |
| 6418 | +GROUP BY |
| 6419 | + sc_slimmed.atlas_site, |
| 6420 | + CASE |
| 6421 | + WHEN INSTR(nodeid, '@') > 0 |
| 6422 | + THEN REGEXP_SUBSTR(nodeid, '@(.+)', 1, 1, NULL, 1) |
| 6423 | + ELSE nodeid |
| 6424 | + END) |
| 6425 | +SELECT atlas_site, worker_node, key, pilot FROM pilot_statistics |
| 6426 | +UNION ALL |
| 6427 | +SELECT atlas_site, workernode, key, harvestor FROM harvester_statistics; |
| 6428 | + |
| 6429 | +COMMIT; |
| 6430 | + |
| 6431 | +DBMS_APPLICATION_INFO.SET_MODULE( module_name => null, action_name => null); |
| 6432 | +DBMS_APPLICATION_INFO.SET_CLIENT_INFO ( client_info => null); |
| 6433 | + |
| 6434 | +end; |
| 6435 | + |
| 6436 | +/ |
| 6437 | + |
6147 | 6438 | -------------------------------------------------------- |
6148 | 6439 | -- Constraints for Table JOBPARAMSTABLE |
6149 | 6440 | -------------------------------------------------------- |
|
0 commit comments