cernopendata · zlmarshall · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 22, 2025
diff --git a/atlas-2025-odfr-hepmc/EVNT_exotics.data b/atlas-2025-odfr-hepmc/EVNT_exotics.data
diff --git a/atlas-2025-odfr-hepmc/EVNT_exotics_datasets.txt b/atlas-2025-odfr-hepmc/EVNT_exotics_datasets.txt
diff --git a/atlas-2025-odfr-hepmc/EVNT_exotics_extras.txt b/atlas-2025-odfr-hepmc/EVNT_exotics_extras.txt
@@ -0,0 +1,40 @@
+mc23_13p6TeV.561309.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561301.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd600_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561303.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd1500_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561307.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561304.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd1500_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561305.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd1500_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561306.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l1.evgen.EVNT.e8529
+mc23_13p6TeV.561300.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd600_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561308.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l10.evgen.EVNT.e8529
+mc23_13p6TeV.561302.MGPy8EG_Tchan2EJs_Ld40_rho80_pi20_Xd600_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561317.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1000_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561313.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd800_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561311.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561319.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1200_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561318.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1000_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561316.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1000_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561312.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l1000.evgen.EVNT.e8529
+mc23_13p6TeV.561315.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd800_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561314.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd800_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561310.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd600_l100.evgen.EVNT.e8529
+mc23_13p6TeV.561328.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l1000.evgen.EVNT.e8529
+mc23_13p6TeV.561326.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l100.evgen.EVNT.e8529
+mc23_13p6TeV.561321.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1200_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561320.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1200_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561329.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1800_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561327.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561324.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l10.evgen.EVNT.e8529
+mc23_13p6TeV.561323.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561325.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561322.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1500_l1.evgen.EVNT.e8529
+mc23_13p6TeV.561334.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd2000_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561333.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd2000_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561331.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1800_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561330.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd1800_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561332.MGPy8EG_Tchan2EJs_Ld20_rho40_pi10_Xd2000_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561338.MGPy8EG_Tchan2EJs_Ld10_rho20_pi5_Xd1500_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561335.MGPy8EG_Tchan2EJs_Ld10_rho20_pi5_Xd600_l5.evgen.EVNT.e8529
+mc23_13p6TeV.561337.MGPy8EG_Tchan2EJs_Ld10_rho20_pi5_Xd600_l500.evgen.EVNT.e8529
+mc23_13p6TeV.561339.MGPy8EG_Tchan2EJs_Ld10_rho20_pi5_Xd1500_l50.evgen.EVNT.e8529
+mc23_13p6TeV.561336.MGPy8EG_Tchan2EJs_Ld10_rho20_pi5_Xd600_l50.evgen.EVNT.e8529
diff --git a/atlas-2025-odfr-hepmc/EVNT_list_Alternative.txt b/atlas-2025-odfr-hepmc/EVNT_list_Alternative.txt
diff --git a/atlas-2025-odfr-hepmc/EVNT_list_Baseline.txt b/atlas-2025-odfr-hepmc/EVNT_list_Baseline.txt
diff --git a/atlas-2025-odfr-hepmc/EVNT_list_Specialised.txt b/atlas-2025-odfr-hepmc/EVNT_list_Specialised.txt
diff --git a/atlas-2025-odfr-hepmc/EVNT_list_Systematic.txt b/atlas-2025-odfr-hepmc/EVNT_list_Systematic.txt
diff --git a/atlas-2025-odfr-hepmc/EVNT_metadata.csv b/atlas-2025-odfr-hepmc/EVNT_metadata.csv
diff --git a/atlas-2025-odfr-hepmc/EVNT_prod_request15.csv b/atlas-2025-odfr-hepmc/EVNT_prod_request15.csv
diff --git a/atlas-2025-odfr-hepmc/EVNT_prod_request23.csv b/atlas-2025-odfr-hepmc/EVNT_prod_request23.csv
diff --git a/atlas-2025-odfr-hepmc/HEPMC_datasets.txt b/atlas-2025-odfr-hepmc/HEPMC_datasets.txt
@@ -0,0 +1,57 @@
+mc16_13TeV.301000.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_120M180.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301001.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_180M250.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301002.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_250M400.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301003.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_400M600.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301004.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_600M800.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301005.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_800M1000.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301006.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_1000M1250.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301007.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_1250M1500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301008.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_1500M1750.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301009.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_1750M2000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301010.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_2000M2250.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301011.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_2250M2500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301012.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_2500M2750.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301013.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_2750M3000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301014.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_3000M3500.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301015.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_3500M4000.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301016.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_4000M4500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301017.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_4500M5000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301018.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYee_5000M.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301020.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_120M180.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301021.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_180M250.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301022.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_250M400.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301023.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_400M600.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301024.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_600M800.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301025.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_800M1000.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301026.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_1000M1250.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301027.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_1250M1500.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301028.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_1500M1750.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301029.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_1750M2000.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301030.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_2000M2250.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301031.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_2250M2500.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301032.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_2500M2750.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301033.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_2750M3000.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301034.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_3000M3500.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301035.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_3500M4000.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301036.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_4000M4500.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301037.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_4500M5000.evgen.HEPMC.e3649_e7400_e8596
+mc16_13TeV.301038.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYmumu_5000M.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301040.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_120M180.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301041.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_180M250.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301042.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_250M400.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301043.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_400M600.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301044.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_600M800.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301045.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_800M1000.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301046.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_1000M1250.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301047.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_1250M1500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301048.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_1500M1750.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301049.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_1750M2000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301050.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_2000M2250.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301051.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_2250M2500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301052.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_2500M2750.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301053.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_2750M3000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301054.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_3000M3500.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301055.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_3500M4000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301056.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_4000M4500.evgen.HEPMC.e3649_e5984_e8596
+mc16_13TeV.301057.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_4500M5000.evgen.HEPMC.e3649_e8596
+mc16_13TeV.301058.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_5000M.evgen.HEPMC.e3649_e5984_e8596
diff --git a/atlas-2025-odfr-hepmc/README.md b/atlas-2025-odfr-hepmc/README.md
@@ -0,0 +1,142 @@
+# Scripts for processing Open Event Generation Data
+
+This repository contains scripts used for generating the open event generation data from
+the ATLAS experiment. Everything needed should be included here.
+
+## Scripts and files
+
+This is a complete explanation of the function and origin of all scripts in this directory.
+
+* `evgen_list_from_central_pages.sh` generates lists of datasets based on the ATLAS
+"central pages" repositories. This repository records all baseline, systematic variation,
+and alternative recommended samples and their evgen formats. The result is EVNT datasets
+in the files `EVNT_list_*.txt`. Four such lists are generated, containing Baseline,
+Systematic, Alternative, and Specialized samples.
+* `EVNT_list_*.txt` are the lists of datasets generated by the above script
+* `EVNT_exotics.data` is input provided by the exotics search group conveners of datasets
+that they wish to have included in the open data.
+* `EVNT_exotics_extras.txt` is a second set of input from the exotics group with an
+explicit list of datasets for processing.
+* `parse_exotics_input.py` is a script to parse that input data from the exotics group and
+create a list of EVNT datasets, in `EVNT_exotics_datasets.txt`. This is a relatively slow
+script because we have to check the rucio database for many possible datasets, so we also
+pre-check `EVNT_exotics_datasets.txt` to see what is already there. This also now takes as
+input the `EVNT_exotics_extras.txt` list and simply pushes it to the output list.
+* `EVNT_exotics_datasets.txt` is the list of exotics EVNT datasets generated by the above
+script.
+* `create_metadata_AMI.py` is a script to separate the input EVNT datasets into categories
+required for production of HEPMC files and to construct a metadata repository for those
+datasets (in json format). It reads input from `EVNT_list_*.txt` and
+`EVNT_exotics_datasets.txt` and outputs the metadata file `EVNT_metadata.csv`, two separate
+production spreadsheets in csv format `EVNT_prod_request15.csv` and `EVNT_prod_request23.csv`
+(one for 13 TeV, one for 13.6 TeV EVNT files, because these are in different campaigns),
+and `EVNT_empty_datasets.txt`, a list of datasets that were identified as being empty
+during this processing step. This will also skip the inclusion in the production spreadsheets
+of any datasets identified in `HEPMC_datasets.txt` as having already been processed to HEPMC.
+These datasets will still be included in the metadata file. Note that keywords are critical
+for sample sorting in subsequent steps, so in several cases additional keywords are added to
+samples where they are missing or insufficient. This script also creates an
+`EVNT_empty_datasets.txt` list, which should be empty, of datasets that were reported as
+desired but seem in rucio to be empty. Any cases should be reported via a CENTRPAGE JIRA
+ticket.
+* `EVNT_metadata.csv` is a metadata csv file generated by the above script that can be
+loaded into the ATLAS Open Data website to help users identify samples. It is also useful
+for ATLAS Open Magic.
+* `EVNT_prod_request*csv` are production spreadsheets created by the above script that can
+be used to create requests in the ATLMCPROD JIRA tracker.
+* `build_metadata_json.py` is a script to construct a json and js (javascript) file for
+loading into our open data website based on the `EVNT_metadata.csv` file. Its output is
+`EVNT_metadata.js`, which packages the metadata csv file with a bit of html and javascript.
+* `sample_rules.py` is a dictionary of rules by which samples are sorted into collections.
+This is meant to avoid having thousands of Open Data Portal records and to help users find
+samples that are conceptually connected.
+* `keyword_sorting.py` is a script that checks for unsorted samples and displays the
+balance over the planned records. It also creates a `od_hepmc_sample_map.json` json file
+that has a dictionary mapping Open Data Portal records to keywords and EVNT samples.
+* `check_keyword.py` is a script to check for samples matching a particular keyword. It has
+a standard argument which is the comma-separated list of keywords to require. Samples matching
+any keyword in a second comma-separated list of keywords after `-n` will be excluded. The
+switch `-x` excludes any samples that have already been sorted. The switch `-s` will show the
+full dataset names for any samples that are identified. This script is very helpful for seeing
+what samples have not yet been sorted according to the rules in `sample_rules.py`. The script
+also outputs `od_hepmc_sample_map.json`, a json file containing a map from expected Open Data
+portal record name to sample name (EVNT samples)
+* `check_for_HEPMC_datasets.py` is a script to check for HEPMC datasets that have been
+generated in the production system. It takes as input the two production spreadsheets
+generated above, and outputs `HEPMC_datasets.txt`, a list of datasets that are in HEPMC
+format and which should be ready for transfer to the open data endpoint. This appends to the
+existing list of datasets. This script is also pretty slow because of the number of rucio
+database queries required.
+* `HEPMC_datasets.txt` is a list of existing datasets in HEPMC format generated by the above.
+* `transfer_hepmc.sh` is a script to transfer datasets to the CERN open data endpoint. It reads
+from `HEPMC_datasets.txt` and transfers any datasets that don't already have rules over to CERN.
+* `create_file_metadata.py` is a script to create metadata files mapping dataset names to
+filenames and dataset names to file metadata (names, locations, number of events, etc) at CERN.
+It creates `od_hepmc_file_mapping*.json` files containing those two dictionaries, and reads any
+previously created metadata files it can find to avoid recreating them.
+* `od_hepmc_file_mapping*.json` are metadata files containing maps from dataset names to file
+names and dataset names to file metadata (names, locations, number of events, etc) at CERN,
+created by the above script.
+* `make_od_hepmc_json.py` is a script to generate loads of json files for upload to the CERN
+Open Data Portal. One file will be created for every record defined in `sample_rules.py`,
+containing all of the individual samples for that record, as well as summary files for
+13 TeV and 13.6 TeV linking all the child records for easy entry. It also creates a
+`last_record_creation.json` json file containing a list of records and samples that were
+used in the last run. This can be used to check for updates when samples are added to the
+records.
+* `reset_doi_recid_assignment.py` is a script that, in case anything ever goes horribly wrong,
+will allow the full reset of the DOI and record ID assignments back to square one (just the
+summary records are there). This should not be used once things are in the public.
+* `dump_doi_recid_assignment.py` is a script to dump the current DOI and record ID assignments.
+Handy for checking whether there are available DOIs and record IDs, or if more need to be
+requested from the CERN team.
+* `doi_recid_assignment.json` is a json file containing a list of dictionaries. Each dictionary
+has a record ID and a DOI, and those that have been assigned have the short name of the
+record to which they were assigned (the name used to generate the open data portal record json
+file name)
+
+## Workflow for first production
+
+* Run `evgen_list_from_central_pages.sh` to generate `EVNT_list_*.txt`
+* Request any updates to `EVNT_exotics.data` from the exotics group
+* Run `parse_exotics_input.py` to generate `EVNT_exotics_datasets.txt`
+* Run `create_metadata_AMI.py` to generate `EVNT_metadata.csv`, `EVNT_prod_request15.csv`,
+and `EVNT_prod_request23.csv`
+* Use `EVNT_prod_request15.csv` and `EVNT_prod_request23.csv` to generate production requests
+in JIRA (ATLMCPROD project)
+* Run `keyword_sorting.py` to identify any samples not sorted into records, and adjust the rules
+in `sample_rules.py` accordingly. This will also update `od_hepmc_sample_map.json`.
+* Run `check_for_HEPMC_datasets.py` to generate `HEPMC_datasets.txt`
+* Run `transfer_hepmc.py` to transfer the HEPMC files to CERN
+   * To check on the status of transfers, you can use `rucio list-rules --account opendata | grep HEPMC`
+   * To check for ongoing transfers, try simply `rucio list-rules --account opendata | grep HEPMC | grep -ve " OK"`
+   * Generally transfers take about an hour to complete when they are a modest data volume.
+* Run `create_file_metadata.py` to generate a new `od_hepmc_file_mapping*.json`
+* Get a list of record IDs and DOIs for the Open Data Portal for all the new records
+* Run `make_od_hepmc_json.py` to test generation of json files for the Open Data Portal and update
+the mapping in `doi_recid_assignment.json`
+* Create a merge request for all these scripts
+
+## Workflow for updating datasets
+
+We will not delete any datasets on the open data portal. Therefore, this is only about identifying
+additional datasets to be included.
+
+* Run `evgen_list_from_central_pages.sh` to generate `EVNT_list_*.txt`. If needed, the old copy
+can be retained; `git diff` will also show differences w.r.t. what was previously done.
+* Request any updates to `EVNT_exotics.data` from the exotics group
+* Run `parse_exotics_input.py` to update `EVNT_exotics_datasets.txt`
+* Run `create_metadata_AMI.py` to create new production spreadsheets (should only contain
+datasets that still need processing in the production system)
+* Create a JIRA ticket for production with `EVNT_prod_request15.csv` and `EVNT_prod_request23.csv`
+* Run `keyword_sorting.py` to identify any samples not sorted into records, and adjust the rules
+in `sample_rules.py` accordingly. This will also update `od_hepmc_sample_map.json`.
+* Run `check_for_HEPMC_datasets.py` to update `HEPMC_datasets.txt`
+* Run `transfer_hepmc.sh` to transfer the (additional) HEPMC files to CERN
+* Run `create_file_metadata.py` to generate a new `od_hepmc_file_mapping*.json`
+* Run `dump_doi_recid_assignment.py` to confirm that sufficient record IDs and DOIs are available
+* If they aren't, get additional record IDs and DOIs from the CERN team and add them to the json
+file using `add_doi_recids.py`
+* Run `make_od_hepmc_json.py` to test generation of new json files for the Open Data Portal and
+check that the additions are as expected; this also updates the mapping in `doi_recid_assignment.json`
+* Create a merge request for all these scripts and updated files
diff --git a/atlas-2025-odfr-hepmc/add_doi_recids.py b/atlas-2025-odfr-hepmc/add_doi_recids.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+# Load the existing list of dictionaries
+# Each dictionary has a doi and a recid
+# We assign a record to it in the dictionary
+# The original is from
+# https://github.com/cernopendata/opendata.cern.ch/pull/3737
+with open('doi_recid_assignment.json','r') as f:
+    doirecid_list = json.load(f)
+
+# Add extra dois / record IDs here as needed!
+doirecid_list += [
+#  {
+#    "doi": "10.7483/OPENDATA.ATLAS.3I7V.FNQQ",
+#    "recid": "160002"
+#  },
+                 ]
+
+# Finally, record a new DOI + Record ID list
+with open('doi_recid_assignment.json','w') as f:
+    json.dump( obj=doirecid_list, fp=f )
+