#! /usr/bin/env python
###############################################################################
# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024
import os
import subprocess
import glob
import re
from km3dq_common.common_library import get_det_id
###############################################################################
[docs]
def complete_with_zero(integer, total_length):
output = ""
for _ in range(total_length - len(str(integer))):
output += "0"
output += f"{integer}"
return output
###############################################################################
[docs]
def get_raw_data_full_path(dataset, run_number):
"""
Return the full iRODS path
Functional only at CC-IN2P3
"""
raw_data_file = {"std": "",
"root": "",
"details": ""}
d_id = get_det_id(dataset)
regular_dir = (f"/in2p3/km3net/data/raw/sea/"
f"KM3NeT_{complete_with_zero(d_id, 8)}")
regular_name = (f"KM3NeT_{complete_with_zero(d_id, 8)}"
f"_{complete_with_zero(run_number, 8)}.root")
cmd = f"ils {regular_dir}"
out = os.popen(cmd)
for i_out in out:
sub_dir = i_out.split("C- ")
if len(sub_dir) > 1:
sub_dir_clean = sub_dir[1].replace("\n", "")
cmd = f"ils -l {sub_dir_clean}"
out2 = os.popen(cmd)
for i_out2 in out2:
if regular_name in i_out2:
d = sub_dir_clean.replace("/in2p3/", "/")
raw_data_file['std'] = (f"/in2p3{d}/{regular_name}")
raw_data_file['root'] = (f"root://ccxroot:1999//hpss/"
f"in2p3.fr/group{d}/"
f"{regular_name}")
irods_details = i_out2.split(f"& {regular_name}")[0]
raw_data_file['details'] += f"{irods_details} / "
return raw_data_file
###############################################################################
[docs]
def get_dst_full_path(dataset, run_number, data_type, version, where):
"""
Return the full SPS/irods path
Functional only at CC-IN2P3
- dataset: detector name
- run_number: if 0, returns only the directory
- data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic"
- version: array of version
Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"}
- where: string containing "sps" and/or "irods"
"""
dst_file = {}
d_id_str = complete_with_zero(get_det_id(dataset), 8)
run_number_str = complete_with_zero(run_number, 8)
sub_directory_list = ["", "_priority", "_secondary"]
filename = {"data": (f"KM3NeT_{d_id_str}_{run_number_str}.data."
f"{data_type}.offline.dst.{version['data']}.root"),
"mc_noise": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc."
"pure_noise*.root")),
"mc_mupage": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc."
"mupage*.root")),
"mc_neutr": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc."
"gsg*.root"))}
# Check the file availability on hpss
if "sps" in where:
for i_file in filename.keys():
if run_number == 0:
dst_file[f"dir_{i_file}"] = ("/sps/km3net/repo/data_processing/tag/"
f"{version[i_file]}/workdirs/"
f"KM3NeT_{d_id_str}{sub_directory_list}/"
f"results/{d_id_str}/dst")
else:
dst_file[i_file] = ""
for i_dir_data_type in sub_directory_list:
full_path = ("/sps/km3net/repo/data_processing/tag/"
f"{version[i_file]}/workdirs/"
f"KM3NeT_{d_id_str}{i_dir_data_type}/"
f"results/{d_id_str}/dst/{filename[i_file]}")
# Check that the file exists
if len(glob.glob(full_path)) == 1:
dst_file[i_file] = glob.glob(full_path)[0]
# Check the file availability on irods
# Only data implemented so far
if "irods" in where:
for i_file in ["data"]:
if run_number == 0:
dst_file[f"dir_{i_file}"] = ("/in2p3/km3net/data"
f"KM3NeT_{d_id_str}{sub_directory_list}/"
f"{version[i_file]}/dst")
else:
dst_file[f'{i_file}_irods'] = ""
for i_dir_data_type in sub_directory_list:
full_path = ("/in2p3/km3net/data/"
f"KM3NeT_{d_id_str}{i_dir_data_type}/"
f"{version[i_file]}/dst/{filename[i_file]}")
# Check that the file exists
sub = subprocess.run(["ils", full_path],
capture_output=True)
ils_out = sub.stdout.decode("utf-8").replace(" ", "")\
.replace("\n", "")
if filename[i_file] in ils_out:
dst_file[f'{i_file}_irods'] = full_path
return dst_file
###############################################################################
# def get_dst_data_full_path(dataset, run_number, suffix, version, where):
# """
# Return the full SPS/irods path
# Functional only at CC-IN2P3
# """
#
# dst_data_file = {"sps": "missing",
# "irods": "missing",
# "sps_mc": ""}
#
# d_id = get_det_id(dataset)
# if d_id < 100:
# d_id_str = f"000000{d_id:2d}"
# else:
# d_id_str = f"00000{d_id:3d}"
#
# if run_number < 10000:
# run_number_str = f"0000{run_number}"
# elif run_number < 100000:
# run_number_str = f"000{run_number}"
# elif run_number < 1000000:
# run_number_str = f"00{run_number}"
#
# filename = (f"KM3NeT_{d_id_str}_{run_number_str}.data."
# f"{suffix}.offline.dst.{version}.root")
# filename_mc_wildc = (f"KM3NeT_{d_id_str}_{run_number_str}.mc."
# "*.root")
#
# if "sps" in where:
# for i_dir_suffix in ("", "_priority", "_secondary", "_priority_viper"):
# sps_path = ("/sps/km3net/repo/data_processing/tag/"
# f"{version}/workdirs/KM3NeT_{d_id_str}{i_dir_suffix}/"
# f"results/{d_id_str}/dst/{filename}")
#
# if os.path.exists(sps_path):
# dst_data_file['sps'] = sps_path
# else:
# sps_path_mc = ("/sps/km3net/repo/data_processing/tag/"
# f"{version}/workdirs/"
# f"KM3NeT_{d_id_str}{i_dir_suffix}/"
# f"results/{d_id_str}/dst/{filename_mc_wildc}")
# if len(glob.glob(sps_path_mc)):
# dst_data_file['sps_mc'] = "some exists"
#
# if "irods" in where:
# irods_path = (f"/in2p3/km3net/data/KM3NeT_{d_id_str}/{version}/"
# f"dst/{filename}")
#
# sub = subprocess.run(["ils", irods_path],
# capture_output=True)
# ils_out = sub.stdout.decode("utf-8").replace(" ", "").replace("\n", "")
# if filename in ils_out:
# dst_data_file['irods'] = irods_path
#
# return dst_data_file
###############################################################################
[docs]
def check_raw_data_availability(dataset, run_number_list):
"""
Check that the raw data are available on iRODS
Function optimised for long list (typically JQAQC one)
"""
d_id = get_det_id(dataset)
regular_dir = (f"/in2p3/km3net/data/raw/sea/"
f"KM3NeT_{complete_with_zero(d_id, 8)}")
cmd = f"ils {regular_dir}"
out = os.popen(cmd)
nb_of_files = {}
for i_out in out:
sub_dir = i_out.split("C- ")
if len(sub_dir) > 1:
sub_dir_clean = sub_dir[1].replace("\n", "")
cmd = f"ils {sub_dir_clean}"
out2 = os.popen(cmd)
for i_out2 in out2:
re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*")
re_file_match = re_file.match(i_out2)
if re_file_match:
run_number = re_file_match.group(1)
if run_number in nb_of_files.keys():
nb_of_files[run_number] += 1
else:
nb_of_files[run_number] = 1
results = {}
for v_run in run_number_list:
if f"{v_run}" not in nb_of_files.keys():
results[v_run] = 0
elif nb_of_files[f"{v_run}"] > 1:
results[v_run] = nb_of_files[f"{v_run}"]
return results