#! /usr/bin/env python
###############################################################################
# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024
import os
import subprocess
import glob
import re
import km3db
###############################################################################
[docs]
def complete_with_zero(integer, total_length):
output = ""
for _ in range(total_length - len(str(integer))):
output += "0"
output += f"{integer}"
return output
###############################################################################
[docs]
def get_raw_data_full_path(dataset, run_number):
"""
Return the full iRODS path
Functional only at CC-IN2P3
"""
raw_data_file = {"std": "", "root": "", "details": ""}
d_id = km3db.tools.todetid(dataset)
regular_dir = f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}"
regular_name = (
f"KM3NeT_{complete_with_zero(d_id, 8)}"
f"_{complete_with_zero(run_number, 8)}.root"
)
cmd = f"ils {regular_dir}"
out = os.popen(cmd)
for i_out in out:
sub_dir = i_out.split("C- ")
if len(sub_dir) > 1:
sub_dir_clean = sub_dir[1].replace("\n", "")
cmd = f"ils -l {sub_dir_clean}"
out2 = os.popen(cmd)
for i_out2 in out2:
if regular_name in i_out2:
d = sub_dir_clean.replace("/in2p3/", "/")
raw_data_file["std"] = f"/in2p3{d}/{regular_name}"
raw_data_file["root"] = (
f"root://ccxroot:1999//hpss/"
f"in2p3.fr/group{d}/"
f"{regular_name}"
)
irods_details = i_out2.split(f"& {regular_name}")[0]
raw_data_file["details"] += f"{irods_details} / "
return raw_data_file
###############################################################################
[docs]
def get_dst_full_path(dataset, run_number, data_type, version, where):
"""
Return the full SPS/irods path
Functional only at CC-IN2P3
- dataset: detector name
- run_number: if 0, returns only the directory
- data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic"
- version: array of version
Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"}
- where: string containing "sps" and/or "irods"
"""
dst_file = {}
d_id_str = complete_with_zero(km3db.tools.todetid(dataset), 8)
run_number_str = complete_with_zero(run_number, 8)
sub_directory_list = ["", "_priority", "_secondary"]
filename = {
"data": (
f"KM3NeT_{d_id_str}_{run_number_str}.data."
f"{data_type}.offline.dst.{version['data']}.root"
),
"mc_noise": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "pure_noise*.root")),
"mc_mupage": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "mupage*.root")),
"mc_neutr": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "gsg*.root")),
}
# Check the file availability on hpss
if "sps" in where:
for i_file in filename.keys():
if run_number == 0:
dst_file[f"dir_{i_file}"] = (
"/sps/km3net/repo/data_processing/tag/"
f"{version[i_file]}/workdirs/"
f"KM3NeT_{d_id_str}{sub_directory_list}/"
f"results/{d_id_str}/dst"
)
else:
dst_file[i_file] = ""
for i_dir_data_type in sub_directory_list:
full_path = (
"/sps/km3net/repo/data_processing/tag/"
f"{version[i_file]}/workdirs/"
f"KM3NeT_{d_id_str}{i_dir_data_type}/"
f"results/{d_id_str}/dst/{filename[i_file]}"
)
# Check that the file exists
if len(glob.glob(full_path)) == 1:
dst_file[i_file] = glob.glob(full_path)[0]
# Check the file availability on irods
# Only data implemented so far
if "irods" in where:
for i_file in ["data"]:
if run_number == 0:
dst_file[f"dir_{i_file}"] = (
"/in2p3/km3net/data"
f"KM3NeT_{d_id_str}{sub_directory_list}/"
f"{version[i_file]}/dst"
)
else:
dst_file[f"{i_file}_irods"] = ""
for i_dir_data_type in sub_directory_list:
full_path = (
"/in2p3/km3net/data/"
f"KM3NeT_{d_id_str}{i_dir_data_type}/"
f"{version[i_file]}/dst/{filename[i_file]}"
)
# Check that the file exists
sub = subprocess.run(["ils", full_path], capture_output=True)
ils_out = (
sub.stdout.decode("utf-8").replace(" ", "").replace("\n", "")
)
if filename[i_file] in ils_out:
dst_file[f"{i_file}_irods"] = full_path
return dst_file
###############################################################################
[docs]
def check_raw_data_availability(dataset, run_number_list):
"""
Check that the raw data are available on iRODS
Function optimised for long list (typically JQAQC one)
"""
d_id = km3db.tools.todetid(dataset)
regular_dir = f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}"
cmd = f"ils {regular_dir}"
out = os.popen(cmd)
nb_of_files = {}
for i_out in out:
sub_dir = i_out.split("C- ")
if len(sub_dir) > 1:
sub_dir_clean = sub_dir[1].replace("\n", "")
cmd = f"ils {sub_dir_clean}"
out2 = os.popen(cmd)
for i_out2 in out2:
re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*")
re_file_match = re_file.match(i_out2)
if re_file_match:
run_number = re_file_match.group(1)
if run_number in nb_of_files.keys():
nb_of_files[run_number] += 1
else:
nb_of_files[run_number] = 1
results = {}
for v_run in run_number_list:
if f"{v_run}" not in nb_of_files.keys():
results[v_run] = 0
elif nb_of_files[f"{v_run}"] > 1:
results[v_run] = nb_of_files[f"{v_run}"]
return results