Source code for km3dq_common.aux_library

#! /usr/bin/env python
###############################################################################
# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024

import os
import subprocess
import glob
import re
import km3db


###############################################################################
[docs] def complete_with_zero(integer, total_length): output = "" for _ in range(total_length - len(str(integer))): output += "0" output += f"{integer}" return output
###############################################################################
[docs] def get_raw_data_full_path(dataset, run_number): """ Return the full iRODS path Functional only at CC-IN2P3 """ raw_data_file = {"std": "", "root": "", "details": ""} d_id = km3db.tools.todetid(dataset) regular_dir = f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}" regular_name = ( f"KM3NeT_{complete_with_zero(d_id, 8)}" f"_{complete_with_zero(run_number, 8)}.root" ) cmd = f"ils {regular_dir}" out = os.popen(cmd) for i_out in out: sub_dir = i_out.split("C- ") if len(sub_dir) > 1: sub_dir_clean = sub_dir[1].replace("\n", "") cmd = f"ils -l {sub_dir_clean}" out2 = os.popen(cmd) for i_out2 in out2: if regular_name in i_out2: d = sub_dir_clean.replace("/in2p3/", "/") raw_data_file["std"] = f"/in2p3{d}/{regular_name}" raw_data_file["root"] = ( f"root://ccxroot:1999//hpss/" f"in2p3.fr/group{d}/" f"{regular_name}" ) irods_details = i_out2.split(f"& {regular_name}")[0] raw_data_file["details"] += f"{irods_details} / " return raw_data_file
###############################################################################
[docs] def get_dst_full_path(dataset, run_number, data_type, version, where): """ Return the full SPS/irods path Functional only at CC-IN2P3 - dataset: detector name - run_number: if 0, returns only the directory - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic" - version: array of version Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"} - where: string containing "sps" and/or "irods" """ dst_file = {} d_id_str = complete_with_zero(km3db.tools.todetid(dataset), 8) run_number_str = complete_with_zero(run_number, 8) sub_directory_list = ["", "_priority", "_secondary"] filename = { "data": ( f"KM3NeT_{d_id_str}_{run_number_str}.data." f"{data_type}.offline.dst.{version['data']}.root" ), "mc_noise": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "pure_noise*.root")), "mc_mupage": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "mupage*.root")), "mc_neutr": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "gsg*.root")), } # Check the file availability on hpss if "sps" in where: for i_file in filename.keys(): if run_number == 0: dst_file[f"dir_{i_file}"] = ( "/sps/km3net/repo/data_processing/tag/" f"{version[i_file]}/workdirs/" f"KM3NeT_{d_id_str}{sub_directory_list}/" f"results/{d_id_str}/dst" ) else: dst_file[i_file] = "" for i_dir_data_type in sub_directory_list: full_path = ( "/sps/km3net/repo/data_processing/tag/" f"{version[i_file]}/workdirs/" f"KM3NeT_{d_id_str}{i_dir_data_type}/" f"results/{d_id_str}/dst/{filename[i_file]}" ) # Check that the file exists if len(glob.glob(full_path)) == 1: dst_file[i_file] = glob.glob(full_path)[0] # Check the file availability on irods # Only data implemented so far if "irods" in where: for i_file in ["data"]: if run_number == 0: dst_file[f"dir_{i_file}"] = ( "/in2p3/km3net/data" f"KM3NeT_{d_id_str}{sub_directory_list}/" f"{version[i_file]}/dst" ) else: dst_file[f"{i_file}_irods"] = "" for i_dir_data_type in sub_directory_list: full_path = ( "/in2p3/km3net/data/" f"KM3NeT_{d_id_str}{i_dir_data_type}/" f"{version[i_file]}/dst/{filename[i_file]}" ) # Check that the file exists sub = subprocess.run(["ils", full_path], capture_output=True) ils_out = ( sub.stdout.decode("utf-8").replace(" ", "").replace("\n", "") ) if filename[i_file] in ils_out: dst_file[f"{i_file}_irods"] = full_path return dst_file
###############################################################################
[docs] def check_raw_data_availability(dataset, run_number_list): """ Check that the raw data are available on iRODS Function optimised for long list (typically JQAQC one) """ d_id = km3db.tools.todetid(dataset) regular_dir = f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}" cmd = f"ils {regular_dir}" out = os.popen(cmd) nb_of_files = {} for i_out in out: sub_dir = i_out.split("C- ") if len(sub_dir) > 1: sub_dir_clean = sub_dir[1].replace("\n", "") cmd = f"ils {sub_dir_clean}" out2 = os.popen(cmd) for i_out2 in out2: re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*") re_file_match = re_file.match(i_out2) if re_file_match: run_number = re_file_match.group(1) if run_number in nb_of_files.keys(): nb_of_files[run_number] += 1 else: nb_of_files[run_number] = 1 results = {} for v_run in run_number_list: if f"{v_run}" not in nb_of_files.keys(): results[v_run] = 0 elif nb_of_files[f"{v_run}"] > 1: results[v_run] = nb_of_files[f"{v_run}"] return results