Source code for km3dq_common.aux_library

#! /usr/bin/env python
###############################################################################
# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024

import os
import subprocess
import glob
import re

from km3dq_common.common_library import get_det_id


###############################################################################
[docs] def complete_with_zero(integer, total_length): output = "" for _ in range(total_length - len(str(integer))): output += "0" output += f"{integer}" return output
###############################################################################
[docs] def get_raw_data_full_path(dataset, run_number): """ Return the full iRODS path Functional only at CC-IN2P3 """ raw_data_file = {"std": "", "root": "", "details": ""} d_id = get_det_id(dataset) regular_dir = (f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}") regular_name = (f"KM3NeT_{complete_with_zero(d_id, 8)}" f"_{complete_with_zero(run_number, 8)}.root") cmd = f"ils {regular_dir}" out = os.popen(cmd) for i_out in out: sub_dir = i_out.split("C- ") if len(sub_dir) > 1: sub_dir_clean = sub_dir[1].replace("\n", "") cmd = f"ils -l {sub_dir_clean}" out2 = os.popen(cmd) for i_out2 in out2: if regular_name in i_out2: d = sub_dir_clean.replace("/in2p3/", "/") raw_data_file['std'] = (f"/in2p3{d}/{regular_name}") raw_data_file['root'] = (f"root://ccxroot:1999//hpss/" f"in2p3.fr/group{d}/" f"{regular_name}") irods_details = i_out2.split(f"& {regular_name}")[0] raw_data_file['details'] += f"{irods_details} / " return raw_data_file
###############################################################################
[docs] def get_dst_full_path(dataset, run_number, data_type, version, where): """ Return the full SPS/irods path Functional only at CC-IN2P3 - dataset: detector name - run_number: if 0, returns only the directory - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic" - version: array of version Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"} - where: string containing "sps" and/or "irods" """ dst_file = {} d_id_str = complete_with_zero(get_det_id(dataset), 8) run_number_str = complete_with_zero(run_number, 8) sub_directory_list = ["", "_priority", "_secondary"] filename = {"data": (f"KM3NeT_{d_id_str}_{run_number_str}.data." f"{data_type}.offline.dst.{version['data']}.root"), "mc_noise": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "pure_noise*.root")), "mc_mupage": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "mupage*.root")), "mc_neutr": ((f"KM3NeT_{d_id_str}_{run_number_str}.mc." "gsg*.root"))} # Check the file availability on hpss if "sps" in where: for i_file in filename.keys(): if run_number == 0: dst_file[f"dir_{i_file}"] = ("/sps/km3net/repo/data_processing/tag/" f"{version[i_file]}/workdirs/" f"KM3NeT_{d_id_str}{sub_directory_list}/" f"results/{d_id_str}/dst") else: dst_file[i_file] = "" for i_dir_data_type in sub_directory_list: full_path = ("/sps/km3net/repo/data_processing/tag/" f"{version[i_file]}/workdirs/" f"KM3NeT_{d_id_str}{i_dir_data_type}/" f"results/{d_id_str}/dst/{filename[i_file]}") # Check that the file exists if len(glob.glob(full_path)) == 1: dst_file[i_file] = glob.glob(full_path)[0] # Check the file availability on irods # Only data implemented so far if "irods" in where: for i_file in ["data"]: if run_number == 0: dst_file[f"dir_{i_file}"] = ("/in2p3/km3net/data" f"KM3NeT_{d_id_str}{sub_directory_list}/" f"{version[i_file]}/dst") else: dst_file[f'{i_file}_irods'] = "" for i_dir_data_type in sub_directory_list: full_path = ("/in2p3/km3net/data/" f"KM3NeT_{d_id_str}{i_dir_data_type}/" f"{version[i_file]}/dst/{filename[i_file]}") # Check that the file exists sub = subprocess.run(["ils", full_path], capture_output=True) ils_out = sub.stdout.decode("utf-8").replace(" ", "")\ .replace("\n", "") if filename[i_file] in ils_out: dst_file[f'{i_file}_irods'] = full_path return dst_file
############################################################################### # def get_dst_data_full_path(dataset, run_number, suffix, version, where): # """ # Return the full SPS/irods path # Functional only at CC-IN2P3 # """ # # dst_data_file = {"sps": "missing", # "irods": "missing", # "sps_mc": ""} # # d_id = get_det_id(dataset) # if d_id < 100: # d_id_str = f"000000{d_id:2d}" # else: # d_id_str = f"00000{d_id:3d}" # # if run_number < 10000: # run_number_str = f"0000{run_number}" # elif run_number < 100000: # run_number_str = f"000{run_number}" # elif run_number < 1000000: # run_number_str = f"00{run_number}" # # filename = (f"KM3NeT_{d_id_str}_{run_number_str}.data." # f"{suffix}.offline.dst.{version}.root") # filename_mc_wildc = (f"KM3NeT_{d_id_str}_{run_number_str}.mc." # "*.root") # # if "sps" in where: # for i_dir_suffix in ("", "_priority", "_secondary", "_priority_viper"): # sps_path = ("/sps/km3net/repo/data_processing/tag/" # f"{version}/workdirs/KM3NeT_{d_id_str}{i_dir_suffix}/" # f"results/{d_id_str}/dst/{filename}") # # if os.path.exists(sps_path): # dst_data_file['sps'] = sps_path # else: # sps_path_mc = ("/sps/km3net/repo/data_processing/tag/" # f"{version}/workdirs/" # f"KM3NeT_{d_id_str}{i_dir_suffix}/" # f"results/{d_id_str}/dst/{filename_mc_wildc}") # if len(glob.glob(sps_path_mc)): # dst_data_file['sps_mc'] = "some exists" # # if "irods" in where: # irods_path = (f"/in2p3/km3net/data/KM3NeT_{d_id_str}/{version}/" # f"dst/{filename}") # # sub = subprocess.run(["ils", irods_path], # capture_output=True) # ils_out = sub.stdout.decode("utf-8").replace(" ", "").replace("\n", "") # if filename in ils_out: # dst_data_file['irods'] = irods_path # # return dst_data_file ###############################################################################
[docs] def check_raw_data_availability(dataset, run_number_list): """ Check that the raw data are available on iRODS Function optimised for long list (typically JQAQC one) """ d_id = get_det_id(dataset) regular_dir = (f"/in2p3/km3net/data/raw/sea/" f"KM3NeT_{complete_with_zero(d_id, 8)}") cmd = f"ils {regular_dir}" out = os.popen(cmd) nb_of_files = {} for i_out in out: sub_dir = i_out.split("C- ") if len(sub_dir) > 1: sub_dir_clean = sub_dir[1].replace("\n", "") cmd = f"ils {sub_dir_clean}" out2 = os.popen(cmd) for i_out2 in out2: re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*") re_file_match = re_file.match(i_out2) if re_file_match: run_number = re_file_match.group(1) if run_number in nb_of_files.keys(): nb_of_files[run_number] += 1 else: nb_of_files[run_number] = 1 results = {} for v_run in run_number_list: if f"{v_run}" not in nb_of_files.keys(): results[v_run] = 0 elif nb_of_files[f"{v_run}"] > 1: results[v_run] = nb_of_files[f"{v_run}"] return results