Coverage for src/km3dq_common/aux_library.py: 0%
120 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 14:13 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-16 14:13 +0000
1#! /usr/bin/env python
2###############################################################################
3# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024
5import os
6import glob
7import re
8import km3db
11###############################################################################
12def complete_with_zero(integer, total_length):
14 output = ""
15 for _ in range(total_length - len(str(integer))):
16 output += "0"
17 output += f"{integer}"
19 return output
22###############################################################################
23def get_raw_data_full_path(dataset, run_number):
24 """
25 Return the full iRODS path
26 Functional only at CC-IN2P3
27 """
29 raw_data_file = {"std": "", "root": "", "details": ""}
31 d_id = km3db.tools.todetid(dataset)
32 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}"
33 regular_name = f"KM3NeT_{d_id:08d}_{run_number:08d}.root"
35 cmd = f"ils {regular_dir}"
37 out = os.popen(cmd)
38 for i_out in out:
39 sub_dir = i_out.split("C- ")
40 if len(sub_dir) > 1:
41 sub_dir_clean = sub_dir[1].replace("\n", "")
42 cmd = f"ils -l {sub_dir_clean}"
43 out2 = os.popen(cmd)
44 for i_out2 in out2:
45 if regular_name in i_out2:
46 d = sub_dir_clean.replace("/in2p3/", "/")
47 raw_data_file["std"] = f"/in2p3{d}/{regular_name}"
48 raw_data_file["root"] = (
49 f"root://ccxroot:1999//hpss/"
50 f"in2p3.fr/group{d}/"
51 f"{regular_name}"
52 )
53 irods_details = i_out2.split(f"& {regular_name}")[0]
54 raw_data_file["details"] += f"{irods_details} / "
56 return raw_data_file
59###############################################################################
60def get_reco_full_path(dataset, run_number, data_type, version, where, what):
61 """
62 Return the full SPS/irods path
63 Functional only at CC-IN2P3
64 - dataset: detector name
65 - run_number: if 0, returns only the directory
66 - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic"
67 - version: dict of version
68 Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"}
69 - where:
70 """
72 path = {}
74 d_id_str = f"{km3db.tools.todetid(dataset):08d}"
75 run_number_str = f"{run_number:08d}"
77 sub_directory_list = ["", "_priority", "_secondary"]
79 filename = {
80 "data": {
81 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.{version['data']}.root",
82 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.dst.{version['data']}.root"
83 },
84 "mc_noise": {
85 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root",
86 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root"
87 },
88 "mc_mupage": {
89 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root",
90 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root"
91 },
92 "mc_neutr": {
93 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root",
94 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root"
95 },
96 }
98 if int(run_number) == 16338:
99 print(filename)
101 # Check the file availability on sps
102 for i_type in what:
103 for i_file in filename.keys():
104 if where == "sps":
105 if run_number == 0:
106 path[f"dir_{i_file}"] = (
107 "/sps/km3net/repo/data_processing/tag/"
108 f"{version[i_file]}/workdirs/"
109 f"KM3NeT_{d_id_str}{sub_directory_list}/"
110 f"results/{d_id_str}"
111 )
112 else:
113 path[f"{i_type}_{i_file}"] = ""
114 for i_dir_data_type in sub_directory_list:
115 full_path = (
116 "/sps/km3net/repo/data_processing/tag/"
117 f"{version[i_file]}/workdirs/"
118 f"KM3NeT_{d_id_str}{i_dir_data_type}/"
119 f"results/{d_id_str}/{i_type}/{filename[i_file][i_type]}"
120 )
121 # Check that the file exists
122 if len(glob.glob(full_path)) == 1:
123 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0]
124 elif "sps" in where:
125 if run_number == 0:
126 path[f"dir_{i_file}"] = where
127 else:
128 path[f"{i_type}_{i_file}"] = ""
129 full_path = f"{where}/{filename[i_file][i_type]}"
130 # Check that the file exists
131 if len(glob.glob(full_path)) == 1:
132 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0]
134 return path
137###############################################################################
138def check_raw_data_availability(dataset, run_number_list):
139 """
140 Check that the raw data are available on iRODS
141 Function optimised for long list (typically JQAQC one)
143 Input:
144 - dataset: dataset name (ex: D0ORCA015)
145 - list of runs to be checked
146 """
148 d_id = km3db.tools.todetid(dataset)
149 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}"
151 cmd = f"ils {regular_dir}"
152 out = os.popen(cmd)
154 nb_of_files = {}
156 for i_out in out:
157 sub_dir = i_out.split("C- ")
158 if len(sub_dir) > 1:
159 sub_dir_clean = sub_dir[1].replace("\n", "")
160 cmd = f"ils {sub_dir_clean}"
161 out2 = os.popen(cmd)
162 for i_out2 in out2:
163 re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*")
164 re_file_match = re_file.match(i_out2)
166 if re_file_match:
167 run_number = re_file_match.group(1)
169 if run_number in nb_of_files.keys():
170 nb_of_files[run_number] += 1
171 else:
172 nb_of_files[run_number] = 1
174 results = {}
175 for v_run in run_number_list:
176 if f"{v_run}" not in nb_of_files.keys():
177 results[v_run] = 0
178 elif nb_of_files[f"{v_run}"] > 1:
179 results[v_run] = nb_of_files[f"{v_run}"]
181 return results
184############################################################################################
185def check_reco_availability(dataset, run_list, data_type, version, where, what):
186 """
187 Check that the reco/dst/light availability in sps/irods
188 Function optimised for long list (typically JQAQC one)
190 Input:
191 - dataset: dataset name (ex: D0ORCA015)
192 - list of runs to be checked
193 """
195 d_id_str = f"{km3db.tools.todetid(dataset):08d}"
197 # Initialize results
198 results = {}
199 for i_run in run_list:
200 results[i_run] = {}
201 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]:
202 results[i_run][i_type] = {}
203 # Loop on what (reco, dst, light)
204 for i_what in what:
205 results[i_run][i_type][i_what] = {}
206 # loop on where (sps, irods)
207 for i_where in where:
208 results[i_run][i_type][i_what][i_where] = 0
210 filename_re = {
211 # data_type is coded with unix wildcards, not regex ones -> need to replace them
212 "data": {
213 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.{version['data']}.root"),
214 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.dst.{version['data']}.root")
215 },
216 "mc_noise": {
217 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root"),
218 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root")
219 },
220 "mc_mupage": {
221 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root"),
222 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root")
223 },
224 "mc_neutr": {
225 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root"),
226 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root")
227 },
228 }
230 # Extract the directory content only once as it can be time consuming
231 path_0 = {
232 "sps": {
233 "data": "/sps/km3net/repo/data_processing/tag/",
234 "mc_noise": "/sps/km3net/repo/data_processing/tag/",
235 "mc_mupage": "/sps/km3net/repo/data_processing/tag/",
236 "mc_neutr": "/sps/km3net/repo/data_processing/tag/"
237 },
238 "irods": {
239 "data": "/in2p3/km3net/data/",
240 "mc_noise": "/in2p3/km3net/mc/pure_noise/",
241 "mc_mupage": "/in2p3/km3net/mc/atm_muon/",
242 "mc_neutr": "/in2p3/km3net/mc/atm_neutrino/"
243 }
244 }
246 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]:
247 path = {
248 "reco": {
249 "sps": [f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/reco"],
250 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/reco"
251 },
252 "dst": {
253 "sps": [
254 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/dst",
255 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_priority/results/{d_id_str}/dst",
256 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_secondary/results/{d_id_str}/dst"
257 ],
258 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/dst"
259 },
260 "light": {
261 "sps": ""
262 }
263 }
265 # Loop on what (reco, dst, light)
266 for i_what in what:
267 # loop on where (sps, irods)
268 for i_where in where:
269 # Extract the content
270 content_list = []
271 if i_where == "sps":
272 for i_sps_dir in path[i_what][i_where]:
273 if os.path.exists(i_sps_dir):
274 for i_out in os.listdir(i_sps_dir):
275 content_list.append(i_out)
276 elif i_where == "irods":
277 cmd = f"ils {path[i_what][i_where]}"
278 out = os.popen(cmd)
279 for i_out in out:
280 if "KM3NeT" in i_out:
281 content_list.append(i_out.replace(" ", "").replace("\n", ""))
283 # Loop on all content and regex
284 for i_content in content_list:
285 re_file_match = filename_re[i_type][i_what].match(i_content)
287 if re_file_match:
288 run_number = int(re_file_match.group(1))
289 if run_number in run_list:
290 results[run_number][i_type][i_what][i_where] += 1
292 return results