Coverage for src/km3dq_common/aux_library.py: 0%
118 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-14 11:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-14 11:06 +0000
1#! /usr/bin/env python
2###############################################################################
3# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024
5import os
6import glob
7import re
8import km3db
11###############################################################################
12def complete_with_zero(integer, total_length):
14 output = ""
15 for _ in range(total_length - len(str(integer))):
16 output += "0"
17 output += f"{integer}"
19 return output
22###############################################################################
23def get_raw_data_full_path(dataset, run_number):
24 """
25 Return the full iRODS path
26 Functional only at CC-IN2P3
27 """
29 raw_data_file = {"std": "", "root": "", "details": ""}
31 d_id = km3db.tools.todetid(dataset)
32 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}"
33 regular_name = f"KM3NeT_{d_id:08d}_{run_number:08d}.root"
35 cmd = f"ils {regular_dir}"
37 out = os.popen(cmd)
38 for i_out in out:
39 sub_dir = i_out.split("C- ")
40 if len(sub_dir) > 1:
41 sub_dir_clean = sub_dir[1].replace("\n", "")
42 cmd = f"ils -l {sub_dir_clean}"
43 out2 = os.popen(cmd)
44 for i_out2 in out2:
45 if regular_name in i_out2:
46 d = sub_dir_clean.replace("/in2p3/", "/")
47 raw_data_file["std"] = f"/in2p3{d}/{regular_name}"
48 raw_data_file["root"] = (
49 f"root://ccxroot:1999//hpss/"
50 f"in2p3.fr/group{d}/"
51 f"{regular_name}"
52 )
53 irods_details = i_out2.split(f"& {regular_name}")[0]
54 raw_data_file["details"] += f"{irods_details} / "
56 return raw_data_file
59###############################################################################
60def get_reco_full_path(dataset, run_number, data_type, version, where, what):
61 """
62 Return the full SPS/irods path
63 Functional only at CC-IN2P3
64 - dataset: detector name
65 - run_number: if 0, returns only the directory
66 - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic"
67 - version: dict of version
68 Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"}
69 - where:
70 """
72 path = {}
74 d_id_str = f"{km3db.tools.todetid(dataset):08d}"
75 run_number_str = f"{run_number:08d}"
77 sub_directory_list = ["", "_priority", "_secondary"]
79 filename = {
80 "data": {
81 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.{version['data']}.root",
82 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.dst.{version['data']}.root"
83 },
84 "mc_noise": {
85 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root",
86 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root"
87 },
88 "mc_mupage": {
89 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root",
90 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root"
91 },
92 "mc_neutr": {
93 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root",
94 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root"
95 },
96 }
98 # Check the file availability on sps
99 for i_type in what:
100 for i_file in filename.keys():
101 if where == "sps":
102 if run_number == 0:
103 path[f"dir_{i_file}"] = (
104 "/sps/km3net/repo/data_processing/tag/"
105 f"{version[i_file]}/workdirs/"
106 f"KM3NeT_{d_id_str}{sub_directory_list}/"
107 f"results/{d_id_str}"
108 )
109 else:
110 path[f"{i_type}_{i_file}"] = ""
111 for i_dir_data_type in sub_directory_list:
112 full_path = (
113 "/sps/km3net/repo/data_processing/tag/"
114 f"{version[i_file]}/workdirs/"
115 f"KM3NeT_{d_id_str}{i_dir_data_type}/"
116 f"results/{d_id_str}/{i_type}/{filename[i_file][i_type]}"
117 )
118 # Check that the file exists
119 if len(glob.glob(full_path)) == 1:
120 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0]
121 elif "sps" in where:
122 if run_number == 0:
123 path[f"dir_{i_file}"] = where
124 else:
125 path[f"{i_type}_{i_file}"] = ""
126 full_path = f"{where}/{filename[i_file][i_type]}"
127 # Check that the file exists
128 if len(glob.glob(full_path)) == 1:
129 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0]
131 return path
134###############################################################################
135def check_raw_data_availability(dataset, run_number_list):
136 """
137 Check that the raw data are available on iRODS
138 Function optimised for long list (typically JQAQC one)
140 Input:
141 - dataset: dataset name (ex: D0ORCA015)
142 - list of runs to be checked
143 """
145 d_id = km3db.tools.todetid(dataset)
146 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}"
148 cmd = f"ils {regular_dir}"
149 out = os.popen(cmd)
151 nb_of_files = {}
153 for i_out in out:
154 sub_dir = i_out.split("C- ")
155 if len(sub_dir) > 1:
156 sub_dir_clean = sub_dir[1].replace("\n", "")
157 cmd = f"ils {sub_dir_clean}"
158 out2 = os.popen(cmd)
159 for i_out2 in out2:
160 re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*")
161 re_file_match = re_file.match(i_out2)
163 if re_file_match:
164 run_number = re_file_match.group(1)
166 if run_number in nb_of_files.keys():
167 nb_of_files[run_number] += 1
168 else:
169 nb_of_files[run_number] = 1
171 results = {}
172 for v_run in run_number_list:
173 if f"{v_run}" not in nb_of_files.keys():
174 results[v_run] = 0
175 elif nb_of_files[f"{v_run}"] > 1:
176 results[v_run] = nb_of_files[f"{v_run}"]
178 return results
181############################################################################################
182def check_reco_availability(dataset, run_list, data_type, version, where, what):
183 """
184 Check that the reco/dst/light availability in sps/irods
185 Function optimised for long list (typically JQAQC one)
187 Input:
188 - dataset: dataset name (ex: D0ORCA015)
189 - list of runs to be checked
190 """
192 d_id_str = f"{km3db.tools.todetid(dataset):08d}"
194 # Initialize results
195 results = {}
196 for i_run in run_list:
197 results[i_run] = {}
198 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]:
199 results[i_run][i_type] = {}
200 # Loop on what (reco, dst, light)
201 for i_what in what:
202 results[i_run][i_type][i_what] = {}
203 # loop on where (sps, irods)
204 for i_where in where:
205 results[i_run][i_type][i_what][i_where] = 0
207 filename_re = {
208 # data_type is coded with unix wildcards, not regex ones -> need to replace them
209 "data": {
210 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.{version['data']}.root"),
211 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.dst.{version['data']}.root")
212 },
213 "mc_noise": {
214 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root"),
215 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root")
216 },
217 "mc_mupage": {
218 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root"),
219 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root")
220 },
221 "mc_neutr": {
222 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root"),
223 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root")
224 },
225 }
227 # Extract the directory content only once as it can be time consuming
228 path_0 = {
229 "sps": {
230 "data": "/sps/km3net/repo/data_processing/tag/",
231 "mc_noise": "/sps/km3net/repo/data_processing/tag/",
232 "mc_mupage": "/sps/km3net/repo/data_processing/tag/",
233 "mc_neutr": "/sps/km3net/repo/data_processing/tag/"
234 },
235 "irods": {
236 "data": "/in2p3/km3net/data/",
237 "mc_noise": "/in2p3/km3net/mc/pure_noise/",
238 "mc_mupage": "/in2p3/km3net/mc/atm_muon/",
239 "mc_neutr": "/in2p3/km3net/mc/atm_neutrino/"
240 }
241 }
243 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]:
244 path = {
245 "reco": {
246 "sps": [f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/reco"],
247 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/reco"
248 },
249 "dst": {
250 "sps": [
251 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/dst",
252 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_priority/results/{d_id_str}/dst",
253 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_secondary/results/{d_id_str}/dst"
254 ],
255 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/dst"
256 },
257 "light": {
258 "sps": ""
259 }
260 }
262 # Loop on what (reco, dst, light)
263 for i_what in what:
264 # loop on where (sps, irods)
265 for i_where in where:
266 # Extract the content
267 content_list = []
268 if i_where == "sps":
269 for i_sps_dir in path[i_what][i_where]:
270 if os.path.exists(i_sps_dir):
271 for i_out in os.listdir(i_sps_dir):
272 content_list.append(i_out)
273 elif i_where == "irods":
274 cmd = f"ils {path[i_what][i_where]}"
275 out = os.popen(cmd)
276 for i_out in out:
277 if "KM3NeT" in i_out:
278 content_list.append(i_out.replace(" ", "").replace("\n", ""))
280 # Loop on all content and regex
281 for i_content in content_list:
282 re_file_match = filename_re[i_type][i_what].match(i_content)
284 if re_file_match:
285 run_number = int(re_file_match.group(1))
286 if run_number in run_list:
287 results[run_number][i_type][i_what][i_where] += 1
289 return results