Coverage for src/km3dq_common/aux_library.py: 0%

120 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-16 14:13 +0000

1#! /usr/bin/env python 

2############################################################################### 

3# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024 

4 

5import os 

6import glob 

7import re 

8import km3db 

9 

10 

11############################################################################### 

12def complete_with_zero(integer, total_length): 

13 

14 output = "" 

15 for _ in range(total_length - len(str(integer))): 

16 output += "0" 

17 output += f"{integer}" 

18 

19 return output 

20 

21 

22############################################################################### 

23def get_raw_data_full_path(dataset, run_number): 

24 """ 

25 Return the full iRODS path 

26 Functional only at CC-IN2P3 

27 """ 

28 

29 raw_data_file = {"std": "", "root": "", "details": ""} 

30 

31 d_id = km3db.tools.todetid(dataset) 

32 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}" 

33 regular_name = f"KM3NeT_{d_id:08d}_{run_number:08d}.root" 

34 

35 cmd = f"ils {regular_dir}" 

36 

37 out = os.popen(cmd) 

38 for i_out in out: 

39 sub_dir = i_out.split("C- ") 

40 if len(sub_dir) > 1: 

41 sub_dir_clean = sub_dir[1].replace("\n", "") 

42 cmd = f"ils -l {sub_dir_clean}" 

43 out2 = os.popen(cmd) 

44 for i_out2 in out2: 

45 if regular_name in i_out2: 

46 d = sub_dir_clean.replace("/in2p3/", "/") 

47 raw_data_file["std"] = f"/in2p3{d}/{regular_name}" 

48 raw_data_file["root"] = ( 

49 f"root://ccxroot:1999//hpss/" 

50 f"in2p3.fr/group{d}/" 

51 f"{regular_name}" 

52 ) 

53 irods_details = i_out2.split(f"& {regular_name}")[0] 

54 raw_data_file["details"] += f"{irods_details} / " 

55 

56 return raw_data_file 

57 

58 

59############################################################################### 

60def get_reco_full_path(dataset, run_number, data_type, version, where, what): 

61 """ 

62 Return the full SPS/irods path 

63 Functional only at CC-IN2P3 

64 - dataset: detector name 

65 - run_number: if 0, returns only the directory 

66 - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic" 

67 - version: dict of version 

68 Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"} 

69 - where:  

70 """ 

71 

72 path = {} 

73 

74 d_id_str = f"{km3db.tools.todetid(dataset):08d}" 

75 run_number_str = f"{run_number:08d}" 

76 

77 sub_directory_list = ["", "_priority", "_secondary"] 

78 

79 filename = { 

80 "data": { 

81 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.{version['data']}.root", 

82 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.dst.{version['data']}.root" 

83 }, 

84 "mc_noise": { 

85 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root", 

86 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root" 

87 }, 

88 "mc_mupage": { 

89 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root", 

90 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root" 

91 }, 

92 "mc_neutr": { 

93 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root", 

94 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root" 

95 }, 

96 } 

97 

98 if int(run_number) == 16338: 

99 print(filename) 

100 

101 # Check the file availability on sps 

102 for i_type in what: 

103 for i_file in filename.keys(): 

104 if where == "sps": 

105 if run_number == 0: 

106 path[f"dir_{i_file}"] = ( 

107 "/sps/km3net/repo/data_processing/tag/" 

108 f"{version[i_file]}/workdirs/" 

109 f"KM3NeT_{d_id_str}{sub_directory_list}/" 

110 f"results/{d_id_str}" 

111 ) 

112 else: 

113 path[f"{i_type}_{i_file}"] = "" 

114 for i_dir_data_type in sub_directory_list: 

115 full_path = ( 

116 "/sps/km3net/repo/data_processing/tag/" 

117 f"{version[i_file]}/workdirs/" 

118 f"KM3NeT_{d_id_str}{i_dir_data_type}/" 

119 f"results/{d_id_str}/{i_type}/{filename[i_file][i_type]}" 

120 ) 

121 # Check that the file exists 

122 if len(glob.glob(full_path)) == 1: 

123 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0] 

124 elif "sps" in where: 

125 if run_number == 0: 

126 path[f"dir_{i_file}"] = where 

127 else: 

128 path[f"{i_type}_{i_file}"] = "" 

129 full_path = f"{where}/{filename[i_file][i_type]}" 

130 # Check that the file exists 

131 if len(glob.glob(full_path)) == 1: 

132 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0] 

133 

134 return path 

135 

136 

137############################################################################### 

138def check_raw_data_availability(dataset, run_number_list): 

139 """ 

140 Check that the raw data are available on iRODS 

141 Function optimised for long list (typically JQAQC one) 

142 

143 Input: 

144 - dataset: dataset name (ex: D0ORCA015) 

145 - list of runs to be checked 

146 """ 

147 

148 d_id = km3db.tools.todetid(dataset) 

149 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}" 

150 

151 cmd = f"ils {regular_dir}" 

152 out = os.popen(cmd) 

153 

154 nb_of_files = {} 

155 

156 for i_out in out: 

157 sub_dir = i_out.split("C- ") 

158 if len(sub_dir) > 1: 

159 sub_dir_clean = sub_dir[1].replace("\n", "") 

160 cmd = f"ils {sub_dir_clean}" 

161 out2 = os.popen(cmd) 

162 for i_out2 in out2: 

163 re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*") 

164 re_file_match = re_file.match(i_out2) 

165 

166 if re_file_match: 

167 run_number = re_file_match.group(1) 

168 

169 if run_number in nb_of_files.keys(): 

170 nb_of_files[run_number] += 1 

171 else: 

172 nb_of_files[run_number] = 1 

173 

174 results = {} 

175 for v_run in run_number_list: 

176 if f"{v_run}" not in nb_of_files.keys(): 

177 results[v_run] = 0 

178 elif nb_of_files[f"{v_run}"] > 1: 

179 results[v_run] = nb_of_files[f"{v_run}"] 

180 

181 return results 

182 

183 

184############################################################################################ 

185def check_reco_availability(dataset, run_list, data_type, version, where, what): 

186 """ 

187 Check that the reco/dst/light availability in sps/irods 

188 Function optimised for long list (typically JQAQC one) 

189 

190 Input: 

191 - dataset: dataset name (ex: D0ORCA015) 

192 - list of runs to be checked 

193 """ 

194 

195 d_id_str = f"{km3db.tools.todetid(dataset):08d}" 

196 

197 # Initialize results 

198 results = {} 

199 for i_run in run_list: 

200 results[i_run] = {} 

201 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]: 

202 results[i_run][i_type] = {} 

203 # Loop on what (reco, dst, light) 

204 for i_what in what: 

205 results[i_run][i_type][i_what] = {} 

206 # loop on where (sps, irods) 

207 for i_where in where: 

208 results[i_run][i_type][i_what][i_where] = 0 

209 

210 filename_re = { 

211 # data_type is coded with unix wildcards, not regex ones -> need to replace them 

212 "data": { 

213 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.{version['data']}.root"), 

214 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.dst.{version['data']}.root") 

215 }, 

216 "mc_noise": { 

217 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root"), 

218 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root") 

219 }, 

220 "mc_mupage": { 

221 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root"), 

222 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root") 

223 }, 

224 "mc_neutr": { 

225 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root"), 

226 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root") 

227 }, 

228 } 

229 

230 # Extract the directory content only once as it can be time consuming 

231 path_0 = { 

232 "sps": { 

233 "data": "/sps/km3net/repo/data_processing/tag/", 

234 "mc_noise": "/sps/km3net/repo/data_processing/tag/", 

235 "mc_mupage": "/sps/km3net/repo/data_processing/tag/", 

236 "mc_neutr": "/sps/km3net/repo/data_processing/tag/" 

237 }, 

238 "irods": { 

239 "data": "/in2p3/km3net/data/", 

240 "mc_noise": "/in2p3/km3net/mc/pure_noise/", 

241 "mc_mupage": "/in2p3/km3net/mc/atm_muon/", 

242 "mc_neutr": "/in2p3/km3net/mc/atm_neutrino/" 

243 } 

244 } 

245 

246 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]: 

247 path = { 

248 "reco": { 

249 "sps": [f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/reco"], 

250 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/reco" 

251 }, 

252 "dst": { 

253 "sps": [ 

254 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/dst", 

255 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_priority/results/{d_id_str}/dst", 

256 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_secondary/results/{d_id_str}/dst" 

257 ], 

258 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/dst" 

259 }, 

260 "light": { 

261 "sps": "" 

262 } 

263 } 

264 

265 # Loop on what (reco, dst, light) 

266 for i_what in what: 

267 # loop on where (sps, irods) 

268 for i_where in where: 

269 # Extract the content 

270 content_list = [] 

271 if i_where == "sps": 

272 for i_sps_dir in path[i_what][i_where]: 

273 if os.path.exists(i_sps_dir): 

274 for i_out in os.listdir(i_sps_dir): 

275 content_list.append(i_out) 

276 elif i_where == "irods": 

277 cmd = f"ils {path[i_what][i_where]}" 

278 out = os.popen(cmd) 

279 for i_out in out: 

280 if "KM3NeT" in i_out: 

281 content_list.append(i_out.replace(" ", "").replace("\n", "")) 

282 

283 # Loop on all content and regex 

284 for i_content in content_list: 

285 re_file_match = filename_re[i_type][i_what].match(i_content) 

286 

287 if re_file_match: 

288 run_number = int(re_file_match.group(1)) 

289 if run_number in run_list: 

290 results[run_number][i_type][i_what][i_where] += 1 

291 

292 return results