Coverage for src/km3dq_common/aux_library.py: 0%

118 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-14 11:06 +0000

1#! /usr/bin/env python 

2############################################################################### 

3# Developer: Benjamin Trocme (benjamin.trocme at apc.in2p3.fr) - 2024 

4 

5import os 

6import glob 

7import re 

8import km3db 

9 

10 

11############################################################################### 

12def complete_with_zero(integer, total_length): 

13 

14 output = "" 

15 for _ in range(total_length - len(str(integer))): 

16 output += "0" 

17 output += f"{integer}" 

18 

19 return output 

20 

21 

22############################################################################### 

23def get_raw_data_full_path(dataset, run_number): 

24 """ 

25 Return the full iRODS path 

26 Functional only at CC-IN2P3 

27 """ 

28 

29 raw_data_file = {"std": "", "root": "", "details": ""} 

30 

31 d_id = km3db.tools.todetid(dataset) 

32 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}" 

33 regular_name = f"KM3NeT_{d_id:08d}_{run_number:08d}.root" 

34 

35 cmd = f"ils {regular_dir}" 

36 

37 out = os.popen(cmd) 

38 for i_out in out: 

39 sub_dir = i_out.split("C- ") 

40 if len(sub_dir) > 1: 

41 sub_dir_clean = sub_dir[1].replace("\n", "") 

42 cmd = f"ils -l {sub_dir_clean}" 

43 out2 = os.popen(cmd) 

44 for i_out2 in out2: 

45 if regular_name in i_out2: 

46 d = sub_dir_clean.replace("/in2p3/", "/") 

47 raw_data_file["std"] = f"/in2p3{d}/{regular_name}" 

48 raw_data_file["root"] = ( 

49 f"root://ccxroot:1999//hpss/" 

50 f"in2p3.fr/group{d}/" 

51 f"{regular_name}" 

52 ) 

53 irods_details = i_out2.split(f"& {regular_name}")[0] 

54 raw_data_file["details"] += f"{irods_details} / " 

55 

56 return raw_data_file 

57 

58 

59############################################################################### 

60def get_reco_full_path(dataset, run_number, data_type, version, where, what): 

61 """ 

62 Return the full SPS/irods path 

63 Functional only at CC-IN2P3 

64 - dataset: detector name 

65 - run_number: if 0, returns only the directory 

66 - data_type: "jppmuon_aashower_dynamic" or "jppmuon_jppshower-upgoing_dynamic" 

67 - version: dict of version 

68 Example: {"data":"v9.0","mc_noise":"v9.0", "mc_mupage":"v9.0", "mc_neutr":"v9.0"} 

69 - where:  

70 """ 

71 

72 path = {} 

73 

74 d_id_str = f"{km3db.tools.todetid(dataset):08d}" 

75 run_number_str = f"{run_number:08d}" 

76 

77 sub_directory_list = ["", "_priority", "_secondary"] 

78 

79 filename = { 

80 "data": { 

81 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.{version['data']}.root", 

82 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.data.{data_type}.offline.dst.{version['data']}.root" 

83 }, 

84 "mc_noise": { 

85 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root", 

86 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.pure_noise*.root" 

87 }, 

88 "mc_mupage": { 

89 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root", 

90 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.mupage*.root" 

91 }, 

92 "mc_neutr": { 

93 "reco": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root", 

94 "dst": f"KM3NeT_{d_id_str}_{run_number_str}.mc.gsg*.root" 

95 }, 

96 } 

97 

98 # Check the file availability on sps 

99 for i_type in what: 

100 for i_file in filename.keys(): 

101 if where == "sps": 

102 if run_number == 0: 

103 path[f"dir_{i_file}"] = ( 

104 "/sps/km3net/repo/data_processing/tag/" 

105 f"{version[i_file]}/workdirs/" 

106 f"KM3NeT_{d_id_str}{sub_directory_list}/" 

107 f"results/{d_id_str}" 

108 ) 

109 else: 

110 path[f"{i_type}_{i_file}"] = "" 

111 for i_dir_data_type in sub_directory_list: 

112 full_path = ( 

113 "/sps/km3net/repo/data_processing/tag/" 

114 f"{version[i_file]}/workdirs/" 

115 f"KM3NeT_{d_id_str}{i_dir_data_type}/" 

116 f"results/{d_id_str}/{i_type}/{filename[i_file][i_type]}" 

117 ) 

118 # Check that the file exists 

119 if len(glob.glob(full_path)) == 1: 

120 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0] 

121 elif "sps" in where: 

122 if run_number == 0: 

123 path[f"dir_{i_file}"] = where 

124 else: 

125 path[f"{i_type}_{i_file}"] = "" 

126 full_path = f"{where}/{filename[i_file][i_type]}" 

127 # Check that the file exists 

128 if len(glob.glob(full_path)) == 1: 

129 path[f"{i_type}_{i_file}"] = glob.glob(full_path)[0] 

130 

131 return path 

132 

133 

134############################################################################### 

135def check_raw_data_availability(dataset, run_number_list): 

136 """ 

137 Check that the raw data are available on iRODS 

138 Function optimised for long list (typically JQAQC one) 

139 

140 Input: 

141 - dataset: dataset name (ex: D0ORCA015) 

142 - list of runs to be checked 

143 """ 

144 

145 d_id = km3db.tools.todetid(dataset) 

146 regular_dir = f"/in2p3/km3net/data/raw/sea/KM3NeT_{d_id:08d}" 

147 

148 cmd = f"ils {regular_dir}" 

149 out = os.popen(cmd) 

150 

151 nb_of_files = {} 

152 

153 for i_out in out: 

154 sub_dir = i_out.split("C- ") 

155 if len(sub_dir) > 1: 

156 sub_dir_clean = sub_dir[1].replace("\n", "") 

157 cmd = f"ils {sub_dir_clean}" 

158 out2 = os.popen(cmd) 

159 for i_out2 in out2: 

160 re_file = re.compile(rf"\s*KM3NeT_0+{d_id}_0+(\d+).root\s*") 

161 re_file_match = re_file.match(i_out2) 

162 

163 if re_file_match: 

164 run_number = re_file_match.group(1) 

165 

166 if run_number in nb_of_files.keys(): 

167 nb_of_files[run_number] += 1 

168 else: 

169 nb_of_files[run_number] = 1 

170 

171 results = {} 

172 for v_run in run_number_list: 

173 if f"{v_run}" not in nb_of_files.keys(): 

174 results[v_run] = 0 

175 elif nb_of_files[f"{v_run}"] > 1: 

176 results[v_run] = nb_of_files[f"{v_run}"] 

177 

178 return results 

179 

180 

181############################################################################################ 

182def check_reco_availability(dataset, run_list, data_type, version, where, what): 

183 """ 

184 Check that the reco/dst/light availability in sps/irods 

185 Function optimised for long list (typically JQAQC one) 

186 

187 Input: 

188 - dataset: dataset name (ex: D0ORCA015) 

189 - list of runs to be checked 

190 """ 

191 

192 d_id_str = f"{km3db.tools.todetid(dataset):08d}" 

193 

194 # Initialize results 

195 results = {} 

196 for i_run in run_list: 

197 results[i_run] = {} 

198 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]: 

199 results[i_run][i_type] = {} 

200 # Loop on what (reco, dst, light) 

201 for i_what in what: 

202 results[i_run][i_type][i_what] = {} 

203 # loop on where (sps, irods) 

204 for i_where in where: 

205 results[i_run][i_type][i_what][i_where] = 0 

206 

207 filename_re = { 

208 # data_type is coded with unix wildcards, not regex ones -> need to replace them 

209 "data": { 

210 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.{version['data']}.root"), 

211 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).data.{data_type.replace('*', '.*')}.offline.dst.{version['data']}.root") 

212 }, 

213 "mc_noise": { 

214 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root"), 

215 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.pure_noise.*\.root") 

216 }, 

217 "mc_mupage": { 

218 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root"), 

219 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.mupage.*\.root") 

220 }, 

221 "mc_neutr": { 

222 "reco": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root"), 

223 "dst": re.compile(f"KM3NeT_{d_id_str}_(\d+).mc.gsg.*\.root") 

224 }, 

225 } 

226 

227 # Extract the directory content only once as it can be time consuming 

228 path_0 = { 

229 "sps": { 

230 "data": "/sps/km3net/repo/data_processing/tag/", 

231 "mc_noise": "/sps/km3net/repo/data_processing/tag/", 

232 "mc_mupage": "/sps/km3net/repo/data_processing/tag/", 

233 "mc_neutr": "/sps/km3net/repo/data_processing/tag/" 

234 }, 

235 "irods": { 

236 "data": "/in2p3/km3net/data/", 

237 "mc_noise": "/in2p3/km3net/mc/pure_noise/", 

238 "mc_mupage": "/in2p3/km3net/mc/atm_muon/", 

239 "mc_neutr": "/in2p3/km3net/mc/atm_neutrino/" 

240 } 

241 } 

242 

243 for i_type in ["data", "mc_noise", "mc_mupage", "mc_neutr"]: 

244 path = { 

245 "reco": { 

246 "sps": [f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/reco"], 

247 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/reco" 

248 }, 

249 "dst": { 

250 "sps": [ 

251 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}/results/{d_id_str}/dst", 

252 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_priority/results/{d_id_str}/dst", 

253 f"{path_0['sps'][i_type]}/{version[i_type]}/workdirs/KM3NeT_{d_id_str}_secondary/results/{d_id_str}/dst" 

254 ], 

255 "irods": f"{path_0['irods'][i_type]}/KM3NeT_{d_id_str}/{version[i_type]}/dst" 

256 }, 

257 "light": { 

258 "sps": "" 

259 } 

260 } 

261 

262 # Loop on what (reco, dst, light) 

263 for i_what in what: 

264 # loop on where (sps, irods) 

265 for i_where in where: 

266 # Extract the content 

267 content_list = [] 

268 if i_where == "sps": 

269 for i_sps_dir in path[i_what][i_where]: 

270 if os.path.exists(i_sps_dir): 

271 for i_out in os.listdir(i_sps_dir): 

272 content_list.append(i_out) 

273 elif i_where == "irods": 

274 cmd = f"ils {path[i_what][i_where]}" 

275 out = os.popen(cmd) 

276 for i_out in out: 

277 if "KM3NeT" in i_out: 

278 content_list.append(i_out.replace(" ", "").replace("\n", "")) 

279 

280 # Loop on all content and regex 

281 for i_content in content_list: 

282 re_file_match = filename_re[i_type][i_what].match(i_content) 

283 

284 if re_file_match: 

285 run_number = int(re_file_match.group(1)) 

286 if run_number in run_list: 

287 results[run_number][i_type][i_what][i_where] += 1 

288 

289 return results