diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py index 7d14761944da0e4d8be61f186a8ad6f7378d4018..c4bf4cd3152821fc62a23298d05bf3faa7c06873 100644 --- a/maxwell_integrate_to_h5.py +++ b/maxwell_integrate_to_h5.py @@ -34,6 +34,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): """ global NPROC global FORBIDDEN + seen = [] fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im)) fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata")) path_int_list = [] @@ -47,10 +48,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): else: path_new = path_int path_int_list.append(path_new) - elif ".metadata" in name: + if "metadata" in name: fnames_metadata.append(os.path.join(path, name)) - print("Found metadata file: " + os.path.join(path, name)) - + #fnames_ims.sort(key=str.lower) @@ -64,7 +64,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): with open(os.path.join(path, name), 'r') as metadata_file: for line in metadata_file: - metadata = {} if line.startswith("dateString="): metadata["dateString"] = line.split("=", 1)[1].strip() elif line.startswith("userComment1="): @@ -87,7 +86,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip() metadata["filename"] = name # Convert metadata dictionary to a DataFrame and sort by filename - return metadata def integration_thread(fname_im,path_int): @@ -139,7 +137,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): subdir_fnames = [fname for fname in fnames_ims if os.path.dirname(fname) == subdir] - subdir_fnames_metadata = [fname for fname in fnames_metadata if os.path.dirname(fname) == subdir] + subdir_fnames_metadata = [fname_meta for fname_meta in fnames_metadata if os.path.dirname(fname_meta) == subdir] + if not subdir_fnames: print(f"No images found in subdirectory: {subdir}") continue @@ -189,9 +188,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower()) results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower()) - for key, value in results_metadata_df.iloc[0].items(): + for key in results_metadata_df.columns if key not in results_df.columns: - results_df[key] = value + results_df[key] = results_metadata_df[key].value subdir_name = os.path.basename(os.path.normpath(subdir_path_int)) results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False) @@ -225,15 +224,15 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]): comments = entry.create_group("comments") - comments.attrs["NX_class"] = "NXcomments" - if results_metadata[idx-1]["userComment1"]: - comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8')) - if results_metadata[idx-1]["userComment2"]: - comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8')) - if results_metadata[idx-1]["userComment3"]: - comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8')) - if results_metadata[idx-1]["userComment4"]: - comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8')) + comments.attrs["NX_class"] = "NXcomments" + if results_metadata[idx-1]["userComment1"]: + comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8')) + if results_metadata[idx-1]["userComment2"]: + comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8')) + if results_metadata[idx-1]["userComment3"]: + comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8')) + if results_metadata[idx-1]["userComment4"]: + comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8')) # Instrument / Detector group @@ -244,11 +243,24 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,)) - detector.create_dataset("pixels width", data=int(results_metadata[idx-1]["width"])) - detector.create_dataset("pixels height", data=int(results_metadata[idx-1]["height"])) - detector.create_dataset("exposureTime", data=float(results_metadata[idx-1]["exposureTime"])) - detector.create_dataset("summedExposures", data=int(results_metadata[idx-1]["summedExposures"])) - detector.create_dataset("imageSequenceNumber", data=int(results_metadata[idx-1]["imageSequenceNumber"])) + + # Handle missing or invalid metadata values with defaults + width = results_metadata[idx-1].get("width", "").strip() + height = results_metadata[idx-1].get("height", "").strip() + exposure_time = results_metadata[idx-1].get("exposureTime", "").strip() + summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip() + image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip() + + if width.isdigit(): + detector.create_dataset("pixels width", data=np.asarray(int(width), dtype=np.int64)) + if height.isdigit(): + detector.create_dataset("pixels height", data=np.asarray(int(height), dtype=np.int64)) + if exposure_time.isdigit(): + detector.create_dataset("exposure time", data=np.asarray(int(exposure_time), dtype=np.int64)) + if summed_exposures.replace('.', '', 1).isdigit(): + detector.create_dataset("summed exposures", data=np.asarray(float(summed_exposures), dtype=np.float64)) + if image_sequence_number.isdigit(): + detector.create_dataset("image sequence number", data=np.asarray(int(image_sequence_number), dtype=np.int64)) # Add interpretation info (optional for PyMca) detector["I"].attrs["interpretation"] = "spectrum" @@ -289,94 +301,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): if idx == len(results_data): # mark the last one as global default entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement") - - # h5 = h5py.File(output_file, "w") - # h5["/entry/instrument/q/data"] = result["q"] - # h5["/entry/instrument/I/data"] = result["I"] - # h5["/entry/instrument/dI/data"] = result["dI"] - - # h5["/entry/title"] = subdir_name - # h5["/entry"].attrs["NX_class"] = u"NXentry" - # h5["/entry/instrument"].attrs["NX_class"] = u"NXinstrument" - # h5["/entry/instrument/q/"].attrs["NX_class"] = u"NXdetector" - # h5["/entry/instrument/I/"].attrs["NX_class"] = u"NXdetector" - # h5["/entry/instrument/dI/"].attrs["NX_class"] = u"NXdetector" - - # h5["/entry/instrument/q/data"].attrs["interpretation"] = u"Dataset" - # h5["/entry/instrument/I/data"].attrs["interpretation"] = u"Dataset" - # h5["/entry/instrument/dI/data"].attrs["interpretation"] = u"Dataset" - - # h5["/entry/measurement/q"] = h5py.SoftLink("/entry/instrument/q/data") - # h5["/entry/measurement/I"] = h5py.SoftLink("/entry/instrument/I/data") - # h5["/entry/measurement/dI"] = h5py.SoftLink("/entry/instrument/dI/data") - # h5["/entry/measurement"].attrs["NX_class"] = u"NXcollection" - # h5["/entry/measurement"].attrs["signal"] = u"I" - # h5["/entry"].attrs["default"] = u"measurement" - - # # Top-level NXroot - # nxroot = f.create_group(subdir_name) - # nxroot.attrs["NX_class"] = np.string_("NXroot") - # nxroot.attrs["default"] = np.string_("1.1") - - # for idx, result in enumerate(results_data, start=1): - # entry_name = f"{idx}.1" - # entry = nxroot.create_group(entry_name) - # entry.attrs["NX_class"] = np.string_("NXentry") - # entry.attrs["default"] = np.string_("plotdata") - - # # Create full measurement group - # meas = entry.create_group("measurement") - # meas.attrs["NX_class"] = np.string_("NXcollection") - # meas.attrs["filename"] = np.string_(result["filename"]) - - # meas.create_dataset("q", data=result["q"]) - # meas.create_dataset("I", data=result["I"]) - # meas.create_dataset("dI", data=result["dI"]) - - # # Create plotdata group with proper NXdata spec - # plot = entry.create_group("plotdata") - # plot.attrs["NX_class"] = np.string_("NXdata") - # plot.attrs["signal"] = np.string_("I") - # plot.attrs["axes"] = np.string_("q") - # plot.attrs["title"] = np.string_(result["filename"]) - - # plot.create_dataset("q", data=result["q"]) - # dset_I = plot.create_dataset("I", data=result["I"]) - # dset_I.attrs["long_name"] = np.string_(result["filename"]) - # plot["I"].attrs["long_name"] = "Intensity" - # plot["q"].attrs["long_name"] = "Ang^-1" - - # plot.create_dataset("dI", data=result["dI"]) - # plot["dI"].attrs["long_name"] = np.string_("Uncertainty in I") - print(f"✅ HDF5 file '{output_file}' created with {len(results_data)} spectra.") - # # Sort results_data by filename - # def natural_sort_key(item): - # return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])] - - # results_data = sorted(results_data, key=natural_sort_key) - # # Prepare data for HDF5 file using silx - # hdf5_data = {} - # for idx, result in enumerate(results_data, start=1): - # hdf5_data[f"{idx}.1"] = { - # "@NX_class": "NXentry", - # "measurement": { - # "@NX_class": "NXcollection", - # "q": result["q"].tolist(), # Convert numpy arrays to lists for HDF5 compatibility - # "I": result["I"].tolist(), - # "dI": result["dI"].tolist(), - # }, - # "plotselect": { - # "@NX_class": "NXcollection", - # "axes": "q", - # "signal": "I", - # }, - # } - - # Save to HDF5 file using silx - # hdf5_file_path = os.path.join(subdir_path_int, f"{subdir_name}.h5") - # dicttoh5(hdf5_data, hdf5_file_path, mode="w") + del results_df else: