Skip to content
Snippets Groups Projects
Commit 8884d0b2 authored by Gröne, Tjark Leon Raphael's avatar Gröne, Tjark Leon Raphael
Browse files

Update file maxwell_integrate_to_h5.py

parent c4f4d9f1
No related branches found
No related tags found
No related merge requests found
...@@ -34,6 +34,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -34,6 +34,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
""" """
global NPROC global NPROC
global FORBIDDEN global FORBIDDEN
seen = []
fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im)) fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im))
fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata")) fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata"))
path_int_list = [] path_int_list = []
...@@ -47,9 +48,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -47,9 +48,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
else: else:
path_new = path_int path_new = path_int
path_int_list.append(path_new) path_int_list.append(path_new)
elif ".metadata" in name: if "metadata" in name:
fnames_metadata.append(os.path.join(path, name)) fnames_metadata.append(os.path.join(path, name))
print("Found metadata file: " + os.path.join(path, name))
#fnames_ims.sort(key=str.lower) #fnames_ims.sort(key=str.lower)
...@@ -64,7 +64,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -64,7 +64,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
with open(os.path.join(path, name), 'r') as metadata_file: with open(os.path.join(path, name), 'r') as metadata_file:
for line in metadata_file: for line in metadata_file:
metadata = {}
if line.startswith("dateString="): if line.startswith("dateString="):
metadata["dateString"] = line.split("=", 1)[1].strip() metadata["dateString"] = line.split("=", 1)[1].strip()
elif line.startswith("userComment1="): elif line.startswith("userComment1="):
...@@ -87,7 +86,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -87,7 +86,6 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip() metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip()
metadata["filename"] = name metadata["filename"] = name
# Convert metadata dictionary to a DataFrame and sort by filename # Convert metadata dictionary to a DataFrame and sort by filename
return metadata return metadata
def integration_thread(fname_im,path_int): def integration_thread(fname_im,path_int):
...@@ -139,7 +137,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -139,7 +137,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
subdir_fnames = [fname for fname in fnames_ims if os.path.dirname(fname) == subdir] subdir_fnames = [fname for fname in fnames_ims if os.path.dirname(fname) == subdir]
subdir_fnames_metadata = [fname for fname in fnames_metadata if os.path.dirname(fname) == subdir] subdir_fnames_metadata = [fname_meta for fname_meta in fnames_metadata if os.path.dirname(fname_meta) == subdir]
if not subdir_fnames: if not subdir_fnames:
print(f"No images found in subdirectory: {subdir}") print(f"No images found in subdirectory: {subdir}")
continue continue
...@@ -189,9 +188,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -189,9 +188,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower()) results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower()) results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower())
for key, value in results_metadata_df.iloc[0].items(): for key in results_metadata_df.columns
if key not in results_df.columns: if key not in results_df.columns:
results_df[key] = value results_df[key] = results_metadata_df[key].value
subdir_name = os.path.basename(os.path.normpath(subdir_path_int)) subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False) results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
...@@ -244,11 +243,24 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -244,11 +243,24 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("pixels width", data=int(results_metadata[idx-1]["width"]))
detector.create_dataset("pixels height", data=int(results_metadata[idx-1]["height"])) # Handle missing or invalid metadata values with defaults
detector.create_dataset("exposureTime", data=float(results_metadata[idx-1]["exposureTime"])) width = results_metadata[idx-1].get("width", "").strip()
detector.create_dataset("summedExposures", data=int(results_metadata[idx-1]["summedExposures"])) height = results_metadata[idx-1].get("height", "").strip()
detector.create_dataset("imageSequenceNumber", data=int(results_metadata[idx-1]["imageSequenceNumber"])) exposure_time = results_metadata[idx-1].get("exposureTime", "").strip()
summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip()
image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
if width.isdigit():
detector.create_dataset("pixels width", data=np.asarray(int(width), dtype=np.int64))
if height.isdigit():
detector.create_dataset("pixels height", data=np.asarray(int(height), dtype=np.int64))
if exposure_time.isdigit():
detector.create_dataset("exposure time", data=np.asarray(int(exposure_time), dtype=np.int64))
if summed_exposures.replace('.', '', 1).isdigit():
detector.create_dataset("summed exposures", data=np.asarray(float(summed_exposures), dtype=np.float64))
if image_sequence_number.isdigit():
detector.create_dataset("image sequence number", data=np.asarray(int(image_sequence_number), dtype=np.int64))
# Add interpretation info (optional for PyMca) # Add interpretation info (optional for PyMca)
detector["I"].attrs["interpretation"] = "spectrum" detector["I"].attrs["interpretation"] = "spectrum"
...@@ -290,93 +302,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -290,93 +302,8 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement") entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement")
# h5 = h5py.File(output_file, "w")
# h5["/entry/instrument/q/data"] = result["q"]
# h5["/entry/instrument/I/data"] = result["I"]
# h5["/entry/instrument/dI/data"] = result["dI"]
# h5["/entry/title"] = subdir_name
# h5["/entry"].attrs["NX_class"] = u"NXentry"
# h5["/entry/instrument"].attrs["NX_class"] = u"NXinstrument"
# h5["/entry/instrument/q/"].attrs["NX_class"] = u"NXdetector"
# h5["/entry/instrument/I/"].attrs["NX_class"] = u"NXdetector"
# h5["/entry/instrument/dI/"].attrs["NX_class"] = u"NXdetector"
# h5["/entry/instrument/q/data"].attrs["interpretation"] = u"Dataset"
# h5["/entry/instrument/I/data"].attrs["interpretation"] = u"Dataset"
# h5["/entry/instrument/dI/data"].attrs["interpretation"] = u"Dataset"
# h5["/entry/measurement/q"] = h5py.SoftLink("/entry/instrument/q/data")
# h5["/entry/measurement/I"] = h5py.SoftLink("/entry/instrument/I/data")
# h5["/entry/measurement/dI"] = h5py.SoftLink("/entry/instrument/dI/data")
# h5["/entry/measurement"].attrs["NX_class"] = u"NXcollection"
# h5["/entry/measurement"].attrs["signal"] = u"I"
# h5["/entry"].attrs["default"] = u"measurement"
# # Top-level NXroot
# nxroot = f.create_group(subdir_name)
# nxroot.attrs["NX_class"] = np.string_("NXroot")
# nxroot.attrs["default"] = np.string_("1.1")
# for idx, result in enumerate(results_data, start=1):
# entry_name = f"{idx}.1"
# entry = nxroot.create_group(entry_name)
# entry.attrs["NX_class"] = np.string_("NXentry")
# entry.attrs["default"] = np.string_("plotdata")
# # Create full measurement group
# meas = entry.create_group("measurement")
# meas.attrs["NX_class"] = np.string_("NXcollection")
# meas.attrs["filename"] = np.string_(result["filename"])
# meas.create_dataset("q", data=result["q"])
# meas.create_dataset("I", data=result["I"])
# meas.create_dataset("dI", data=result["dI"])
# # Create plotdata group with proper NXdata spec
# plot = entry.create_group("plotdata")
# plot.attrs["NX_class"] = np.string_("NXdata")
# plot.attrs["signal"] = np.string_("I")
# plot.attrs["axes"] = np.string_("q")
# plot.attrs["title"] = np.string_(result["filename"])
# plot.create_dataset("q", data=result["q"])
# dset_I = plot.create_dataset("I", data=result["I"])
# dset_I.attrs["long_name"] = np.string_(result["filename"])
# plot["I"].attrs["long_name"] = "Intensity"
# plot["q"].attrs["long_name"] = "Ang^-1"
# plot.create_dataset("dI", data=result["dI"])
# plot["dI"].attrs["long_name"] = np.string_("Uncertainty in I")
print(f"✅ HDF5 file '{output_file}' created with {len(results_data)} spectra.") print(f"✅ HDF5 file '{output_file}' created with {len(results_data)} spectra.")
# # Sort results_data by filename
# def natural_sort_key(item):
# return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
# results_data = sorted(results_data, key=natural_sort_key)
# # Prepare data for HDF5 file using silx
# hdf5_data = {}
# for idx, result in enumerate(results_data, start=1):
# hdf5_data[f"{idx}.1"] = {
# "@NX_class": "NXentry",
# "measurement": {
# "@NX_class": "NXcollection",
# "q": result["q"].tolist(), # Convert numpy arrays to lists for HDF5 compatibility
# "I": result["I"].tolist(),
# "dI": result["dI"].tolist(),
# },
# "plotselect": {
# "@NX_class": "NXcollection",
# "axes": "q",
# "signal": "I",
# },
# }
# Save to HDF5 file using silx
# hdf5_file_path = os.path.join(subdir_path_int, f"{subdir_name}.h5")
# dicttoh5(hdf5_data, hdf5_file_path, mode="w")
del results_df del results_df
else: else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment