From 51dc5ce6c53f83c825a4eec9a1698a4dcb99e50f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?=
 <tjark.leon.raphael.groene@uni-hamburg.de>
Date: Wed, 18 Jun 2025 16:34:37 +0200
Subject: [PATCH] Update file maxwell_integrate_to_h5.py

---
 maxwell_integrate_to_h5.py | 227 ++++++++++++++++++-------------------
 1 file changed, 113 insertions(+), 114 deletions(-)

diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 0897401..0feac4c 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -181,7 +181,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
             # Use a lambda function to pass the subdir_path_int to the integration_thread
             # Map async allows us to run the integration in parallel, with trace back of varibles
             async_result = pool.map_async(
-                            lambda fname_im: integration_thread(fname_im, subdir_path_int),
+                            lambda fname_im: integration_thread(fname_im),
                             filtered_fnames
                         )
 
@@ -261,121 +261,120 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                 for idx, result in enumerate(results_data, start=1):
                     
                     # Drop unfinished scans (usually last scan due to closed shutter)
-                    if not result["q"].size or not result["I"].size:
-                        print(f"Skipping invalid scan data for entry {entry_name}")
+                    try:
+                        # Here one could use the image sequence number from the metadata, however, we use the index as it seemes cleaner
+                        entry_name = f"{idx:05d}.1"
+                        
+                        # Create a new entry for each scan
+                        entry = h5.create_group(entry_name)
+                        entry["title"] = "Collected Q-I scans"
+                        entry.attrs["NX_class"] = "NXentry"
+                        
+                        # Set time attributes for the entry	
+                        entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'))
+                        
+                        # We log the image sequence number if it is available in the metadata
+                        image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
+                        if image_sequence_number.isdigit():
+                            entry.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                        
+                        # User comments can be added to the entry if available
+                        # We check if any of the user comments are available, if so, we create a comments group
+                        if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
+                            comments = entry.create_group("comments")
+                            comments.attrs["NX_class"] = "NXcomments"
+                            if results_metadata[idx-1]["userComment1"]:
+                                comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                            if results_metadata[idx-1]["userComment2"]:
+                                comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                            if results_metadata[idx-1]["userComment3"]:
+                                comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                            if results_metadata[idx-1]["userComment4"]:
+                                comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+
+
+                        # Instrument / Detector group (holds all detector data)
+                        detector = entry.create_group("instrument/detector")
+                        detector.attrs["NX_class"] = "NXdetector"
+                        # Compress the data to save space, chunks are used to allow for efficient reading
+                        # Larger chunk sizes increase compression but may slow down reading
+                        # 256 is a common chunk size, (512 is also a good choice for larger datasets), over 1024 may lead to memory issues
+                        chunk_size = 512
+
+                        # Create datasets for q, I, and dI with compression
+                        # We use np.asarray to ensure the data is in the correct format
+                        # and dtype is set to float64 for better precision
+                        detector.create_dataset("q [Å^-1]", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                        detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                        detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                        
+                        # Handle missing or invalid metadata values with defaults
+                        width = results_metadata[idx-1].get("width", "").strip()
+                        height = results_metadata[idx-1].get("height", "").strip()
+                        exposure_time = results_metadata[idx-1].get("exposureTime", "").strip()
+                        summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip()
+                        
+                        # Create detector size dataset if width and height are valid integers
+                        # We check if the width and height are digits (i.e., valid integers)	
+                        if width.isdigit() and height.isdigit():
+                            det_size = detector.create_group("detector size")
+                            det_size.attrs["NX_class"] = "NXcollection"
+                            det_size.create_dataset("detector width [pixel]", data=np.asarray([int(width)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                            det_size.create_dataset("detector height [pixel]", data=np.asarray([int(height)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                        
+                        # Also we trac exposure time and summed exposures if they are valid
+                        if exposure_time.isdigit():
+                            detector.create_dataset("exposure time [s]", data=np.asarray([float(exposure_time)], dtype=np.float32), dtype="f4", compression_opts=4, compression="gzip")
+                        if summed_exposures.replace('.', '', 1).isdigit():
+                            detector.create_dataset("summed exposures", data=np.asarray([int(summed_exposures)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+
+                        # Add interpretation info (optional for PyMca)
+                        detector["I"].attrs["interpretation"] = "spectrum"
+
+                        # Measurement group (holds soft links)
+                        meas = entry.create_group("measurement")
+                        meas.attrs["NX_class"] = "NXdata"
+                        meas.attrs["signal"] = "I"
+                        meas.attrs["axes"] = "q"
+                        meas.attrs["filename"] = result["filename"]
+                        
+                        # Create soft links to the detector datasets
+                        # We use soft links to the detector datasets to allow for easy access
+                        # This is useful for PyMca and other tools that expect these links
+                        meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
+                        meas["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
+                        meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")
+
+                        # Optional display-friendly names
+                        meas["I"].attrs["long_name"] = "Intensity"
+                        meas["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
+                        
+                        # Measurement group (holds soft links)
+                        # We create a plotselect group to allow for easy plotting in h5Web or PyMca
+                        # This group will contain soft links to the datasets in the measurement group
+                        plotselect = entry.create_group("plotselect")
+                        plotselect.attrs["NX_class"] = "NXdata"
+                        plotselect.attrs["signal"] = "I"
+                        plotselect.attrs["axes"] = "q"
+
+                        plotselect["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
+                        plotselect["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
+
+                        # Optional display-friendly names
+                        plotselect["I"].attrs["long_name"] = "Intensity"
+                        plotselect["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
+
+                        # For PyMca auto-plot:
+                        entry.attrs["default"] = "plotselect"
+
+                        # Optional global default plot group
+                        if idx == len(results_data):  # mark the last one as global default
+                            entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement")
+                            
+                    except Exception as e:
+                        print(f"Error processing file {result['filename']}: {e}")
                         continue
                     
-                    # Here one could use the image sequence number from the metadata, however, we use the index as it seemes cleaner
-                    entry_name = f"{idx:05d}.1"
-                    
-                    # Create a new entry for each scan
-                    entry = h5.create_group(entry_name)
-                    entry["title"] = "Collected Q-I scans"
-                    entry.attrs["NX_class"] = "NXentry"
-                    
-
-                    
-                    # Set time attributes for the entry	
-                    entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'))
-                    
-                    # We log the image sequence number if it is available in the metadata
-                    image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
-                    if image_sequence_number.isdigit():
-                        entry.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                    
-                    # User comments can be added to the entry if available
-                    # We check if any of the user comments are available, if so, we create a comments group
-                    if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
-                        comments = entry.create_group("comments")
-                        comments.attrs["NX_class"] = "NXcomments"
-                        if results_metadata[idx-1]["userComment1"]:
-                            comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                        if results_metadata[idx-1]["userComment2"]:
-                            comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                        if results_metadata[idx-1]["userComment3"]:
-                            comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                        if results_metadata[idx-1]["userComment4"]:
-                            comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-
-
-                    # Instrument / Detector group (holds all detector data)
-                    detector = entry.create_group("instrument/detector")
-                    detector.attrs["NX_class"] = "NXdetector"
-                    # Compress the data to save space, chunks are used to allow for efficient reading
-                    # Larger chunk sizes increase compression but may slow down reading
-                    # 256 is a common chunk size, (512 is also a good choice for larger datasets), over 1024 may lead to memory issues
-                    chunk_size = 512
-
-                    # Create datasets for q, I, and dI with compression
-                    # We use np.asarray to ensure the data is in the correct format
-                    # and dtype is set to float64 for better precision
-                    detector.create_dataset("q [Å^-1]", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                    detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                    detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                    
-                    # Handle missing or invalid metadata values with defaults
-                    width = results_metadata[idx-1].get("width", "").strip()
-                    height = results_metadata[idx-1].get("height", "").strip()
-                    exposure_time = results_metadata[idx-1].get("exposureTime", "").strip()
-                    summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip()
-                    
-                    # Create detector size dataset if width and height are valid integers
-                    # We check if the width and height are digits (i.e., valid integers)	
-                    if width.isdigit() and height.isdigit():
-                        det_size = detector.create_group("detector size")
-                        det_size.attrs["NX_class"] = "NXcollection"
-                        det_size.create_dataset("detector width [pixel]", data=np.asarray([int(width)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                        det_size.create_dataset("detector height [pixel]", data=np.asarray([int(height)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                    
-                    # Also we trac exposure time and summed exposures if they are valid
-                    if exposure_time.isdigit():
-                        detector.create_dataset("exposure time [s]", data=np.asarray([float(exposure_time)], dtype=np.float32), dtype="f4", compression_opts=4, compression="gzip")
-                    if summed_exposures.replace('.', '', 1).isdigit():
-                        detector.create_dataset("summed exposures", data=np.asarray([int(summed_exposures)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-
-                    # Add interpretation info (optional for PyMca)
-                    detector["I"].attrs["interpretation"] = "spectrum"
-
-                    # Measurement group (holds soft links)
-                    meas = entry.create_group("measurement")
-                    meas.attrs["NX_class"] = "NXdata"
-                    meas.attrs["signal"] = "I"
-                    meas.attrs["axes"] = "q"
-                    meas.attrs["filename"] = result["filename"]
-                    
-                    # Create soft links to the detector datasets
-                    # We use soft links to the detector datasets to allow for easy access
-                    # This is useful for PyMca and other tools that expect these links
-                    meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
-                    meas["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
-                    meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")
-
-                    # Optional display-friendly names
-                    meas["I"].attrs["long_name"] = "Intensity"
-                    meas["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
-                    
-                    # Measurement group (holds soft links)
-                    # We create a plotselect group to allow for easy plotting in h5Web or PyMca
-                    # This group will contain soft links to the datasets in the measurement group
-                    plotselect = entry.create_group("plotselect")
-                    plotselect.attrs["NX_class"] = "NXdata"
-                    plotselect.attrs["signal"] = "I"
-                    plotselect.attrs["axes"] = "q"
-
-                    plotselect["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
-                    plotselect["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
-
-                    # Optional display-friendly names
-                    plotselect["I"].attrs["long_name"] = "Intensity"
-                    plotselect["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
-
-                    # For PyMca auto-plot:
-                    entry.attrs["default"] = "plotselect"
-
-                    # Optional global default plot group
-                    if idx == len(results_data):  # mark the last one as global default
-                        entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement")
-                    
 
             print(f"✅ HDF5 file '{output_file}' created with {len(results_data)} spectra.")
 
-- 
GitLab