From 1dc0d3cc6505cb7a230a1d5814b38b5f11390fff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?=
 <tjark.leon.raphael.groene@uni-hamburg.de>
Date: Wed, 18 Jun 2025 17:02:14 +0200
Subject: [PATCH] Update file maxwell_integrate_to_h5.py

---
 maxwell_integrate_to_h5.py | 218 +++++++++++++++++++------------------
 1 file changed, 113 insertions(+), 105 deletions(-)

diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 0feac4c..60ce8d5 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -267,114 +267,122 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                         
                         # Create a new entry for each scan
                         entry = h5.create_group(entry_name)
-                        entry["title"] = "Collected Q-I scans"
-                        entry.attrs["NX_class"] = "NXentry"
-                        
-                        # Set time attributes for the entry	
-                        entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'))
-                        
-                        # We log the image sequence number if it is available in the metadata
-                        image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
-                        if image_sequence_number.isdigit():
-                            entry.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                        
-                        # User comments can be added to the entry if available
-                        # We check if any of the user comments are available, if so, we create a comments group
-                        if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
-                            comments = entry.create_group("comments")
-                            comments.attrs["NX_class"] = "NXcomments"
-                            if results_metadata[idx-1]["userComment1"]:
-                                comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                            if results_metadata[idx-1]["userComment2"]:
-                                comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                            if results_metadata[idx-1]["userComment3"]:
-                                comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-                            if results_metadata[idx-1]["userComment4"]:
-                                comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
-
-
-                        # Instrument / Detector group (holds all detector data)
-                        detector = entry.create_group("instrument/detector")
-                        detector.attrs["NX_class"] = "NXdetector"
-                        # Compress the data to save space, chunks are used to allow for efficient reading
-                        # Larger chunk sizes increase compression but may slow down reading
-                        # 256 is a common chunk size, (512 is also a good choice for larger datasets), over 1024 may lead to memory issues
-                        chunk_size = 512
-
-                        # Create datasets for q, I, and dI with compression
-                        # We use np.asarray to ensure the data is in the correct format
-                        # and dtype is set to float64 for better precision
-                        detector.create_dataset("q [Å^-1]", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                        detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                        detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
-                        
-                        # Handle missing or invalid metadata values with defaults
-                        width = results_metadata[idx-1].get("width", "").strip()
-                        height = results_metadata[idx-1].get("height", "").strip()
-                        exposure_time = results_metadata[idx-1].get("exposureTime", "").strip()
-                        summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip()
-                        
-                        # Create detector size dataset if width and height are valid integers
-                        # We check if the width and height are digits (i.e., valid integers)	
-                        if width.isdigit() and height.isdigit():
-                            det_size = detector.create_group("detector size")
-                            det_size.attrs["NX_class"] = "NXcollection"
-                            det_size.create_dataset("detector width [pixel]", data=np.asarray([int(width)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                            det_size.create_dataset("detector height [pixel]", data=np.asarray([int(height)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-                        
-                        # Also we trac exposure time and summed exposures if they are valid
-                        if exposure_time.isdigit():
-                            detector.create_dataset("exposure time [s]", data=np.asarray([float(exposure_time)], dtype=np.float32), dtype="f4", compression_opts=4, compression="gzip")
-                        if summed_exposures.replace('.', '', 1).isdigit():
-                            detector.create_dataset("summed exposures", data=np.asarray([int(summed_exposures)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
-
-                        # Add interpretation info (optional for PyMca)
-                        detector["I"].attrs["interpretation"] = "spectrum"
-
-                        # Measurement group (holds soft links)
-                        meas = entry.create_group("measurement")
-                        meas.attrs["NX_class"] = "NXdata"
-                        meas.attrs["signal"] = "I"
-                        meas.attrs["axes"] = "q"
-                        meas.attrs["filename"] = result["filename"]
-                        
-                        # Create soft links to the detector datasets
-                        # We use soft links to the detector datasets to allow for easy access
-                        # This is useful for PyMca and other tools that expect these links
-                        meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
-                        meas["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
-                        meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")
-
-                        # Optional display-friendly names
-                        meas["I"].attrs["long_name"] = "Intensity"
-                        meas["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
+
+                        try:
+                            entry["title"] = "Collected Q-I scans"
+                            entry.attrs["NX_class"] = "NXentry"
+                            
+                            # Set time attributes for the entry	
+                            entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'))
+                            
+                            # We log the image sequence number if it is available in the metadata
+                            image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
+                            if image_sequence_number.isdigit():
+                                entry.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                            
+                            # User comments can be added to the entry if available
+                            # We check if any of the user comments are available, if so, we create a comments group
+                            if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
+                                comments = entry.create_group("comments")
+                                comments.attrs["NX_class"] = "NXcomments"
+                                if results_metadata[idx-1]["userComment1"]:
+                                    comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                                if results_metadata[idx-1]["userComment2"]:
+                                    comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                                if results_metadata[idx-1]["userComment3"]:
+                                    comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+                                if results_metadata[idx-1]["userComment4"]:
+                                    comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'),compression_opts=4, compression="gzip")
+
+
+                            # Instrument / Detector group (holds all detector data)
+                            detector = entry.create_group("instrument/detector")
+                            detector.attrs["NX_class"] = "NXdetector"
+                            # Compress the data to save space, chunks are used to allow for efficient reading
+                            # Larger chunk sizes increase compression but may slow down reading
+                            # 256 is a common chunk size, (512 is also a good choice for larger datasets), over 1024 may lead to memory issues
+                            chunk_size = 512
+
+                            # Create datasets for q, I, and dI with compression
+                            # We use np.asarray to ensure the data is in the correct format
+                            # and dtype is set to float64 for better precision
+                            detector.create_dataset("q [Å^-1]", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                            detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                            detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,), dtype="f8", compression_opts=4, compression="gzip")
+                            
+                            # Handle missing or invalid metadata values with defaults
+                            width = results_metadata[idx-1].get("width", "").strip()
+                            height = results_metadata[idx-1].get("height", "").strip()
+                            exposure_time = results_metadata[idx-1].get("exposureTime", "").strip()
+                            summed_exposures = results_metadata[idx-1].get("summedExposures", "").strip()
+                            
+                            # Create detector size dataset if width and height are valid integers
+                            # We check if the width and height are digits (i.e., valid integers)	
+                            if width.isdigit() and height.isdigit():
+                                det_size = detector.create_group("detector size")
+                                det_size.attrs["NX_class"] = "NXcollection"
+                                det_size.create_dataset("detector width [pixel]", data=np.asarray([int(width)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                                det_size.create_dataset("detector height [pixel]", data=np.asarray([int(height)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+                            
+                            # Also we trac exposure time and summed exposures if they are valid
+                            if exposure_time.isdigit():
+                                detector.create_dataset("exposure time [s]", data=np.asarray([float(exposure_time)], dtype=np.float32), dtype="f4", compression_opts=4, compression="gzip")
+                            if summed_exposures.replace('.', '', 1).isdigit():
+                                detector.create_dataset("summed exposures", data=np.asarray([int(summed_exposures)], dtype=np.int32), dtype="i4", compression_opts=4, compression="gzip")
+
+                            # Add interpretation info (optional for PyMca)
+                            detector["I"].attrs["interpretation"] = "spectrum"
+
+                            # Measurement group (holds soft links)
+                            meas = entry.create_group("measurement")
+                            meas.attrs["NX_class"] = "NXdata"
+                            meas.attrs["signal"] = "I"
+                            meas.attrs["axes"] = "q"
+                            meas.attrs["filename"] = result["filename"]
+                            
+                            # Create soft links to the detector datasets
+                            # We use soft links to the detector datasets to allow for easy access
+                            # This is useful for PyMca and other tools that expect these links
+                            meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
+                            meas["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
+                            meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")
+
+                            # Optional display-friendly names
+                            meas["I"].attrs["long_name"] = "Intensity"
+                            meas["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
+                            
+                            # Measurement group (holds soft links)
+                            # We create a plotselect group to allow for easy plotting in h5Web or PyMca
+                            # This group will contain soft links to the datasets in the measurement group
+                            plotselect = entry.create_group("plotselect")
+                            plotselect.attrs["NX_class"] = "NXdata"
+                            plotselect.attrs["signal"] = "I"
+                            plotselect.attrs["axes"] = "q"
+
+                            plotselect["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
+                            plotselect["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
+
+                            # Optional display-friendly names
+                            plotselect["I"].attrs["long_name"] = "Intensity"
+                            plotselect["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
+
+                            # For PyMca auto-plot:
+                            entry.attrs["default"] = "plotselect"
+
+                            # Optional global default plot group
+                            if idx == len(results_data):  # mark the last one as global default
+                                entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement")
                         
-                        # Measurement group (holds soft links)
-                        # We create a plotselect group to allow for easy plotting in h5Web or PyMca
-                        # This group will contain soft links to the datasets in the measurement group
-                        plotselect = entry.create_group("plotselect")
-                        plotselect.attrs["NX_class"] = "NXdata"
-                        plotselect.attrs["signal"] = "I"
-                        plotselect.attrs["axes"] = "q"
-
-                        plotselect["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
-                        plotselect["q [Å^-1]"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q [Å^-1]")
-
-                        # Optional display-friendly names
-                        plotselect["I"].attrs["long_name"] = "Intensity"
-                        plotselect["q [Å^-1]"].attrs["long_name"] = "Q [1/Å]"
-
-                        # For PyMca auto-plot:
-                        entry.attrs["default"] = "plotselect"
-
-                        # Optional global default plot group
-                        if idx == len(results_data):  # mark the last one as global default
-                            entry["last_plot"] = h5py.SoftLink(f"/{subdir_name}/{entry_name}/measurement")
+                        except Exception as e:
+                            print(f"Error processing file {result['filename']} in entry {entry_name}: {e}")
+                            # Delete incomplete group if something failed inside it
+                            del h5[entry_name]
+                            continue
                             
-                    except Exception as e:
-                        print(f"Error processing file {result['filename']}: {e}")
+                    except Exception as outer_e:
+                        print(f"Failed to create entry group {entry_name}: {outer_e}")
                         continue
-                    
+                            
 
             print(f"✅ HDF5 file '{output_file}' created with {len(results_data)} spectra.")
 
-- 
GitLab