From 2fe88a76c62f31caa34856d2dfb269d6999b2394 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?=
 <tjark.leon.raphael.groene@uni-hamburg.de>
Date: Wed, 18 Jun 2025 08:48:03 +0200
Subject: [PATCH] Update file maxwell_integrate_to_h5.py

---
 maxwell_integrate_to_h5.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 2871b0b..207ff20 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -219,20 +219,20 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                     entry.attrs["NX_class"] = "NXentry"
                     
 
-                    entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'))
+                    entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'), dtype=h5py.string_dtype(encoding='utf-8'), chunks=False, compression="gzip")	
                     
                     
                     if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
                         comments = entry.create_group("comments")
                         comments.attrs["NX_class"] = "NXcomments"
                         if results_metadata[idx-1]["userComment1"]:
-                            comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'))
+                            comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'), chunks=False, dtype=h5py.string_dtype(encoding='utf-8'), compression="gzip")
                         if results_metadata[idx-1]["userComment2"]:
-                            comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'))
+                            comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'), chunks=False, dtype=h5py.string_dtype(encoding='utf-8'), compression="gzip")
                         if results_metadata[idx-1]["userComment3"]:
-                            comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'))
+                            comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'), chunks=False, dtype=h5py.string_dtype(encoding='utf-8'), compression="gzip")
                         if results_metadata[idx-1]["userComment4"]:
-                            comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'))
+                            comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'), chunks=False, dtype=h5py.string_dtype(encoding='utf-8'), compression="gzip")
 
 
                     # Instrument / Detector group
@@ -240,9 +240,9 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                     detector.attrs["NX_class"] = "NXdetector"
                     chunk_size = min(len(result["I"]), 1000)
 
-                    detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,))
-                    detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,))
-                    detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,))
+                    detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,), dtype=h5py.float64, compression="gzip")
+                    detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,), dtype=h5py.float64, compression="gzip")
+                    detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,), dtype=h5py.float64, compression="gzip")
                     
                     # Handle missing or invalid metadata values with defaults
                     width = results_metadata[idx-1].get("width", "").strip()
@@ -252,15 +252,15 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                     image_sequence_number = results_metadata[idx-1].get("imageSequenceNumber", "").strip()
 
                     if width.isdigit():
-                        detector.create_dataset("pixels width", data=np.asarray([int(width)], dtype=np.int64))
+                        detector.create_dataset("pixels width", data=np.asarray([int(width)], dtype=np.int64), chunks=False, dtype=h5py.int64, compression="gzip")
                     if height.isdigit():
-                        detector.create_dataset("pixels height", data=np.asarray([int(height)], dtype=np.int64))
+                        detector.create_dataset("pixels height", data=np.asarray([int(height)], dtype=np.int64), chunks=False, dtype=h5py.int64, compression="gzip")
                     if exposure_time.isdigit():
-                        detector.create_dataset("exposure time", data=np.asarray([int(exposure_time)], dtype=np.int64))
+                        detector.create_dataset("exposure time", data=np.asarray([float(exposure_time)], dtype=np.float64), chunks=False, dtype=h5py.float64, compression="gzip")
                     if summed_exposures.replace('.', '', 1).isdigit():
-                        detector.create_dataset("summed exposures", data=np.asarray([float(summed_exposures)], dtype=np.float64))
+                        detector.create_dataset("summed exposures", data=np.asarray([int(summed_exposures)], dtype=np.int64), chunks=False, dtype=h5py.int64, compression="gzip")
                     if image_sequence_number.isdigit():
-                        detector.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int64))
+                        detector.create_dataset("image sequence number", data=np.asarray([int(image_sequence_number)], dtype=np.int64), chunks=False, dtype=h5py.int64, compression="gzip")
 
                     # Add interpretation info (optional for PyMca)
                     detector["I"].attrs["interpretation"] = "spectrum"
-- 
GitLab