From 57014b2c45eed4e1e533f51cdebfd435744e6273 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?=
 <tjark.leon.raphael.groene@uni-hamburg.de>
Date: Mon, 16 Jun 2025 23:56:06 +0200
Subject: [PATCH] Update file maxwell_integrate_to_h5.py

---
 maxwell_integrate_to_h5.py | 248 +++++++++----------------------------
 1 file changed, 58 insertions(+), 190 deletions(-)

diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 9181d12..3332b64 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -15,7 +15,8 @@ from watchdog.observers.polling import PollingObserver
 from watchdog.events import PatternMatchingEventHandler
 from multiprocessing.pool import ThreadPool as Pool
 import pandas as pd
-from silx.io.dictdump import h5todict, dicttoh5
+#from silx.io.dictdump import h5todict, dicttoh5
+import h5py
 import re
 
 
@@ -124,198 +125,65 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
             subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
             results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
 
-
-            # Sort results_data by filename
-            def natural_sort_key(item):
-                return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
-
-            results_data = sorted(results_data, key=natural_sort_key)
-            # Prepare data for HDF5 file using silx
-            hdf5_data = {
-                subdir_name: {
-                    "@NX_class": "NXentry",
-                    "description": f"{subdir_name}",
-                    "files": [
-                        {
-                            "name": f"{idx}.1",
-                            "path": f"/{idx}.1",
-                            "attributes": [
-                                {
-                                    "name": "NX_class",
-                                    "shape": [],
-                                    "type": {
-                                        "class": "String",
-                                        "charSet": "UTF-8",
-                                        "strPad": "null-terminated"
-                                    },
-                                    "value": "NXentry"
-                                },
-                                {
-                                    "name": "default",
-                                    "shape": [],
-                                    "type": {
-                                        "class": "String",
-                                        "charSet": "UTF-8",
-                                        "strPad": "null-terminated"
-                                    },
-                                    "value": "measurement"
-                                },
-                                {
-                                    "name": "plotselect",
-                                    "shape": [],
-                                    "type": {
-                                        "class": "String",
-                                        "charSet": "UTF-8",
-                                        "strPad": "null-terminated"
-                                    },
-                                    "value": "q,I"
-                                }
-                            ],
-                            "kind": "group",
-                            "children": [
-                                {
-                                    "name": "measurement",
-                                    "path": f"/{idx}.1/measurement",
-                                    "attributes": [
-                                        {
-                                            "name": "NX_class",
-                                            "shape": [],
-                                            "type": {
-                                                "class": "String",
-                                                "charSet": "UTF-8",
-                                                "strPad": "null-terminated"
-                                            },
-                                            "value": "NXcollection"
-                                        }
-                                    ],
-                                    "kind": "group",
-                                    "children": [
-                                        {
-                                            "name": "q",
-                                            "path": f"/{idx}.1/measurement/q",
-                                            "attributes": [],
-                                            "kind": "dataset",
-                                            "shape": [len(result["q"])],
-                                            "type": {
-                                                "class": "Float",
-                                                "endianness": "little-endian",
-                                                "size": 64
-                                            },
-                                            "chunks": [min(len(result["q"]), 1000)],
-                                            "filters": [],
-                                            "rawType": {
-                                                "signed": False,
-                                                "type": 1,
-                                                "vlen": False,
-                                                "littleEndian": True,
-                                                "size": 8,
-                                                "total_size": len(result["q"])
-                                            },
-                                            "value": result["q"]
-                                        },
-                                        {
-                                            "name": "I",
-                                            "path": f"/{idx}.1/measurement/I",
-                                            "attributes": [],
-                                            "kind": "dataset",
-                                            "shape": [len(result["I"])],
-                                            "type": {
-                                                "class": "Float",
-                                                "endianness": "little-endian",
-                                                "size": 64
-                                            },
-                                            "chunks": [min(len(result["I"]), 1000)],
-                                            "filters": [],
-                                            "rawType": {
-                                                "signed": False,
-                                                "type": 1,
-                                                "vlen": False,
-                                                "littleEndian": True,
-                                                "size": 8,
-                                                "total_size": len(result["I"])
-                                            },
-                                            "value": result["I"]
-                                        },
-                                        {
-                                            "name": "dI",
-                                            "path": f"/{idx}.1/measurement/dI",
-                                            "attributes": [],
-                                            "kind": "dataset",
-                                            "shape": [len(result["dI"])],
-                                            "type": {
-                                                "class": "Float",
-                                                "endianness": "little-endian",
-                                                "size": 64
-                                            },
-                                            "chunks": [min(len(result["dI"]), 1000)],
-                                            "filters": [],
-                                            "rawType": {
-                                                "signed": False,
-                                                "type": 1,
-                                                "vlen": False,
-                                                "littleEndian": True,
-                                                "size": 8,
-                                                "total_size": len(result["dI"])
-                                            },
-                                            "value": result["dI"]
-                                        }
-                                    ]
-                                },
-                                {
-                                    "name": "plotselect",
-                                    "path": f"/{idx}.1/plotselect",
-                                    "attributes": [
-                                        {
-                                            "name": "NX_class",
-                                            "shape": [],
-                                            "type": {
-                                                "class": "String",
-                                                "charSet": "UTF-8",
-                                                "strPad": "null-terminated"
-                                            },
-                                            "value": "NXcollection"
-                                        },
-                                        {
-                                            "name": "axes",
-                                            "shape": [1],
-                                            "type": {
-                                                "class": "String",
-                                                "charSet": "UTF-8",
-                                                "strPad": "null-terminated"
-                                            },
-                                            "value": "q"
-                                        },
-                                        {
-                                            "name": "signal",
-                                            "shape": [],
-                                            "type": {
-                                                "class": "String",
-                                                "charSet": "UTF-8",
-                                                "strPad": "null-terminated"
-                                            },
-                                            "value": "I"
-                                        }
-                                    ],
-                                    "kind": "group"
-                                }
-                            ]
-                        }
-                        for idx, result in enumerate(results_data, start=1)
-                    ]
-                }
-            }
-
-            # Save to HDF5 file using silx
-            hdf5_file_path = os.path.join(subdir_path_int, f"{subdir_name}.h5")
-            dicttoh5(hdf5_data, hdf5_file_path, mode="w")
-
-            print(f"Results for subdirectory {subdir_name} saved to HDF5 file using silx.")
+            # Create the HDF5 file
+            with h5py.File("output_silx_format.h5", "w") as f:
+                # Create top-level group (e.g., subdir_name)
+                subdir_grp = f.create_group(subdir_name)
+                subdir_grp.attrs["NX_class"] = np.string_("NXentry")
+                subdir_grp.attrs["description"] = np.string_(subdir_name)
+
+                for idx, result in enumerate(results_data, start=1):
+                    scan_name = f"{idx}.1"
+                    scan_grp = subdir_grp.create_group(scan_name)
+
+                    # --- Group-level attributes ---
+                    scan_grp.attrs["NX_class"] = np.string_("NXentry")
+                    scan_grp.attrs["default"] = np.string_("measurement")
+                    scan_grp.attrs["plotselect"] = np.string_("q,I")
+
+                    # --- Measurement group ---
+                    meas_grp = scan_grp.create_group("measurement")
+                    meas_grp.attrs["NX_class"] = np.string_("NXcollection")
+
+                    # Datasets: q, I, dI
+                    for name in ["q", "I", "dI"]:
+                        data = result[name]
+                        meas_grp.create_dataset(name, data=data, chunks=(min(len(data), 1000),), dtype='f8')
+
+                    # --- Plotselect group ---
+                    plotselect_grp = scan_grp.create_group("plotselect")
+                    plotselect_grp.attrs["NX_class"] = np.string_("NXcollection")
+                    plotselect_grp.attrs["axes"] = np.string_("q")
+                    plotselect_grp.attrs["signal"] = np.string_("I")
+            # # Sort results_data by filename
+            # def natural_sort_key(item):
+            #     return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
+
+            # results_data = sorted(results_data, key=natural_sort_key)
+            # # Prepare data for HDF5 file using silx
+            # hdf5_data = {}
+            # for idx, result in enumerate(results_data, start=1):
+            #     hdf5_data[f"{idx}.1"] = {
+            #         "@NX_class": "NXentry",
+            #         "measurement": {
+            #             "@NX_class": "NXcollection",
+            #             "q": result["q"].tolist(),  # Convert numpy arrays to lists for HDF5 compatibility
+            #             "I": result["I"].tolist(),
+            #             "dI": result["dI"].tolist(),
+            #         },
+            #         "plotselect": {
+            #             "@NX_class": "NXcollection",
+            #             "axes": "q",
+            #             "signal": "I",
+            #         },
+            #     }
+
+            print(f"Results for subdirectory {subdir_name} saved to HDF5 file using h5py.")
 
             # Save to HDF5 file using silx
-            hdf5_file_path = os.path.join(subdir_path_int, f"{subdir_name}.h5")
-            dicttoh5(hdf5_data, hdf5_file_path, mode="w")
+            # hdf5_file_path = os.path.join(subdir_path_int, f"{subdir_name}.h5")
+            # dicttoh5(hdf5_data, hdf5_file_path, mode="w")
 
-            print(f"Results for subdirectory {subdir_name} saved to CSV and HDF5 files using silx.")
             del results_df
         else:
             print(f"No images were integrated in subdirectory {subdir}. No results DataFrame created.")
-- 
GitLab