Update file maxwell_integrate_to_h5.py

f6511130 · Gröne, Tjark Leon Raphael · 17a12b81 · f6511130
Commit f6511130 authored 1 month ago by Gröne, Tjark Leon Raphael
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -35,6 +35,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
    global NPROC
    global FORBIDDEN
    fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im))
+    fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata"))
    path_int_list = []
    for path, subdirs, files in os.walk(path_im):
        for name in files:
@@ -46,9 +47,48 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                else:
                    path_new = path_int 
                path_int_list.append(path_new)
+            elif name.endswith(".metadata"):
+                fnames_metadata.append(os.path.join(path, name))
    #fnames_ims.sort(key=str.lower)
+    def metadata_thread(name):
+        """
+        Extract metadata from files with .metadata extension in the specified directory.
+        :param fnames_metadata: List of filenames with .metadata extension.
+        :return: Dictonary containing metadata.
+        """
+        metadata = {}
+        with open(os.path.join(path, name), 'r') as metadata_file:
+            for line in metadata_file: 
+                metadata = {}
+                if line.startswith("dateString="):
+                    metadata["dateString"] = line.split("=", 1)[1].strip()
+                elif line.startswith("userComment1="):
+                    metadata["userComment1"] = line.split("=", 1)[1].strip()
+                elif line.startswith("userComment2="):
+                    metadata["userComment2"] = line.split("=", 1)[1].strip()
+                elif line.startswith("userComment3="):
+                    metadata["userComment3"] = line.split("=", 1)[1].strip()
+                elif line.startswith("userComment4="):
+                    metadata["userComment4"] = line.split("=", 1)[1].strip()
+                elif line.startswith("width="):
+                    metadata["width"] = line.split("=", 1)[1].strip()
+                elif line.startswith("height="):
+                    metadata["height"] = line.split("=", 1)[1].strip()
+                elif line.startswith("exposureTime="):
+                    metadata["exposureTime"] = line.split("=", 1)[1].strip()
+                elif line.startswith("summedExposures="):
+                    metadata["summedExposures"] = line.split("=", 1)[1].strip()
+                elif line.startswith("imageSequenceNumber="):
+                    metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip()
+            metadata["filename"] =  name
+            # Convert metadata dictionary to a DataFrame and sort by filename
+        return metadata
    def integration_thread(fname_im,path_int):
        global NPT
        global UNIT
@@ -104,6 +144,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
            os.mkdir(subdir_path_int)
        filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im]
+        filetered_metadata = [fname_im for fname_im in subdir_fnames if "metadata" in fname_im]
        if filtered_fnames:
            # Use map_async to apply the integration_thread function to all filtered filenames
@@ -113,15 +154,40 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                            filtered_fnames
                        )
+        if filetered_metadata:
+            print(f"Extracting metadata in subdirectory: {subdir}")
+            # Metadata extraction can run in the same pool, but it should be executed after async_result
+            pool.close()
+            pool.join()  # Ensure image integration tasks are completed before starting metadata extraction
+            # Create a new pool for metadata extraction
+            metadata_pool = Pool(int(NPROC))
+            async_metadata_result = metadata_pool.map_async(
+                lambda fname_meta: metadata_thread(fname_meta),
+                filetered_metadata
+            )
+            metadata_pool.close()
+            metadata_pool.join()
+        else:
            pool.close()
            pool.join()
        # Export the DataFrame to a CSV file with the name of the subdirectory
-        if async_result.ready():
+        if async_result.ready() and async_metadata_result.ready():
            # Retrieve results from async_result
            results_data = async_result.get()
+            results_metadata = async_metadata_result.get()
            results_df = pd.DataFrame(results_data)
+            results_metadata_df = pd.DataFrame(results_metadata)
            results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
+            results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower())
+            for key, value in results_metadata_df.iloc[0].items():
+                if key not in results_df.columns:
+                    results_df[key] = value
            subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
            results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
@@ -129,6 +195,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
            results_data = sorted(results_data, key=natural_sort_key)
+            results_metadata = sorted(results_metadata, key=natural_sort_key)
            output_file = os.path.join(subdir_path_int, f"{subdir_name}.h5")
            if os.path.exists(output_file):
@@ -148,6 +215,22 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                    entry.attrs["NX_class"] = "NXentry"
+                    entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'))
+                    if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
+                        comments = entry.create_group("comments")
+                    comments.attrs["NX_class"] = "NXcomments"
+                    if results_metadata[idx-1]["userComment1"]:
+                        comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'))
+                    if results_metadata[idx-1]["userComment2"]:
+                        comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'))
+                    if results_metadata[idx-1]["userComment3"]:
+                        comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'))
+                    if results_metadata[idx-1]["userComment4"]:
+                        comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'))
                    # Instrument / Detector group
                    detector = entry.create_group("instrument/detector")
                    detector.attrs["NX_class"] = "NXdetector"
@@ -156,6 +239,11 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                    detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,))
                    detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,))
                    detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,))
+                    detector.create_dataset("pixels width", data=int(results_metadata[idx-1]["width"]))
+                    detector.create_dataset("pixels height", data=int(results_metadata[idx-1]["height"]))
+                    detector.create_dataset("exposureTime", data=float(results_metadata[idx-1]["exposureTime"]))
+                    detector.create_dataset("summedExposures", data=int(results_metadata[idx-1]["summedExposures"]))
+                    detector.create_dataset("imageSequenceNumber", data=int(results_metadata[idx-1]["imageSequenceNumber"]))
                    # Add interpretation info (optional for PyMca)
                    detector["I"].attrs["interpretation"] = "spectrum"
@@ -167,6 +255,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
                    meas.attrs["axes"] = "q"
                    meas.attrs["filename"] = result["filename"]
                    meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
                    meas["q"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q")
                    meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")