From bed9e2c6db2e45efc3d55706abee7ea3e9bd48c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?=
 <tjark.leon.raphael.groene@uni-hamburg.de>
Date: Mon, 16 Jun 2025 16:50:12 +0200
Subject: [PATCH] Update file maxwell_integrate_to_h5.py

---
 maxwell_integrate_to_h5.py | 110 ++++++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 51 deletions(-)

diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 1e218a6..e636bbf 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -33,7 +33,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
     path_int_list = []
     for path, subdirs, files in os.walk(path_im):
         for name in files:
-            if FORBIDDEN not in name:
+            if not any(forbidden in name for forbidden in FORBIDDEN):
                 fnames_ims.append(os.path.join(path, name))
                 if path_im != str(path):
                     path_new = str(path).replace(path_im,'')      
@@ -57,70 +57,77 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
         basename_int = os.path.basename(fname_im)[:-len(dtype_im)] + dtype_int
         fname_int = os.path.join(path_int, basename_int)
 
-        if ("metadata" not in fname_im):
-            if not os.path.isfile(fname_int):
-                # Perform integration and return results instead of saving to file
-                if ERRORMODE == "none":
-                    q, I = ai.integrate1d(
-                        data=im,
-                        npt=NPT,
-                        mask=mask,
-                        polarization_factor=POLARIZATION,
-                        correctSolidAngle=True,
-                        error_model=ERRORMODE,
-                        unit=UNIT,
-                    )
-                    dI = np.zeros_like(I)
-                else:
-                    q, I, dI = ai.integrate1d(
-                        data=im,
-                        npt=NPT,
-                        mask=mask,
-                        polarization_factor=POLARIZATION,
-                        correctSolidAngle=True,
-                        error_model=ERRORMODE,
-                        unit=UNIT,
-                    )
+        if not os.path.isfile(fname_int):
+            # Perform integration and return results instead of saving to file
+            if ERRORMODE == "none":
+                q, I = ai.integrate1d(
+                    data=im,
+                    npt=NPT,
+                    mask=mask,
+                    polarization_factor=POLARIZATION,
+                    correctSolidAngle=True,
+                    error_model=ERRORMODE,
+                    unit=UNIT,
+                )
+                dI = np.zeros_like(I)
+            else:
+                q, I, dI = ai.integrate1d(
+                    data=im,
+                    npt=NPT,
+                    mask=mask,
+                    polarization_factor=POLARIZATION,
+                    correctSolidAngle=True,
+                    error_model=ERRORMODE,
+                    unit=UNIT,
+                )
+        
+            data = {
+                "q": q,
+                "I": I,
+                "dI": dI,
+                "filename": fname_im
+            }
             
-                data = {
-                    "q": q,
-                    "I": I,
-                    "dI": dI,
-                    "filename": fname_im
-                }
+            return data
 
-                # Check if the DataFrame exists, otherwise create it
-                if 'results_df' not in globals():
-                    results_df = pd.DataFrame(data)
-                else:
-                    results_df = pd.concat([results_df, pd.DataFrame(data)], ignore_index=True)
 
     pool = Pool(int(NPROC))
     for subdir in set(os.path.dirname(fname) for fname in fnames_ims):
         subdir_fnames = [fname for fname in fnames_ims if os.path.dirname(fname) == subdir]
         subdir_path_int = path_int_list[fnames_ims.index(subdir_fnames[0])]
 
-        for fname_im in subdir_fnames:
-            pool.apply_async(integration_thread, (fname_im, subdir_path_int))
-            print(f"Integrating {fname_im}.")
+        results = []
+        filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im]
+
+        if filtered_fnames:
+            # Use map_async to apply the integration_thread function to all filtered filenames
+            async_result = pool.map_async(
+                lambda fname_im: integration_thread(fname_im, subdir_path_int),
+                filtered_fnames
+            )
 
-        pool.close()
-        pool.join()
+            pool.close()
+            pool.join()
 
-        # Export the DataFrame to a CSV file with the name of the subdirectory
-        if 'results_df' in globals():
-            results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
-            subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
-            results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
-            results_df.to_hdf(os.path.join(subdir_path_int, f"{subdir_name}.h5"), key='data', mode='w')
-            print(f"Results for subdirectory {subdir_name} saved to CSV and HDF5 files.")
-            del results_df
+            # Export the DataFrame to a CSV file with the name of the subdirectory
+            if async_result.ready():
+                # Retrieve results from async_result
+                results_data = async_result.get()
+                results_df = pd.DataFrame(results_data)
+                results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
+                subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
+                results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
+                results_df.to_hdf(os.path.join(subdir_path_int, f"{subdir_name}.h5"), key='data', mode='w')
+                print(f"Results for subdirectory {subdir_name} saved to CSV and HDF5 files.")
+                del results_df
+            else:
+                print(f"No images were integrated in subdirectory {subdir}. No results DataFrame created.")
         else:
-            print(f"No images were integrated in subdirectory {subdir}. No results DataFrame created.")
+            print(f"No valid filenames found in subdirectory {subdir}.")
 
         # Reset the pool for the next subdirectory
         pool = Pool(int(NPROC))
-    
+
         
 def integrate_on_created(event, path_int, dtype_im=".tif", dtype_int=".dat"):
     """
@@ -220,6 +227,7 @@ if __name__ == '__main__':
     if not sys.argv[7].isdigit():
         raise ValueError("NPT must be a positive integer")
     if not sys.argv[8].isalpha():
+        
         raise ValueError("UNIT must be a string representing the unit (e.g., 'q_A^-1', 'q_nm^-1', 'q_ang^-1')")
     if not sys.argv[9].isalpha():
         raise ValueError("ERRORMODE must be a string representing the error model (e.g., 'poisson', 'azimuthal', 'none')")
-- 
GitLab