From bed9e2c6db2e45efc3d55706abee7ea3e9bd48c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=B6ne=2C=20Tjark=20Leon=20Raphael?= <tjark.leon.raphael.groene@uni-hamburg.de> Date: Mon, 16 Jun 2025 16:50:12 +0200 Subject: [PATCH] Update file maxwell_integrate_to_h5.py --- maxwell_integrate_to_h5.py | 110 ++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py index 1e218a6..e636bbf 100644 --- a/maxwell_integrate_to_h5.py +++ b/maxwell_integrate_to_h5.py @@ -33,7 +33,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): path_int_list = [] for path, subdirs, files in os.walk(path_im): for name in files: - if FORBIDDEN not in name: + if not any(forbidden in name for forbidden in FORBIDDEN): fnames_ims.append(os.path.join(path, name)) if path_im != str(path): path_new = str(path).replace(path_im,'') @@ -57,70 +57,77 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): basename_int = os.path.basename(fname_im)[:-len(dtype_im)] + dtype_int fname_int = os.path.join(path_int, basename_int) - if ("metadata" not in fname_im): - if not os.path.isfile(fname_int): - # Perform integration and return results instead of saving to file - if ERRORMODE == "none": - q, I = ai.integrate1d( - data=im, - npt=NPT, - mask=mask, - polarization_factor=POLARIZATION, - correctSolidAngle=True, - error_model=ERRORMODE, - unit=UNIT, - ) - dI = np.zeros_like(I) - else: - q, I, dI = ai.integrate1d( - data=im, - npt=NPT, - mask=mask, - polarization_factor=POLARIZATION, - correctSolidAngle=True, - error_model=ERRORMODE, - unit=UNIT, - ) + if not os.path.isfile(fname_int): + # Perform integration and return results instead of saving to file + if ERRORMODE == "none": + q, I = ai.integrate1d( + data=im, + npt=NPT, + mask=mask, + polarization_factor=POLARIZATION, + correctSolidAngle=True, + error_model=ERRORMODE, + unit=UNIT, + ) + dI = np.zeros_like(I) + else: + q, I, dI = ai.integrate1d( + data=im, + npt=NPT, + mask=mask, + polarization_factor=POLARIZATION, + correctSolidAngle=True, + error_model=ERRORMODE, + unit=UNIT, + ) + + data = { + "q": q, + "I": I, + "dI": dI, + "filename": fname_im + } - data = { - "q": q, - "I": I, - "dI": dI, - "filename": fname_im - } + return data - # Check if the DataFrame exists, otherwise create it - if 'results_df' not in globals(): - results_df = pd.DataFrame(data) - else: - results_df = pd.concat([results_df, pd.DataFrame(data)], ignore_index=True) pool = Pool(int(NPROC)) for subdir in set(os.path.dirname(fname) for fname in fnames_ims): subdir_fnames = [fname for fname in fnames_ims if os.path.dirname(fname) == subdir] subdir_path_int = path_int_list[fnames_ims.index(subdir_fnames[0])] - for fname_im in subdir_fnames: - pool.apply_async(integration_thread, (fname_im, subdir_path_int)) - print(f"Integrating {fname_im}.") + results = [] + filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im] + + if filtered_fnames: + # Use map_async to apply the integration_thread function to all filtered filenames + async_result = pool.map_async( + lambda fname_im: integration_thread(fname_im, subdir_path_int), + filtered_fnames + ) - pool.close() - pool.join() + pool.close() + pool.join() - # Export the DataFrame to a CSV file with the name of the subdirectory - if 'results_df' in globals(): - results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower()) - subdir_name = os.path.basename(os.path.normpath(subdir_path_int)) - results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False) - results_df.to_hdf(os.path.join(subdir_path_int, f"{subdir_name}.h5"), key='data', mode='w') - print(f"Results for subdirectory {subdir_name} saved to CSV and HDF5 files.") - del results_df + # Export the DataFrame to a CSV file with the name of the subdirectory + if async_result.ready(): + # Retrieve results from async_result + results_data = async_result.get() + results_df = pd.DataFrame(results_data) + results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower()) + subdir_name = os.path.basename(os.path.normpath(subdir_path_int)) + results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False) + results_df.to_hdf(os.path.join(subdir_path_int, f"{subdir_name}.h5"), key='data', mode='w') + print(f"Results for subdirectory {subdir_name} saved to CSV and HDF5 files.") + del results_df + else: + print(f"No images were integrated in subdirectory {subdir}. No results DataFrame created.") else: - print(f"No images were integrated in subdirectory {subdir}. No results DataFrame created.") + print(f"No valid filenames found in subdirectory {subdir}.") # Reset the pool for the next subdirectory pool = Pool(int(NPROC)) - + def integrate_on_created(event, path_int, dtype_im=".tif", dtype_int=".dat"): """ @@ -220,6 +227,7 @@ if __name__ == '__main__': if not sys.argv[7].isdigit(): raise ValueError("NPT must be a positive integer") if not sys.argv[8].isalpha(): + raise ValueError("UNIT must be a string representing the unit (e.g., 'q_A^-1', 'q_nm^-1', 'q_ang^-1')") if not sys.argv[9].isalpha(): raise ValueError("ERRORMODE must be a string representing the error model (e.g., 'poisson', 'azimuthal', 'none')") -- GitLab