diff --git a/maxwell_integrate_to_h5.py b/maxwell_integrate_to_h5.py
index 69acb6a75ffafe421fca20e256e4a0e150c13176..07a12025a70dca34d98ecba58371b1f98be292d0 100644
--- a/maxwell_integrate_to_h5.py
+++ b/maxwell_integrate_to_h5.py
@@ -35,6 +35,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
global NPROC
global FORBIDDEN
fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im))
+ fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata"))
path_int_list = []
for path, subdirs, files in os.walk(path_im):
for name in files:
@@ -46,8 +47,47 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
else:
path_new = path_int
path_int_list.append(path_new)
+ elif name.endswith(".metadata"):
+ fnames_metadata.append(os.path.join(path, name))
+
#fnames_ims.sort(key=str.lower)
+
+ def metadata_thread(name):
+ """
+ Extract metadata from files with .metadata extension in the specified directory.
+ :param fnames_metadata: List of filenames with .metadata extension.
+ :return: Dictonary containing metadata.
+ """
+ metadata = {}
+
+ with open(os.path.join(path, name), 'r') as metadata_file:
+ for line in metadata_file:
+ metadata = {}
+ if line.startswith("dateString="):
+ metadata["dateString"] = line.split("=", 1)[1].strip()
+ elif line.startswith("userComment1="):
+ metadata["userComment1"] = line.split("=", 1)[1].strip()
+ elif line.startswith("userComment2="):
+ metadata["userComment2"] = line.split("=", 1)[1].strip()
+ elif line.startswith("userComment3="):
+ metadata["userComment3"] = line.split("=", 1)[1].strip()
+ elif line.startswith("userComment4="):
+ metadata["userComment4"] = line.split("=", 1)[1].strip()
+ elif line.startswith("width="):
+ metadata["width"] = line.split("=", 1)[1].strip()
+ elif line.startswith("height="):
+ metadata["height"] = line.split("=", 1)[1].strip()
+ elif line.startswith("exposureTime="):
+ metadata["exposureTime"] = line.split("=", 1)[1].strip()
+ elif line.startswith("summedExposures="):
+ metadata["summedExposures"] = line.split("=", 1)[1].strip()
+ elif line.startswith("imageSequenceNumber="):
+ metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip()
+ metadata["filename"] = name
+ # Convert metadata dictionary to a DataFrame and sort by filename
+
+ return metadata
def integration_thread(fname_im,path_int):
global NPT
@@ -82,7 +122,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
error_model=ERRORMODE,
unit=UNIT,
)
-
+
data = {
"q": q,
"I": I,
@@ -104,24 +144,50 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
os.mkdir(subdir_path_int)
filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im]
+ filetered_metadata = [fname_im for fname_im in subdir_fnames if "metadata" in fname_im]
if filtered_fnames:
# Use map_async to apply the integration_thread function to all filtered filenames
print(f"Integrating images in subdirectory: {subdir}")
async_result = pool.map_async(
- lambda fname_im: integration_thread(fname_im, subdir_path_int),
- filtered_fnames
+ lambda fname_im: integration_thread(fname_im, subdir_path_int),
+ filtered_fnames
+ )
+
+ if filetered_metadata:
+ print(f"Extracting metadata in subdirectory: {subdir}")
+ # Metadata extraction can run in the same pool, but it should be executed after async_result
+ pool.close()
+ pool.join() # Ensure image integration tasks are completed before starting metadata extraction
+
+ # Create a new pool for metadata extraction
+ metadata_pool = Pool(int(NPROC))
+ async_metadata_result = metadata_pool.map_async(
+ lambda fname_meta: metadata_thread(fname_meta),
+ filetered_metadata
)
-
- pool.close()
- pool.join()
+ metadata_pool.close()
+ metadata_pool.join()
+ else:
+ pool.close()
+ pool.join()
# Export the DataFrame to a CSV file with the name of the subdirectory
- if async_result.ready():
+ if async_result.ready() and async_metadata_result.ready():
# Retrieve results from async_result
results_data = async_result.get()
+ results_metadata = async_metadata_result.get()
+
results_df = pd.DataFrame(results_data)
+ results_metadata_df = pd.DataFrame(results_metadata)
+
results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
+ results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower())
+
+ for key, value in results_metadata_df.iloc[0].items():
+ if key not in results_df.columns:
+ results_df[key] = value
+
subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
@@ -129,6 +195,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
results_data = sorted(results_data, key=natural_sort_key)
+ results_metadata = sorted(results_metadata, key=natural_sort_key)
output_file = os.path.join(subdir_path_int, f"{subdir_name}.h5")
if os.path.exists(output_file):
@@ -148,6 +215,22 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
entry.attrs["NX_class"] = "NXentry"
+ entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'))
+
+
+ if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
+ comments = entry.create_group("comments")
+ comments.attrs["NX_class"] = "NXcomments"
+ if results_metadata[idx-1]["userComment1"]:
+ comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'))
+ if results_metadata[idx-1]["userComment2"]:
+ comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'))
+ if results_metadata[idx-1]["userComment3"]:
+ comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'))
+ if results_metadata[idx-1]["userComment4"]:
+ comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'))
+
+
# Instrument / Detector group
detector = entry.create_group("instrument/detector")
detector.attrs["NX_class"] = "NXdetector"
@@ -156,6 +239,11 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,))
+ detector.create_dataset("pixels width", data=int(results_metadata[idx-1]["width"]))
+ detector.create_dataset("pixels height", data=int(results_metadata[idx-1]["height"]))
+ detector.create_dataset("exposureTime", data=float(results_metadata[idx-1]["exposureTime"]))
+ detector.create_dataset("summedExposures", data=int(results_metadata[idx-1]["summedExposures"]))
+ detector.create_dataset("imageSequenceNumber", data=int(results_metadata[idx-1]["imageSequenceNumber"]))
# Add interpretation info (optional for PyMca)
detector["I"].attrs["interpretation"] = "spectrum"
@@ -166,6 +254,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
meas.attrs["signal"] = "I"
meas.attrs["axes"] = "q"
meas.attrs["filename"] = result["filename"]
+
meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
meas["q"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q")