Skip to content
Snippets Groups Projects
Commit f6511130 authored by Gröne, Tjark Leon Raphael's avatar Gröne, Tjark Leon Raphael
Browse files

Update file maxwell_integrate_to_h5.py

parent 17a12b81
Branches
Tags
No related merge requests found
...@@ -35,6 +35,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -35,6 +35,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
global NPROC global NPROC
global FORBIDDEN global FORBIDDEN
fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im)) fnames_ims = []#= glob(os.path.join(path_im, "*" + dtype_im))
fnames_metadata = []#= glob(os.path.join(path_im, "*" + ".metadata"))
path_int_list = [] path_int_list = []
for path, subdirs, files in os.walk(path_im): for path, subdirs, files in os.walk(path_im):
for name in files: for name in files:
...@@ -46,9 +47,48 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -46,9 +47,48 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
else: else:
path_new = path_int path_new = path_int
path_int_list.append(path_new) path_int_list.append(path_new)
elif name.endswith(".metadata"):
fnames_metadata.append(os.path.join(path, name))
#fnames_ims.sort(key=str.lower) #fnames_ims.sort(key=str.lower)
def metadata_thread(name):
"""
Extract metadata from files with .metadata extension in the specified directory.
:param fnames_metadata: List of filenames with .metadata extension.
:return: Dictonary containing metadata.
"""
metadata = {}
with open(os.path.join(path, name), 'r') as metadata_file:
for line in metadata_file:
metadata = {}
if line.startswith("dateString="):
metadata["dateString"] = line.split("=", 1)[1].strip()
elif line.startswith("userComment1="):
metadata["userComment1"] = line.split("=", 1)[1].strip()
elif line.startswith("userComment2="):
metadata["userComment2"] = line.split("=", 1)[1].strip()
elif line.startswith("userComment3="):
metadata["userComment3"] = line.split("=", 1)[1].strip()
elif line.startswith("userComment4="):
metadata["userComment4"] = line.split("=", 1)[1].strip()
elif line.startswith("width="):
metadata["width"] = line.split("=", 1)[1].strip()
elif line.startswith("height="):
metadata["height"] = line.split("=", 1)[1].strip()
elif line.startswith("exposureTime="):
metadata["exposureTime"] = line.split("=", 1)[1].strip()
elif line.startswith("summedExposures="):
metadata["summedExposures"] = line.split("=", 1)[1].strip()
elif line.startswith("imageSequenceNumber="):
metadata["imageSequenceNumber"] = line.split("=", 1)[1].strip()
metadata["filename"] = name
# Convert metadata dictionary to a DataFrame and sort by filename
return metadata
def integration_thread(fname_im,path_int): def integration_thread(fname_im,path_int):
global NPT global NPT
global UNIT global UNIT
...@@ -104,6 +144,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -104,6 +144,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
os.mkdir(subdir_path_int) os.mkdir(subdir_path_int)
filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im] filtered_fnames = [fname_im for fname_im in subdir_fnames if "metadata" not in fname_im]
filetered_metadata = [fname_im for fname_im in subdir_fnames if "metadata" in fname_im]
if filtered_fnames: if filtered_fnames:
# Use map_async to apply the integration_thread function to all filtered filenames # Use map_async to apply the integration_thread function to all filtered filenames
...@@ -113,15 +154,40 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -113,15 +154,40 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
filtered_fnames filtered_fnames
) )
if filetered_metadata:
print(f"Extracting metadata in subdirectory: {subdir}")
# Metadata extraction can run in the same pool, but it should be executed after async_result
pool.close()
pool.join() # Ensure image integration tasks are completed before starting metadata extraction
# Create a new pool for metadata extraction
metadata_pool = Pool(int(NPROC))
async_metadata_result = metadata_pool.map_async(
lambda fname_meta: metadata_thread(fname_meta),
filetered_metadata
)
metadata_pool.close()
metadata_pool.join()
else:
pool.close() pool.close()
pool.join() pool.join()
# Export the DataFrame to a CSV file with the name of the subdirectory # Export the DataFrame to a CSV file with the name of the subdirectory
if async_result.ready(): if async_result.ready() and async_metadata_result.ready():
# Retrieve results from async_result # Retrieve results from async_result
results_data = async_result.get() results_data = async_result.get()
results_metadata = async_metadata_result.get()
results_df = pd.DataFrame(results_data) results_df = pd.DataFrame(results_data)
results_metadata_df = pd.DataFrame(results_metadata)
results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower()) results_df = results_df.sort_values(by="filename", key=lambda col: col.str.lower())
results_metadata_df = results_metadata_df.sort_values(by="filename", key=lambda col: col.str.lower())
for key, value in results_metadata_df.iloc[0].items():
if key not in results_df.columns:
results_df[key] = value
subdir_name = os.path.basename(os.path.normpath(subdir_path_int)) subdir_name = os.path.basename(os.path.normpath(subdir_path_int))
results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False) results_df.to_csv(os.path.join(subdir_path_int, f"{subdir_name}.csv"), index=False)
...@@ -129,6 +195,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -129,6 +195,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])] return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', item["filename"])]
results_data = sorted(results_data, key=natural_sort_key) results_data = sorted(results_data, key=natural_sort_key)
results_metadata = sorted(results_metadata, key=natural_sort_key)
output_file = os.path.join(subdir_path_int, f"{subdir_name}.h5") output_file = os.path.join(subdir_path_int, f"{subdir_name}.h5")
if os.path.exists(output_file): if os.path.exists(output_file):
...@@ -148,6 +215,22 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -148,6 +215,22 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
entry.attrs["NX_class"] = "NXentry" entry.attrs["NX_class"] = "NXentry"
entry.create_dataset("time", data=results_metadata[idx-1]["dateString"].encode('utf-8'))
if any(results_metadata[idx-1][key] for key in ["userComment1", "userComment2", "userComment3", "userComment4"]):
comments = entry.create_group("comments")
comments.attrs["NX_class"] = "NXcomments"
if results_metadata[idx-1]["userComment1"]:
comments.create_dataset("userComment1", data=results_metadata[idx-1]["userComment1"].encode('utf-8'))
if results_metadata[idx-1]["userComment2"]:
comments.create_dataset("userComment2", data=results_metadata[idx-1]["userComment2"].encode('utf-8'))
if results_metadata[idx-1]["userComment3"]:
comments.create_dataset("userComment3", data=results_metadata[idx-1]["userComment3"].encode('utf-8'))
if results_metadata[idx-1]["userComment4"]:
comments.create_dataset("userComment4", data=results_metadata[idx-1]["userComment4"].encode('utf-8'))
# Instrument / Detector group # Instrument / Detector group
detector = entry.create_group("instrument/detector") detector = entry.create_group("instrument/detector")
detector.attrs["NX_class"] = "NXdetector" detector.attrs["NX_class"] = "NXdetector"
...@@ -156,6 +239,11 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -156,6 +239,11 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("q", data=np.asarray(result["q"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("I", data=np.asarray(result["I"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,)) detector.create_dataset("dI", data=np.asarray(result["dI"], dtype=np.float64), chunks=(chunk_size,))
detector.create_dataset("pixels width", data=int(results_metadata[idx-1]["width"]))
detector.create_dataset("pixels height", data=int(results_metadata[idx-1]["height"]))
detector.create_dataset("exposureTime", data=float(results_metadata[idx-1]["exposureTime"]))
detector.create_dataset("summedExposures", data=int(results_metadata[idx-1]["summedExposures"]))
detector.create_dataset("imageSequenceNumber", data=int(results_metadata[idx-1]["imageSequenceNumber"]))
# Add interpretation info (optional for PyMca) # Add interpretation info (optional for PyMca)
detector["I"].attrs["interpretation"] = "spectrum" detector["I"].attrs["interpretation"] = "spectrum"
...@@ -167,6 +255,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"): ...@@ -167,6 +255,7 @@ def integrate_ims_in_dir(path_im, path_int, dtype_im=".tif", dtype_int=".dat"):
meas.attrs["axes"] = "q" meas.attrs["axes"] = "q"
meas.attrs["filename"] = result["filename"] meas.attrs["filename"] = result["filename"]
meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I") meas["I"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/I")
meas["q"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q") meas["q"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/q")
meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI") meas["dI"] = h5py.SoftLink(f"/{entry_name}/instrument/detector/dI")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment