Source code for syconn.processing.objectextraction

# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Jörgen Kornfeld

import numpy as np
import os
import time
import cPickle as pickle
import networkx as nx
import glob

from ..processing import objectextraction_helper as oeh
from ..multi_proc import multi_proc_main as mpm
from ..utils import datahandler, segmentationdataset


[docs]def calculate_chunk_numbers_for_box(cset, offset, size): """ Calculates the chunk ids that are (partly) contained it the defined volume Parameters ---------- cset : ChunkDataset offset : np.array offset of the volume to the origin size: np.array size of the volume Returns ------- chunk_list: list chunk ids dictionary: dict with reverse mapping """ for dim in range(3): offset_overlap = offset[dim] % cset.chunk_size[dim] offset[dim] -= offset_overlap size[dim] += offset_overlap size[dim] += (cset.chunk_size[dim] - size[dim]) % cset.chunk_size[dim] chunk_list = [] translator = {} for x in range(offset[0], offset[0]+size[0], cset.chunk_size[0]): for y in range(offset[1], offset[1]+size[1], cset.chunk_size[1]): for z in range(offset[2], offset[2]+size[2], cset.chunk_size[2]): chunk_list.append(cset.coord_dict[tuple([x, y, z])]) translator[chunk_list[-1]] = len(chunk_list)-1 print "Chunk List contains %d elements." % len(chunk_list) return chunk_list, translator
[docs]def gauss_threshold_connected_components(cset, filename, hdf5names, overlap="auto", sigmas=None, thresholds=None, chunk_list=None, debug=False, swapdata=False, label_density=np.ones(3), membrane_filename=None, membrane_kd_path=None, hdf5_name_membrane=None, fast_load=False, suffix="", qsub_pe=None, qsub_queue=None): """ Extracts connected component from probability maps 1. Gaussian filter (defined by sigma) 2. Thresholding (defined by threshold) 3. Connected components analysis In case of vesicle clouds (hdf5_name in ["p4", "vc"]) the membrane segmentation is used to cut connected vesicle clouds across cells apart (only if membrane segmentation is provided). Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file overlap: str or np.array Defines the overlap with neighbouring chunks that is left for later processing steps; if 'auto' the overlap is calculated from the sigma and the stitch_overlap (here: [1., 1., 1.]) sigmas: list of lists or None Defines the sigmas of the gaussian filters applied to the probability maps. Has to be the same length as hdf5names. If None no gaussian filter is applied thresholds: list of float Threshold for cutting the probability map. Has to be the same length as hdf5names. If None zeros are used instead (not recommended!) chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages swapdata: boolean If true an x-z swap is applied to the data prior to processing label_density: np.array Defines the density of the data. If the data was downsampled prior to saving; it has to be interpolated first before processing due to alignment issues with the coordinate system. Two-times downsampled data would have a label_density of [2, 2, 2] membrane_filename: str One way to allow access to a membrane segmentation when processing vesicle clouds. Filename of the prediction in the chunkdataset. The threshold is currently set at 0.4. membrane_kd_path: str One way to allow access to a membrane segmentation when processing vesicle clouds. Path to the knossosdataset containing a membrane segmentation. The threshold is currently set at 0.4. hdf5_name_membrane: str When using the membrane_filename this key has to be given to access the data in the saved chunk fast_load: boolean If true the data of chunk is blindly loaded without checking for enough offset to compute the overlap area. Faster, because no neighbouring chunk has to be accessed since the default case loads th overlap area from them. suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue Returns ------- results_as_list: list list containing information about the number of connected components in each chunk overlap: np.array stitch overlap: np.array """ label_density = np.array(label_density) if thresholds is None: thresholds = np.zeros(len(hdf5names)) if sigmas is None: sigmas = np.zeros(len(hdf5names)) if not len(sigmas) == len(thresholds) == len(hdf5names): raise Exception("Number of thresholds, sigmas and HDF5 names does not " "match!") stitch_overlap = np.array([1, 1, 1]) if overlap == "auto": # Truncation of gaussian kernel is 4 per standard deviation # (per default). One overlap for matching of connected components if sigmas is None: max_sigma = np.zeros(3) else: max_sigma = np.array([np.max(sigmas)] * 3) overlap = np.ceil(max_sigma * 4) + stitch_overlap print "overlap:", overlap print "thresholds:", thresholds multi_params = [] for nb_chunk in chunk_list: multi_params.append( [cset.chunk_dict[nb_chunk], cset.path_head_folder, filename, hdf5names, overlap, sigmas, thresholds, swapdata, label_density, membrane_filename, membrane_kd_path, hdf5_name_membrane, fast_load, suffix]) if qsub_pe is None and qsub_queue is None: results = mpm.start_multiprocess(oeh.gauss_threshold_connected_components_thread, multi_params, debug=debug) results_as_list = [] for result in results: for entry in result: results_as_list.append(entry) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "gauss_threshold_connected_components", pe=qsub_pe, queue=qsub_queue) out_files = glob.glob(path_to_out + "/*") results_as_list = [] for out_file in out_files: with open(out_file) as f: for entry in pickle.load(f): results_as_list.append(entry) else: raise Exception("QSUB not available") return results_as_list, [overlap, stitch_overlap]
[docs]def make_unique_labels(cset, filename, hdf5names, chunk_list, max_nb_dict, chunk_translator, debug, suffix="", qsub_pe=None, qsub_queue=None): """ Makes labels unique across chunks Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. max_nb_dict: dictionary Maps each chunk id to a integer describing which needs to be added to all its entries chunk_translator: boolean Remapping from chunk ids to position in chunk_list debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ multi_params = [] for nb_chunk in chunk_list: this_max_nb_dict = {} for hdf5_name in hdf5names: this_max_nb_dict[hdf5_name] = max_nb_dict[hdf5_name][ chunk_translator[nb_chunk]] multi_params.append([cset.chunk_dict[nb_chunk], filename, hdf5names, this_max_nb_dict, suffix]) if qsub_pe is None and qsub_queue is None: results = mpm.start_multiprocess(oeh.make_unique_labels_thread, multi_params, debug=debug) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "make_unique_labels", pe=qsub_pe, queue=qsub_queue) else: raise Exception("QSUB not available")
[docs]def make_stitch_list(cset, filename, hdf5names, chunk_list, stitch_overlap, overlap, debug, suffix="", qsub_pe=None, qsub_queue=None): """ Creates a stitch list for the overlap region between chunks Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. overlap: np.array Defines the overlap with neighbouring chunks that is left for later processing steps stitch_overlap: np.array Defines the overlap with neighbouring chunks that is left for stitching debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue Returns: -------- stitch_list: list list of overlapping component ids """ multi_params = [] for nb_chunk in chunk_list: multi_params.append([cset, nb_chunk, filename, hdf5names, stitch_overlap, overlap, suffix, chunk_list]) if qsub_pe is None and qsub_queue is None: results = mpm.start_multiprocess(oeh.make_stitch_list_thread, multi_params, debug=debug) stitch_list = {} for hdf5_name in hdf5names: stitch_list[hdf5_name] = [] for result in results: for hdf5_name in hdf5names: elems = result[hdf5_name] for elem in elems: stitch_list[hdf5_name].append(elem) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "make_stitch_list", pe=qsub_pe, queue=qsub_queue) out_files = glob.glob(path_to_out + "/*") stitch_list = {} for hdf5_name in hdf5names: stitch_list[hdf5_name] = [] for out_file in out_files: with open(out_file) as f: result = pickle.load(f) for hdf5_name in hdf5names: elems = result[hdf5_name] for elem in elems: stitch_list[hdf5_name].append(elem) else: raise Exception("QSUB not available") return stitch_list
[docs]def make_merge_list(hdf5names, stitch_list, max_labels): """ Creates a merge list from a stitch list by mapping all connected ids to one id Parameters ---------- hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file stitch_list: dictionary Contains pairs of overlapping component ids for each hdf5name max_labels dictionary Contains the number of different component ids for each hdf5name Returns ------- merge_dict: dictionary mergelist for each hdf5name merge_list_dict: dictionary mergedict for each hdf5name """ merge_dict = {} merge_list_dict = {} for hdf5_name in hdf5names: this_stitch_list = stitch_list[hdf5_name] max_label = max_labels[hdf5_name] graph = nx.from_edgelist(this_stitch_list) cc = nx.connected_components(graph) merge_dict[hdf5_name] = {} merge_list_dict[hdf5_name] = np.arange(max_label + 1) for this_cc in cc: this_cc = list(this_cc) for id in this_cc: merge_dict[hdf5_name][id] = this_cc[0] merge_list_dict[hdf5_name][id] = this_cc[0] return merge_dict, merge_list_dict
[docs]def apply_merge_list(cset, chunk_list, filename, hdf5names, merge_list_dict, debug, suffix="", qsub_pe=None, qsub_queue=None): """ Applies merge list to all chunks Parameters ---------- cset : chunkdataset instance chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file merge_list_dict: dictionary mergedict for each hdf5name debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ multi_params = [] merge_list_dict_path = cset.path_head_folder + "merge_list_dict.pkl" f = open(merge_list_dict_path, "w") pickle.dump(merge_list_dict, f) f.close() for nb_chunk in chunk_list: multi_params.append([cset.chunk_dict[nb_chunk], filename, hdf5names, merge_list_dict_path, suffix]) # results = mpm.start_multiprocess(oeh.apply_merge_list_thread, # multi_params, debug=debug) if qsub_pe is None and qsub_queue is None: results = mpm.start_multiprocess(oeh.apply_merge_list_thread, multi_params, debug=debug) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "apply_merge_list", pe=qsub_pe, queue=qsub_queue) else: raise Exception("QSUB not available")
[docs]def extract_voxels(cset, filename, hdf5names, debug=False, chunk_list=None, suffix="", qsub_pe=None, qsub_queue=None): """ Extracts voxels for each component id Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ if chunk_list is None: chunk_list = [ii for ii in range(len(cset.chunk_dict))] multi_params = [] for nb_chunk in chunk_list: multi_params.append([cset.chunk_dict[nb_chunk], cset.path_head_folder, filename, hdf5names, suffix]) if qsub_pe is None and qsub_queue is None: results = mpm.start_multiprocess(oeh.extract_voxels_thread, multi_params, debug=debug) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "extract_voxels", pe=qsub_pe, queue=qsub_queue) else: raise Exception("QSUB not available")
[docs]def concatenate_mappings(cset, filename, hdf5names, debug=False, suffix=""): """ Combines all map dicts Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results """ multi_params = [] for hdf5_name in hdf5names: rel_path = segmentationdataset.get_rel_path(hdf5_name, filename, suffix) map_dict_paths = glob.glob(cset.path_head_folder + rel_path + "/map_dicts/*") multi_params.append([cset.path_head_folder, rel_path, map_dict_paths]) mpm.start_multiprocess(oeh.concatenate_mappings_thread, multi_params, debug=debug)
[docs]def create_objects_from_voxels(cset, filename, hdf5names, granularity=15, debug=False, suffix="", qsub_pe=None, qsub_queue=None): """ Creates object instances from extracted voxels Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file granularity: int Defines granularity for partitioning data for multiprocessing debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ multi_params = [] for nb_hdf5_name in range(len(hdf5names)): counter = 0 hdf5_name = hdf5names[nb_hdf5_name] path_dataset = cset.path_head_folder + \ segmentationdataset.get_rel_path(hdf5_name, filename, suffix) if not os.path.exists(path_dataset + "/object_dicts/"): os.makedirs(path_dataset + "/object_dicts/") map_dict_paths = glob.glob(path_dataset + "/map_dicts/map_*") for step in range( int(np.ceil(len(map_dict_paths) / float(granularity)))): this_map_dict_paths = map_dict_paths[step * granularity: (step + 1) * granularity] save_path = path_dataset + "/object_dicts/dict_%d.pkl" % counter multi_params.append([cset.path_head_folder, this_map_dict_paths, filename, hdf5_name, save_path, suffix, counter]) counter += 1 if qsub_pe is None and qsub_queue is None: mpm.start_multiprocess(oeh.create_objects_from_voxels_thread, multi_params, debug=debug) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "create_objects_from_voxels", pe=qsub_pe, queue=qsub_queue) else: raise Exception("QSUB not available")
[docs]def create_datasets_from_objects(cset, filename, hdf5names, debug=False, suffix="", qsub_pe=None, qsub_queue=None): """ Create dataset instance from objects Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ multi_params = [] for hdf5_name in hdf5names: multi_params.append( [cset.path_head_folder, hdf5_name, filename, suffix]) if qsub_pe is None and qsub_queue is None: mpm.start_multiprocess(oeh.create_datasets_from_objects_thread, multi_params, debug=debug) elif mpm.__QSUB__: path_to_out = mpm.QSUB_script(multi_params, "create_datasets_from_objects", pe=qsub_pe, queue=qsub_queue) else: raise Exception("QSUB not available")
[docs]def from_probabilities_to_objects(cset, filename, hdf5names, overlap="auto", sigmas=None, thresholds=None, chunk_list=None, debug=False, swapdata=0, label_density=np.ones(3), offset=None, size=None, membrane_filename=None, membrane_kd_path=None, hdf5_name_membrane=None, suffix="", qsub_pe=None, qsub_queue=None): """ Main function for the object extraction step; combines all needed steps Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file overlap: str or np.array Defines the overlap with neighbouring chunks that is left for later processing steps; if 'auto' the overlap is calculated from the sigma and the stitch_overlap (here: [1., 1., 1.]) sigmas: list of lists or None Defines the sigmas of the gaussian filters applied to the probability maps. Has to be the same length as hdf5names. If None no gaussian filter is applied thresholds: list of float Threshold for cutting the probability map. Has to be the same length as hdf5names. If None zeros are used instead (not recommended!) chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages swapdata: boolean If true an x-z swap is applied to the data prior to processing label_density: np.array Defines the density of the data. If the data was downsampled prior to saving; it has to be interpolated first before processing due to alignment issues with the coordinate system. Two-times downsampled data would have a label_density of [2, 2, 2] offset : np.array offset of the volume to the origin size: np.array size of the volume membrane_filename: str One way to allow access to a membrane segmentation when processing vesicle clouds. Filename of the prediction in the chunkdataset. The threshold is currently set at 0.4. membrane_kd_path: str One way to allow access to a membrane segmentation when processing vesicle clouds. Path to the knossosdataset containing a membrane segmentation. The threshold is currently set at 0.4. hdf5_name_membrane: str When using the membrane_filename this key has to be given to access the data in the saved chunk suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ all_times = [] step_names = [] if size is not None and offset is not None: chunk_list, chunk_translator = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_translator = {} if chunk_list is None: chunk_list = [ii for ii in range(len(cset.chunk_dict))] for ii in range(len(cset.chunk_dict)): chunk_translator[ii] = ii else: for ii in range(len(chunk_list)): chunk_translator[chunk_list[ii]] = ii if thresholds is not None and thresholds[0] <= 1.: thresholds = np.array(thresholds) thresholds *= 255 if sigmas is not None and swapdata == 1: for nb_sigma in range(len(sigmas)): if len(sigmas[nb_sigma]) == 3: sigmas[nb_sigma] = datahandler.switch_array_entries(sigmas[nb_sigma], [0, 2]) time_start = time.time() cc_info_list, overlap_info = gauss_threshold_connected_components( cset, filename, hdf5names, overlap, sigmas, thresholds, chunk_list, debug, swapdata, label_density=label_density, membrane_filename=membrane_filename, membrane_kd_path=membrane_kd_path, hdf5_name_membrane=hdf5_name_membrane, fast_load=True, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) stitch_overlap = overlap_info[1] overlap = overlap_info[0] all_times.append(time.time() - time_start) step_names.append("conneceted components") print "\nTime needed for connected components: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() nb_cc_dict = {} max_nb_dict = {} max_labels = {} for hdf5_name in hdf5names: nb_cc_dict[hdf5_name] = np.zeros(len(chunk_list), dtype=np.int32) max_nb_dict[hdf5_name] = np.zeros(len(chunk_list), dtype=np.int32) for cc_info in cc_info_list: nb_cc_dict[cc_info[1]][chunk_translator[cc_info[0]]] = cc_info[2] for hdf5_name in hdf5names: max_nb_dict[hdf5_name][0] = 0 for nb_chunk in range(1, len(chunk_list)): max_nb_dict[hdf5_name][nb_chunk] = \ max_nb_dict[hdf5_name][nb_chunk - 1] + \ nb_cc_dict[hdf5_name][nb_chunk - 1] max_labels[hdf5_name] = int(max_nb_dict[hdf5_name][-1] + \ nb_cc_dict[hdf5_name][-1]) all_times.append(time.time() - time_start) step_names.append("extracting max labels") print "\nTime needed for extracting max labels: %.6fs" % all_times[-1] print "Max labels: ", max_labels # -------------------------------------------------------------------------- time_start = time.time() make_unique_labels(cset, filename, hdf5names, chunk_list, max_nb_dict, chunk_translator, debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("unique labels") print "\nTime needed for unique labels: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() stitch_list = make_stitch_list(cset, filename, hdf5names, chunk_list, stitch_overlap, overlap, debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("stitch list") print "\nTime needed for stitch list: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() merge_dict, merge_list_dict = make_merge_list(hdf5names, stitch_list, max_labels) all_times.append(time.time() - time_start) step_names.append("merge list") print "\nTime needed for merge list: %.3fs" % all_times[-1] # if all_times[-1] < 0.01: # raise Exception("That was too fast!") # ------------------------------------------------------------------------- time_start = time.time() apply_merge_list(cset, chunk_list, filename, hdf5names, merge_list_dict, debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("apply merge list") print "\nTime needed for applying merge list: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- for hdf5_name in hdf5names: path = cset.path_head_folder + "/" + \ segmentationdataset.get_rel_path(hdf5_name, filename, suffix) if not os.path.exists(path + "/map_dicts/"): os.makedirs(path + "/map_dicts/") if not os.path.exists(path + "/voxels/"): os.makedirs(path + "/voxels/") if not os.path.exists(path + "/hull_voxels/"): os.makedirs(path + "/hull_voxels/") # -------------------------------------------------------------------------- time_start = time.time() extract_voxels(cset, filename, hdf5names, debug=debug, chunk_list=chunk_list, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("voxel extraction") print "\nTime needed for extracting voxels: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() concatenate_mappings(cset, filename, hdf5names, debug=debug, suffix=suffix) all_times.append(time.time() - time_start) step_names.append("concatenate mappings") print "\nTime needed for concatenating mappings: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() create_objects_from_voxels(cset, filename, hdf5names, granularity=15, debug=debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("create objects from voxels") print "\nTime needed for creating objects: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() create_datasets_from_objects(cset, filename, hdf5names, debug=debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("create datasets from objects") print "\nTime needed for creating datasets: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- print "\nTime overview:" for ii in range(len(all_times)): print "%s: %.3fs" % (step_names[ii], all_times[ii]) print "--------------------------" print "Total Time: %.1f min" % (np.sum(all_times) / 60) print "--------------------------\n\n"
[docs]def from_probabilities_to_objects_parameter_sweeping(cset, filename, hdf5names, nb_thresholds, overlap="auto", sigmas=None, chunk_list=None, swapdata=0, label_density=np.ones(3), offset=None, size=None, membrane_filename=None, membrane_kd_path=None, hdf5_name_membrane=None, qsub_pe=None, qsub_queue=None): """ Sweeps over different thresholds. Each objectextraction resutls are saved in a seperate folder, all intermediate steps are saved with a different suffix Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file nb_thresholds: integer number of thresholds and therefore runs of objectextractions to do; the actual thresholds are equally spaced overlap: str or np.array Defines the overlap with neighbouring chunks that is left for later processing steps; if 'auto' the overlap is calculated from the sigma and the stitch_overlap (here: [1., 1., 1.]) sigmas: list of lists or None Defines the sigmas of the gaussian filters applied to the probability maps. Has to be the same length as hdf5names. If None no gaussian filter is applied chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. swapdata: boolean If true an x-z swap is applied to the data prior to processing label_density: np.array Defines the density of the data. If the data was downsampled prior to saving; it has to be interpolated first before processing due to alignment issues with the coordinate system. Two-times downsampled data would have a label_density of [2, 2, 2] offset : np.array offset of the volume to the origin size: np.array size of the volume membrane_filename: str One way to allow access to a membrane segmentation when processing vesicle clouds. Filename of the prediction in the chunkdataset. The threshold is currently set at 0.4. membrane_kd_path: str One way to allow access to a membrane segmentation when processing vesicle clouds. Path to the knossosdataset containing a membrane segmentation. The threshold is currently set at 0.4. hdf5_name_membrane: str When using the membrane_filename this key has to be given to access the data in the saved chunk suffix: str Suffix for the intermediate results qsub_pe: str qsub parallel environment name qsub_queue: str or None qsub queue name """ thresholds = np.array( 255. / (nb_thresholds + 1) * np.array(range(1, nb_thresholds + 1)), dtype=np.uint8) all_times = [] for nb, t in enumerate(thresholds): print "\n\n ======= t = %.2f =======" % t time_start = time.time() from_probabilities_to_objects(cset, filename, hdf5names, overlap=overlap, sigmas=sigmas, thresholds=[t] * len(hdf5names), chunk_list=chunk_list, swapdata=swapdata, label_density=label_density, offset=offset, size=size, membrane_filename=membrane_filename, membrane_kd_path=membrane_kd_path, hdf5_name_membrane=hdf5_name_membrane, suffix=str(nb), qsub_pe=qsub_pe, qsub_queue=qsub_queue, debug=False) all_times.append(time.time() - time_start) print "\n\nTime overview:" for ii in range(len(all_times)): print "t = %.2f: %.1f min" % (thresholds[ii], all_times[ii] / 60) print "--------------------------" print "Total Time: %.1f min" % (np.sum(all_times) / 60) print "--------------------------\n"
[docs]def from_ids_to_objects(cset, filename, hdf5names, chunk_list=None, debug=False, offset=None, size=None, suffix="", qsub_pe=None, qsub_queue=None): """ Main function for the object extraction step; combines all needed steps Parameters ---------- cset : chunkdataset instance filename : str Filename of the prediction in the chunkdataset hdf5names: list of str List of names/ labels to be extracted and processed from the prediction file chunk_list: list of int Selective list of chunks for which this function should work on. If None all chunks are used. debug: boolean If true multiprocessed steps only operate on one core using 'map' which allows for better error messages offset : np.array offset of the volume to the origin size: np.array size of the volume suffix: str Suffix for the intermediate results qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ all_times = [] step_names = [] if size is not None and offset is not None: chunk_list, chunk_translator = \ calculate_chunk_numbers_for_box(cset, offset, size) else: chunk_translator = {} if chunk_list is None: chunk_list = [ii for ii in range(len(cset.chunk_dict))] for ii in range(len(cset.chunk_dict)): chunk_translator[ii] = ii else: for ii in range(len(chunk_list)): chunk_translator[chunk_list[ii]] = ii for hdf5_name in hdf5names: path = cset.path_head_folder + "/" + \ segmentationdataset.get_rel_path(hdf5_name, filename, suffix) if not os.path.exists(path + "/map_dicts/"): os.makedirs(path + "/map_dicts/") if not os.path.exists(path + "/voxels/"): os.makedirs(path + "/voxels/") if not os.path.exists(path + "/hull_voxels/"): os.makedirs(path + "/hull_voxels/") # -------------------------------------------------------------------------- time_start = time.time() extract_voxels(cset, filename, hdf5names, debug=debug, chunk_list=chunk_list, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("voxel extraction") print "\nTime needed for extracting voxels: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() concatenate_mappings(cset, filename, hdf5names, debug=debug, suffix=suffix) all_times.append(time.time() - time_start) step_names.append("concatenate mappings") print "\nTime needed for concatenating mappings: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() create_objects_from_voxels(cset, filename, hdf5names, granularity=15, debug=debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("create objects from voxels") print "\nTime needed for creating objects: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- time_start = time.time() create_datasets_from_objects(cset, filename, hdf5names, debug=debug, suffix=suffix, qsub_pe=qsub_pe, qsub_queue=qsub_queue) all_times.append(time.time() - time_start) step_names.append("create datasets from objects") print "\nTime needed for creating datasets: %.3fs" % all_times[-1] # -------------------------------------------------------------------------- print "\nTime overview:" for ii in range(len(all_times)): print "%s: %.3fs" % (step_names[ii], all_times[ii]) print "--------------------------" print "Total Time: %.1f min" % (np.sum(all_times) / 60) print "--------------------------\n\n"