Source code for syconn.brainqueries

# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Joergen Kornfeld

from itertools import combinations
from contactsites import write_summaries
from multi_proc.multi_proc_main import __QSUB__, start_multiprocess, QSUB_script
from processing.features import calc_prop_feat_dict
from processing.learning_rfc import write_feat2csv, load_rfcs
from processing.mapper import SkeletonMapper, prepare_syns_btw_annos, \
    similarity_check_star
from syconn.processing.cell_types import predict_celltype_label
from knossos_utils.skeleton import Skeleton
from utils.datahandler import *
from syconn.conmatrix import type_sorted_wiring
import time
import sys
import datetime


[docs]def analyze_dataset(wd): """Analyze dataset, i.e. predict barrier, cell objects, calculate hull, map cell objects to tracings and predict synapses together with ultrastructure of cells to create connectivity matrix Parameters ---------- wd : str path to working directory """ enrich_tracings_all(wd, overwrite=False) # remap_tracings_all(wd) detect_synapses(wd) predict_celltype_label(wd) type_sorted_wiring(wd)
[docs]def enrich_tracings_all(wd, overwrite=False, qsub_pe=None, qsub_queue=None): """Run :func: 'enrich_tracings' on available cluster nodes defined by somaqnodes or using multiprocessing. Parameters ---------- wd : str Path to working directory overwrite : bool enforce overwriting of existing files qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ use_qsub = qsub_pe is not None or qsub_queue is not None start = time.time() skel_dir = wd + '/tracings/' if not os.path.isdir(wd + '/neurons/'): os.makedirs(wd + '/neurons/') anno_list = [os.path.join(skel_dir, f) for f in next(os.walk(skel_dir))[2] if 'k.zip' in f] print "------------------------------\n" \ "Starting enrichment of %d cell tracings" % len(anno_list) np.random.shuffle(anno_list) if use_qsub and __QSUB__: nb_lists = 30 list_of_lists = [[anno_list[i::nb_lists], wd, overwrite] for i in xrange(nb_lists)] QSUB_script(list_of_lists, 'skeleton_mapping', queue=qsub_queue, pe=qsub_pe, n_cores=np.max((int(cpu_count() / 2), 1))) elif use_qsub and not __QSUB__: raise RuntimeError("QSUB not available. Please make sure QSUB is" "configured correctly.") else: nb_lists = np.max((int(cpu_count() / 2), 1)) list_of_lists = [[anno_list[i::nb_lists], wd, overwrite] for i in xrange(nb_lists)] start_multiprocess(enrich_tracings_star, list_of_lists, nb_cpus=nb_lists) predict_celltype_label(wd) diff = time.time() - start print "Finished tracing enrichment (cell object mapping, prediction of " \ "sub-cellular compartments and cell types) after %s." % \ str(datetime.timedelta(seconds=diff))
[docs]def enrich_tracings_star(params): enrich_tracings(params[0], params[1], overwrite=params[2])
[docs]def enrich_tracings(anno_list, wd, map_objects=True, method='hull', radius=1200, thresh=2.2, filter_size=(2786, 1594, 250), create_hull=True, max_dist_mult=1.4, detect_outlier=True, dh=None, overwrite=False, nb_neighbors=20, nb_hull_vox=500, context_range=6000, neighbor_radius=220, nb_rays=20, nb_voting_neighbors=100, write_obj_voxel=True): """Enriches a list of paths (to tracings) using dataset in working directory. Writes enriched tracings to 'neuron' folder in working directory, or, if specified, to DataHandler().data_path. Parameters ---------- wd : str Path to working directory anno_list : list paths to tracing .k.zip's map_objects : bool Map cell components to skeleton method : str Either 'kd' for fix radius or 'hull'/'supervoxel' if membrane is available. radius : int Radius in nm. Single integer if integer radius is for all objects the same. If list of three integer stick to ordering [mitos, vc, sj]. thresh : int Denotes the factor which is multiplied with the maximum membrane probability. The resulting value is used as threshold after which the membrane is assumed to be existant. nb_rays : int Number of rays send at each skeleton node (multiplied by a factor of 5). Defines the angle between two rays (=360 / nb_rays) in the orthogonal plane. neighbor_radius : int Radius of ball in which to look for supporting hull voxels. Used during outlier detection. nb_neighbors : int minimum number of neighbors needed during outlier detection for a single hull point to survive. filter_size : list, tuple of int minimum cell object sizes for each object (mitos, vesicle clouds, synaptic clefts) nb_hull_vox : int Number of object hull voxels which are used to estimate spatial proximity to skeleton. context_range : int Range of property features nb_voting_neighbors : int Number votes of skeleton hull voxels (membrane representation) for object-mapping. dh: DataHandler object containing SegmentationDataObjects mitos, vc, sj create_hull : bool create skeleton membrane estimation max_dist_mult : float Multiplier for radius to generate maximal distance of hull points to source node. detect_outlier : bool Use outlier-detection if True overwrite : bool Overwrite existing .k.zip's of mapped skeletons write_obj_voxel : bool write object voxel coordinates to .k.zip """ rf_axoness_p = wd + '/models/rf_axoness/rf.pkl' rf_spiness_p = wd + '/models/rf_spiness/rf.pkl' rfc_axoness, rfc_spiness = None, None if dh is None: dh = DataHandler(wd) if not os.path.isdir(dh.myelin_ds_path): print "Could not find myelin dataset. Enriched tracings won't " \ "contain myelin prediction." dh.myelin_ds_path = None output_dir = dh.data_path if not overwrite: existing_skel = [re.findall('[^/]+$', os.path.join(dp, f))[0] for dp, dn, filenames in os.walk(output_dir) for f in filenames if 'k.zip' in f] else: existing_skel = [] if anno_list == [] and dh is not None: anno_list = [os.path.join(dp, f) for dp, dn, filenames in os.walk(dh.skeleton_path) for f in filenames if 'consensus.nml' in f] anno_list += [os.path.join(dp, f) for dp, dn, filenames in os.walk(dh.skeleton_path) for f in filenames if 'k.zip' in f] elif anno_list == [] and dh is None: raise RuntimeError("Aborting mapping procedure. Either DataHandler" "with source path (dh.skeleton_path) or " "nml_list must be given!") todo_skel = list(set([re.findall('[^/]+$', f)[0] for f in anno_list]) - set(existing_skel)) todo_skel = [f for f in anno_list if re.findall('[^/]+$', f)[0] in todo_skel] if len(todo_skel) == 0: print "Nothing to do. Aborting." return # print "Found %d processed Skeletons. %d left. Writing result to %s. " \ # "Using %s barrier." % (len(existing_skel), len(todo_skel), # dh.data_path, dh.mem_path) cnt = 0 if map_objects: rfc_axoness, rfc_spiness = load_rfcs(rf_axoness_p, rf_spiness_p) for ii, filepath in enumerate(list(todo_skel)[::-1]): dh.skeletons = {} cnt += 1 _list = load_ordered_mapped_skeleton(filepath) annotation = _list[0] soma = connect_soma_tracing(_list[4]) try: ix = int(re.findall('.*?([\d]+)', filepath)[-3]) except IndexError: ix = cnt path = dh.data_path + re.findall('[^/]+$', filepath)[0] skel = SkeletonMapper(annotation, dh, ix=ix, soma=soma, context_range=context_range) skel.write_obj_voxel = write_obj_voxel if create_hull: # try: skel.hull_sampling(detect_outlier=detect_outlier, thresh=thresh, nb_neighbors=nb_neighbors, neighbor_radius=neighbor_radius, max_dist_mult=max_dist_mult) if map_objects: skel.annotate_objects(dh, radius, method, thresh, filter_size, nb_hull_vox=nb_hull_vox, nb_voting_neighbors=nb_voting_neighbors, nb_rays=nb_rays, nb_neighbors=nb_neighbors, neighbor_radius=neighbor_radius, max_dist_mult=max_dist_mult) # except Exception, e: # warnings.warn( # "%s. Problem with tracing %s. Skipping it." % # (e, filepath), RuntimeWarning) if map_objects: if rfc_spiness is not None: skel.predict_property(rfc_spiness, 'spiness') skel._property_features = None if rfc_spiness is not None and rfc_axoness is not None: skel.predict_property(rfc_axoness, 'axoness') if not os.path.exists(dh.data_path): os.makedirs(dh.data_path) skel.write2kzip(path) print "Finished enrichment of %d cell tracings" % len(anno_list)
[docs]def remap_tracings_all(wd, dest_dir=None, recalc_prop_only=False, method='hull', context_range=6000, qsub_pe=None, qsub_queue=None): """Run remap_tracings on available cluster nodes defined by somaqnodes or using single node multiprocessing. Parameters ---------- wd : str Path to working directory, which contains skeleton nml / kzip files in dest_dir : str Directory path to store mapped skeletons recalc_prop_only : bool Recalculate properties (spiness, axoness) only, without calculating hull method : str Method for object mapping procedure context_range : int Context range for property features qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ use_qsub = qsub_pe is not None or qsub_queue is not None anno_list = get_filepaths_from_dir(wd + '/neurons/') np.random.shuffle(anno_list) if dest_dir is not None and not os.path.isdir(dest_dir): os.makedirs(dest_dir) nb_processes = np.max((len(anno_list) / 3, 3)) list_of_lists = [[wd, anno_list[i::nb_processes], dest_dir, recalc_prop_only, method, context_range] for i in xrange(nb_processes)] if use_qsub and __QSUB__: QSUB_script(list_of_lists, 'skeleton_remapping', queue=qsub_queue, pe=qsub_pe, n_cores=np.max((int(cpu_count()), 1))) elif use_qsub and not __QSUB__: raise RuntimeError("QSUB not available. Please make sure QSUB is" "configured correctly.") else: start_multiprocess(remap_tracings_star, list_of_lists, debug=False)
[docs]def remap_tracings_star(params): """Helper function for multiprocessed remap_tracings.""" remap_tracings(params[0], params[1], output_dir=params[2], recalc_prop_only=params[3], method=params[4], context_range=params[5])
[docs]def remap_tracings(wd, mapped_skel_paths, dh=None, method='hull', radius=1200, thresh=2.2, filter_size=(2786, 1594, 250), max_dist_mult=1.4, nb_neighbors=20, nb_hull_vox=500, neighbor_radius=220, nb_rays=20, nb_voting_neighbors=100, output_dir=None, write_obj_voxel=True, mito_min_votes=235, vc_min_votes=191, sj_min_votes=346, recalc_prop_only=True, context_range=6000): """Remaps objects to skeleton without recalculating the hull. Parameters ---------- wd : str Path to working directory mapped_skel_paths : list of str Paths to tracings dh: DataHandler object containing SegmentationDataObjects mitos, vc, sj output_dir : str path to output dir. If none, use given path write_obj_voxel : bool write object voxel to k.zip sj_min_votes : int Best F2 score found by eval mito_min_votes : int Best F1 score found by eval vc_min_votes : int Best F2 score found by eval method : str Either 'kd' for fix radius or 'hull'/'supervoxel' if membrane is available. radius : int Radius in nm. Single integer if integer radius is for all objects the same. If list of three integer stick to ordering [mitos, vc, sj]. thresh : float Denotes the factor which is multiplied with the maximum membrane probability. The resulting value is used as threshold after which the membrane is assumed to be existant. nb_rays : int Number of rays send at each skeleton node (multiplied by a factor of 5). Defines the angle between two rays (=360 / nb_rays) in the orthogonal plane. neighbor_radius : int Radius of ball in which to look for supporting hull voxels. Used during outlier detection. nb_neighbors : int minimum number of neighbors needed during outlier detection for a single hull point to survive. filter_size : List of integer for each object [mitos, vc, sj] nb_hull_vox : int Number of object hull voxels which are used to estimate spatial proximity to skeleton. context_range : int Range of property features nb_voting_neighbors : int Number votes of skeleton hull voxels (membrane representation) for object-mapping. max_dist_mult : float Multiplier for radius to generate maximal distance of hull points to source node. output_dir : str Path to output directory, if None dh._data_path will be used. recalc_prop_only : bool Recalculate only features and prediction of properties (axoness, spiness) write_obj_voxel : bool write object voxel coordinates to kzip """ rf_axoness_p = wd + '/models/rf_axoness/rf.pkl' rf_spiness_p = wd + '/models/rf_spiness/rf.pkl' rfc_axoness, rfc_spiness = load_rfcs(rf_axoness_p, rf_spiness_p) cnt = 0 if dh is None: dh = DataHandler(wd) for skel_path in mapped_skel_paths: cnt += 1 # get first element in list and only skeletonAnnotation mapped_skel_old, mito, vc, sj, soma = \ load_anno_list([skel_path], load_mitos=False)[0] if output_dir is not None: for fpath in mapped_skel_paths: shutil.copyfile(fpath, output_dir + os.path.split(fpath)[1]) path = output_dir + re.findall('[^/]+$', skel_path)[0] else: path = skel_path try: ix = int(re.findall('.*?([\d]+)', skel_path)[-3]) except IndexError: ix = cnt new_skel = SkeletonMapper(mapped_skel_old, dh, ix=ix, soma=soma) if recalc_prop_only: new_skel.old_anno.filename = skel_path else: new_skel.obj_min_votes['mitos'] = mito_min_votes new_skel.obj_min_votes['vc'] = vc_min_votes new_skel.obj_min_votes['sj'] = sj_min_votes new_skel.write_obj_voxel = write_obj_voxel new_skel.annotate_objects( dh, radius, method, thresh, filter_size, nb_hull_vox=nb_hull_vox, nb_voting_neighbors=nb_voting_neighbors, nb_rays=nb_rays, nb_neighbors=nb_neighbors, neighbor_radius=neighbor_radius, max_dist_mult=max_dist_mult) if rfc_spiness is not None: new_skel._property_features, new_skel.property_feat_names, \ spiness_given = calc_prop_feat_dict(new_skel, context_range) new_skel.predict_property(rfc_spiness, 'spiness') new_skel._property_features = None if rfc_spiness is not None and rfc_axoness is not None: new_skel.predict_property(rfc_axoness, 'axoness') if not recalc_prop_only: new_skel.write2kzip(path) else: dummy_skel = Skeleton() dummy_skel.add_annotation(new_skel.old_anno) dummy_skel.add_annotation(mito) dummy_skel.add_annotation(vc) dummy_skel.add_annotation(sj) if soma is not None: dummy_skel.add_annotation(soma) dummy_skel.toNml(path[:-5] + 'nml') write_data2kzip(path, path[:-5] + 'nml') write_feat2csv(path + 'axoness_feat.csv', new_skel.property_features['axoness']) write_feat2csv(path + 'spiness_feat.csv', new_skel.property_features['spiness']) write_data2kzip(path, path + 'axoness_feat.csv') write_data2kzip(path, path + 'spiness_feat.csv')
[docs]def detect_synapses(wd, qsub_pe=None, qsub_queue=None): """Detects contact sites between enriched tracings and writes contact site summary and synapse summary to working directory. Parameters ---------- wd : str Path to working directory qsub_pe: str or None qsub parallel environment qsub_queue: str or None qsub queue """ print "------------------------------\n" \ "Starting contact site detection with synapse classification." use_qsub = qsub_pe is not None or qsub_queue is not None start = time.time() nml_list = get_filepaths_from_dir(wd + '/neurons/') cs_path = wd + '/contactsites/' for ending in ['', 'cs', 'cs_vc', 'cs_sj', 'cs_vc_sj', 'pairwise', 'overlap_vx']: if not os.path.exists(cs_path+ending): os.makedirs(cs_path+ending) anno_permutations = list(combinations(nml_list, 2)) if use_qsub and __QSUB__: list_of_lists = [[list(anno_permutations[i::300]), cs_path] for i in xrange(300)] QSUB_script(list_of_lists, 'synapse_mapping', queue=qsub_queue, pe=qsub_pe, n_cores=np.max((int(cpu_count()), 1))) elif use_qsub and not __QSUB__: raise RuntimeError("QSUB not available. Please make sure QSUB is" "configured correctly.") else: prepare_syns_btw_annos(anno_permutations, cs_path) write_summaries(wd) diff = time.time() - start print "Finished connectivity analysis after %s.\n" \ "---------------------------" % str(datetime.timedelta(seconds=diff))
[docs]def detect_similar_tracings(wd): """Print similar skeleton file paths Parameters ---------- wd : str path to working directory """ skel_paths = get_filepaths_from_dir(wd + '/tracings/') start_multiprocess(similarity_check_star, list(combinations(skel_paths, 2)))
[docs]def get_connectivity_list(wd): """Find synapses between tracing pairs Parameters ---------- wd : str path to working directory Returns ------- np.array (n, 1), np.array (n x 2) descending array of synapse (axo-dendritic, if count is zero, axo-axonic touches arepresent) number between corresponding tracing pairs (tuple of ids) """ conn_dict_path = wd + '/contactsites/connectivity_dict.pkl' assert os.path.exists(conn_dict_path) conn_dict = load_pkl2obj(conn_dict_path) cell_ids = [] synapse_touches = [] for pair_name, pair in conn_dict.iteritems(): if pair['total_size_area'] == 0: continue skel_id1, skel_id2 = re.findall('(\d+)_(\d+)', pair_name)[0] cell_ids.append(np.array([int(skel_id1), int(skel_id2)])) indiv_syn_sizes = np.array(pair['sizes_area']) indiv_syn_axoness = np.array(pair['partner_axoness']) == 1 pair['sizes_area'] = indiv_syn_sizes[~indiv_syn_axoness] synapse_touches.append(len(pair['sizes_area'])) synapse_touches = np.array(synapse_touches) cell_ids = np.array(cell_ids) sorted_ixs = np.argsort(synapse_touches)[::-1] synapse_touches = synapse_touches[sorted_ixs] cell_ids = cell_ids[sorted_ixs] return np.array(synapse_touches), np.array(cell_ids)