Source code for syconn.processing.features

# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Jörgen Kornfeld

import copy
import numpy as np
import os
import zipfile
from collections import Counter
from numpy import array as arr
from scipy import spatial
import networkx as nx
from learning_rfc import write_feat2csv, cell_classification
from knossos_utils import skeleton_utils as su
from ..utils.basics import euclidian_distance
from ..utils.datahandler import load_objpkl_from_kzip, \
    load_ordered_mapped_skeleton
from knossos_utils.skeleton import remove_from_zip


[docs]def update_property_feat_kzip_star(args): """Helper function for update_property_feat_kzip """ update_property_feat_kzip(*args)
[docs]def update_property_feat_kzip(path2kzip, dist=6000): """Recompute axoness feature of skeleton at path2kzip and writes it to .k.zip Parameters ---------- path2kzip : str Path to mapped skeleton dist : int """ prop_dict, property_feat_names = calc_prop_feat_dict(path2kzip, dist) for prop, feat in prop_dict.iteritems(): path2csv = path2kzip[:-6] + '_%s_feat.csv' % prop write_feat2csv(path2csv, feat, property_feat_names[prop]) file_name = '%s_feat.csv' % prop try: remove_from_zip(path2kzip, file_name) with zipfile.ZipFile(path2kzip, "a", zipfile.ZIP_DEFLATED) as zf: zf.write(path2csv, file_name) # print "Wrote new %s feature to %s." % (prop, path2kzip) except Exception, e: print 'Could not write %s to zip file.' % file_name print e os.remove(path2csv)
[docs]def calc_prop_feat_dict(source, dist=6000): """Calculates property feature Parameters ---------- source : SkeletonAnnotation dist : int Returns ------- dict, list of str, bool Dictionary of property features, list of feature names, bool if spiness feature are given """ property_features = {} property_feat_names = {} morph_feat, spinehead_feats, node_ids, spiness_given\ = morphology_feature(source, dist) morph_info = np.concatenate((node_ids[:, None], morph_feat.astype(np.float32)), axis=1) property_features["axoness"] = np.concatenate((morph_info, spinehead_feats), axis=1) morph_feat_names = ['nodeID', 'rad_mean', 'rad_std'] + \ ['rad_hist'+str(i) for i in range(10)] +\ ['mito_nb', 'mito_size_mean', 'vc_nb', 'vc_size_mean', 'sj_nb', 'sj_size_mean', 'branch_dist', 'endpoint_dist'] property_features["spiness"] = morph_info property_feat_names["axoness"] = morph_feat_names +\ ['nb_spinehead', 'sh_rad_mean', 'sh_rad_std', 'sh_proba_mean'] property_feat_names["spiness"] = morph_feat_names return property_features, property_feat_names, spiness_given
[docs]def morphology_feature(source, max_nn_dist=6000): """Calculates features for discrimination tasks of neurite identities, such as axon vs. dendrite or cell types classification. Estimated on interpolated skeleton nodes. Features are calculated with a sliding window approach for each node. Window is 2*max_nn_dist (nm). Parameters ---------- source : str Path to anno or MappedSkeletonObject max_nn_dist : float Radius in which neighboring nodes are found and used for calculating features in nm. Returns ------- numpy.array, numpary.array, list of int, bool two arrays of features for each node. number of nodes x 28 (22 radius feature and 6 object features), bool if spiness feature are given """ if isinstance(source, basestring): anno = load_ordered_mapped_skeleton(source)[0] # build mito sample tree mitos, vc, sj = load_objpkl_from_kzip(source) else: anno = source.old_anno if source.mitos is None: mitos, vc, sj = load_objpkl_from_kzip(anno.filename) else: mitos = source.mitos vc = source.vc sj = source.sj m_dict, vc_dict, sj_dict = (mitos.object_dict, vc.object_dict, sj.object_dict) nearby_node_list = nodes_in_pathlength(anno, max_nn_dist) node_coords = [] node_radii = [] node_ids = [] for nodes in nearby_node_list: node_coords.append(nodes[0].getCoordinate_scaled()) node_radii.append(nodes[0].getDataElem("radius")) node_ids.append(nodes[0].getID()) m_feat = objfeat2skelnode(node_coords, node_radii, node_ids, nearby_node_list, m_dict, anno.scaling) vc_feat = objfeat2skelnode(node_coords, node_radii, node_ids, nearby_node_list, vc_dict, anno.scaling) sj_feat = objfeat2skelnode(node_coords, node_radii, node_ids, nearby_node_list, sj_dict, anno.scaling) rad_feat, spinehead_feat, spiness_given = radfeat2skelnode(nearby_node_list) morph_feat = np.concatenate((rad_feat, m_feat, vc_feat, sj_feat), axis=1) dist_feature, ids = node_branch_end_distance(anno, max_nn_dist) sort_ix = np.argsort(ids) ids = ids[sort_ix] dist_feature = dist_feature[sort_ix] node_ids = arr(node_ids) sort_ix2 = np.argsort(node_ids) node_ids = node_ids[sort_ix2] morph_feat = morph_feat[sort_ix2] assert np.all(node_ids == ids), 'Node IDs are different.' morph_feat = np.concatenate((morph_feat, dist_feature), axis=1) if np.any(np.isnan(morph_feat)): # print "Found nans in morphological features of %s: %s" % \ # (source, np.where(np.isnan(morph_feat))) morph_feat = np.nan_to_num(morph_feat.astype(np.float32)) spinehead_feat = spinehead_feat[sort_ix2] if np.any(np.isnan(spinehead_feat)): # print "Found nans in spinhead features of %s: %s" % \ # (source, np.where(np.isnan(spinehead_feat))) spinehead_feat = np.nan_to_num(spinehead_feat.astype(np.float32)) return morph_feat, spinehead_feat, ids, spiness_given
[docs]def radfeat2skelnode(nearby_node_list): """Calculate nodewise radius feature Parameters ---------- nearby_node_list : list of list of SkeletonNodes grouped tracing nodes Returns ------- np.array, np.array, bool array of number of nodes times 22 features, containing mean radius, sigma of radii, 20 hist features, spinehead features and whether spiness features are returned """ radius_feat = np.zeros((len(nearby_node_list), 12)) spiness_given = True try: _ = nearby_node_list[0][0].data["spiness_pred"] except KeyError: # print "Spiness prediction not found in Annotation Object. Axoness" \ # "feature reduced to morphological features." spiness_given = False spinehead_feats = np.zeros((len(nearby_node_list), 4)) for k, neighbor_nodes in enumerate(nearby_node_list): radius_feat[k] = radius_feats_from_nodes(neighbor_nodes) if spiness_given: spinehead_feats[k] = spiness_feats_from_nodes(neighbor_nodes)[:4] return radius_feat, spinehead_feats, spiness_given
[docs]def radius_feats_from_nodes(nodes, nb_bins=10, max_rad=5000): """Calculates mean, std and histogram features Parameters ---------- nodes : list of SkeletonNodes nb_bins : int Number of bins for histogram features max_rad : int maximum radius to plot on histogram x-axis Returns ------- np.array radius features with dim. nb_bins+2 """ radius_feat = np.zeros((12)) nn_radius = arr([nn.getDataElem("radius") for nn in nodes]) radius_feat[0] = np.mean(nn_radius) radius_feat[1] = np.std(nn_radius) hist, bin_edges = np.histogram(nn_radius*10, bins=nb_bins, range=(0, max_rad), normed=True) radius_feat[2:(nb_bins+2)] = hist return radius_feat
[docs]def spiness_feats_from_nodes(nodes): """ Calculates spiness feats including abs. number of spineheads, mean and standard deviation (std) of spinehead size, mean spinehead probability and mean and std of spineneck lengths. Parameters ---------- nodes : list of SkeletonNodes Returns ------- np.array spiness features, dim. of 6 """ spinehead_feats = np.zeros((6)) spinehead_radius = [] spinehead_proba = [] spine_neck_lengths = [] for node in nodes: if node.degree() == 1 and int(node.data["spiness_pred"]) == 1: spinehead_proba.append(float(node.data["spiness_proba1"])) spinehead_radius.append(node.data["radius"]) node1 = node neck_length = 0 while (len(node1.getParents()) == 1) and (int(node1.data\ ["spiness_pred"]) != 2): neck_length += euclidian_distance(node1.getCoordinate_scaled(), list(node1.getParents())[0].getCoordinate_scaled()) node1 = list(node1.getParents())[0] spine_neck_lengths.append(neck_length) if len(spinehead_radius) != 0: spinehead_mean = np.mean(spinehead_radius) spinhead_std = np.std(spinehead_radius) spinehead_proba_mean = np.mean(spinehead_proba) spinehead_feats[:4] = arr([len(spinehead_radius), spinehead_mean, spinhead_std, spinehead_proba_mean]) if len(spine_neck_lengths) != 0: spinehead_feats[4:6] = arr([np.mean(spine_neck_lengths), np.std(spine_neck_lengths)]) return spinehead_feats
[docs]def sj_per_spinehead(anno): """Calculate number of sj per spinehead. Iterate over all mapped sj objects and find nearest skeleton node. If skeleton node has spiness prediction == 1 (spinehead) then increment counter of this node by one. After the loop sum over all counter and divide by the number of nodes which have at least one sj assigned. Parameters ---------- :param anno: SkeletonAnnotation Returns ------- float Average number of sj per spinehead (assumes there is no spinehead without sj) """ _, _, sj = load_objpkl_from_kzip(anno.filename) sj_dict = sj.object_dict nb_sj = len(sj_dict.keys()) hull_samples = np.zeros((nb_sj, 100, 3)) skel_nodes = [n for n in anno.getNodes()] node_coords = arr([n.getCoordinate_scaled() for n in skel_nodes]) node_sj_counter = np.zeros((len(skel_nodes), )) skeleton_tree = spatial.cKDTree(node_coords) for i, sj_key in enumerate(sj_dict.keys()): sj = sj_dict[sj_key] m_hull = sj.hull_voxels * arr(anno.scaling) random_ixs = np.random.choice(np.arange(len(m_hull)), size=100) hull_samples[i] = m_hull[random_ixs] for i in range(nb_sj): dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1) majority_ix = cell_classification(nearest_skel_ixs) if int(skel_nodes[majority_ix].data["spiness_pred"]) == 1: node_sj_counter[majority_ix] += 1 sj_per_sh = np.sum(node_sj_counter) / float(np.sum(node_sj_counter != 0)) return np.nan_to_num(sj_per_sh)
[docs]def propertyfeat2skelnode(node_list): """Calculate nodewise radius feature Parameters ---------- node_list : list grouped nodes Returns ------- np.array number of nodes times 22 features, containing mean radius, sigma of radii, 20 hist features """ radius_feat = np.zeros((1, 12)) n_radius = [] type_feat = np.zeros((1, 6)) n_axoness = [] for node in node_list: n_radius.append(float(node.getDataElem("radius"))) n_axoness.append(int(node.data["axoness_pred"])) n_radius = arr(n_radius) radius_feat[0, 0] = np.mean(n_radius) radius_feat[0, 1] = np.std(n_radius) hist, bin_edges = np.histogram(n_radius*10, bins=10, range=(0, 5000), normed=True) radius_feat[0, 2:12] = hist n_axoness = arr(n_axoness) nb_axonnodes = np.sum(n_axoness == 1) nb_dennodes = np.sum(n_axoness == 0) nb_somanodes = np.sum(n_axoness == 2) type_feat[0, :3] = [nb_axonnodes, nb_dennodes, nb_somanodes] type_feat[0, 3:] = type_feat[0, :3] / float(len(n_axoness)) return radius_feat, type_feat
[docs]def celltype_axoness_feature(anno): """ Calculates axones feature of mapped sekeleton for cell type prediction. These include proportion of axon, dendrite and soma pathlengths and maximum degree of soma nodes. Returns ------- np.array (n x 4) axoness features """ type_feats = np.zeros((1, 4)) all_path_length = anno.physical_length() / 1000. if all_path_length != 0: for i in range(3): type_feats[0, i] = pathlength_of_property(anno, 'axoness_pred', i) / \ all_path_length for n in anno.getNodes(): if int(n.data["axoness_pred"]) == 2: type_feats[0, 3] = np.max((n.degree(), type_feats[0, 3])) return type_feats
[docs]def pathlength_of_property(anno, property, value): """Calculate pathlength of nodes with certain property value Parameters ---------- anno : SkeletonAnnotation mapped cell tracing property : str spiness / axoness value : int classification result, e.g. 0, 1, 2 Returns ------- int length (in um) """ pathlength = 0 for from_node, to_node in anno.iter_edges(): if int(from_node.data[property]) == value and\ int(to_node.data[property]) == value: pathlength += euclidian_distance(from_node.getCoordinate_scaled(), to_node.getCoordinate_scaled()) return pathlength / 1000.
[docs]def objfeat2skelnode(node_coords, node_radii, node_ids, nearby_node_list, obj_dict, scaling): """Calculate features of UltrastructuralDatasetObjects along Skeleton Parameters ---------- node_coords : np.array node_radii : np.array node_ids : np.array nearby_node_list : list of list of SkeletonNodes obj_dict : UltrastructuralDataset scaling : tuple Returns ------- np.array (dimension nb_skelnodes x 2) The two features are absolute number of assigned objects and mean voxel size of the objects """ skeleton_tree = spatial.cKDTree(node_coords) nb_skelnodes = len(node_coords) axoness_features = np.zeros((nb_skelnodes, 2)) obj_assignment = [[] for i in range(nb_skelnodes)] nb_objs = len(obj_dict.keys()) hull_samples = np.zeros((nb_objs, 100, 3)) key_list = [] for i, obj_key in enumerate(obj_dict.keys()): obj_object = obj_dict[obj_key] m_hull = obj_object.hull_voxels * scaling random_ixs = np.random.choice(np.arange(len(m_hull)), size=100) hull_samples[i] = m_hull[random_ixs] key_list.append(obj_key) for i in range(nb_objs): dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1) for ix in list(set(nearest_skel_ixs)): if np.min(dists[nearest_skel_ixs == ix]) > node_radii[ix]*10: continue obj_assignment[ix] += [key_list[i]] for k in range(nb_skelnodes): nn_nodes = nearby_node_list[k] nn_ids = [nn.getID() for nn in nn_nodes] assigned_objs = [] for nn_id in nn_ids: assigned_objs += obj_assignment[node_ids.index(nn_id)] axoness_features[k, 0] = len(assigned_objs) if len(assigned_objs) == 0: continue obj_sizes = [obj_dict[key].size for key in assigned_objs] axoness_features[k, 1] = np.mean(obj_sizes) return axoness_features
[docs]def nodes_in_pathlength(anno, max_path_len): """Find nodes reachable in max_path_len from source node, calculated for every node in anno. Parameters ---------- anno : AnnotationObject max_path_len : float Maximum distance from source node Returns ------- list of lists containing reachable nodes in max_path_len where outer list has length len(anno.getNodes()) """ skel_graph = su.annotation_to_nx_graph(anno) list_reachable_nodes = [] for source_node in anno.getNodes(): source_node_coord = arr(source_node.getCoordinate_scaled()) reachable_nodes = [source_node] for edge in nx.bfs_edges(skel_graph, source_node): next_node = edge[1] next_node_coord = arr(next_node.getCoordinate_scaled()) if np.linalg.norm(next_node_coord - source_node_coord) > max_path_len: break reachable_nodes.append(next_node) list_reachable_nodes.append(reachable_nodes) return list_reachable_nodes
[docs]def assign_property2node(node, pred, property): """Assign prediction of property to node Parameters ---------- node : NewSkeletonNode pred : prediction appropriate to property property : property to change """ node.data["%s_pred" % property] = str(pred) node_comment = node.getComment() ax_ix = node_comment.find(property) if ax_ix == -1: node.appendComment(property+str(pred)) else: help_list = list(node_comment) help_list[ax_ix+7] = str(pred) node.setComment("".join(help_list))
[docs]def majority_vote(anno, property='axoness', max_dist=6000): """ Smoothes (average using sliding window of 2 times max_dist and majority vote) property prediction in annotation, whereas for axoness somata are untouched. Parameters ---------- anno : SkeletonAnnotation property : str which property to average max_dist : int maximum distance (in nm) for sliding window used in majority voting """ # print "Performing smoothing of %s using sliding window average of max " \ # "dist %d nm." % (property, max_dist) old_anno = copy.deepcopy(anno) nearest_nodes_list = nodes_in_pathlength(old_anno, max_dist) for nodes in nearest_nodes_list: curr_node_id = nodes[0].getID() new_node = anno.getNodeByID(curr_node_id) if int(new_node.data["axoness_pred"]) == 2: new_node.data["axoness_pred"] = 2 continue property_val = [int(n.data[property+'_pred']) for n in nodes] counter = Counter(property_val) new_ax = counter.most_common()[0][0] node_comment = new_node.getComment() ax_ix = node_comment.find(property) help_list = list(node_comment) help_list[ax_ix+len(property)] = str(new_ax) new_node.setComment("".join(help_list)) new_node.setDataElem(property+'_pred', new_ax)
[docs]def get_obj_density(source, property='axoness_pred', value=1, obj='mito', return_abs_density=True): """Calculate pathlength of nodes using edges Parameters ---------- anno: list of SkeletonAnnotation property : str e.g. 'axoness_pred' value : int value of property to check obj : str mito/vc/sj return_abs_density : bool Returns ------- :return: length in um """ obj_dict = {'mito': 0, 'vc': 1, 'sj':2} if isinstance(source, basestring): anno = load_ordered_mapped_skeleton(source)[0] # build mito sample tree mitos, vc, sj = load_objpkl_from_kzip(source) else: anno = source.old_anno if source.mitos is None: mitos, vc, sj = load_objpkl_from_kzip(anno.filename) else: mitos = source.mitos vc = source.vc sj = source.sj m_dict, vc_dict, sj_dict = (mitos.object_dict, vc.object_dict, sj.object_dict) obj_dict = [m_dict, vc_dict, sj_dict][obj_dict[obj]] node_coords = [] node_radii = [] node_ids = [] for node in anno.getNodes(): node_coords.append(node.getCoordinate_scaled()) node_radii.append(node.getDataElem("radius")) node_ids.append(node.getID()) pathlength = 0 nodes_of_value = [] for from_node, to_node in anno.iter_edges(): if int(from_node.data[property]) == value: nodes_of_value.append(from_node.getID()) if int(from_node.data[property]) == value and\ int(to_node.data[property]) == value: pathlength += euclidian_distance(from_node.getCoordinate_scaled(), to_node.getCoordinate_scaled()) skeleton_tree = spatial.cKDTree(node_coords) nb_skelnodes = len(node_coords) obj_assignment = [[] for i in range(nb_skelnodes)] nb_objs = len(obj_dict.keys()) hull_samples = np.zeros((nb_objs, 500, 3)) key_list = [] for i, obj_key in enumerate(obj_dict.keys()): obj_object = obj_dict[obj_key] m_hull = obj_object.hull_voxels * anno.scaling random_ixs = np.random.choice(np.arange(len(m_hull)), size=500) hull_samples[i] = m_hull[random_ixs] key_list.append(obj_key) for i in range(nb_objs): dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1) for ix in list(set(nearest_skel_ixs)): if np.min(dists[nearest_skel_ixs == ix]) > node_radii[ix]*10: continue obj_assignment[ix] += [key_list[i]] assigned_objs = [] for k in range(nb_skelnodes): if not node_ids[k] in nodes_of_value: continue assigned_objs += obj_assignment[node_ids.index(node_ids[k])] assigned_objs = list(set(assigned_objs)) if pathlength == 0: return 0 if return_abs_density: return len(assigned_objs) / pathlength * 1000. obj_vols = [] for key in assigned_objs: obj_vols.append(obj_dict[key].size * (9*9*20)) obj_density = np.sum(obj_vols) / pathlength * 1000. return obj_density
[docs]def node_branch_end_distance(nml, dist): """Set distances to next branch resp. end point for each node (distance is capped by given parameter dist) in .data dictionary of each node and returns values with node ids Parameters ---------- nml : SkeletonAnnotation dist : int maximum distance value to occur Returns ------- np.array, np.array distances to nearest end/branch point, node ids """ graph = su.annotation_to_nx_graph(nml) dic = su.nx.degree(graph) end = [] for key, value in dic.items(): if value == 1: end.append(key) bran = [] for key, value in dic.items(): if value >= 3: bran.append(key) features = [] Y = [] for node in graph.nodes(): Y.append(node.getID()) node_to_all_endnode = [dist] node_to_all_branchpoint = [dist] single_node_feature = [] for endnode in end: node_to_all_endnode.append(node.distance_scaled(endnode)) if len(node_to_all_endnode) != 0: distance2endpoint = min(node_to_all_endnode) else: distance2endpoint = np.float32(99999999) node.data["endpointdistance"] = distance2endpoint single_node_feature.append(distance2endpoint) for branchpoint in bran: node_to_all_branchpoint.append(node.distance_scaled(branchpoint)) if len(node_to_all_branchpoint) != 0: distance2branchpoint = min(node_to_all_branchpoint) else: distance2branchpoint = np.float32(99999999) single_node_feature.append(distance2branchpoint) features.append(single_node_feature) node.data["branchpointdistance"] = distance2branchpoint X = np.array(features) Y = np.array(Y) return X, Y