Source code for syconn.processing.features

# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Jörgen Kornfeld

import copy
import numpy as np
import os
import zipfile
from collections import Counter
from numpy import array as arr
from scipy import spatial
import networkx as nx
from learning_rfc import write_feat2csv, cell_classification
from knossos_utils import skeleton_utils as su
from ..utils.basics import euclidian_distance
from ..utils.datahandler import load_objpkl_from_kzip, \
    load_ordered_mapped_skeleton
from knossos_utils.skeleton import remove_from_zip


[docs]def update_property_feat_kzip_star(args):
    """Helper function for update_property_feat_kzip
    """
    update_property_feat_kzip(*args)


[docs]def update_property_feat_kzip(path2kzip, dist=6000):
    """Recompute axoness feature of skeleton at path2kzip and writes it to
    .k.zip

    Parameters
    ----------
    path2kzip : str
        Path to mapped skeleton
    dist : int
    """
    prop_dict, property_feat_names = calc_prop_feat_dict(path2kzip, dist)
    for prop, feat in prop_dict.iteritems():
        path2csv = path2kzip[:-6] + '_%s_feat.csv' % prop
        write_feat2csv(path2csv, feat, property_feat_names[prop])
        file_name = '%s_feat.csv' % prop
        try:
            remove_from_zip(path2kzip, file_name)
            with zipfile.ZipFile(path2kzip, "a", zipfile.ZIP_DEFLATED) as zf:
                zf.write(path2csv, file_name)
            # print "Wrote new %s feature to %s." % (prop, path2kzip)
        except Exception, e:
            print 'Could not write %s to zip file.' % file_name
            print e
        os.remove(path2csv)


[docs]def calc_prop_feat_dict(source, dist=6000):
    """Calculates property feature

    Parameters
    ----------
    source : SkeletonAnnotation
    dist : int

    Returns
    -------
    dict, list of str, bool
    Dictionary of property features, list of feature names, bool if spiness
    feature are given
    """
    property_features = {}
    property_feat_names = {}
    morph_feat, spinehead_feats, node_ids, spiness_given\
        = morphology_feature(source, dist)
    morph_info = np.concatenate((node_ids[:, None],
                                 morph_feat.astype(np.float32)), axis=1)
    property_features["axoness"] = np.concatenate((morph_info,
                                                  spinehead_feats), axis=1)
    morph_feat_names = ['nodeID', 'rad_mean', 'rad_std'] + \
                       ['rad_hist'+str(i) for i in range(10)] +\
                       ['mito_nb', 'mito_size_mean', 'vc_nb', 'vc_size_mean',
                        'sj_nb', 'sj_size_mean', 'branch_dist', 'endpoint_dist']
    property_features["spiness"] = morph_info
    property_feat_names["axoness"] = morph_feat_names +\
                                    ['nb_spinehead', 'sh_rad_mean',
                                    'sh_rad_std', 'sh_proba_mean']
    property_feat_names["spiness"] = morph_feat_names
    return property_features, property_feat_names, spiness_given


[docs]def morphology_feature(source, max_nn_dist=6000):
    """Calculates features for discrimination tasks of neurite identities, such as
    axon vs. dendrite or cell types classification. Estimated on interpolated
    skeleton nodes. Features are calculated with a sliding window approach for
    each node. Window is 2*max_nn_dist (nm).

    Parameters
    ----------
    source : str
        Path to anno or MappedSkeletonObject
    max_nn_dist : float
        Radius in which neighboring nodes are found and
        used for calculating features in nm.

    Returns
    -------
    numpy.array, numpary.array, list of int, bool
        two arrays of features for each node. number of nodes x 28 (22 radius
        feature and 6 object features), bool if spiness feature are given
    """
    if isinstance(source, basestring):
        anno = load_ordered_mapped_skeleton(source)[0]
        # build mito sample tree
        mitos, vc, sj = load_objpkl_from_kzip(source)
    else:
        anno = source.old_anno
        if source.mitos is None:
            mitos, vc, sj = load_objpkl_from_kzip(anno.filename)
        else:
            mitos = source.mitos
            vc = source.vc
            sj = source.sj
    m_dict, vc_dict, sj_dict = (mitos.object_dict, vc.object_dict,
                                sj.object_dict)
    nearby_node_list = nodes_in_pathlength(anno, max_nn_dist)
    node_coords = []
    node_radii = []
    node_ids = []
    for nodes in nearby_node_list:
        node_coords.append(nodes[0].getCoordinate_scaled())
        node_radii.append(nodes[0].getDataElem("radius"))
        node_ids.append(nodes[0].getID())
    m_feat = objfeat2skelnode(node_coords, node_radii, node_ids,
                              nearby_node_list, m_dict, anno.scaling)
    vc_feat = objfeat2skelnode(node_coords, node_radii, node_ids,
                               nearby_node_list, vc_dict, anno.scaling)
    sj_feat = objfeat2skelnode(node_coords, node_radii, node_ids,
                               nearby_node_list, sj_dict, anno.scaling)
    rad_feat, spinehead_feat, spiness_given = radfeat2skelnode(nearby_node_list)
    morph_feat = np.concatenate((rad_feat, m_feat, vc_feat, sj_feat),
                                axis=1)
    dist_feature, ids = node_branch_end_distance(anno, max_nn_dist)
    sort_ix = np.argsort(ids)
    ids = ids[sort_ix]
    dist_feature = dist_feature[sort_ix]
    node_ids = arr(node_ids)
    sort_ix2 = np.argsort(node_ids)
    node_ids = node_ids[sort_ix2]
    morph_feat = morph_feat[sort_ix2]
    assert np.all(node_ids == ids), 'Node IDs are different.'
    morph_feat = np.concatenate((morph_feat, dist_feature), axis=1)
    if np.any(np.isnan(morph_feat)):
        # print "Found nans in morphological features of %s: %s" % \
        #       (source, np.where(np.isnan(morph_feat)))
        morph_feat = np.nan_to_num(morph_feat.astype(np.float32))
    spinehead_feat = spinehead_feat[sort_ix2]
    if np.any(np.isnan(spinehead_feat)):
        # print "Found nans in spinhead features of %s: %s" % \
        #       (source, np.where(np.isnan(spinehead_feat)))
        spinehead_feat = np.nan_to_num(spinehead_feat.astype(np.float32))
    return morph_feat, spinehead_feat, ids, spiness_given


[docs]def radfeat2skelnode(nearby_node_list):
    """Calculate nodewise radius feature

    Parameters
    ----------
    nearby_node_list : list of list of SkeletonNodes
        grouped tracing nodes

    Returns
    -------
    np.array, np.array, bool
        array of number of nodes times 22 features, containing mean radius,
        sigma of radii, 20 hist features, spinehead features and whether
        spiness features are returned
    """
    radius_feat = np.zeros((len(nearby_node_list), 12))
    spiness_given = True
    try:
        _ = nearby_node_list[0][0].data["spiness_pred"]
    except KeyError:
        # print "Spiness prediction not found in Annotation Object. Axoness" \
        #       "feature reduced to morphological features."
        spiness_given = False
    spinehead_feats = np.zeros((len(nearby_node_list), 4))
    for k, neighbor_nodes in enumerate(nearby_node_list):
        radius_feat[k] = radius_feats_from_nodes(neighbor_nodes)
        if spiness_given:
            spinehead_feats[k] = spiness_feats_from_nodes(neighbor_nodes)[:4]
    return radius_feat, spinehead_feats, spiness_given


[docs]def radius_feats_from_nodes(nodes, nb_bins=10, max_rad=5000):
    """Calculates mean, std and histogram features

    Parameters
    ----------
    nodes : list of SkeletonNodes
    nb_bins : int
        Number of bins for histogram features
    max_rad : int
        maximum radius to plot on histogram x-axis

    Returns
    -------
    np.array
        radius features with dim. nb_bins+2
    """
    radius_feat = np.zeros((12))
    nn_radius = arr([nn.getDataElem("radius") for nn in nodes])
    radius_feat[0] = np.mean(nn_radius)
    radius_feat[1] = np.std(nn_radius)
    hist, bin_edges = np.histogram(nn_radius*10, bins=nb_bins, range=(0, max_rad),
                                   normed=True)
    radius_feat[2:(nb_bins+2)] = hist
    return radius_feat


[docs]def spiness_feats_from_nodes(nodes):
    """
    Calculates spiness feats including abs. number of spineheads, mean and
    standard deviation (std) of spinehead size, mean spinehead probability and
    mean and std of spineneck lengths.

    Parameters
    ----------
    nodes : list of SkeletonNodes

    Returns
    -------
    np.array
        spiness features, dim. of 6
    """
    spinehead_feats = np.zeros((6))
    spinehead_radius = []
    spinehead_proba = []
    spine_neck_lengths = []
    for node in nodes:
        if node.degree() == 1 and int(node.data["spiness_pred"]) == 1:
            spinehead_proba.append(float(node.data["spiness_proba1"]))
            spinehead_radius.append(node.data["radius"])
            node1 = node
            neck_length = 0
            while (len(node1.getParents()) == 1) and (int(node1.data\
            ["spiness_pred"]) != 2):
                neck_length += euclidian_distance(node1.getCoordinate_scaled(),
                               list(node1.getParents())[0].getCoordinate_scaled())
                node1 = list(node1.getParents())[0]
            spine_neck_lengths.append(neck_length)
    if len(spinehead_radius) != 0:
        spinehead_mean = np.mean(spinehead_radius)
        spinhead_std = np.std(spinehead_radius)
        spinehead_proba_mean = np.mean(spinehead_proba)
        spinehead_feats[:4] = arr([len(spinehead_radius), spinehead_mean,
                               spinhead_std, spinehead_proba_mean])
    if len(spine_neck_lengths) != 0:
        spinehead_feats[4:6] = arr([np.mean(spine_neck_lengths),
                                   np.std(spine_neck_lengths)])
    return spinehead_feats


[docs]def sj_per_spinehead(anno):
    """Calculate number of sj per spinehead. Iterate over all mapped sj objects and
    find nearest skeleton node. If skeleton node has spiness prediction == 1
    (spinehead) then increment counter of this node by one.
    After the loop sum over all counter and divide by the number of nodes which
    have at least one sj assigned.

    Parameters
    ----------
    :param anno: SkeletonAnnotation

    Returns
    -------
    float
        Average number of sj per spinehead (assumes there is no spinehead
        without sj)
    """
    _, _, sj = load_objpkl_from_kzip(anno.filename)
    sj_dict = sj.object_dict
    nb_sj = len(sj_dict.keys())
    hull_samples = np.zeros((nb_sj, 100, 3))
    skel_nodes = [n for n in anno.getNodes()]
    node_coords = arr([n.getCoordinate_scaled() for n in skel_nodes])
    node_sj_counter = np.zeros((len(skel_nodes), ))
    skeleton_tree = spatial.cKDTree(node_coords)
    for i, sj_key in enumerate(sj_dict.keys()):
        sj = sj_dict[sj_key]
        m_hull = sj.hull_voxels * arr(anno.scaling)
        random_ixs = np.random.choice(np.arange(len(m_hull)), size=100)
        hull_samples[i] = m_hull[random_ixs]
    for i in range(nb_sj):
        dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1)
        majority_ix = cell_classification(nearest_skel_ixs)
        if int(skel_nodes[majority_ix].data["spiness_pred"]) == 1:
            node_sj_counter[majority_ix] += 1
    sj_per_sh = np.sum(node_sj_counter) / float(np.sum(node_sj_counter != 0))
    return np.nan_to_num(sj_per_sh)


[docs]def propertyfeat2skelnode(node_list):
    """Calculate nodewise radius feature

    Parameters
    ----------
    node_list : list
        grouped nodes

    Returns
    -------
    np.array
        number of nodes times 22 features, containing mean radius,
        sigma of radii, 20 hist features
    """
    radius_feat = np.zeros((1, 12))
    n_radius = []
    type_feat = np.zeros((1, 6))
    n_axoness = []
    for node in node_list:
        n_radius.append(float(node.getDataElem("radius")))
        n_axoness.append(int(node.data["axoness_pred"]))
    n_radius = arr(n_radius)
    radius_feat[0, 0] = np.mean(n_radius)
    radius_feat[0, 1] = np.std(n_radius)
    hist, bin_edges = np.histogram(n_radius*10, bins=10, range=(0, 5000),
                                   normed=True)
    radius_feat[0, 2:12] = hist
    n_axoness = arr(n_axoness)
    nb_axonnodes = np.sum(n_axoness == 1)
    nb_dennodes = np.sum(n_axoness == 0)
    nb_somanodes = np.sum(n_axoness == 2)
    type_feat[0, :3] = [nb_axonnodes, nb_dennodes, nb_somanodes]
    type_feat[0, 3:] = type_feat[0, :3] / float(len(n_axoness))
    return radius_feat, type_feat


[docs]def celltype_axoness_feature(anno):
    """
    Calculates axones feature of mapped sekeleton for cell type prediction.
    These include proportion of axon, dendrite and soma pathlengths and maximum
    degree of soma nodes.

    Returns
    -------
    np.array (n x 4)
        axoness features
    """
    type_feats = np.zeros((1, 4))
    all_path_length = anno.physical_length() / 1000.
    if all_path_length != 0:
        for i in range(3):
            type_feats[0, i] = pathlength_of_property(anno, 'axoness_pred', i) / \
                            all_path_length
    for n in anno.getNodes():
        if int(n.data["axoness_pred"]) == 2:
            type_feats[0, 3] = np.max((n.degree(), type_feats[0, 3]))
    return type_feats


[docs]def pathlength_of_property(anno, property, value):
    """Calculate pathlength of nodes with certain property value

    Parameters
    ----------
    anno : SkeletonAnnotation
        mapped cell tracing
    property : str
        spiness / axoness
    value : int
        classification result, e.g. 0, 1, 2

    Returns
    -------
    int
        length (in um)
    """
    pathlength = 0
    for from_node, to_node in anno.iter_edges():
        if int(from_node.data[property]) == value and\
                        int(to_node.data[property]) == value:
            pathlength += euclidian_distance(from_node.getCoordinate_scaled(),
                                             to_node.getCoordinate_scaled())
    return pathlength / 1000.


[docs]def objfeat2skelnode(node_coords, node_radii, node_ids, nearby_node_list,
                     obj_dict, scaling):
    """Calculate features of UltrastructuralDatasetObjects along Skeleton

    Parameters
    ----------
    node_coords : np.array
    node_radii : np.array
    node_ids : np.array
    nearby_node_list : list of list of SkeletonNodes
    obj_dict : UltrastructuralDataset
    scaling : tuple

    Returns
    -------
    np.array (dimension nb_skelnodes x 2)
        The two features are absolute number of assigned objects and
        mean voxel size of the objects
    """
    skeleton_tree = spatial.cKDTree(node_coords)
    nb_skelnodes = len(node_coords)
    axoness_features = np.zeros((nb_skelnodes, 2))
    obj_assignment = [[] for i in range(nb_skelnodes)]
    nb_objs = len(obj_dict.keys())
    hull_samples = np.zeros((nb_objs, 100, 3))
    key_list = []
    for i, obj_key in enumerate(obj_dict.keys()):
        obj_object = obj_dict[obj_key]
        m_hull = obj_object.hull_voxels * scaling
        random_ixs = np.random.choice(np.arange(len(m_hull)), size=100)
        hull_samples[i] = m_hull[random_ixs]
        key_list.append(obj_key)
    for i in range(nb_objs):
        dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1)
        for ix in list(set(nearest_skel_ixs)):
            if np.min(dists[nearest_skel_ixs == ix]) > node_radii[ix]*10:
                continue
            obj_assignment[ix] += [key_list[i]]
    for k in range(nb_skelnodes):
        nn_nodes = nearby_node_list[k]
        nn_ids = [nn.getID() for nn in nn_nodes]
        assigned_objs = []
        for nn_id in nn_ids:
            assigned_objs += obj_assignment[node_ids.index(nn_id)]
        axoness_features[k, 0] = len(assigned_objs)
        if len(assigned_objs) == 0:
            continue
        obj_sizes = [obj_dict[key].size for key in assigned_objs]
        axoness_features[k, 1] = np.mean(obj_sizes)
    return axoness_features


[docs]def nodes_in_pathlength(anno, max_path_len):
    """Find nodes reachable in max_path_len from source node, calculated for
    every node in anno.

    Parameters
    ----------
    anno : AnnotationObject
    max_path_len : float
        Maximum distance from source node

    Returns
    -------
    list of lists containing reachable nodes in max_path_len where
    outer list has length len(anno.getNodes())
    """
    skel_graph = su.annotation_to_nx_graph(anno)
    list_reachable_nodes = []
    for source_node in anno.getNodes():
        source_node_coord = arr(source_node.getCoordinate_scaled())
        reachable_nodes = [source_node]
        for edge in nx.bfs_edges(skel_graph, source_node):
            next_node = edge[1]
            next_node_coord = arr(next_node.getCoordinate_scaled())
            if np.linalg.norm(next_node_coord - source_node_coord) > max_path_len:
                break
            reachable_nodes.append(next_node)
        list_reachable_nodes.append(reachable_nodes)
    return list_reachable_nodes


[docs]def assign_property2node(node, pred, property):
    """Assign prediction of property to node

    Parameters
    ----------
    node : NewSkeletonNode
    pred : prediction appropriate to property
    property : property to change

    """
    node.data["%s_pred" % property] = str(pred)
    node_comment = node.getComment()
    ax_ix = node_comment.find(property)
    if ax_ix == -1:
        node.appendComment(property+str(pred))
    else:
        help_list = list(node_comment)
        help_list[ax_ix+7] = str(pred)
        node.setComment("".join(help_list))


[docs]def majority_vote(anno, property='axoness', max_dist=6000):
    """
    Smoothes (average using sliding window of 2 times max_dist and majority
    vote) property prediction in annotation, whereas for axoness somata are
    untouched.

    Parameters
    ----------
    anno : SkeletonAnnotation
    property : str
        which property to average
    max_dist : int
        maximum distance (in nm) for sliding window used in majority voting
    """
    # print "Performing smoothing of %s using sliding window average of max " \
    #       "dist %d nm." % (property, max_dist)
    old_anno = copy.deepcopy(anno)
    nearest_nodes_list = nodes_in_pathlength(old_anno, max_dist)
    for nodes in nearest_nodes_list:
        curr_node_id = nodes[0].getID()
        new_node = anno.getNodeByID(curr_node_id)
        if int(new_node.data["axoness_pred"]) == 2:
            new_node.data["axoness_pred"] = 2
            continue
        property_val = [int(n.data[property+'_pred']) for n in nodes]
        counter = Counter(property_val)
        new_ax = counter.most_common()[0][0]
        node_comment = new_node.getComment()
        ax_ix = node_comment.find(property)
        help_list = list(node_comment)
        help_list[ax_ix+len(property)] = str(new_ax)
        new_node.setComment("".join(help_list))
        new_node.setDataElem(property+'_pred', new_ax)


[docs]def get_obj_density(source, property='axoness_pred', value=1, obj='mito',
                    return_abs_density=True):
    """Calculate pathlength of nodes using edges

    Parameters
    ----------
    anno: list of SkeletonAnnotation
    property : str
        e.g. 'axoness_pred'
    value : int
        value of property to check
    obj : str
        mito/vc/sj
    return_abs_density : bool

    Returns
    -------
    :return: length in um
    """
    obj_dict = {'mito': 0, 'vc': 1, 'sj':2}
    if isinstance(source, basestring):
        anno = load_ordered_mapped_skeleton(source)[0]
        # build mito sample tree
        mitos, vc, sj = load_objpkl_from_kzip(source)
    else:
        anno = source.old_anno
        if source.mitos is None:
            mitos, vc, sj = load_objpkl_from_kzip(anno.filename)
        else:
            mitos = source.mitos
            vc = source.vc
            sj = source.sj
    m_dict, vc_dict, sj_dict = (mitos.object_dict, vc.object_dict,
                                sj.object_dict)
    obj_dict = [m_dict, vc_dict, sj_dict][obj_dict[obj]]
    node_coords = []
    node_radii = []
    node_ids = []
    for node in anno.getNodes():
        node_coords.append(node.getCoordinate_scaled())
        node_radii.append(node.getDataElem("radius"))
        node_ids.append(node.getID())
    pathlength = 0
    nodes_of_value = []
    for from_node, to_node in anno.iter_edges():
        if int(from_node.data[property]) == value:
            nodes_of_value.append(from_node.getID())
        if int(from_node.data[property]) == value and\
        int(to_node.data[property]) == value:
            pathlength += euclidian_distance(from_node.getCoordinate_scaled(),
                                             to_node.getCoordinate_scaled())
    skeleton_tree = spatial.cKDTree(node_coords)
    nb_skelnodes = len(node_coords)
    obj_assignment = [[] for i in range(nb_skelnodes)]
    nb_objs = len(obj_dict.keys())
    hull_samples = np.zeros((nb_objs, 500, 3))
    key_list = []
    for i, obj_key in enumerate(obj_dict.keys()):
        obj_object = obj_dict[obj_key]
        m_hull = obj_object.hull_voxels * anno.scaling
        random_ixs = np.random.choice(np.arange(len(m_hull)), size=500)
        hull_samples[i] = m_hull[random_ixs]
        key_list.append(obj_key)
    for i in range(nb_objs):
        dists, nearest_skel_ixs = skeleton_tree.query(hull_samples[i], 1)
        for ix in list(set(nearest_skel_ixs)):
            if np.min(dists[nearest_skel_ixs == ix]) > node_radii[ix]*10:
                continue
            obj_assignment[ix] += [key_list[i]]
    assigned_objs = []
    for k in range(nb_skelnodes):
        if not node_ids[k] in nodes_of_value:
            continue
        assigned_objs += obj_assignment[node_ids.index(node_ids[k])]
    assigned_objs = list(set(assigned_objs))
    if pathlength == 0:
        return 0
    if return_abs_density:
        return len(assigned_objs) / pathlength * 1000.
    obj_vols = []
    for key in assigned_objs:
        obj_vols.append(obj_dict[key].size * (9*9*20))
    obj_density = np.sum(obj_vols) / pathlength * 1000.
    return obj_density


[docs]def node_branch_end_distance(nml, dist):
    """Set distances to next branch resp. end point for each node (distance is
    capped by given parameter dist) in .data dictionary of each node
    and returns values with node ids

    Parameters
    ----------
    nml : SkeletonAnnotation
    dist : int
        maximum distance value to occur

    Returns
    -------
    np.array, np.array
        distances to nearest end/branch point, node ids
    """
    graph = su.annotation_to_nx_graph(nml)
    dic = su.nx.degree(graph)
    end = []
    for key, value in dic.items():
        if value == 1:
            end.append(key)
    bran = []
    for key, value in dic.items():
        if value >= 3:
            bran.append(key)
    features = []
    Y = []
    for node in graph.nodes():
        Y.append(node.getID())
        node_to_all_endnode = [dist]
        node_to_all_branchpoint = [dist]
        single_node_feature = []
        for endnode in end:
            node_to_all_endnode.append(node.distance_scaled(endnode))

        if len(node_to_all_endnode) != 0:
            distance2endpoint = min(node_to_all_endnode)
        else:
            distance2endpoint = np.float32(99999999)
        node.data["endpointdistance"] = distance2endpoint
        single_node_feature.append(distance2endpoint)

        for branchpoint in bran:
            node_to_all_branchpoint.append(node.distance_scaled(branchpoint))
        if len(node_to_all_branchpoint) != 0:
            distance2branchpoint = min(node_to_all_branchpoint)
        else:
            distance2branchpoint = np.float32(99999999)
        single_node_feature.append(distance2branchpoint)
        features.append(single_node_feature)
        node.data["branchpointdistance"] = distance2branchpoint
    X = np.array(features)
    Y = np.array(Y)
    return X, Y