Source code for syconn.utils.datahandler

# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Joergen Kornfeld

import numpy as np
import re
import h5py
import os
import shutil
import tempfile
import zipfile
from multiprocessing import cpu_count
from basics import *
from segmentationdataset import load_dataset
from knossos_utils import skeleton_utils as su
from ..utils.segmentationdataset import UltrastructuralDataset
from knossos_utils.skeleton import SkeletonAnnotation
import cPickle as pickle


[docs]class DataHandler(object): """Initialized with paths or cell components (SegmentationObjects), path to membrane prediction and source path of traced skeletons (to be computed). DataHandler is needed for further processing. Attributes ---------- wd : str path to working directory, by default all other paths will lead to subfolders of this directory mem_path : str path to barrier prediction cs_path : str path to supervoxel data myelin_ds_path : str path to myelin prediction skeleton_path : str path to cell tracings data_path : str output directory for mapped cell tracings scaling : np.array scaling of dataset, s.t. after applying coordinates are isotropic and in nm mem : KnossosDataset will be assigned automatically mem_offset : optional offeset of dataset mitos/vc/sj : UltrastructuralDataset Dataset which contains cell objects of mitochondria, vesicle clouds and synaptic junctions respectively """ def __init__(self, wd, scaling=(9., 9., 20.)): """ Parameters ---------- wd : str path to working directory scaling : tuple of ints scaling of data set, s.t. data is isotropic """ self.wd = wd vc_source = wd + '/chunkdataset/obj_vc_ARGUS_5/' sj_source = wd + '/chunkdataset/obj_sj_ARGUS_3/' mito_source = wd + '/chunkdataset/obj_mi_ARGUS_8/' self.mem_path = wd + '/knossosdatasets/rrbarrier/' self.cs_path = wd + '/chunkdataset/j0126_watershed_map/' self.myelin_ds_path = wd + "/knossosdatasets/myelin/" self.data_path = wd + '/neurons/' self.skeleton_path = wd + '/tracings/' self.nb_cpus = np.min((cpu_count(), 2)) if not os.path.exists(self.data_path): os.makedirs(self.data_path) self.scaling = arr(scaling) self.mem = None self.mem_offset = arr([0, 0, 0]) object_dict = {0: "mitos", 1: "vc", 2: "sj"} objects = [None, None, None] for i, source in enumerate([mito_source, vc_source, sj_source]): if type(source) is str: try: obj = load_dataset(source) obj.init_properties() # print "Initialized %s objects." % object_dict[i] except IOError: obj = None else: obj = source objects[i] = obj self.mitos = objects[0] self.vc = objects[1] self.sj = objects[2]
[docs]def load_ordered_mapped_skeleton(path): """Load nml of mapped skeleton and order trees Parameters ---------- path : str Path to nml Returns ------- list List of trees with order [skeleton, mitos, vc, sj, soma] """ anno_dict = {"skeleton": 0, "mitos": 1, "vc": 2, "sj": 3, "soma": 4} annotation = su.loadj0126NML(path) ordered_annotation = list([[], [], [], [], []]) for name in anno_dict.keys(): init_anno = SkeletonAnnotation() init_anno.scaling = [9, 9, 20] init_anno.appendComment(name) ordered_annotation[anno_dict[name]] = init_anno if len(annotation) == 1 and not np.any([key in annotation[0].getComment() for key in anno_dict.keys()]): ordered_annotation[0] = annotation[0] else: for anno in annotation: c = anno.getComment() if c == '': continue if ('vc' in c) or ('p4' in c): c = 'vc' elif ('sj' in c) or ('az' in c): c = 'sj' elif 'mitos' in c: c = 'mitos' elif 'soma' in c: c = 'soma' elif 'skeleton' in c: c = 'skeleton' else: print "Using %s as skeleton tree." % c c = 'skeleton' try: ordered_annotation[anno_dict[c]] = anno except KeyError: print "Found strange tree %s in skeleton at %s." % (c, path) return ordered_annotation
[docs]def load_files_from_kzip(path, load_mitos): """Load all available files from annotation kzip Parameters ---------- path : str path to kzip load_mitos : bool load mito hull points and ids Returns ------- list of np.array coordinates [hull, mitos, vc, sj], id lists, normals """ coord_list = [np.zeros((0, 3)), np.zeros((0, 3)), np.zeros((0, 3)), np.zeros((0, 3))] hull_normals = [np.zeros((0, 3)), np.zeros((0, 3)), np.zeros((0, 3)), np.zeros((0, 3))] # id_list of cell objects id_list = [np.zeros((0, )), np.zeros((0, )), np.zeros((0, ))] zf = zipfile.ZipFile(path, 'r') for i, filename in enumerate(['hull_points.xyz', 'mitos.txt', 'vc.txt', 'sj.txt']): try: if i == 1 and load_mitos is not True: continue data = np.fromstring(zf.read(filename), sep=' ') if len(data) == 0: continue if np.isscalar(data[0]) and data[0] == -1: continue if i == 0: hull_normals = data.reshape(data.shape[0]/6, 6)[:, 3:] data = data.reshape(data.shape[0]/6, 6)[:, :3] else: data = data.reshape(data.shape[0]/3, 3) coord_list[i] = data.astype(np.uint32) except (IOError, ImportError, KeyError): pass for i, filename in enumerate(['mitos_id.txt', 'vc_id.txt', 'sj_id.txt']): try: if i == 0 and load_mitos is not True: continue data = np.fromstring(zf.read(filename), sep=' ') if len(data) == 0: continue if data[0] == -1: continue id_list[i] = data.astype(np.uint32) except (IOError, ImportError, KeyError): pass zf.close() return coord_list, id_list, hull_normals
[docs]def load_objpkl_from_kzip(path): """Loads object pickle file from mapped skeleton kzip Parameters ---------- path : str Path to kzip Returns ------- list of SegmentationObjectDataset """ zf = zipfile.ZipFile(path, 'r') object_datasets = [] for ix, filename in enumerate(['mitos.pkl', 'vc.pkl', 'sj.pkl']): object_datasets.append(UltrastructuralDataset('', '', '')) try: temp = tempfile.TemporaryFile() temp.write(zf.read(filename)) temp.seek(0) obj_ds = pickle.load(temp) object_datasets[ix] = obj_ds except (IOError, ImportError, KeyError): pass return object_datasets
[docs]def load_anno_list(fpaths, load_mitos=True, append_obj=True): """Load nml's given in nml_list and append according hull information from xyz-file, as well as object coordinates from txt files in kzip (if given). Parameters ---------- fpaths : list of str paths to tracings load_mitos: bool load mito hull points and ids append_obj : bool load mapped cell objects Returns ------- list of list of SkeletonAnnotations list of mapped cell tracings """ anno_list = [] cnt = 0 for path in fpaths: cnt += 1 mapped_skel = load_mapped_skeleton(path, append_obj, load_mitos) if mapped_skel is None: continue anno_list.append(mapped_skel) return anno_list
[docs]def load_mapped_skeleton(path, append_obj, load_mitos): """Load mapped cell tracing and append mapped cell objects Parameters ---------- path : str path to tracing kzip append_obj : bool append cell objects load_mitos : bool load mitochondria objects Returns ------- list of SkeletonAnnotations mapped cell tracings (skel, mito, vc, sj, soma) """ mapped_skel = load_ordered_mapped_skeleton(path) if append_obj: skel = mapped_skel[0] obj_dicts = load_objpkl_from_kzip(path) skel.mitos = obj_dicts[0] skel.vc = obj_dicts[1] skel.sj = obj_dicts[2] if 'k.zip' in os.path.basename(path): path = path[:-5] if 'nml' in os.path.basename(path): path = path[:-3] coord_list, id_list, hull_normals = \ load_files_from_kzip(path + 'k.zip', load_mitos) mito_hull_ids, vc_hull_ids, sj_hull_ids = id_list hull, mitos, vc, sj = coord_list skel.hull_coords = hull skel.mito_hull_coords = mitos skel.mito_hull_ids = mito_hull_ids skel.vc_hull_coords = vc skel.vc_hull_ids = vc_hull_ids skel.sj_hull_coords = sj skel.sj_hull_ids = sj_hull_ids skel.hull_normals = hull_normals return mapped_skel
[docs]def get_filepaths_from_dir(directory, ending='k.zip', recursively=False): """Collect all files with certain ending from directory. Parameters ---------- directory: str path to lookup directory ending: str ending of files recursively: boolean add files from subdirectories Returns ------- list of str paths to files """ if recursively: files = [os.path.join(r, f) for r,s ,fs in os.walk(directory) for f in fs if ending in f[-len(ending):]] else: files = [os.path.join(directory, f) for f in next(os.walk(directory))[2] if ending in f[-len(ending):]] return files
[docs]def get_paths_of_skelID(id_list, traced_skel_dir): """Gather paths to kzip of skeletons with ID in id_list Parameters ---------- id_list: list of str skeleton ID's traced_skel_dir: str directory of mapped skeletons Returns ------- list of str paths of skeletons in id_list """ mapped_skel_paths = get_filepaths_from_dir(traced_skel_dir) mapped_skel_ids = re.findall('iter_\d+_(\d+)', ''.join(mapped_skel_paths)) wanted_paths = [] for skelID in id_list: try: path = mapped_skel_paths[mapped_skel_ids.index(str(skelID))] wanted_paths.append(path) except: wanted_paths.append(None) pass return wanted_paths
[docs]def supp_fname_from_fpath(fpath): """Returns supported filename from path to written file to write it to kzip Parameters ---------- fpath : str path to file Returns ------- str filename if supported, else ValueError """ file_name = os.path.basename(fpath) if '.nml' in file_name or '.xml' in file_name: file_name = 'annotation.xml' elif '.xyz' in file_name: file_name = 'hull_points.xyz' elif 'mitos.txt' in file_name: file_name = 'mitos.txt' elif 'vc.txt' in file_name: file_name = 'vc.txt' elif 'sj.txt' in file_name: file_name = 'sj.txt' elif 'mitos_id.txt' in file_name: file_name = 'mitos_id.txt' elif 'vc_id.txt' in file_name: file_name = 'vc_id.txt' elif 'sj_id.txt' in file_name: file_name = 'sj_id.txt' elif 'mitos.pkl' in file_name: file_name = 'mitos.pkl' elif 'vc.pkl' in file_name: file_name = 'vc.pkl' elif 'sj.pkl' in file_name: file_name = 'sj.pkl' elif 'axoness_feat.csv' in file_name: file_name = 'axoness_feat.csv' elif 'spiness_feat.csv' in file_name: file_name = 'spiness_feat.csv' elif 'mergelist.txt' in file_name: file_name = 'mergelist.txt' else: raise ValueError return file_name
[docs]def write_data2kzip(kzip_path, fpath, force_overwrite=False): """Write supported files to kzip of mapped annotation Parameters ---------- kzip_path : str fpath : str force_overwrite : bool """ file_name = supp_fname_from_fpath(fpath) if os.path.isfile(kzip_path): try: if force_overwrite: with zipfile.ZipFile(kzip_path, "w", zipfile.ZIP_DEFLATED) as zf: zf.write(fpath, file_name) else: remove_from_zip(kzip_path, file_name) with zipfile.ZipFile(kzip_path, "a", zipfile.ZIP_DEFLATED) as zf: zf.write(fpath, file_name) except Exception, e: print "Couldn't open file %s for reading and" \ " overwriting." % kzip_path, e else: try: with zipfile.ZipFile(kzip_path, "w", zipfile.ZIP_DEFLATED) as zf: zf.write(fpath, file_name) except Exception, e: print "Couldn't open file %s for writing." % kzip_path, e os.remove(fpath)
[docs]def remove_from_zip(zipfname, *filenames): """Removes filenames from zipfile Parameters ---------- zipfname : str Path to zipfile filenames : list of str files to delete """ tempdir = tempfile.mkdtemp() try: tempname = os.path.join(tempdir, 'new.zip') with zipfile.ZipFile(zipfname, 'r') as zipread: with zipfile.ZipFile(tempname, 'w') as zipwrite: for item in zipread.infolist(): if item.filename not in filenames: data = zipread.read(item.filename) zipwrite.writestr(item, data) shutil.move(tempname, zipfname) finally: shutil.rmtree(tempdir)
[docs]def get_skelID_from_path(skel_path): """Parse skeleton id from filename Parameters ---------- skel_path : str path to skeleton Returns ------- int skeleton ID """ return int(re.findall('iter_0_(\d+)', skel_path)[0])
[docs]def connect_soma_tracing(soma): """Connect tracings of soma, s.t. a lot of rays are emitted. Connects nearby (kd-tree) nodes of soma tracings with edges Parameters ---------- soma: SkeletonAnnotation Soma tracing Returns ------- SkeletonAnnotation Sparsely connected soma tracing """ node_list = np.array([node for node in soma.getNodes()]) coords = np.array([node.getCoordinate_scaled() for node in node_list]) if len(coords) == 0: return soma tree = spatial.cKDTree(coords) near_nodes_list = tree.query_ball_point(coords, 4000) for ii, node in enumerate(node_list): near_nodes_ids = near_nodes_list[ii] near_nodes = node_list[near_nodes_ids] for nn in near_nodes: soma.addEdge(node, nn) return soma
[docs]def cell_object_id_parser(obj_trees): """Extracts unique object ids from object tree list for cell objects 'mitos', 'vc' and 'sj' Parameters ---------- obj_trees : list of annotation objects ['mitos', 'vc', 'sj'] Returns ------- dict keys 'mitos', 'vc' and 'sj' containing unique object ID's as values """ mito_ids = [] vc_ids = [] sj_ids = [] for node in obj_trees[0].getNodes(): comment = node.getComment() match = re.search('%s-([^,]+)' % 'mitos', comment) mito_ids.append(int(match.group(1))) for node in obj_trees[1].getNodes(): comment = node.getComment() match = re.search('%s-([^,]+)' % 'vc', comment) vc_ids.append(int(match.group(1))) for node in obj_trees[2].getNodes(): comment = node.getComment() match = re.search('%s-([^,]+)' % 'sj', comment) sj_ids.append(int(match.group(1))) obj_id_dict = {} obj_id_dict['mitos'] = mito_ids obj_id_dict['vc'] = vc_ids obj_id_dict['sj'] = sj_ids return obj_id_dict
[docs]def write_obj2pkl(objects, path): """Writes object to pickle file Parameters ---------- objects : UltrastructuralDatasetObject path : str destianation """ with open(path, 'wb') as output: pickle.dump(objects, output, -1)
[docs]def load_pkl2obj(path): """Loads pickle file of object Parameters ---------- path: str path of source file Returns ------- UltrastructuralDatasetObject """ with open(path, 'rb') as inp: objects = pickle.load(inp) return objects
[docs]def helper_get_voxels(obj): """Helper function to receive object voxel Parameters ---------- obj : SegmentationObject Returns ------- np.array object voxels """ try: voxels = obj.voxels except KeyError: return np.array([]) return voxels
[docs]def helper_get_hull_voxels(obj): """Helper function to receive object hull voxels. Parameters ---------- obj : SegmentationObject Returns ------- np.array object hull voxels """ return obj.hull_voxels
[docs]def hull2text(hull_coords, normals, path): """Writes hull coordinates and normals to xyz file. Each line corresponds to coordinates and normal vector of one point x y z n_x n_y n_z. Parameters ---------- hull_coords : np.array normals : np.array path : str """ # add ray-end-points to nml and to txt file (incl. normals) f = open(path, 'wb') for i in range(hull_coords.shape[0]): end_point = hull_coords[i] normal = normals[i] f.write("%d %d %d %0.4f %0.4f %0.4f\n" % (end_point[0], end_point[1], end_point[2], normal[0], normal[1], normal[2])) f.close()
[docs]def obj_hull2text(id_list, hull_coords_list, path): """Writes object hull coordinates and corresponding object ids to txt file. Each line corresponds to id and coordinate vector of one object: id x y z Parameters ---------- id_list : np.array hull_coords_list : np.array path : str """ # add ray-end-points to nml and to txt file (incl. normals) f = open(path, 'wb') for i in range(len(hull_coords_list)): coord = hull_coords_list[i] f.write("%d %d %d\n" % (coord[0], coord[1], coord[2])) f.close() if id_list is []: return f = open(path[:-4]+'_id.txt', 'wb') for i in range(len(hull_coords_list)): ix = id_list[i] f.write("%d\n" % ix) f.close()
[docs]def load_from_h5py(path, hdf5_names=None, as_dict=False): """ Loads data from a h5py File Parameters ---------- path: str hdf5_names: list of str if None, all keys will be loaded as_dict: boolean if False a list is returned Returns ------- data: dict or np.array """ if as_dict: data = {} else: data = [] try: f = h5py.File(path, 'r') if hdf5_names is None: hdf5_names = f.keys() for hdf5_name in hdf5_names: if as_dict: data[hdf5_name] = f[hdf5_name].value else: data.append(f[hdf5_name].value) except: raise Exception("Error at Path: %s, with labels:" % path, hdf5_names) f.close() return data
[docs]def save_to_h5py(data, path, hdf5_names=None): """ Saves data to h5py File Parameters ---------- data: list of np.arrays path: str hdf5_names: list of str has to be the same length as data Returns ------- nothing """ if (not type(data) is dict) and hdf5_names is None: raise Exception("hdf5names has to be set, when data is a list") if os.path.isfile(path): os.remove(path) f = h5py.File(path, "w") if type(data) is dict: for key in data.keys(): f.create_dataset(key, data=data[key], compression="gzip") else: if len(hdf5_names) != len(data): f.close() raise Exception("Not enough or to much hdf5-names given!") for nb_data in range(len(data)): f.create_dataset(hdf5_names[nb_data], data=data[nb_data], compression="gzip") f.close()
[docs]def switch_array_entries(this_array, entries): """ Switches to array entries Parameters ---------- this_array: np.array entries: list of int Returns ------- this_array: np.array """ entry_0 = this_array[entries[0]] this_array[entries[0]] = this_array[entries[1]] this_array[entries[1]] = entry_0 return this_array
[docs]def cut_array_in_one_dim(array, start, end, dim): """ Cuts an array along a dimension Parameters ---------- array: np.array start: int end: int dim: int Returns ------- array: np.array """ start = int(start) end = int(end) if dim == 0: array = array[start: end, :, :] elif dim == 1: array = array[:, start: end, :] elif dim == 2: array = array[:, :, start:end] else: raise NotImplementedError() return array