# -*- coding: utf-8 -*-
# SyConn - Synaptic connectivity inference toolkit
#
# Copyright (c) 2016 - now
# Max-Planck-Institute for Medical Research, Heidelberg, Germany
# Authors: Sven Dorkenwald, Philipp Schubert, Joergen Kornfeld
import numpy as np
import re
import h5py
import os
import shutil
import tempfile
import zipfile
from multiprocessing import cpu_count
from basics import *
from segmentationdataset import load_dataset
from knossos_utils import skeleton_utils as su
from ..utils.segmentationdataset import UltrastructuralDataset
from knossos_utils.skeleton import SkeletonAnnotation
import cPickle as pickle
[docs]class DataHandler(object):
"""Initialized with paths or cell components (SegmentationObjects), path to
membrane prediction and source path of traced skeletons (to be computed).
DataHandler is needed for further processing.
Attributes
----------
wd : str
path to working directory, by default all other paths will lead to
subfolders of this directory
mem_path : str
path to barrier prediction
cs_path : str
path to supervoxel data
myelin_ds_path : str
path to myelin prediction
skeleton_path : str
path to cell tracings
data_path : str
output directory for mapped cell tracings
scaling : np.array
scaling of dataset, s.t. after applying coordinates are isotropic and
in nm
mem : KnossosDataset
will be assigned automatically
mem_offset :
optional offeset of dataset
mitos/vc/sj : UltrastructuralDataset
Dataset which contains cell objects of mitochondria, vesicle clouds and
synaptic junctions respectively
"""
def __init__(self, wd, scaling=(9., 9., 20.)):
"""
Parameters
----------
wd : str
path to working directory
scaling : tuple of ints
scaling of data set, s.t. data is isotropic
"""
self.wd = wd
vc_source = wd + '/chunkdataset/obj_vc_ARGUS_5/'
sj_source = wd + '/chunkdataset/obj_sj_ARGUS_3/'
mito_source = wd + '/chunkdataset/obj_mi_ARGUS_8/'
self.mem_path = wd + '/knossosdatasets/rrbarrier/'
self.cs_path = wd + '/chunkdataset/j0126_watershed_map/'
self.myelin_ds_path = wd + "/knossosdatasets/myelin/"
self.data_path = wd + '/neurons/'
self.skeleton_path = wd + '/tracings/'
self.nb_cpus = np.min((cpu_count(), 2))
if not os.path.exists(self.data_path):
os.makedirs(self.data_path)
self.scaling = arr(scaling)
self.mem = None
self.mem_offset = arr([0, 0, 0])
object_dict = {0: "mitos", 1: "vc", 2: "sj"}
objects = [None, None, None]
for i, source in enumerate([mito_source, vc_source, sj_source]):
if type(source) is str:
try:
obj = load_dataset(source)
obj.init_properties()
# print "Initialized %s objects." % object_dict[i]
except IOError:
obj = None
else:
obj = source
objects[i] = obj
self.mitos = objects[0]
self.vc = objects[1]
self.sj = objects[2]
[docs]def load_ordered_mapped_skeleton(path):
"""Load nml of mapped skeleton and order trees
Parameters
----------
path : str
Path to nml
Returns
-------
list
List of trees with order [skeleton, mitos, vc, sj, soma]
"""
anno_dict = {"skeleton": 0, "mitos": 1, "vc": 2, "sj": 3, "soma": 4}
annotation = su.loadj0126NML(path)
ordered_annotation = list([[], [], [], [], []])
for name in anno_dict.keys():
init_anno = SkeletonAnnotation()
init_anno.scaling = [9, 9, 20]
init_anno.appendComment(name)
ordered_annotation[anno_dict[name]] = init_anno
if len(annotation) == 1 and not np.any([key in annotation[0].getComment()
for key in anno_dict.keys()]):
ordered_annotation[0] = annotation[0]
else:
for anno in annotation:
c = anno.getComment()
if c == '':
continue
if ('vc' in c) or ('p4' in c):
c = 'vc'
elif ('sj' in c) or ('az' in c):
c = 'sj'
elif 'mitos' in c:
c = 'mitos'
elif 'soma' in c:
c = 'soma'
elif 'skeleton' in c:
c = 'skeleton'
else:
print "Using %s as skeleton tree." % c
c = 'skeleton'
try:
ordered_annotation[anno_dict[c]] = anno
except KeyError:
print "Found strange tree %s in skeleton at %s." % (c, path)
return ordered_annotation
[docs]def load_files_from_kzip(path, load_mitos):
"""Load all available files from annotation kzip
Parameters
----------
path : str
path to kzip
load_mitos : bool
load mito hull points and ids
Returns
-------
list of np.array
coordinates [hull, mitos, vc, sj], id lists, normals
"""
coord_list = [np.zeros((0, 3)), np.zeros((0, 3)),
np.zeros((0, 3)), np.zeros((0, 3))]
hull_normals = [np.zeros((0, 3)), np.zeros((0, 3)),
np.zeros((0, 3)), np.zeros((0, 3))]
# id_list of cell objects
id_list = [np.zeros((0, )), np.zeros((0, )), np.zeros((0, ))]
zf = zipfile.ZipFile(path, 'r')
for i, filename in enumerate(['hull_points.xyz', 'mitos.txt', 'vc.txt',
'sj.txt']):
try:
if i == 1 and load_mitos is not True:
continue
data = np.fromstring(zf.read(filename), sep=' ')
if len(data) == 0:
continue
if np.isscalar(data[0]) and data[0] == -1:
continue
if i == 0:
hull_normals = data.reshape(data.shape[0]/6, 6)[:, 3:]
data = data.reshape(data.shape[0]/6, 6)[:, :3]
else:
data = data.reshape(data.shape[0]/3, 3)
coord_list[i] = data.astype(np.uint32)
except (IOError, ImportError, KeyError):
pass
for i, filename in enumerate(['mitos_id.txt', 'vc_id.txt', 'sj_id.txt']):
try:
if i == 0 and load_mitos is not True:
continue
data = np.fromstring(zf.read(filename), sep=' ')
if len(data) == 0:
continue
if data[0] == -1:
continue
id_list[i] = data.astype(np.uint32)
except (IOError, ImportError, KeyError):
pass
zf.close()
return coord_list, id_list, hull_normals
[docs]def load_objpkl_from_kzip(path):
"""Loads object pickle file from mapped skeleton kzip
Parameters
----------
path : str
Path to kzip
Returns
-------
list of SegmentationObjectDataset
"""
zf = zipfile.ZipFile(path, 'r')
object_datasets = []
for ix, filename in enumerate(['mitos.pkl', 'vc.pkl', 'sj.pkl']):
object_datasets.append(UltrastructuralDataset('', '', ''))
try:
temp = tempfile.TemporaryFile()
temp.write(zf.read(filename))
temp.seek(0)
obj_ds = pickle.load(temp)
object_datasets[ix] = obj_ds
except (IOError, ImportError, KeyError):
pass
return object_datasets
[docs]def load_anno_list(fpaths, load_mitos=True, append_obj=True):
"""Load nml's given in nml_list and append according hull information from
xyz-file, as well as object coordinates from txt files in kzip (if given).
Parameters
----------
fpaths : list of str
paths to tracings
load_mitos: bool
load mito hull points and ids
append_obj : bool
load mapped cell objects
Returns
-------
list of list of SkeletonAnnotations
list of mapped cell tracings
"""
anno_list = []
cnt = 0
for path in fpaths:
cnt += 1
mapped_skel = load_mapped_skeleton(path, append_obj, load_mitos)
if mapped_skel is None:
continue
anno_list.append(mapped_skel)
return anno_list
[docs]def load_mapped_skeleton(path, append_obj, load_mitos):
"""Load mapped cell tracing and append mapped cell objects
Parameters
----------
path : str
path to tracing kzip
append_obj : bool
append cell objects
load_mitos : bool
load mitochondria objects
Returns
-------
list of SkeletonAnnotations
mapped cell tracings (skel, mito, vc, sj, soma)
"""
mapped_skel = load_ordered_mapped_skeleton(path)
if append_obj:
skel = mapped_skel[0]
obj_dicts = load_objpkl_from_kzip(path)
skel.mitos = obj_dicts[0]
skel.vc = obj_dicts[1]
skel.sj = obj_dicts[2]
if 'k.zip' in os.path.basename(path):
path = path[:-5]
if 'nml' in os.path.basename(path):
path = path[:-3]
coord_list, id_list, hull_normals = \
load_files_from_kzip(path + 'k.zip', load_mitos)
mito_hull_ids, vc_hull_ids, sj_hull_ids = id_list
hull, mitos, vc, sj = coord_list
skel.hull_coords = hull
skel.mito_hull_coords = mitos
skel.mito_hull_ids = mito_hull_ids
skel.vc_hull_coords = vc
skel.vc_hull_ids = vc_hull_ids
skel.sj_hull_coords = sj
skel.sj_hull_ids = sj_hull_ids
skel.hull_normals = hull_normals
return mapped_skel
[docs]def get_filepaths_from_dir(directory, ending='k.zip', recursively=False):
"""Collect all files with certain ending from directory.
Parameters
----------
directory: str
path to lookup directory
ending: str
ending of files
recursively: boolean
add files from subdirectories
Returns
-------
list of str
paths to files
"""
if recursively:
files = [os.path.join(r, f) for r,s ,fs in
os.walk(directory) for f in fs if ending in f[-len(ending):]]
else:
files = [os.path.join(directory, f) for f in next(os.walk(directory))[2]
if ending in f[-len(ending):]]
return files
[docs]def get_paths_of_skelID(id_list, traced_skel_dir):
"""Gather paths to kzip of skeletons with ID in id_list
Parameters
----------
id_list: list of str
skeleton ID's
traced_skel_dir: str
directory of mapped skeletons
Returns
-------
list of str
paths of skeletons in id_list
"""
mapped_skel_paths = get_filepaths_from_dir(traced_skel_dir)
mapped_skel_ids = re.findall('iter_\d+_(\d+)', ''.join(mapped_skel_paths))
wanted_paths = []
for skelID in id_list:
try:
path = mapped_skel_paths[mapped_skel_ids.index(str(skelID))]
wanted_paths.append(path)
except:
wanted_paths.append(None)
pass
return wanted_paths
[docs]def supp_fname_from_fpath(fpath):
"""Returns supported filename from path to written file to write it to
kzip
Parameters
----------
fpath : str
path to file
Returns
-------
str
filename if supported, else ValueError
"""
file_name = os.path.basename(fpath)
if '.nml' in file_name or '.xml' in file_name:
file_name = 'annotation.xml'
elif '.xyz' in file_name:
file_name = 'hull_points.xyz'
elif 'mitos.txt' in file_name:
file_name = 'mitos.txt'
elif 'vc.txt' in file_name:
file_name = 'vc.txt'
elif 'sj.txt' in file_name:
file_name = 'sj.txt'
elif 'mitos_id.txt' in file_name:
file_name = 'mitos_id.txt'
elif 'vc_id.txt' in file_name:
file_name = 'vc_id.txt'
elif 'sj_id.txt' in file_name:
file_name = 'sj_id.txt'
elif 'mitos.pkl' in file_name:
file_name = 'mitos.pkl'
elif 'vc.pkl' in file_name:
file_name = 'vc.pkl'
elif 'sj.pkl' in file_name:
file_name = 'sj.pkl'
elif 'axoness_feat.csv' in file_name:
file_name = 'axoness_feat.csv'
elif 'spiness_feat.csv' in file_name:
file_name = 'spiness_feat.csv'
elif 'mergelist.txt' in file_name:
file_name = 'mergelist.txt'
else:
raise ValueError
return file_name
[docs]def write_data2kzip(kzip_path, fpath, force_overwrite=False):
"""Write supported files to kzip of mapped annotation
Parameters
----------
kzip_path : str
fpath : str
force_overwrite : bool
"""
file_name = supp_fname_from_fpath(fpath)
if os.path.isfile(kzip_path):
try:
if force_overwrite:
with zipfile.ZipFile(kzip_path, "w", zipfile.ZIP_DEFLATED) as zf:
zf.write(fpath, file_name)
else:
remove_from_zip(kzip_path, file_name)
with zipfile.ZipFile(kzip_path, "a", zipfile.ZIP_DEFLATED) as zf:
zf.write(fpath, file_name)
except Exception, e:
print "Couldn't open file %s for reading and" \
" overwriting." % kzip_path, e
else:
try:
with zipfile.ZipFile(kzip_path, "w", zipfile.ZIP_DEFLATED) as zf:
zf.write(fpath, file_name)
except Exception, e:
print "Couldn't open file %s for writing." % kzip_path, e
os.remove(fpath)
[docs]def remove_from_zip(zipfname, *filenames):
"""Removes filenames from zipfile
Parameters
----------
zipfname : str
Path to zipfile
filenames : list of str
files to delete
"""
tempdir = tempfile.mkdtemp()
try:
tempname = os.path.join(tempdir, 'new.zip')
with zipfile.ZipFile(zipfname, 'r') as zipread:
with zipfile.ZipFile(tempname, 'w') as zipwrite:
for item in zipread.infolist():
if item.filename not in filenames:
data = zipread.read(item.filename)
zipwrite.writestr(item, data)
shutil.move(tempname, zipfname)
finally:
shutil.rmtree(tempdir)
[docs]def get_skelID_from_path(skel_path):
"""Parse skeleton id from filename
Parameters
----------
skel_path : str
path to skeleton
Returns
-------
int
skeleton ID
"""
return int(re.findall('iter_0_(\d+)', skel_path)[0])
[docs]def connect_soma_tracing(soma):
"""Connect tracings of soma, s.t. a lot of rays are emitted. Connects nearby
(kd-tree) nodes of soma tracings with edges
Parameters
----------
soma: SkeletonAnnotation
Soma tracing
Returns
-------
SkeletonAnnotation
Sparsely connected soma tracing
"""
node_list = np.array([node for node in soma.getNodes()])
coords = np.array([node.getCoordinate_scaled() for node in node_list])
if len(coords) == 0:
return soma
tree = spatial.cKDTree(coords)
near_nodes_list = tree.query_ball_point(coords, 4000)
for ii, node in enumerate(node_list):
near_nodes_ids = near_nodes_list[ii]
near_nodes = node_list[near_nodes_ids]
for nn in near_nodes:
soma.addEdge(node, nn)
return soma
[docs]def cell_object_id_parser(obj_trees):
"""Extracts unique object ids from object tree list for cell objects
'mitos', 'vc' and 'sj'
Parameters
----------
obj_trees : list of annotation objects ['mitos', 'vc', 'sj']
Returns
-------
dict
keys 'mitos', 'vc' and 'sj' containing unique object ID's as values
"""
mito_ids = []
vc_ids = []
sj_ids = []
for node in obj_trees[0].getNodes():
comment = node.getComment()
match = re.search('%s-([^,]+)' % 'mitos', comment)
mito_ids.append(int(match.group(1)))
for node in obj_trees[1].getNodes():
comment = node.getComment()
match = re.search('%s-([^,]+)' % 'vc', comment)
vc_ids.append(int(match.group(1)))
for node in obj_trees[2].getNodes():
comment = node.getComment()
match = re.search('%s-([^,]+)' % 'sj', comment)
sj_ids.append(int(match.group(1)))
obj_id_dict = {}
obj_id_dict['mitos'] = mito_ids
obj_id_dict['vc'] = vc_ids
obj_id_dict['sj'] = sj_ids
return obj_id_dict
[docs]def write_obj2pkl(objects, path):
"""Writes object to pickle file
Parameters
----------
objects : UltrastructuralDatasetObject
path : str
destianation
"""
with open(path, 'wb') as output:
pickle.dump(objects, output, -1)
[docs]def load_pkl2obj(path):
"""Loads pickle file of object
Parameters
----------
path: str
path of source file
Returns
-------
UltrastructuralDatasetObject
"""
with open(path, 'rb') as inp:
objects = pickle.load(inp)
return objects
[docs]def helper_get_voxels(obj):
"""Helper function to receive object voxel
Parameters
----------
obj : SegmentationObject
Returns
-------
np.array
object voxels
"""
try:
voxels = obj.voxels
except KeyError:
return np.array([])
return voxels
[docs]def helper_get_hull_voxels(obj):
"""Helper function to receive object hull voxels.
Parameters
----------
obj : SegmentationObject
Returns
-------
np.array
object hull voxels
"""
return obj.hull_voxels
[docs]def hull2text(hull_coords, normals, path):
"""Writes hull coordinates and normals to xyz file. Each line corresponds to
coordinates and normal vector of one point x y z n_x n_y n_z.
Parameters
----------
hull_coords : np.array
normals : np.array
path : str
"""
# add ray-end-points to nml and to txt file (incl. normals)
f = open(path, 'wb')
for i in range(hull_coords.shape[0]):
end_point = hull_coords[i]
normal = normals[i]
f.write("%d %d %d %0.4f %0.4f %0.4f\n" % (end_point[0], end_point[1],
end_point[2], normal[0],
normal[1], normal[2]))
f.close()
[docs]def obj_hull2text(id_list, hull_coords_list, path):
"""Writes object hull coordinates and corresponding object ids to txt file.
Each line corresponds to id and coordinate vector of one object: id x y z
Parameters
----------
id_list : np.array
hull_coords_list : np.array
path : str
"""
# add ray-end-points to nml and to txt file (incl. normals)
f = open(path, 'wb')
for i in range(len(hull_coords_list)):
coord = hull_coords_list[i]
f.write("%d %d %d\n" % (coord[0], coord[1], coord[2]))
f.close()
if id_list is []:
return
f = open(path[:-4]+'_id.txt', 'wb')
for i in range(len(hull_coords_list)):
ix = id_list[i]
f.write("%d\n" % ix)
f.close()
[docs]def load_from_h5py(path, hdf5_names=None, as_dict=False):
"""
Loads data from a h5py File
Parameters
----------
path: str
hdf5_names: list of str
if None, all keys will be loaded
as_dict: boolean
if False a list is returned
Returns
-------
data: dict or np.array
"""
if as_dict:
data = {}
else:
data = []
try:
f = h5py.File(path, 'r')
if hdf5_names is None:
hdf5_names = f.keys()
for hdf5_name in hdf5_names:
if as_dict:
data[hdf5_name] = f[hdf5_name].value
else:
data.append(f[hdf5_name].value)
except:
raise Exception("Error at Path: %s, with labels:" % path, hdf5_names)
f.close()
return data
[docs]def save_to_h5py(data, path, hdf5_names=None):
"""
Saves data to h5py File
Parameters
----------
data: list of np.arrays
path: str
hdf5_names: list of str
has to be the same length as data
Returns
-------
nothing
"""
if (not type(data) is dict) and hdf5_names is None:
raise Exception("hdf5names has to be set, when data is a list")
if os.path.isfile(path):
os.remove(path)
f = h5py.File(path, "w")
if type(data) is dict:
for key in data.keys():
f.create_dataset(key, data=data[key],
compression="gzip")
else:
if len(hdf5_names) != len(data):
f.close()
raise Exception("Not enough or to much hdf5-names given!")
for nb_data in range(len(data)):
f.create_dataset(hdf5_names[nb_data], data=data[nb_data],
compression="gzip")
f.close()
[docs]def switch_array_entries(this_array, entries):
"""
Switches to array entries
Parameters
----------
this_array: np.array
entries: list of int
Returns
-------
this_array: np.array
"""
entry_0 = this_array[entries[0]]
this_array[entries[0]] = this_array[entries[1]]
this_array[entries[1]] = entry_0
return this_array
[docs]def cut_array_in_one_dim(array, start, end, dim):
"""
Cuts an array along a dimension
Parameters
----------
array: np.array
start: int
end: int
dim: int
Returns
-------
array: np.array
"""
start = int(start)
end = int(end)
if dim == 0:
array = array[start: end, :, :]
elif dim == 1:
array = array[:, start: end, :]
elif dim == 2:
array = array[:, :, start:end]
else:
raise NotImplementedError()
return array