Source code for neurom.io.datawrapper

# Copyright (c) 2015, Ecole Polytechnique Federale de Lausanne, Blue Brain Project
# All rights reserved.
#
# This file is part of NeuroM <https://github.com/BlueBrain/NeuroM>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     1. Redistributions of source code must retain the above copyright
#        notice, this list of conditions and the following disclaimer.
#     2. Redistributions in binary form must reproduce the above copyright
#        notice, this list of conditions and the following disclaimer in the
#        documentation and/or other materials provided with the distribution.
#     3. Neither the name of the copyright holder nor the names of
#        its contributors may be used to endorse or promote products
#        derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""Fast neuron IO module."""

import logging
from collections import defaultdict, namedtuple

import numpy as np
from neurom.core.dataformat import COLS, POINT_TYPE, ROOT_ID

L = logging.getLogger(__name__)


TYPE, ID, PID = 0, 1, 2


[docs]class DataWrapper(object): """Class holding a raw data block and section information.""" def __init__(self, data_block, fmt, sections=None): """Section Data Wrapper. data_block is np.array-like with the following columns: [X, Y, Z, R, TYPE, ID, P] X(float): x-coordinate Y(float): y-coordinate Z(float): z-coordinate R(float): radius TYPE(integer): one of the types described by POINT_TYPE ID(integer): unique integer given to each point, the `ROOT_ID` is -1 P(integer): the ID of the parent Args: data_block: as defined above fmt: File format designation, eg: SWC sections: Already extracted sections, otherwise data_block will be used Notes: - there is no ordering constraint: a child can reference a parent ID that comes later in the block - there is no requirement that the IDs are dense - there is no upper bound on the number of rows with the same 'P'arent: in other words, multifurcations are allowed """ self.data_block = data_block self.fmt = fmt # list of DataBlockSection self.sections = sections if sections is not None else _extract_sections(data_block)
[docs] def neurite_root_section_ids(self): """Get the section IDs of the intitial neurite sections.""" sec = self.sections return [i for i, ss in enumerate(sec) if ss.pid > -1 and (sec[ss.pid].ntype == POINT_TYPE.SOMA and ss.ntype != POINT_TYPE.SOMA)]
[docs] def soma_points(self): """Get the soma points.""" db = self.data_block return db[db[:, COLS.TYPE] == POINT_TYPE.SOMA]
def _merge_sections(sec_a, sec_b): """Merge two sections. Merges sec_a into sec_b and sets sec_a attributes to default """ sec_b.ids = list(sec_a.ids) + list(sec_b.ids[1:]) sec_b.ntype = sec_a.ntype sec_b.pid = sec_a.pid sec_a.ids = [] sec_a.pid = -1 sec_a.ntype = 0 def _section_end_points(structure_block, id_map): """Get the section end-points.""" soma_idx = structure_block[:, TYPE] == POINT_TYPE.SOMA soma_ids = structure_block[soma_idx, ID] neurite_idx = structure_block[:, TYPE] != POINT_TYPE.SOMA neurite_rows = structure_block[neurite_idx, :] soma_end_pts = set(id_map[id_] for id_ in soma_ids[np.in1d(soma_ids, neurite_rows[:, PID])]) # end points have either no children or more than one # ie: leaf or multifurcation nodes n_children = defaultdict(int) for row in structure_block: n_children[row[PID]] += 1 end_pts = set(i for i, row in enumerate(structure_block) if n_children[row[ID]] != 1) return end_pts.union(soma_end_pts) class DataBlockSection(object): """Sections ((ids), type, parent_id).""" def __init__(self, ids=None, ntype=0, pid=-1): """Initialize a DataBlockSection object.""" self.ids = [] if ids is None else ids self.ntype = ntype self.pid = pid def __eq__(self, other): """Test for equality.""" return (self.ids == other.ids and self.ntype == other.ntype and self.pid == other.pid) def __str__(self): """Return a string representation.""" return ('%s: ntype=%s, pid=%s: n_ids=%d' % (self.__class__, self.ntype, self.pid, len(self.ids))) __repr__ = __str__ def _extract_sections(data_block): """Make a list of sections from an SWC-style data wrapper block.""" structure_block = data_block[:, COLS.TYPE:COLS.COL_COUNT].astype(np.int) # SWC ID -> structure_block position id_map = {-1: -1} for i, row in enumerate(structure_block): id_map[row[ID]] = i # end points have either no children, more than one, or are the start # of a new gap sec_end_pts = _section_end_points(structure_block, id_map) # a 'gap' is when a section has part of it's segments interleaved # with those of another section gap_sections = set() sections = [] def new_section(): """A new_section.""" sections.append(DataBlockSection()) return sections[-1] curr_section = new_section() parent_section = {-1: -1} for row in structure_block: row_id = id_map[row[ID]] parent_id = id_map[row[PID]] if not curr_section.ids: # first in section point is parent curr_section.ids.append(parent_id) curr_section.ntype = row[TYPE] gap = parent_id != curr_section.ids[-1] # If parent is not the previous point, create a section end-point. # Else add the point to this section if gap: sec_end_pts.add(row_id) else: curr_section.ids.append(row_id) if row_id in sec_end_pts: parent_section[curr_section.ids[-1]] = len(sections) - 1 # Parent-child discontinuity section if gap: curr_section = new_section() curr_section.ids.extend((parent_id, row_id)) curr_section.ntype = row[TYPE] gap_sections.add(len(sections) - 2) elif row_id != len(data_block) - 1: # avoid creating an extra DataBlockSection for last row if it's a leaf curr_section = new_section() for sec in sections: # get the section parent ID from the id of the first point. if sec.ids: sec.pid = parent_section[sec.ids[0]] # join gap sections and "disable" first half if sec.pid in gap_sections: _merge_sections(sections[sec.pid], sec) # TODO find a way to remove empty sections. Currently they are # required to maintain tree integrity. return sections
[docs]class BlockNeuronBuilder(object): """Helper to create DataWrapper for 'block' sections. This helps create a new DataWrapper when one already has 'blocks' (ie: contiguous points, forming all the segments) of a section, and they just need to connect them together based on their parent. Example: >>> builder = BlockNeuronBuilder() >>> builder.add_section(segment_id, parent_id, segment_type, points) ... >>> morph = builder.get_datawrapper() Note: This will re-number the IDs if they are not 'dense' (ie: have gaps) """ BlockSection = namedtuple('BlockSection', 'parent_id section_type points') def __init__(self): """Initialize a BlockNeuronBuilder object.""" self.sections = {}
[docs] def add_section(self, id_, parent_id, section_type, points): """Add a section. Args: id_(int): identifying number of the section parent_id(int): identifying number of the parent of this section section_type(int): the section type as defined by POINT_TYPE points: an array of [X, Y, Z, R] """ # L.debug('Adding section %d, with parent %d, of type: %d with count: %d', # id_, parent_id, section_type, len(points)) assert id_ not in self.sections, 'id %s already exists in sections' % id_ self.sections[id_] = BlockNeuronBuilder.BlockSection(parent_id, section_type, points)
def _make_datablock(self): """Make a data_block and sections list as required by DataWrapper.""" section_ids = sorted(self.sections) # create all insertion id's, this needs to be done ahead of time # as some of the children may have a lower id than their parents id_to_insert_id = {} row_count = 0 for section_id in section_ids: row_count += len(self.sections[section_id].points) id_to_insert_id[section_id] = row_count - 1 datablock = np.empty((row_count, COLS.COL_COUNT), dtype=np.float) datablock[:, COLS.ID] = np.arange(len(datablock)) datablock[:, COLS.P] = datablock[:, COLS.ID] - 1 sections = [] insert_index = 0 for id_ in section_ids: sec = self.sections[id_] points, section_type, parent_id = sec.points, sec.section_type, sec.parent_id idx = slice(insert_index, insert_index + len(points)) datablock[idx, COLS.XYZR] = points datablock[idx, COLS.TYPE] = section_type datablock[idx.start, COLS.P] = id_to_insert_id.get(parent_id, ROOT_ID) sections.append(DataBlockSection(idx, section_type, parent_id)) insert_index = idx.stop return datablock, sections def _check_consistency(self): """See if the sections have obvious errors.""" type_count = defaultdict(int) for _, section in sorted(self.sections.items()): type_count[section.section_type] += 1 if type_count[POINT_TYPE.SOMA] != 1: L.info('Have %d somas, expected 1', type_count[POINT_TYPE.SOMA])
[docs] def get_datawrapper(self, file_format='BlockNeuronBuilder', data_wrapper=DataWrapper): """Returns a DataWrapper.""" self._check_consistency() datablock, sections = self._make_datablock() return data_wrapper(datablock, file_format, sections)