Source code for neurom.io.datawrapper

# Copyright (c) 2015, Ecole Polytechnique Federale de Lausanne, Blue Brain Project
# All rights reserved.
#
# This file is part of NeuroM <https://github.com/BlueBrain/NeuroM>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     1. Redistributions of source code must retain the above copyright
#        notice, this list of conditions and the following disclaimer.
#     2. Redistributions in binary form must reproduce the above copyright
#        notice, this list of conditions and the following disclaimer in the
#        documentation and/or other materials provided with the distribution.
#     3. Neither the name of the copyright holder nor the names of
#        its contributors may be used to endorse or promote products
#        derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""Fast neuron IO module."""

import logging
from collections import defaultdict, namedtuple

import numpy as np
from neurom.core.dataformat import COLS, POINT_TYPE, ROOT_ID

L = logging.getLogger(__name__)


TYPE, ID, PID = 0, 1, 2


[docs]class DataWrapper(object):
    """Class holding a raw data block and section information."""

    def __init__(self, data_block, fmt, sections=None):
        """Section Data Wrapper.

        data_block is np.array-like with the following columns:
            [X, Y, Z, R, TYPE, ID, P]
            X(float): x-coordinate
            Y(float): y-coordinate
            Z(float): z-coordinate
            R(float): radius
            TYPE(integer): one of the types described by POINT_TYPE
            ID(integer): unique integer given to each point, the `ROOT_ID` is -1
            P(integer): the ID of the parent

        Args:
            data_block: as defined above
            fmt: File format designation, eg: SWC
            sections: Already extracted sections, otherwise data_block will be used

        Notes:
            - there is no ordering constraint: a child can reference a parent ID that comes
              later in the block
            - there is no requirement that the IDs are dense
            - there is no upper bound on the number of rows with the same 'P'arent: in other
              words, multifurcations are allowed
        """
        self.data_block = data_block
        self.fmt = fmt
        # list of DataBlockSection
        self.sections = sections if sections is not None else _extract_sections(data_block)

[docs]    def neurite_root_section_ids(self):
        """Get the section IDs of the intitial neurite sections."""
        sec = self.sections
        return [i for i, ss in enumerate(sec)
                if ss.pid > -1 and (sec[ss.pid].ntype == POINT_TYPE.SOMA and
                                    ss.ntype != POINT_TYPE.SOMA)]

[docs]    def soma_points(self):
        """Get the soma points."""
        db = self.data_block
        return db[db[:, COLS.TYPE] == POINT_TYPE.SOMA]


def _merge_sections(sec_a, sec_b):
    """Merge two sections.

    Merges sec_a into sec_b and sets sec_a attributes to default
    """
    sec_b.ids = list(sec_a.ids) + list(sec_b.ids[1:])
    sec_b.ntype = sec_a.ntype
    sec_b.pid = sec_a.pid

    sec_a.ids = []
    sec_a.pid = -1
    sec_a.ntype = 0


def _section_end_points(structure_block, id_map):
    """Get the section end-points."""
    soma_idx = structure_block[:, TYPE] == POINT_TYPE.SOMA
    soma_ids = structure_block[soma_idx, ID]
    neurite_idx = structure_block[:, TYPE] != POINT_TYPE.SOMA
    neurite_rows = structure_block[neurite_idx, :]
    soma_end_pts = set(id_map[id_]
                       for id_ in soma_ids[np.in1d(soma_ids, neurite_rows[:, PID])])

    # end points have either no children or more than one
    # ie: leaf or multifurcation nodes
    n_children = defaultdict(int)
    for row in structure_block:
        n_children[row[PID]] += 1
    end_pts = set(i for i, row in enumerate(structure_block)
                  if n_children[row[ID]] != 1)

    return end_pts.union(soma_end_pts)


class DataBlockSection(object):
    """Sections ((ids), type, parent_id)."""
    def __init__(self, ids=None, ntype=0, pid=-1):
        """Initialize a DataBlockSection object."""
        self.ids = [] if ids is None else ids
        self.ntype = ntype
        self.pid = pid

    def __eq__(self, other):
        """Test for equality."""
        return (self.ids == other.ids and
                self.ntype == other.ntype and
                self.pid == other.pid)

    def __str__(self):
        """Return a string representation."""
        return ('%s: ntype=%s, pid=%s: n_ids=%d' %
                (self.__class__, self.ntype, self.pid, len(self.ids)))

    __repr__ = __str__


def _extract_sections(data_block):
    """Make a list of sections from an SWC-style data wrapper block."""
    structure_block = data_block[:, COLS.TYPE:COLS.COL_COUNT].astype(np.int)

    # SWC ID -> structure_block position
    id_map = {-1: -1}
    for i, row in enumerate(structure_block):
        id_map[row[ID]] = i

    # end points have either no children, more than one, or are the start
    # of a new gap
    sec_end_pts = _section_end_points(structure_block, id_map)

    # a 'gap' is when a section has part of it's segments interleaved
    # with those of another section
    gap_sections = set()

    sections = []

    def new_section():
        """A new_section."""
        sections.append(DataBlockSection())
        return sections[-1]

    curr_section = new_section()

    parent_section = {-1: -1}

    for row in structure_block:
        row_id = id_map[row[ID]]
        parent_id = id_map[row[PID]]
        if not curr_section.ids:
            # first in section point is parent
            curr_section.ids.append(parent_id)
            curr_section.ntype = row[TYPE]

        gap = parent_id != curr_section.ids[-1]

        # If parent is not the previous point, create a section end-point.
        # Else add the point to this section
        if gap:
            sec_end_pts.add(row_id)
        else:
            curr_section.ids.append(row_id)

        if row_id in sec_end_pts:
            parent_section[curr_section.ids[-1]] = len(sections) - 1
            # Parent-child discontinuity section
            if gap:
                curr_section = new_section()
                curr_section.ids.extend((parent_id, row_id))
                curr_section.ntype = row[TYPE]
                gap_sections.add(len(sections) - 2)
            elif row_id != len(data_block) - 1:
                # avoid creating an extra DataBlockSection for last row if it's a leaf
                curr_section = new_section()

    for sec in sections:
        # get the section parent ID from the id of the first point.
        if sec.ids:
            sec.pid = parent_section[sec.ids[0]]

        # join gap sections and "disable" first half
        if sec.pid in gap_sections:
            _merge_sections(sections[sec.pid], sec)

    # TODO find a way to remove empty sections.  Currently they are
    # required to maintain tree integrity.
    return sections


[docs]class BlockNeuronBuilder(object):
    """Helper to create DataWrapper for 'block' sections.

    This helps create a new DataWrapper when one already has 'blocks'
    (ie: contiguous points, forming all the segments) of a section, and they
    just need to connect them together based on their parent.

    Example:
        >>> builder = BlockNeuronBuilder()
        >>> builder.add_section(segment_id, parent_id, segment_type, points)
        ...
        >>> morph = builder.get_datawrapper()

    Note:
        This will re-number the IDs if they are not 'dense' (ie: have gaps)
    """
    BlockSection = namedtuple('BlockSection', 'parent_id section_type points')

    def __init__(self):
        """Initialize a BlockNeuronBuilder object."""
        self.sections = {}

[docs]    def add_section(self, id_, parent_id, section_type, points):
        """Add a section.

        Args:
            id_(int): identifying number of the section
            parent_id(int): identifying number of the parent of this section
            section_type(int): the section type as defined by POINT_TYPE
            points: an array of [X, Y, Z, R]
        """
        # L.debug('Adding section %d, with parent %d, of type: %d with count: %d',
        #         id_, parent_id, section_type, len(points))
        assert id_ not in self.sections, 'id %s already exists in sections' % id_
        self.sections[id_] = BlockNeuronBuilder.BlockSection(parent_id, section_type, points)

    def _make_datablock(self):
        """Make a data_block and sections list as required by DataWrapper."""
        section_ids = sorted(self.sections)

        # create all insertion id's, this needs to be done ahead of time
        # as some of the children may have a lower id than their parents
        id_to_insert_id = {}
        row_count = 0
        for section_id in section_ids:
            row_count += len(self.sections[section_id].points)
            id_to_insert_id[section_id] = row_count - 1

        datablock = np.empty((row_count, COLS.COL_COUNT), dtype=np.float)
        datablock[:, COLS.ID] = np.arange(len(datablock))
        datablock[:, COLS.P] = datablock[:, COLS.ID] - 1

        sections = []
        insert_index = 0
        for id_ in section_ids:
            sec = self.sections[id_]
            points, section_type, parent_id = sec.points, sec.section_type, sec.parent_id

            idx = slice(insert_index, insert_index + len(points))
            datablock[idx, COLS.XYZR] = points
            datablock[idx, COLS.TYPE] = section_type
            datablock[idx.start, COLS.P] = id_to_insert_id.get(parent_id, ROOT_ID)
            sections.append(DataBlockSection(idx, section_type, parent_id))
            insert_index = idx.stop

        return datablock, sections

    def _check_consistency(self):
        """See if the sections have obvious errors."""
        type_count = defaultdict(int)
        for _, section in sorted(self.sections.items()):
            type_count[section.section_type] += 1

        if type_count[POINT_TYPE.SOMA] != 1:
            L.info('Have %d somas, expected 1', type_count[POINT_TYPE.SOMA])

[docs]    def get_datawrapper(self, file_format='BlockNeuronBuilder', data_wrapper=DataWrapper):
        """Returns a DataWrapper."""
        self._check_consistency()
        datablock, sections = self._make_datablock()
        return data_wrapper(datablock, file_format, sections)