Source code for pyhpo.term

from operator import or_
from functools import reduce, lru_cache
from typing import Any, Dict, List, Optional, Set, Tuple

from pydantic import BaseModel, Field
from backports.cached_property import cached_property

from pyhpo.config import MODIFIER_IDS
from pyhpo.similarity import SimScore
from pyhpo.annotations import GeneSingleton
from pyhpo.annotations import OmimDisease, DecipherDisease, OrphaDisease
from pyhpo.parser.generics import id_from_string



[docs]
class InformationContent(BaseModel):
    """
    InformationContent contains automatically calculated IC based on
    direct/indirect associations with genes, omim, orpha and decipher.
    IC instances are created automatically and accessed through
    :class:`pyhpo.term.HPOTerm` instances.

    Users can also register and calculate custom IC scores via
    :func:`pyhpo.term.InformationContent.set_custom`.
    """

    gene: float = 0.0  # Gene based IC
    omim: float = 0.0  # OMIM based IC
    orpha: float = 0.0  # OrphaNet based IC
    decipher: float = 0.0  # Decipher based IC
    custom: Dict[str, float] = Field(default_factory=dict)

    def __getitem__(self, key: str) -> float:
        """
        The IC is frequently accessed dynamically. e.g. in PyhPOAPI the
        kind of IC is specified in the query (omim / gene)
        Due to this, a dynamic access method is provided

        .. code-block:: python

            ic_kind = 'omim'
            term.information.content[ic_kind]

        """
        try:
            return float(self.__getattribute__(key))
        except AttributeError as err:
            if key in self.custom:
                return self.custom[key]
            else:
                raise AttributeError from err


[docs]
    def set_custom(self, key: str, value: float) -> None:
        """
        Set the IC of a custom score

        Parameters
        ----------
        key: str
            The name of the information-content metric
        value: float
            The actual information content


        **Example:** ::

            for term in Ontology:
                # For some reason, you want to base the information content
                # on the depths of the Term in the ontology
                term.setcustom('depth',  term.shortest_path_to_root())

            # and now calculate similarity of two sets
            my_similarity = term_set_1.similarity(term_set_2, kind='depth')

        """
        self.custom[key] = value





[docs]
class HPOTerm(BaseModel):
    """
    An HPOTerm instance can be build solely by itself,
    without knowledge of the actual Ontology. This is not recommended
    because it would miss all ontology features, such as parents, children,
    associated genes and diseaases etc.

    An HPOTerm instance should always be derived from the :class:`pyhpo.Ontology`
    """

    ###
    # Always present and mandatory
    ###

    id: str
    """
    The HPO identifier, e.g. ``HP:0000118``
    """

    name: str
    """
    The name of the HPO term, e.g. ``Abnormal axial skeleton morphology``
    """

    ###
    # Mandatory, calculated during initialization
    ###

    index: int
    """
    The integer representation of the HPO identifier
    """

    _hash: int

    ###
    # Mandatory for HPOTerm, but not always present in input
    ###

    comment: str = ""
    """
    The comment from the OBO source file
    """

    definition: str = ""
    """
    The definition from the OBO source file
    """

    _is_a: List[str] = []
    synonym: List[str] = []
    """
    A list of synonymous names for the term
    """

    xref: List[str] = []
    alt_id: List[str] = []

    ###
    # Special logic for some obsolete terms
    ###

    is_obsolete: bool = False
    replaced_by: Optional[str] = None
    consider: List[str] = []

    ###
    # Computed once all HPO Terms are present in the Ontology
    ###

    parents: Set["HPOTerm"] = set()
    """
    A set of all direct parent terms
    """

    children: Set["HPOTerm"] = set()
    """
    A set of all direct child terms
    """

    genes: Set[GeneSingleton] = set()
    """
    A set of all associated genes. Associated genes are inversely inherited from
    child terms as well
    """

    omim_diseases: Set[OmimDisease] = set()
    """
    A set of all associated Omim diseases. Associated diseases are inversely inherited from
    child terms as well
    """

    omim_excluded_diseases: Set[OmimDisease] = set()
    """
    A set of all explicitly non-associated Omim diseases. Non-associated diseases are inherited from
    parent terms as well
    """

    orpha_diseases: Set[OrphaDisease] = set()
    """
    A set of all associated Orpha diseases. Associated diseases are inversely inherited from
    child terms as well
    """

    orpha_excluded_diseases: Set[OrphaDisease] = set()
    """
    A set of all explicitly non-associated Orpha diseases.
    Non-associated diseases are inherited from parent terms as well
    """

    decipher_diseases: Set[DecipherDisease] = set()
    """
    A set of all associated Decipher diseases. Associated diseases are inversely inherited from
    child terms as well
    """

    decipher_excluded_diseases: Set[DecipherDisease] = set()
    """
    A set of all explicitly non-associated Decipher diseases.
    Non-associated diseases are inherited from parent terms as well
    """

    information_content: InformationContent = InformationContent()
    """
    The :class:`.InformationContent` of the HPO term.
    Multiple kinds of IC are automatically calculated,
    others can be manually calculated.
    """

    def __init__(self, **kwargs) -> None:  # type: ignore
        kwargs["index"] = id_from_string(kwargs["id"])
        super().__init__(**kwargs)
        self._hash = hash((self.index, self.name))
        self._is_a = kwargs.get("is_a", [])

    @cached_property
    def all_parents(self) -> Set["HPOTerm"]:
        hierarchy_set = reduce(or_, [set(path) for path in self.hierarchy])
        hierarchy_set.remove(self)
        return hierarchy_set

    @cached_property
    def hierarchy(self) -> Tuple[Tuple["HPOTerm", ...], ...]:
        """
        Calculates all paths from current term to Root term
        and returns each path as a Tuple of HPOTerms

        .. note::

            This function is expensive. To ensure better performance, the
            result is cached and all subsequent calls utilize the cache. Don't
            call ``hierarchy`` before the Ontology is fully built with all
            items.

        Returns
        -------
        tuple of tuple of :class:`.HPOTerm` s
            Tuple of paths. Each path is another tuple made up of HPOTerms
        """
        if not self.parents:
            return ((self,),)

        paths: List[Tuple["HPOTerm", ...]] = []
        for parent in self.parents:
            for path in parent.hierarchy:
                paths.append((self,) + path)

        return tuple(paths)

    @cached_property
    def is_modifier(self) -> bool:
        return int(self) in MODIFIER_IDS or bool(
            MODIFIER_IDS & {int(x) for x in self.all_parents}
        )


[docs]
    def parent_ids(self) -> List[int]:
        return [id_from_string(item) for item in self._is_a]



[docs]
    def parent_of(self, other: "HPOTerm") -> bool:
        """
        Checks if ``self`` is a direct or indirect parent of ``other``.

        Parameters
        ----------
        other: :class:`.HPOTerm`
            HPOTerm to check for lineage dependency

        Returns
        -------
        bool
            Is the HPOTerm a direct or indirect parent of another HPOTerms
        """
        return other.child_of(self)



[docs]
    def child_of(self, other: "HPOTerm") -> bool:
        """
        Checks if ``self`` is a direct or indirect child of ``other``.

        Parameters
        ----------
        other: :class:`.HPOTerm`
            HPOTerm to check for lineage dependency

        Returns
        -------
        bool
            Is the HPOTerm a direct or indirect child of another HPOTerms
        """
        if self == other:
            raise RuntimeError("An HPO term cannot be parent/child of itself")

        return other in self.all_parents



[docs]
    def common_ancestors(self, other: "HPOTerm") -> Set["HPOTerm"]:
        """
        Identifies all common ancestors
        of two HPO terms

        Parameters
        ----------
        other: :class:`.HPOTerm`
            Target HPO term for path finding

        Returns
        -------
        set
            Set of common ancestor HPOTerms
        """

        # Return the intersection of all ancestors of self and other.
        # Consider the following edge cases:
        # - self is in other.all_parents
        # - other is in self.all_parents
        # To account for these edge cases,
        # we first add self to self.all_parents
        # and other to other.all_parents
        self_ancestors: Set["HPOTerm"] = self.all_parents | set([self])
        other_ancestors: Set["HPOTerm"] = other.all_parents | set([other])
        return self_ancestors & other_ancestors



[docs]
    def longest_path_to_root(self) -> int:
        """
        Calculates the longest path to root

        Returns
        -------
        int
            Maximum number of nodes until the root HPOTerm
        """
        return max([len(h) - 1 for h in self.hierarchy])



[docs]
    def shortest_path_to_root(self) -> int:
        """
        Calculates the shortest path to root

        Returns
        -------
        int
            Minimum number of nodes until the root HPOTerm
        """
        return min([len(h) - 1 for h in self.hierarchy])



[docs]
    def shortest_path_to_parent(
        self, other: "HPOTerm"
    ) -> Tuple[int, Tuple["HPOTerm", ...]]:
        """
        Calculates the shortest path to another HPO Term

        Parameters
        ----------
        other: HPOTerm
            parent HPOTerm instance

        Returns
        -------
        int
            Minimum number of nodes until the specified HPOTerm

            (float('inf') if ``other`` is not a parent.)
        tuple
            Tuple of all HPOTerm instances on the path

            (``None`` if ``other`` is not a parent)
        """
        if other not in self.all_parents and self != other:
            raise RuntimeError(f"{other.id} is not a parent of {self.id}")
        return_tuples: List[Tuple[int, Tuple["HPOTerm", ...]]] = []
        for path in self.hierarchy:
            try:
                i = path.index(other)
                return_tuples.append((i, path[: i + 1]))
            except ValueError:
                pass

        try:
            return sorted(return_tuples, key=lambda x: x[0])[0]
        except IndexError as err:
            raise RuntimeError(
                f"Unable to determine path to parent term {other.name}"
            ) from err



[docs]
    def longest_path_to_bottom(self, level: int = 0) -> int:
        """
        Calculates how far the most distant child is apart

        Parameters
        ----------
        level: int
            Offset level to indicate for calculation
            Default: 0

        Returns
        -------
        int:
            Number of steps to most distant child

        """
        if len(self.children):
            return max(
                [child.longest_path_to_bottom(level + 1) for child in self.children]
            )
        else:
            return level



[docs]
    def path_to_other(
        self, other: "HPOTerm"
    ) -> Tuple[int, Tuple["HPOTerm", ...], int, int]:
        """
        Identifies the shortest connection between
        two HPO terms

        Parameters
        ----------
        other: HPOTerm
            Target HPO term for path finding

        Returns
        -------
        int
            Length of path
        tuple
            Tuple of HPOTerms in the path
        int
            Number of steps from term-1 to the common parent
        int
            Number of steps from term-2 to the common parent

        """
        common = self.common_ancestors(other)

        paths = []
        for term in common:
            path1 = self.shortest_path_to_parent(term)
            path2 = other.shortest_path_to_parent(term)

            total_path = path1[1] + tuple(reversed(path2[1]))[1:]
            paths.append(
                (int(path1[0] + path2[0]), total_path, int(path1[0]), int(path2[0]))
            )
        return sorted(paths, key=lambda x: x[0])[0]



[docs]
    def count_parents(self) -> int:
        """
        Calculates total number of ancestral HPO Terms

        Returns
        -------
        int
            The number of all ancestral HPO Terms
        """
        return sum([parent.count_parents() + 1 for parent in self.parents])



[docs]
    def similarity_score(
        self, other: "HPOTerm", kind: Optional[str] = None, method: Optional[str] = None
    ) -> float:
        """
        Calculate the similarity between this and another HPO-Term
        It uses :class:`pyhpo.similarity.base._Similarity` underneath

        Parameters
        ----------
        other: `HPOTerm`
            Other HPO term to compare similarity to

        kind: str, default ``''``
            Which kind of information content should be calculated.
            Default option is defined in `pyhpo.similarity.base._Similarity`

            Available options:

            * **omim**
            * **orpha**
            * **decipher**
            * **gene**

        method: string, default ``''``
            The method to use to calculate the similarity.
            Default option is defined in `pyhpo.similarity.base._Similarity`

            Available options:

            * **resnik** - :class:`pyhpo.similarity.defaults.Resnik`
            * **lin** - :class:`pyhpo.similarity.defaults.Lin`
            * **jc** - :class:`pyhpo.similarity.defaults.JC`
            * **jc2** - :class:`pyhpo.similarity.defaults.JC` (**deprecated**)
            * **rel** - :class:`pyhpo.similarity.defaults.Relevance`
            * **ic** - :class:`pyhpo.similarity.defaults.InformationCoefficient`
            * **dist** - :class:`pyhpo.similarity.defaults.Distance`
            * Additional methods can be registered separately (
              see :ref:`custom-similarity-methods`)

        Raises
        ------
        RuntimeError
            The specified ``method`` does not exist
        NotImplementedError
            This error can only occur with custom Similarity-Score
            methods that do not have a ``similarity`` method defined.
        AttributeError
            The information content for ``kind`` does not exist
        """
        return SimScore(self, other, kind, method)


    @lru_cache(maxsize=128)
    def cached_similarity_score(
        self, other: "HPOTerm", kind: str = "", method: str = ""
    ) -> float:
        """
        This is a LRU-chached alias of
        :func:`pyhpo.term.HPOTerm.similarity_score`
        """
        return self.similarity_score(other, kind, method)


[docs]
    def toJSON(self, verbose: bool = False) -> dict:
        """
        Creates a JSON-like object of the HPOTerm

        Parameters
        ----------
        verbose: bool, default ``False``
            Include extra properties

        Returns
        -------
        dict
            A dictionary with the main properties of the HPOTerm


        **Example:** ::

            >>> terms[2].toJSON()
            {
                'name': 'Abnormality of body height',
                'id': 'HP:0000002',
                'int': 2
            }

            >>> terms[2].toJSON(verbose=True)
            {
                'name': 'Abnormality of body height',
                'synonym': ['Abnormality of body height'],
                'comment': None,
                'def': '"Deviation from the norm of height with respect [...]',
                'xref': ['UMLS:C4025901'],
                'is_a': ['HP:0001507 ! Growth abnormality'],
                'id': 'HP:0000002',
                'int': 2
            }

        """
        res = {"int": int(self), "id": self.id, "name": self.name}

        if verbose:
            res["definition"] = self.definition
            res["comment"] = self.comment
            res["synonym"] = self.synonym
            res["xref"] = self.xref
            res["is_a"] = self._is_a
            res["ic"] = self.information_content.model_dump()

        return res


    def to_obo(self) -> str:
        raise NotImplementedError("Method is missing")

    def __hash__(self) -> int:
        """
        The hash is precalcuated during initialization
        """
        return self._hash

    def __int__(self) -> int:
        return self.index

    def __eq__(self, t2: Any) -> bool:
        return hash(self) == hash(t2) and isinstance(t2, HPOTerm)

    def __lt__(self, other: Any) -> bool:
        return int(self) < int(other)

    def __str__(self) -> str:
        return "{} | {}".format(self.id, self.name)

    def __repr__(self) -> str:
        return f"HPOTerm(id='{self.id}', name='{self.name}', " f"is_a={self._is_a})"

    class Config:
        arbitrary_types_allowed = True
        ignored_types = (cached_property,)