from operator import or_
from functools import reduce, lru_cache
from typing import Any, Dict, List, Optional, Set, Tuple
from pydantic import BaseModel, Field
from backports.cached_property import cached_property
from pyhpo.config import MODIFIER_IDS
from pyhpo.similarity import SimScore
from pyhpo.annotations import GeneSingleton
from pyhpo.annotations import OmimDisease, DecipherDisease, OrphaDisease
from pyhpo.parser.generics import id_from_string
[docs]
class InformationContent(BaseModel):
"""
InformationContent contains automatically calculated IC based on
direct/indirect associations with genes, omim, orpha and decipher.
IC instances are created automatically and accessed through
:class:`pyhpo.term.HPOTerm` instances.
Users can also register and calculate custom IC scores via
:func:`pyhpo.term.InformationContent.set_custom`.
"""
gene: float = 0.0 # Gene based IC
omim: float = 0.0 # OMIM based IC
orpha: float = 0.0 # OrphaNet based IC
decipher: float = 0.0 # Decipher based IC
custom: Dict[str, float] = Field(default_factory=dict)
def __getitem__(self, key: str) -> float:
"""
The IC is frequently accessed dynamically. e.g. in PyhPOAPI the
kind of IC is specified in the query (omim / gene)
Due to this, a dynamic access method is provided
.. code-block:: python
ic_kind = 'omim'
term.information.content[ic_kind]
"""
try:
return float(self.__getattribute__(key))
except AttributeError as err:
if key in self.custom:
return self.custom[key]
else:
raise AttributeError from err
[docs]
def set_custom(self, key: str, value: float) -> None:
"""
Set the IC of a custom score
Parameters
----------
key: str
The name of the information-content metric
value: float
The actual information content
**Example:** ::
for term in Ontology:
# For some reason, you want to base the information content
# on the depths of the Term in the ontology
term.setcustom('depth', term.shortest_path_to_root())
# and now calculate similarity of two sets
my_similarity = term_set_1.similarity(term_set_2, kind='depth')
"""
self.custom[key] = value
[docs]
class HPOTerm(BaseModel):
"""
An HPOTerm instance can be build solely by itself,
without knowledge of the actual Ontology. This is not recommended
because it would miss all ontology features, such as parents, children,
associated genes and diseaases etc.
An HPOTerm instance should always be derived from the :class:`pyhpo.Ontology`
"""
###
# Always present and mandatory
###
id: str
"""
The HPO identifier, e.g. ``HP:0000118``
"""
name: str
"""
The name of the HPO term, e.g. ``Abnormal axial skeleton morphology``
"""
###
# Mandatory, calculated during initialization
###
index: int
"""
The integer representation of the HPO identifier
"""
_hash: int
###
# Mandatory for HPOTerm, but not always present in input
###
comment: str = ""
"""
The comment from the OBO source file
"""
definition: str = ""
"""
The definition from the OBO source file
"""
_is_a: List[str] = []
synonym: List[str] = []
"""
A list of synonymous names for the term
"""
xref: List[str] = []
alt_id: List[str] = []
###
# Special logic for some obsolete terms
###
is_obsolete: bool = False
replaced_by: Optional[str] = None
consider: List[str] = []
###
# Computed once all HPO Terms are present in the Ontology
###
parents: Set["HPOTerm"] = set()
"""
A set of all direct parent terms
"""
children: Set["HPOTerm"] = set()
"""
A set of all direct child terms
"""
genes: Set[GeneSingleton] = set()
"""
A set of all associated genes. Associated genes are inversely inherited from
child terms as well
"""
omim_diseases: Set[OmimDisease] = set()
"""
A set of all associated Omim diseases. Associated diseases are inversely inherited from
child terms as well
"""
omim_excluded_diseases: Set[OmimDisease] = set()
"""
A set of all explicitly non-associated Omim diseases. Non-associated diseases are inherited from
parent terms as well
"""
orpha_diseases: Set[OrphaDisease] = set()
"""
A set of all associated Orpha diseases. Associated diseases are inversely inherited from
child terms as well
"""
orpha_excluded_diseases: Set[OrphaDisease] = set()
"""
A set of all explicitly non-associated Orpha diseases.
Non-associated diseases are inherited from parent terms as well
"""
decipher_diseases: Set[DecipherDisease] = set()
"""
A set of all associated Decipher diseases. Associated diseases are inversely inherited from
child terms as well
"""
decipher_excluded_diseases: Set[DecipherDisease] = set()
"""
A set of all explicitly non-associated Decipher diseases.
Non-associated diseases are inherited from parent terms as well
"""
information_content: InformationContent = InformationContent()
"""
The :class:`.InformationContent` of the HPO term.
Multiple kinds of IC are automatically calculated,
others can be manually calculated.
"""
def __init__(self, **kwargs) -> None: # type: ignore
kwargs["index"] = id_from_string(kwargs["id"])
super().__init__(**kwargs)
self._hash = hash((self.index, self.name))
self._is_a = kwargs.get("is_a", [])
@cached_property
def all_parents(self) -> Set["HPOTerm"]:
hierarchy_set = reduce(or_, [set(path) for path in self.hierarchy])
hierarchy_set.remove(self)
return hierarchy_set
@cached_property
def hierarchy(self) -> Tuple[Tuple["HPOTerm", ...], ...]:
"""
Calculates all paths from current term to Root term
and returns each path as a Tuple of HPOTerms
.. note::
This function is expensive. To ensure better performance, the
result is cached and all subsequent calls utilize the cache. Don't
call ``hierarchy`` before the Ontology is fully built with all
items.
Returns
-------
tuple of tuple of :class:`.HPOTerm` s
Tuple of paths. Each path is another tuple made up of HPOTerms
"""
if not self.parents:
return ((self,),)
paths: List[Tuple["HPOTerm", ...]] = []
for parent in self.parents:
for path in parent.hierarchy:
paths.append((self,) + path)
return tuple(paths)
@cached_property
def is_modifier(self) -> bool:
return int(self) in MODIFIER_IDS or bool(
MODIFIER_IDS & {int(x) for x in self.all_parents}
)
[docs]
def parent_ids(self) -> List[int]:
return [id_from_string(item) for item in self._is_a]
[docs]
def parent_of(self, other: "HPOTerm") -> bool:
"""
Checks if ``self`` is a direct or indirect parent of ``other``.
Parameters
----------
other: :class:`.HPOTerm`
HPOTerm to check for lineage dependency
Returns
-------
bool
Is the HPOTerm a direct or indirect parent of another HPOTerms
"""
return other.child_of(self)
[docs]
def child_of(self, other: "HPOTerm") -> bool:
"""
Checks if ``self`` is a direct or indirect child of ``other``.
Parameters
----------
other: :class:`.HPOTerm`
HPOTerm to check for lineage dependency
Returns
-------
bool
Is the HPOTerm a direct or indirect child of another HPOTerms
"""
if self == other:
raise RuntimeError("An HPO term cannot be parent/child of itself")
return other in self.all_parents
[docs]
def common_ancestors(self, other: "HPOTerm") -> Set["HPOTerm"]:
"""
Identifies all common ancestors
of two HPO terms
Parameters
----------
other: :class:`.HPOTerm`
Target HPO term for path finding
Returns
-------
set
Set of common ancestor HPOTerms
"""
# Return the intersection of all ancestors of self and other.
# Consider the following edge cases:
# - self is in other.all_parents
# - other is in self.all_parents
# To account for these edge cases,
# we first add self to self.all_parents
# and other to other.all_parents
self_ancestors: Set["HPOTerm"] = self.all_parents | set([self])
other_ancestors: Set["HPOTerm"] = other.all_parents | set([other])
return self_ancestors & other_ancestors
[docs]
def longest_path_to_root(self) -> int:
"""
Calculates the longest path to root
Returns
-------
int
Maximum number of nodes until the root HPOTerm
"""
return max([len(h) - 1 for h in self.hierarchy])
[docs]
def shortest_path_to_root(self) -> int:
"""
Calculates the shortest path to root
Returns
-------
int
Minimum number of nodes until the root HPOTerm
"""
return min([len(h) - 1 for h in self.hierarchy])
[docs]
def shortest_path_to_parent(
self, other: "HPOTerm"
) -> Tuple[int, Tuple["HPOTerm", ...]]:
"""
Calculates the shortest path to another HPO Term
Parameters
----------
other: HPOTerm
parent HPOTerm instance
Returns
-------
int
Minimum number of nodes until the specified HPOTerm
(float('inf') if ``other`` is not a parent.)
tuple
Tuple of all HPOTerm instances on the path
(``None`` if ``other`` is not a parent)
"""
if other not in self.all_parents and self != other:
raise RuntimeError(f"{other.id} is not a parent of {self.id}")
return_tuples: List[Tuple[int, Tuple["HPOTerm", ...]]] = []
for path in self.hierarchy:
try:
i = path.index(other)
return_tuples.append((i, path[: i + 1]))
except ValueError:
pass
try:
return sorted(return_tuples, key=lambda x: x[0])[0]
except IndexError as err:
raise RuntimeError(
f"Unable to determine path to parent term {other.name}"
) from err
[docs]
def longest_path_to_bottom(self, level: int = 0) -> int:
"""
Calculates how far the most distant child is apart
Parameters
----------
level: int
Offset level to indicate for calculation
Default: 0
Returns
-------
int:
Number of steps to most distant child
"""
if len(self.children):
return max(
[child.longest_path_to_bottom(level + 1) for child in self.children]
)
else:
return level
[docs]
def path_to_other(
self, other: "HPOTerm"
) -> Tuple[int, Tuple["HPOTerm", ...], int, int]:
"""
Identifies the shortest connection between
two HPO terms
Parameters
----------
other: HPOTerm
Target HPO term for path finding
Returns
-------
int
Length of path
tuple
Tuple of HPOTerms in the path
int
Number of steps from term-1 to the common parent
int
Number of steps from term-2 to the common parent
"""
common = self.common_ancestors(other)
paths = []
for term in common:
path1 = self.shortest_path_to_parent(term)
path2 = other.shortest_path_to_parent(term)
total_path = path1[1] + tuple(reversed(path2[1]))[1:]
paths.append(
(int(path1[0] + path2[0]), total_path, int(path1[0]), int(path2[0]))
)
return sorted(paths, key=lambda x: x[0])[0]
[docs]
def count_parents(self) -> int:
"""
Calculates total number of ancestral HPO Terms
Returns
-------
int
The number of all ancestral HPO Terms
"""
return sum([parent.count_parents() + 1 for parent in self.parents])
[docs]
def similarity_score(
self, other: "HPOTerm", kind: Optional[str] = None, method: Optional[str] = None
) -> float:
"""
Calculate the similarity between this and another HPO-Term
It uses :class:`pyhpo.similarity.base._Similarity` underneath
Parameters
----------
other: `HPOTerm`
Other HPO term to compare similarity to
kind: str, default ``''``
Which kind of information content should be calculated.
Default option is defined in `pyhpo.similarity.base._Similarity`
Available options:
* **omim**
* **orpha**
* **decipher**
* **gene**
method: string, default ``''``
The method to use to calculate the similarity.
Default option is defined in `pyhpo.similarity.base._Similarity`
Available options:
* **resnik** - :class:`pyhpo.similarity.defaults.Resnik`
* **lin** - :class:`pyhpo.similarity.defaults.Lin`
* **jc** - :class:`pyhpo.similarity.defaults.JC`
* **jc2** - :class:`pyhpo.similarity.defaults.JC` (**deprecated**)
* **rel** - :class:`pyhpo.similarity.defaults.Relevance`
* **ic** - :class:`pyhpo.similarity.defaults.InformationCoefficient`
* **dist** - :class:`pyhpo.similarity.defaults.Distance`
* Additional methods can be registered separately (
see :ref:`custom-similarity-methods`)
Raises
------
RuntimeError
The specified ``method`` does not exist
NotImplementedError
This error can only occur with custom Similarity-Score
methods that do not have a ``similarity`` method defined.
AttributeError
The information content for ``kind`` does not exist
"""
return SimScore(self, other, kind, method)
@lru_cache(maxsize=128)
def cached_similarity_score(
self, other: "HPOTerm", kind: str = "", method: str = ""
) -> float:
"""
This is a LRU-chached alias of
:func:`pyhpo.term.HPOTerm.similarity_score`
"""
return self.similarity_score(other, kind, method)
[docs]
def toJSON(self, verbose: bool = False) -> dict:
"""
Creates a JSON-like object of the HPOTerm
Parameters
----------
verbose: bool, default ``False``
Include extra properties
Returns
-------
dict
A dictionary with the main properties of the HPOTerm
**Example:** ::
>>> terms[2].toJSON()
{
'name': 'Abnormality of body height',
'id': 'HP:0000002',
'int': 2
}
>>> terms[2].toJSON(verbose=True)
{
'name': 'Abnormality of body height',
'synonym': ['Abnormality of body height'],
'comment': None,
'def': '"Deviation from the norm of height with respect [...]',
'xref': ['UMLS:C4025901'],
'is_a': ['HP:0001507 ! Growth abnormality'],
'id': 'HP:0000002',
'int': 2
}
"""
res = {"int": int(self), "id": self.id, "name": self.name}
if verbose:
res["definition"] = self.definition
res["comment"] = self.comment
res["synonym"] = self.synonym
res["xref"] = self.xref
res["is_a"] = self._is_a
res["ic"] = self.information_content.model_dump()
return res
def to_obo(self) -> str:
raise NotImplementedError("Method is missing")
def __hash__(self) -> int:
"""
The hash is precalcuated during initialization
"""
return self._hash
def __int__(self) -> int:
return self.index
def __eq__(self, t2: Any) -> bool:
return hash(self) == hash(t2) and isinstance(t2, HPOTerm)
def __lt__(self, other: Any) -> bool:
return int(self) < int(other)
def __str__(self) -> str:
return "{} | {}".format(self.id, self.name)
def __repr__(self) -> str:
return f"HPOTerm(id='{self.id}', name='{self.name}', " f"is_a={self._is_a})"
class Config:
arbitrary_types_allowed = True
ignored_types = (cached_property,)