statistics

Functions to compute various graph statistics.

The functions in this module allow to compute various statistics on graphs

Example

import pathpyG as pp

# Generate a toy example graph.
g = pp.Graph.from_edge_list([
    ('b', 'c'),
    ('a', 'b'),
    ('c', 'd'),
    ('d', 'a'),
    ('b', 'd')
])

# Calculate degree distribution and raw moments
d_dist = pp.statistics.degree_distribution(g)
k_1 = pp.statistics.degree_raw_moment(g, k=1)
k_2 = pp.statistics.degree_raw_moment(g, k=2)

`Graph` ¶

A graph object storing nodes, edges, and attributes.

An object than be be used to store directed or undirected graphs with node and edge attributes. Data on nodes and edges are stored in an underlying instance of torch_geometric.Data.

Source code in src/pathpyG/core/graph.py

class Graph:
    """
    A graph object storing nodes, edges, and attributes.

    An object than be be used to store directed or undirected graphs with node
    and edge attributes. Data on nodes and edges are stored in an underlying instance of
    [`torch_geometric.Data`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data).
    """

    def __init__(self, data: Data, mapping: Optional[IndexMap] = None):
        """Generate graph instance from a pyG `Data` object.

        Generate a Graph instance from a `torch_geometric.Data` object that contains an EdgeIndex as well as
        optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map
        node indices to string identifiers.

        Args:
            data: A pyG Data object containing an EdgeIndex and additional attributes
            mapping: `IndexMap` object that maps node indices to string identifiers

        Example:
            ```py
            import pathpyG as pp
            from torch_geometric.data import Data
            from torch_geometric import EdgeIndex

            data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
            g = pp.Graph(data)

            g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))
            ```
        """
        if mapping is None:
            self.mapping = IndexMap()
        else:
            self.mapping = mapping

        # set num_nodes property
        if "num_nodes" not in data:
            data.num_nodes = data.edge_index.max().item() + 1

        # turn edge index tensor into EdgeIndex object
        if not isinstance(data.edge_index, EdgeIndex):
            data.edge_index = EdgeIndex(data=data.edge_index, sparse_size=(data.num_nodes, data.num_nodes))

        if (
            data.edge_index.get_sparse_size(dim=0) != data.num_nodes
            or data.edge_index.get_sparse_size(dim=1) != data.num_nodes
        ):
            raise Exception("sparse size of EdgeIndex should match number of nodes!")

        # sort EdgeIndex and validate
        data.edge_index = data.edge_index.sort_by("row").values
        data.edge_index.validate()

        self.data = data

        # create mapping between edge tuples and edge indices
        self.edge_to_index = {
            (e[0].item(), e[1].item()): i for i, e in enumerate([e for e in self.data.edge_index.t()])
        }

        ((self.row_ptr, self.col), _) = self.data.edge_index.get_csr()
        ((self.col_ptr, self.row), _) = self.data.edge_index.get_csc()

        # create node_sequence mapping for higher-order graphs
        if "node_sequence" not in self.data:
            self.data.node_sequence = torch.arange(data.num_nodes).reshape(-1, 1)

    @staticmethod
    def from_edge_index(edge_index: torch.Tensor, mapping: Optional[IndexMap] = None, num_nodes: int = None) -> Graph:
        """Construct a graph from a torch Tensor containing an edge index. An optional mapping can
        be used to transparently map node indices to string identifiers.

        Args:
            edge_index:  torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index
            mapping: `IndexMap` object that maps node indices to string identifiers
            num_nodes: optional number of nodes (default: None). If None, the number of nodes will be
                inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.

        Examples:
            You can create a graph from an edge index tensor as follows:

            >>> import torch
            >>> import pathpyG as pp
            >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
            >>> print(g)
            Directed graph with 3 nodes and 3 edges ...

            You can also include a mapping of node IDs:

            >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
            >>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
            >>> print(g.mapping)
            a -> 0
            b -> 1
            c -> 2
        """

        if not num_nodes:
            d = Data(edge_index=edge_index)
        else:
            d = Data(edge_index=edge_index, num_nodes=num_nodes)
        return Graph(d, mapping=mapping)

    @staticmethod
    def from_edge_list(
        edge_list: Iterable[Tuple[str, str]],
        is_undirected: bool = False,
        mapping: Optional[IndexMap] = None,
        num_nodes: Optional[int] = None,
    ) -> Graph:
        """Generate a Graph based on an edge list.

        Edges can be given as string or integer tuples. If strings are used and no mapping is given,
        a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of
        node IDs.

        Args:
            edge_list: Iterable of edges represented as tuples
            is_undirected: Whether the edge list contains all bidorectional edges
            mapping: optional mapping of string IDs to node indices
            num_nodes: optional number of nodes (useful in case not all nodes have incident edges)

        Examples:
            >>> import pathpyG as pp
            >>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
            >>> g = pp.Graph.from_edge_list(l)
            >>> print(list(g.edges))
            [('a', 'b'), ('a', 'c'), ('b', 'c')]
        """

        # handle empty graph
        if len(edge_list) == 0:
            return Graph(Data(edge_index=torch.tensor([[], []], dtype=torch.int32), num_nodes=0), mapping=IndexMap())

        if mapping is None:
            edge_array = np.array(edge_list)
            node_ids = np.unique(edge_array)
            if np.issubdtype(node_ids.dtype, str) and np.char.isnumeric(node_ids).all():
                node_ids = np.sort(node_ids.astype(int)).astype(str)
            mapping = IndexMap(node_ids)

        if num_nodes is None:
            num_nodes = mapping.num_ids()

        edge_index = EdgeIndex(
            mapping.to_idxs(edge_list).T.contiguous(),
            sparse_size=(num_nodes, num_nodes),
            is_undirected=is_undirected,
        )
        return Graph(Data(edge_index=edge_index, num_nodes=num_nodes), mapping=mapping)

    def to_undirected(self) -> Graph:
        """Return an undirected version of this directed graph.

        This method creates a new undirected Graph from the current graph instance by
        adding all directed edges in opposite direction.

        Examples:
            >>> import pathpyG as pp
            >>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
            >>> g_u = g.to_undirected()
            >>> print(g_u)
            Undirected graph with 3 nodes and 6 (directed) edges
        """
        # create undirected edge index by coalescing the directed edges and keep 
        # track of the original edge index for the edge attributes
        attr_idx = torch.arange(self.data.num_edges, device=self.data.edge_index.device)
        edge_index, attr_idx = to_undirected(
            self.data.edge_index,
            edge_attr=attr_idx,
            num_nodes=self.data.num_nodes,
            reduce="min",
        )

        data = Data(
            edge_index=EdgeIndex(data=edge_index, sparse_size=(self.data.num_nodes, self.data.num_nodes), is_undirected=True),
            num_nodes=self.data.num_nodes
        )
        # Note that while the torch_geometric.transforms.ToUndirected function would do this automatically,
        # we do it manually since the transform cannot handle numpy arrays as edge attributes.
        # make sure to copy all node and (undirected) edge attributes
        for node_attr in self.node_attrs():
            data[node_attr] = self.data[node_attr]
        for edge_attr in self.edge_attrs():
            if edge_attr != "edge_index":
                data[edge_attr] = self.data[edge_attr][attr_idx]

        return Graph(data, self.mapping)

    def to_weighted_graph(self) -> Graph:
        """Coalesces multi-edges to single-edges with an additional weight attribute

        If the graph contains multiple edges between the same nodes, this method will coalesce
        them into a single edge with an additional weight attribute called `edge_weight` that
        contains the number of coalesced edges. The method returns a new graph instance with
        the coalesced edges.

        Returns:
            Graph: Graph with coalesced edges
        """
        i, w = torch_geometric.utils.coalesce(
            self.data.edge_index.as_tensor(), torch.ones(self.m, device=self.data.edge_index.device)
        )
        return Graph(Data(edge_index=i, edge_weight=w, num_nodes=self.data.num_nodes), mapping=self.mapping)

    def node_attrs(self) -> List[str]:
        """
        Return a list of node attributes.

        This method returns a list containing the names of all node-level attributes,
        ignoring the special `node_sequence` attribute.

        Returns:
            list: list of node attributes
        """
        attrs = []
        for k in self.data.keys():
            if k != "node_sequence" and k.startswith("node_"):
                attrs.append(k)
        return attrs

    def edge_attrs(self) -> List[str]:
        """
        Return a list of edge attributes.

        This method returns a list containing the names of all edge-level attributes,
        ignoring the special `edge_index` attribute.

        Returns:
            list: list of edge attributes
        """
        attrs = []
        for k in self.data.keys():
            if k != "edge_index" and k.startswith("edge_"):
                attrs.append(k)
        return attrs

    @property
    def nodes(self) -> list:
        """
        Return indices or IDs of all nodes in the graph.

        This method returns a list object that contains all nodes.
        If an IndexMap is used, nodes are returned as string IDs.
        If no IndexMap is used, nodes are returned as integer indices.

        Returns:
            list: list of all nodes using IDs or indices (if no mapping is used)
        """
        node_list = self.mapping.to_ids(np.arange(self.n)).tolist()
        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    @property
    def edges(self) -> list:
        """Return all edges in the graph.

        This method returns a list object that contains all edges, where each
        edge is a tuple of two elements. If an IndexMap is used to map node
        indices to string IDs, edges are returned as tuples of string IDs.
        If no mapping is used, edges are returned as tuples of integer indices.

        Returns:
            list: list object yielding all edges using IDs or indices (if no mapping is used)
        """
        edge_list = self.mapping.to_ids(self.data.edge_index.t()).tolist()
        if self.order > 1:
            return [tuple(map(tuple, x)) for x in edge_list]
        return list(map(tuple, edge_list))

    def get_successors(self, row_idx: int) -> torch.Tensor:
        """Return a tensor containing the indices of all successor nodes for a given node identified by an index.

        Args:
            row_idx:   Index of node for which predecessors shall be returned.

        Returns:
            tensor: tensor containing indices of all successor nodes of the node indexed by `row_idx`
        """

        if row_idx + 1 < self.row_ptr.size(0):
            row_start = self.row_ptr[row_idx]
            row_end = self.row_ptr[row_idx + 1]
            return self.col[row_start:row_end]
        else:
            return torch.tensor([], device=self.data.edge_index.device)

    def get_predecessors(self, col_idx: int) -> torch.Tensor:
        """Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

        Args:
            col_idx:   Index of node for which predecessors shall be returned.

        Returns:
            tensor: tensor containing indices of all predecessor nodes of the node indexed by `col_idx`
        """
        if col_idx + 1 < self.col_ptr.size(0):
            col_start = self.col_ptr[col_idx]
            col_end = self.col_ptr[col_idx + 1]
            return self.row[col_start:col_end]
        else:
            return torch.tensor([], device=self.data.edge_index.device)

    def successors(self, node: Union[int, str] | tuple) -> list:
        """Return all successors of a given node.

        This method returns a generator object that yields all successors of a
        given node. If an IndexMap is used, successors are returned
        as string IDs. If no mapping is used, successors are returned as indices.

        Args:
            node:   Index or string ID of node for which successors shall be returned.

        Returns:
            list: list with all successors of the node identified
                by `node` using ID or index (if no mapping is used)
        """

        node_list = self.mapping.to_ids(self.get_successors(self.mapping.to_idx(node))).tolist()  # type: ignore

        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    def predecessors(self, node: Union[str, int] | tuple) -> list:
        """Return the predecessors of a given node.

        This method returns a generator object that yields all predecessors of a
        given node. If a `node_id` mapping is used, predecessors will be returned
        as string IDs. If no mapping is used, predecessors are returned as indices.

        Args:
            node:   Index or string ID of node for which predecessors shall be returned.

        Returns:
            list: list with all predecessors of the node identified
                by `node` using ID or index (if no mapping is used)
        """
        node_list = self.mapping.to_ids(self.get_predecessors(self.mapping.to_idx(node))).tolist()  # type: ignore

        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    def is_edge(self, v: Union[str, int], w: Union[str, int]) -> bool:
        """Return whether edge $(v,w)$ exists in the graph.

        If an index to ID mapping is used, nodes are assumed to be string IDs. If no
        mapping is used, nodes are assumed to be integer indices.

        Args:
            v: source node of edge as integer index or string ID
            w: target node of edge as integer index or string ID

        Returns:
            bool: True if edge exists, False otherwise
        """
        row = self.mapping.to_idx(v)
        row_start = self.row_ptr[row]
        row_end = self.row_ptr[row + 1]

        return self.mapping.to_idx(w) in self.col[row_start:row_end]

    def sparse_adj_matrix(self, edge_attr: Any = None) -> Any:
        """Return sparse adjacency matrix representation of (weighted) graph.

        Args:
            edge_attr: the edge attribute that shall be used as edge weight

        Returns:
            scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph
        """
        if edge_attr is None:
            return torch_geometric.utils.to_scipy_sparse_matrix(self.data.edge_index.as_tensor(), num_nodes=self.n)
        else:
            return torch_geometric.utils.to_scipy_sparse_matrix(
                self.data.edge_index.as_tensor(), edge_attr=self.data[edge_attr], num_nodes=self.n
            )

    @property
    def in_degrees(self) -> Dict[str, float]:
        """Return in-degrees of nodes in directed network.

        Returns:
            dict: dictionary containing in-degrees of nodes
        """
        return self.degrees(mode="in")

    @property
    def out_degrees(self) -> Dict[str, float]:
        """Return out-degrees of nodes in directed network.

        Returns:
            dict: dictionary containing out-degrees of nodes
        """
        return self.degrees(mode="out")

    def degrees(self, mode: str = "in") -> Dict[str, float]:
        """
        Return degrees of nodes.

        Args:
            mode: `in` or `out` to calculate the in- or out-degree for
                directed networks.

        Returns:
            dict: dictionary containing degrees of nodes
        """
        if mode == "in":
            d = torch_geometric.utils.degree(self.data.edge_index[1], num_nodes=self.n, dtype=torch.int)
        else:
            d = torch_geometric.utils.degree(self.data.edge_index[0], num_nodes=self.n, dtype=torch.int)
        return {self.mapping.to_id(i): d[i].item() for i in range(self.n)}

    def weighted_outdegrees(self) -> torch.Tensor:
        """
        Compute the weighted outdegrees of each node in the graph.

        Args:
            graph (Graph): pathpy graph object.

        Returns:
            tensor: Weighted outdegrees of nodes.
        """
        edge_weight = getattr(self.data, 'edge_weight', None)
        if edge_weight is None:
            edge_weight = torch.ones(self.data.num_edges, device=self.data.edge_index.device)
        weighted_outdegree = scatter(
            edge_weight, self.data.edge_index[0], dim=0, dim_size=self.data.num_nodes, reduce="sum"
        )
        return weighted_outdegree

    def transition_probabilities(self) -> torch.Tensor:
        """
        Compute transition probabilities based on weighted outdegrees.

        Returns:
            tensor: Transition probabilities.
        """
        weighted_outdegree = self.weighted_outdegrees()
        source_ids = self.data.edge_index[0]
        edge_weight = getattr(self.data, 'edge_weight', None)
        if edge_weight is None:
            edge_weight = torch.ones(self.data.num_edges, device=self.data.edge_index.device)
        return edge_weight / weighted_outdegree[source_ids]

    def laplacian(self, normalization: Any = None, edge_attr: Any = None) -> Any:
        """Return Laplacian matrix for a given graph.

        This wrapper method will use [`torch_geometric.utils.laplacian`](https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.laplacian)
        to return a Laplcian matrix representation of a given graph.

        Args:
            normalization: normalization parameter passed to pyG `get_laplacian`
                function
            edge_attr: optinal name of numerical edge attribute that shall
                be passed to pyG `get_laplacian` function as edge weight

        Returns:
            scipy.sparse.coo_matrix: Laplacian matrix representation of graph
        """
        if edge_attr is None:
            index, weight = torch_geometric.utils.get_laplacian(
                self.data.edge_index.as_tensor(), normalization=normalization
            )
            return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)
        else:
            index, weight = torch_geometric.utils.get_laplacian(
                self.data.edge_index.as_tensor(),
                normalization=normalization,
                edge_weight=self.data[edge_attr],
            )
            return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)

    def __getitem__(self, key: Union[tuple, str]) -> Any:
        """Return node, edge, or graph attribute.

        Args:
            key: name of attribute to be returned
        """
        if not isinstance(key, tuple):
            if key in self.data.keys():
                return self.data[key]
            else:
                raise KeyError(key + " is not a graph attribute")
        elif key[0] in self.node_attrs():
            return self.data[key[0]][self.mapping.to_idx(key[1])]
        elif key[0] in self.edge_attrs():
            return self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]]
        else:
            raise KeyError(key[0] + " is not a node or edge attribute")

    def __setitem__(self, key: str, val: torch.Tensor) -> None:
        """Store node, edge, or graph attribute.

        Args:
            key: name of attribute to be stored
            val: value of attribute
        """
        if not isinstance(key, tuple):
            if key.startswith("node_"):
                if val.size(0) != self.n:
                    raise ValueError("Attribute must have same length as number of nodes")
                self.data[key] = val
            elif key.startswith("edge_"):
                if val.size(0) != self.m:
                    raise ValueError("Attribute must have same length as number of edges")
                self.data[key] = val
            else:
                self.data[key] = val
        elif key[0].startswith("node_"):  # type: ignore
            if key[0] not in self.data.keys():
                raise KeyError(
                    "Attribute does not yet exist. Setting the value of a specific node attribute"
                    + "requires that the attribute already exists."
                )
            self.data[key[0]][self.mapping.to_idx(key[1])] = val
        elif key[0].startswith("edge_"):  # type: ignore
            if key[0] not in self.data.keys():
                raise KeyError(
                    "Attribute does not yet exist. Setting the value of a specific node attribute"
                    + "requires that the attribute already exists."
                )
            self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]] = val
        else:
            raise KeyError("node and edge specific attributes should be prefixed with 'node_' or 'edge_'")

    @property
    def n(self) -> int:
        """
        Return number of nodes.

        Returns:
            int: number of nodes in the graph
        """
        return self.data.num_nodes  # type: ignore

    @property
    def m(self) -> int:
        """
        Return number of edges.

        Returns the number of edges in the graph. For an undirected graph, the number of directed edges is returned.

        Returns:
            int: number of edges in the graph
        """
        return self.data.num_edges  # type: ignore

    @property
    def order(self) -> int:
        """
        Return order of graph.

        Returns:
            int: order of the (De Bruijn) graph
        """
        return self.data.node_sequence.size(1)  # type: ignore

    def is_directed(self) -> bool:
        """Return whether graph is directed.

        Returns:
            bool: True if graph is directed, False otherwise
        """
        return not self.data.edge_index.is_undirected

    def is_undirected(self) -> bool:
        """Return whether graph is undirected.

        Returns:
            bool: True if graph is undirected, False otherwise
        """
        return self.data.edge_index.is_undirected

    def has_self_loops(self) -> bool:
        """Return whether graph contains self-loops.

        Returns:
            bool: True if graph contains self-loops, False otherwise
        """
        return self.data.has_self_loops()

    def __add__(self, other: Graph) -> Graph:
        """Combine Graph object with other Graph object.

        The semantics of this operation depends on the optional IndexMap
        of both graphs. If no IndexMap is included, the two underlying data objects
        are concatenated, thus merging edges from both graphs while leaving node indices
        unchanged. If both graphs include IndexMaps that assign node IDs to indices,
        indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

        Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

        Examples:
            Adding two graphs without node IDs:

            >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
            >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
            >>> print(g1 + g2)
            Graph with 3 nodes and 6 edges

            Adding two graphs with identical node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
            >>> print(g1 + g2)
            Graph with 3 nodes and 4 edges

            Adding two graphs with non-overlapping node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
            >>> print(g1 + g2)
            Graph with 6 nodes and 4 edges

            Adding two graphs with partly overlapping node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
            >>> print(g1 + g2)
            Graph with 5 nodes and 4 edges
        """

        if self.order > 1:
            raise NotImplementedError("Add operator can only be applied to order 1 graphs")

        d1 = self.data.clone()
        m1 = self.mapping

        d2 = other.data.clone()
        m2 = other.mapping

        # compute overlap and additional nodes in g2 over g1
        overlap = set(m2.node_ids).intersection(m1.node_ids)
        additional_nodes = set(m2.node_ids).difference(m1.node_ids)

        d2_idx_translation = {}
        node_ids = [""] * (self.n + len(additional_nodes))
        # keep mappings of nodes in g1
        for v in m1.node_ids:
            node_ids[m1.to_idx(v)] = v
        for v in m2.node_ids:
            d2_idx_translation[m2.to_idx(v)] = m2.to_idx(v)
        # for overlapping node IDs we must correct node indices in m2
        for v in overlap:
            d2_idx_translation[m2.to_idx(v)] = m1.to_idx(v)
        # add mapping for nodes in g2 that are not in g1 and correct indices in g2
        for v in additional_nodes:
            new_idx = m2.to_idx(v) + self.n - len(overlap)
            node_ids[new_idx] = v
            d2_idx_translation[m2.to_idx(v)] = new_idx
        # apply index translation to d2
        # fast dictionary based mapping using torch
        palette, key = zip(*d2_idx_translation.items())
        key = torch.tensor(key)
        palette = torch.tensor(palette)

        index = torch.bucketize(d2.edge_index.ravel(), palette)
        d2.edge_index = key[index].reshape(d2.edge_index.shape)
        d = d1.concat(d2)
        mapping = IndexMap(node_ids)
        d.num_nodes = self.n + len(additional_nodes)
        d.edge_index = EdgeIndex(d.edge_index, sparse_size=(d.num_nodes, d.num_nodes))
        return Graph(d, mapping=mapping)

    def __str__(self) -> str:
        """Return a string representation of the graph."""

        attr = self.data.to_dict()
        attr_types = {}
        for k in attr:
            t = type(attr[k])
            if t == torch.Tensor:
                attr_types[k] = str(t) + " -> " + str(attr[k].size())
            else:
                attr_types[k] = str(t)

        from pprint import pformat

        if self.is_undirected():
            s = "Undirected graph with {0} nodes and {1} (directed) edges\n".format(self.n, self.m)
        else:
            s = "Directed graph with {0} nodes and {1} edges\n".format(self.n, self.m)

        attribute_info = {"Node Attributes": {}, "Edge Attributes": {}, "Graph Attributes": {}}
        for a in self.node_attrs():
            attribute_info["Node Attributes"][a] = attr_types[a]
        for a in self.edge_attrs():
            attribute_info["Edge Attributes"][a] = attr_types[a]
        for a in self.data.keys():
            if not self.data.is_node_attr(a) and not self.data.is_edge_attr(a):
                attribute_info["Graph Attributes"][a] = attr_types[a]
        s += pformat(attribute_info, indent=4, width=160)
        return s

`edges` `property` ¶

Return all edges in the graph.

This method returns a list object that contains all edges, where each edge is a tuple of two elements. If an IndexMap is used to map node indices to string IDs, edges are returned as tuples of string IDs. If no mapping is used, edges are returned as tuples of integer indices.

Returns:

Name	Type	Description
`list`	`list`	list object yielding all edges using IDs or indices (if no mapping is used)

`in_degrees` `property` ¶

Return in-degrees of nodes in directed network.

Returns:

Name	Type	Description
`dict`	`typing.Dict[str, float]`	dictionary containing in-degrees of nodes

`m` `property` ¶

Return number of edges.

Returns the number of edges in the graph. For an undirected graph, the number of directed edges is returned.

Returns:

Name	Type	Description
`int`	`int`	number of edges in the graph

`n` `property` ¶

Return number of nodes.

Returns:

Name	Type	Description
`int`	`int`	number of nodes in the graph

`nodes` `property` ¶

Return indices or IDs of all nodes in the graph.

This method returns a list object that contains all nodes. If an IndexMap is used, nodes are returned as string IDs. If no IndexMap is used, nodes are returned as integer indices.

Returns:

Name	Type	Description
`list`	`list`	list of all nodes using IDs or indices (if no mapping is used)

`order` `property` ¶

Return order of graph.

Returns:

Name	Type	Description
`int`	`int`	order of the (De Bruijn) graph

`out_degrees` `property` ¶

Return out-degrees of nodes in directed network.

Returns:

Name	Type	Description
`dict`	`typing.Dict[str, float]`	dictionary containing out-degrees of nodes

`add` ¶

Combine Graph object with other Graph object.

The semantics of this operation depends on the optional IndexMap of both graphs. If no IndexMap is included, the two underlying data objects are concatenated, thus merging edges from both graphs while leaving node indices unchanged. If both graphs include IndexMaps that assign node IDs to indices, indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

Examples:

Adding two graphs without node IDs:

>>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
>>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
>>> print(g1 + g2)
Graph with 3 nodes and 6 edges

Adding two graphs with identical node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
>>> print(g1 + g2)
Graph with 3 nodes and 4 edges

Adding two graphs with non-overlapping node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
>>> print(g1 + g2)
Graph with 6 nodes and 4 edges

Adding two graphs with partly overlapping node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
>>> print(g1 + g2)
Graph with 5 nodes and 4 edges

Source code in src/pathpyG/core/graph.py

def __add__(self, other: Graph) -> Graph:
    """Combine Graph object with other Graph object.

    The semantics of this operation depends on the optional IndexMap
    of both graphs. If no IndexMap is included, the two underlying data objects
    are concatenated, thus merging edges from both graphs while leaving node indices
    unchanged. If both graphs include IndexMaps that assign node IDs to indices,
    indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

    Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

    Examples:
        Adding two graphs without node IDs:

        >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
        >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
        >>> print(g1 + g2)
        Graph with 3 nodes and 6 edges

        Adding two graphs with identical node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
        >>> print(g1 + g2)
        Graph with 3 nodes and 4 edges

        Adding two graphs with non-overlapping node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
        >>> print(g1 + g2)
        Graph with 6 nodes and 4 edges

        Adding two graphs with partly overlapping node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
        >>> print(g1 + g2)
        Graph with 5 nodes and 4 edges
    """

    if self.order > 1:
        raise NotImplementedError("Add operator can only be applied to order 1 graphs")

    d1 = self.data.clone()
    m1 = self.mapping

    d2 = other.data.clone()
    m2 = other.mapping

    # compute overlap and additional nodes in g2 over g1
    overlap = set(m2.node_ids).intersection(m1.node_ids)
    additional_nodes = set(m2.node_ids).difference(m1.node_ids)

    d2_idx_translation = {}
    node_ids = [""] * (self.n + len(additional_nodes))
    # keep mappings of nodes in g1
    for v in m1.node_ids:
        node_ids[m1.to_idx(v)] = v
    for v in m2.node_ids:
        d2_idx_translation[m2.to_idx(v)] = m2.to_idx(v)
    # for overlapping node IDs we must correct node indices in m2
    for v in overlap:
        d2_idx_translation[m2.to_idx(v)] = m1.to_idx(v)
    # add mapping for nodes in g2 that are not in g1 and correct indices in g2
    for v in additional_nodes:
        new_idx = m2.to_idx(v) + self.n - len(overlap)
        node_ids[new_idx] = v
        d2_idx_translation[m2.to_idx(v)] = new_idx
    # apply index translation to d2
    # fast dictionary based mapping using torch
    palette, key = zip(*d2_idx_translation.items())
    key = torch.tensor(key)
    palette = torch.tensor(palette)

    index = torch.bucketize(d2.edge_index.ravel(), palette)
    d2.edge_index = key[index].reshape(d2.edge_index.shape)
    d = d1.concat(d2)
    mapping = IndexMap(node_ids)
    d.num_nodes = self.n + len(additional_nodes)
    d.edge_index = EdgeIndex(d.edge_index, sparse_size=(d.num_nodes, d.num_nodes))
    return Graph(d, mapping=mapping)

`getitem` ¶

Return node, edge, or graph attribute.

Parameters:

Name	Type	Description	Default
`key`	`typing.Union[tuple, str]`	name of attribute to be returned	required

Source code in src/pathpyG/core/graph.py

def __getitem__(self, key: Union[tuple, str]) -> Any:
    """Return node, edge, or graph attribute.

    Args:
        key: name of attribute to be returned
    """
    if not isinstance(key, tuple):
        if key in self.data.keys():
            return self.data[key]
        else:
            raise KeyError(key + " is not a graph attribute")
    elif key[0] in self.node_attrs():
        return self.data[key[0]][self.mapping.to_idx(key[1])]
    elif key[0] in self.edge_attrs():
        return self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]]
    else:
        raise KeyError(key[0] + " is not a node or edge attribute")

`init` ¶

Generate graph instance from a pyG Data object.

Generate a Graph instance from a torch_geometric.Data object that contains an EdgeIndex as well as optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map node indices to string identifiers.

Parameters:

Name	Type	Description	Default
`data`	`torch_geometric.data.Data`	A pyG Data object containing an EdgeIndex and additional attributes	required
`mapping`	`typing.Optional[pathpyG.core.index_map.IndexMap]`	`IndexMap` object that maps node indices to string identifiers	`None`

Example

import pathpyG as pp
from torch_geometric.data import Data
from torch_geometric import EdgeIndex

data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
g = pp.Graph(data)

g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))

Source code in src/pathpyG/core/graph.py

def __init__(self, data: Data, mapping: Optional[IndexMap] = None):
    """Generate graph instance from a pyG `Data` object.

    Generate a Graph instance from a `torch_geometric.Data` object that contains an EdgeIndex as well as
    optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map
    node indices to string identifiers.

    Args:
        data: A pyG Data object containing an EdgeIndex and additional attributes
        mapping: `IndexMap` object that maps node indices to string identifiers

    Example:
        ```py
        import pathpyG as pp
        from torch_geometric.data import Data
        from torch_geometric import EdgeIndex

        data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
        g = pp.Graph(data)

        g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))
        ```
    """
    if mapping is None:
        self.mapping = IndexMap()
    else:
        self.mapping = mapping

    # set num_nodes property
    if "num_nodes" not in data:
        data.num_nodes = data.edge_index.max().item() + 1

    # turn edge index tensor into EdgeIndex object
    if not isinstance(data.edge_index, EdgeIndex):
        data.edge_index = EdgeIndex(data=data.edge_index, sparse_size=(data.num_nodes, data.num_nodes))

    if (
        data.edge_index.get_sparse_size(dim=0) != data.num_nodes
        or data.edge_index.get_sparse_size(dim=1) != data.num_nodes
    ):
        raise Exception("sparse size of EdgeIndex should match number of nodes!")

    # sort EdgeIndex and validate
    data.edge_index = data.edge_index.sort_by("row").values
    data.edge_index.validate()

    self.data = data

    # create mapping between edge tuples and edge indices
    self.edge_to_index = {
        (e[0].item(), e[1].item()): i for i, e in enumerate([e for e in self.data.edge_index.t()])
    }

    ((self.row_ptr, self.col), _) = self.data.edge_index.get_csr()
    ((self.col_ptr, self.row), _) = self.data.edge_index.get_csc()

    # create node_sequence mapping for higher-order graphs
    if "node_sequence" not in self.data:
        self.data.node_sequence = torch.arange(data.num_nodes).reshape(-1, 1)

`setitem` ¶

Store node, edge, or graph attribute.

Parameters:

Name	Type	Description	Default
`key`	`str`	name of attribute to be stored	required
`val`	`torch.Tensor`	value of attribute	required

Source code in src/pathpyG/core/graph.py

def __setitem__(self, key: str, val: torch.Tensor) -> None:
    """Store node, edge, or graph attribute.

    Args:
        key: name of attribute to be stored
        val: value of attribute
    """
    if not isinstance(key, tuple):
        if key.startswith("node_"):
            if val.size(0) != self.n:
                raise ValueError("Attribute must have same length as number of nodes")
            self.data[key] = val
        elif key.startswith("edge_"):
            if val.size(0) != self.m:
                raise ValueError("Attribute must have same length as number of edges")
            self.data[key] = val
        else:
            self.data[key] = val
    elif key[0].startswith("node_"):  # type: ignore
        if key[0] not in self.data.keys():
            raise KeyError(
                "Attribute does not yet exist. Setting the value of a specific node attribute"
                + "requires that the attribute already exists."
            )
        self.data[key[0]][self.mapping.to_idx(key[1])] = val
    elif key[0].startswith("edge_"):  # type: ignore
        if key[0] not in self.data.keys():
            raise KeyError(
                "Attribute does not yet exist. Setting the value of a specific node attribute"
                + "requires that the attribute already exists."
            )
        self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]] = val
    else:
        raise KeyError("node and edge specific attributes should be prefixed with 'node_' or 'edge_'")

`str` ¶

Return a string representation of the graph.

Source code in src/pathpyG/core/graph.py

def __str__(self) -> str:
    """Return a string representation of the graph."""

    attr = self.data.to_dict()
    attr_types = {}
    for k in attr:
        t = type(attr[k])
        if t == torch.Tensor:
            attr_types[k] = str(t) + " -> " + str(attr[k].size())
        else:
            attr_types[k] = str(t)

    from pprint import pformat

    if self.is_undirected():
        s = "Undirected graph with {0} nodes and {1} (directed) edges\n".format(self.n, self.m)
    else:
        s = "Directed graph with {0} nodes and {1} edges\n".format(self.n, self.m)

    attribute_info = {"Node Attributes": {}, "Edge Attributes": {}, "Graph Attributes": {}}
    for a in self.node_attrs():
        attribute_info["Node Attributes"][a] = attr_types[a]
    for a in self.edge_attrs():
        attribute_info["Edge Attributes"][a] = attr_types[a]
    for a in self.data.keys():
        if not self.data.is_node_attr(a) and not self.data.is_edge_attr(a):
            attribute_info["Graph Attributes"][a] = attr_types[a]
    s += pformat(attribute_info, indent=4, width=160)
    return s

`degrees` ¶

Return degrees of nodes.

Parameters:

Name	Type	Description	Default
`mode`	`str`	`in` or `out` to calculate the in- or out-degree for directed networks.	`'in'`

Returns:

Name	Type	Description
`dict`	`typing.Dict[str, float]`	dictionary containing degrees of nodes

Source code in src/pathpyG/core/graph.py

def degrees(self, mode: str = "in") -> Dict[str, float]:
    """
    Return degrees of nodes.

    Args:
        mode: `in` or `out` to calculate the in- or out-degree for
            directed networks.

    Returns:
        dict: dictionary containing degrees of nodes
    """
    if mode == "in":
        d = torch_geometric.utils.degree(self.data.edge_index[1], num_nodes=self.n, dtype=torch.int)
    else:
        d = torch_geometric.utils.degree(self.data.edge_index[0], num_nodes=self.n, dtype=torch.int)
    return {self.mapping.to_id(i): d[i].item() for i in range(self.n)}

`edge_attrs` ¶

Return a list of edge attributes.

This method returns a list containing the names of all edge-level attributes, ignoring the special edge_index attribute.

Returns:

Name	Type	Description
`list`	`typing.List[str]`	list of edge attributes

Source code in src/pathpyG/core/graph.py

def edge_attrs(self) -> List[str]:
    """
    Return a list of edge attributes.

    This method returns a list containing the names of all edge-level attributes,
    ignoring the special `edge_index` attribute.

    Returns:
        list: list of edge attributes
    """
    attrs = []
    for k in self.data.keys():
        if k != "edge_index" and k.startswith("edge_"):
            attrs.append(k)
    return attrs

`from_edge_index` `staticmethod` ¶

Construct a graph from a torch Tensor containing an edge index. An optional mapping can be used to transparently map node indices to string identifiers.

Parameters:

Name	Type	Description	Default
`edge_index`	`torch.Tensor`	torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index	required
`mapping`	`typing.Optional[pathpyG.core.index_map.IndexMap]`	`IndexMap` object that maps node indices to string identifiers	`None`
`num_nodes`	`int`	optional number of nodes (default: None). If None, the number of nodes will be inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.	`None`

Examples:

You can create a graph from an edge index tensor as follows:

>>> import torch
>>> import pathpyG as pp
>>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
>>> print(g)
Directed graph with 3 nodes and 3 edges ...

You can also include a mapping of node IDs:

>>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
>>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
>>> print(g.mapping)
a -> 0
b -> 1
c -> 2

Source code in src/pathpyG/core/graph.py

@staticmethod
def from_edge_index(edge_index: torch.Tensor, mapping: Optional[IndexMap] = None, num_nodes: int = None) -> Graph:
    """Construct a graph from a torch Tensor containing an edge index. An optional mapping can
    be used to transparently map node indices to string identifiers.

    Args:
        edge_index:  torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index
        mapping: `IndexMap` object that maps node indices to string identifiers
        num_nodes: optional number of nodes (default: None). If None, the number of nodes will be
            inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.

    Examples:
        You can create a graph from an edge index tensor as follows:

        >>> import torch
        >>> import pathpyG as pp
        >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
        >>> print(g)
        Directed graph with 3 nodes and 3 edges ...

        You can also include a mapping of node IDs:

        >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
        >>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
        >>> print(g.mapping)
        a -> 0
        b -> 1
        c -> 2
    """

    if not num_nodes:
        d = Data(edge_index=edge_index)
    else:
        d = Data(edge_index=edge_index, num_nodes=num_nodes)
    return Graph(d, mapping=mapping)

`from_edge_list` `staticmethod` ¶

Generate a Graph based on an edge list.

Edges can be given as string or integer tuples. If strings are used and no mapping is given, a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of node IDs.

Parameters:

Name	Type	Description	Default
`edge_list`	`typing.Iterable[typing.Tuple[str, str]]`	Iterable of edges represented as tuples	required
`is_undirected`	`bool`	Whether the edge list contains all bidorectional edges	`False`
`mapping`	`typing.Optional[pathpyG.core.index_map.IndexMap]`	optional mapping of string IDs to node indices	`None`
`num_nodes`	`typing.Optional[int]`	optional number of nodes (useful in case not all nodes have incident edges)	`None`

Examples:

>>> import pathpyG as pp
>>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
>>> g = pp.Graph.from_edge_list(l)
>>> print(list(g.edges))
[('a', 'b'), ('a', 'c'), ('b', 'c')]

Source code in src/pathpyG/core/graph.py

@staticmethod
def from_edge_list(
    edge_list: Iterable[Tuple[str, str]],
    is_undirected: bool = False,
    mapping: Optional[IndexMap] = None,
    num_nodes: Optional[int] = None,
) -> Graph:
    """Generate a Graph based on an edge list.

    Edges can be given as string or integer tuples. If strings are used and no mapping is given,
    a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of
    node IDs.

    Args:
        edge_list: Iterable of edges represented as tuples
        is_undirected: Whether the edge list contains all bidorectional edges
        mapping: optional mapping of string IDs to node indices
        num_nodes: optional number of nodes (useful in case not all nodes have incident edges)

    Examples:
        >>> import pathpyG as pp
        >>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
        >>> g = pp.Graph.from_edge_list(l)
        >>> print(list(g.edges))
        [('a', 'b'), ('a', 'c'), ('b', 'c')]
    """

    # handle empty graph
    if len(edge_list) == 0:
        return Graph(Data(edge_index=torch.tensor([[], []], dtype=torch.int32), num_nodes=0), mapping=IndexMap())

    if mapping is None:
        edge_array = np.array(edge_list)
        node_ids = np.unique(edge_array)
        if np.issubdtype(node_ids.dtype, str) and np.char.isnumeric(node_ids).all():
            node_ids = np.sort(node_ids.astype(int)).astype(str)
        mapping = IndexMap(node_ids)

    if num_nodes is None:
        num_nodes = mapping.num_ids()

    edge_index = EdgeIndex(
        mapping.to_idxs(edge_list).T.contiguous(),
        sparse_size=(num_nodes, num_nodes),
        is_undirected=is_undirected,
    )
    return Graph(Data(edge_index=edge_index, num_nodes=num_nodes), mapping=mapping)

`get_predecessors` ¶

Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

Parameters:

Name	Type	Description	Default
`col_idx`	`int`	Index of node for which predecessors shall be returned.	required

Returns:

Name	Type	Description
`tensor`	`torch.Tensor`	tensor containing indices of all predecessor nodes of the node indexed by `col_idx`

Source code in src/pathpyG/core/graph.py

def get_predecessors(self, col_idx: int) -> torch.Tensor:
    """Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

    Args:
        col_idx:   Index of node for which predecessors shall be returned.

    Returns:
        tensor: tensor containing indices of all predecessor nodes of the node indexed by `col_idx`
    """
    if col_idx + 1 < self.col_ptr.size(0):
        col_start = self.col_ptr[col_idx]
        col_end = self.col_ptr[col_idx + 1]
        return self.row[col_start:col_end]
    else:
        return torch.tensor([], device=self.data.edge_index.device)

`get_successors` ¶

Return a tensor containing the indices of all successor nodes for a given node identified by an index.

Parameters:

Name	Type	Description	Default
`row_idx`	`int`	Index of node for which predecessors shall be returned.	required

Returns:

Name	Type	Description
`tensor`	`torch.Tensor`	tensor containing indices of all successor nodes of the node indexed by `row_idx`

Source code in src/pathpyG/core/graph.py

def get_successors(self, row_idx: int) -> torch.Tensor:
    """Return a tensor containing the indices of all successor nodes for a given node identified by an index.

    Args:
        row_idx:   Index of node for which predecessors shall be returned.

    Returns:
        tensor: tensor containing indices of all successor nodes of the node indexed by `row_idx`
    """

    if row_idx + 1 < self.row_ptr.size(0):
        row_start = self.row_ptr[row_idx]
        row_end = self.row_ptr[row_idx + 1]
        return self.col[row_start:row_end]
    else:
        return torch.tensor([], device=self.data.edge_index.device)

`has_self_loops` ¶

Return whether graph contains self-loops.

Returns:

Name	Type	Description
`bool`	`bool`	True if graph contains self-loops, False otherwise

Source code in src/pathpyG/core/graph.py

def has_self_loops(self) -> bool:
    """Return whether graph contains self-loops.

    Returns:
        bool: True if graph contains self-loops, False otherwise
    """
    return self.data.has_self_loops()

`is_directed` ¶

Return whether graph is directed.

Returns:

Name	Type	Description
`bool`	`bool`	True if graph is directed, False otherwise

Source code in src/pathpyG/core/graph.py

def is_directed(self) -> bool:
    """Return whether graph is directed.

    Returns:
        bool: True if graph is directed, False otherwise
    """
    return not self.data.edge_index.is_undirected

`is_edge` ¶

Return whether edge \((v,w)\) exists in the graph.

If an index to ID mapping is used, nodes are assumed to be string IDs. If no mapping is used, nodes are assumed to be integer indices.

Parameters:

Name	Type	Description	Default
`v`	`typing.Union[str, int]`	source node of edge as integer index or string ID	required
`w`	`typing.Union[str, int]`	target node of edge as integer index or string ID	required

Returns:

Name	Type	Description
`bool`	`bool`	True if edge exists, False otherwise

Source code in src/pathpyG/core/graph.py

def is_edge(self, v: Union[str, int], w: Union[str, int]) -> bool:
    """Return whether edge $(v,w)$ exists in the graph.

    If an index to ID mapping is used, nodes are assumed to be string IDs. If no
    mapping is used, nodes are assumed to be integer indices.

    Args:
        v: source node of edge as integer index or string ID
        w: target node of edge as integer index or string ID

    Returns:
        bool: True if edge exists, False otherwise
    """
    row = self.mapping.to_idx(v)
    row_start = self.row_ptr[row]
    row_end = self.row_ptr[row + 1]

    return self.mapping.to_idx(w) in self.col[row_start:row_end]

`is_undirected` ¶

Return whether graph is undirected.

Returns:

Name	Type	Description
`bool`	`bool`	True if graph is undirected, False otherwise

Source code in src/pathpyG/core/graph.py

def is_undirected(self) -> bool:
    """Return whether graph is undirected.

    Returns:
        bool: True if graph is undirected, False otherwise
    """
    return self.data.edge_index.is_undirected

`laplacian` ¶

Return Laplacian matrix for a given graph.

This wrapper method will use torch_geometric.utils.laplacian to return a Laplcian matrix representation of a given graph.

Parameters:

Name	Type	Description	Default
`normalization`	`typing.Any`	normalization parameter passed to pyG `get_laplacian` function	`None`
`edge_attr`	`typing.Any`	optinal name of numerical edge attribute that shall be passed to pyG `get_laplacian` function as edge weight	`None`

Returns:

Type	Description
`typing.Any`	scipy.sparse.coo_matrix: Laplacian matrix representation of graph

Source code in src/pathpyG/core/graph.py

def laplacian(self, normalization: Any = None, edge_attr: Any = None) -> Any:
    """Return Laplacian matrix for a given graph.

    This wrapper method will use [`torch_geometric.utils.laplacian`](https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.laplacian)
    to return a Laplcian matrix representation of a given graph.

    Args:
        normalization: normalization parameter passed to pyG `get_laplacian`
            function
        edge_attr: optinal name of numerical edge attribute that shall
            be passed to pyG `get_laplacian` function as edge weight

    Returns:
        scipy.sparse.coo_matrix: Laplacian matrix representation of graph
    """
    if edge_attr is None:
        index, weight = torch_geometric.utils.get_laplacian(
            self.data.edge_index.as_tensor(), normalization=normalization
        )
        return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)
    else:
        index, weight = torch_geometric.utils.get_laplacian(
            self.data.edge_index.as_tensor(),
            normalization=normalization,
            edge_weight=self.data[edge_attr],
        )
        return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)

`node_attrs` ¶

Return a list of node attributes.

This method returns a list containing the names of all node-level attributes, ignoring the special node_sequence attribute.

Returns:

Name	Type	Description
`list`	`typing.List[str]`	list of node attributes

Source code in src/pathpyG/core/graph.py

def node_attrs(self) -> List[str]:
    """
    Return a list of node attributes.

    This method returns a list containing the names of all node-level attributes,
    ignoring the special `node_sequence` attribute.

    Returns:
        list: list of node attributes
    """
    attrs = []
    for k in self.data.keys():
        if k != "node_sequence" and k.startswith("node_"):
            attrs.append(k)
    return attrs

`predecessors` ¶

Return the predecessors of a given node.

This method returns a generator object that yields all predecessors of a given node. If a node_id mapping is used, predecessors will be returned as string IDs. If no mapping is used, predecessors are returned as indices.

Parameters:

Name	Type	Description	Default
`node`	`typing.Union[str, int] \| tuple`	Index or string ID of node for which predecessors shall be returned.	required

Returns:

Name	Type	Description
`list`	`list`	list with all predecessors of the node identified by `node` using ID or index (if no mapping is used)

Source code in src/pathpyG/core/graph.py

def predecessors(self, node: Union[str, int] | tuple) -> list:
    """Return the predecessors of a given node.

    This method returns a generator object that yields all predecessors of a
    given node. If a `node_id` mapping is used, predecessors will be returned
    as string IDs. If no mapping is used, predecessors are returned as indices.

    Args:
        node:   Index or string ID of node for which predecessors shall be returned.

    Returns:
        list: list with all predecessors of the node identified
            by `node` using ID or index (if no mapping is used)
    """
    node_list = self.mapping.to_ids(self.get_predecessors(self.mapping.to_idx(node))).tolist()  # type: ignore

    if self.order > 1:
        return list(map(tuple, node_list))
    return node_list

`sparse_adj_matrix` ¶

Return sparse adjacency matrix representation of (weighted) graph.

Parameters:

Name	Type	Description	Default
`edge_attr`	`typing.Any`	the edge attribute that shall be used as edge weight	`None`

Returns:

Type	Description
`typing.Any`	scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph

Source code in src/pathpyG/core/graph.py

def sparse_adj_matrix(self, edge_attr: Any = None) -> Any:
    """Return sparse adjacency matrix representation of (weighted) graph.

    Args:
        edge_attr: the edge attribute that shall be used as edge weight

    Returns:
        scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph
    """
    if edge_attr is None:
        return torch_geometric.utils.to_scipy_sparse_matrix(self.data.edge_index.as_tensor(), num_nodes=self.n)
    else:
        return torch_geometric.utils.to_scipy_sparse_matrix(
            self.data.edge_index.as_tensor(), edge_attr=self.data[edge_attr], num_nodes=self.n
        )

`successors` ¶

Return all successors of a given node.

This method returns a generator object that yields all successors of a given node. If an IndexMap is used, successors are returned as string IDs. If no mapping is used, successors are returned as indices.

Parameters:

Name	Type	Description	Default
`node`	`typing.Union[int, str] \| tuple`	Index or string ID of node for which successors shall be returned.	required

Returns:

Name	Type	Description
`list`	`list`	list with all successors of the node identified by `node` using ID or index (if no mapping is used)

Source code in src/pathpyG/core/graph.py

def successors(self, node: Union[int, str] | tuple) -> list:
    """Return all successors of a given node.

    This method returns a generator object that yields all successors of a
    given node. If an IndexMap is used, successors are returned
    as string IDs. If no mapping is used, successors are returned as indices.

    Args:
        node:   Index or string ID of node for which successors shall be returned.

    Returns:
        list: list with all successors of the node identified
            by `node` using ID or index (if no mapping is used)
    """

    node_list = self.mapping.to_ids(self.get_successors(self.mapping.to_idx(node))).tolist()  # type: ignore

    if self.order > 1:
        return list(map(tuple, node_list))
    return node_list

`to_undirected` ¶

Return an undirected version of this directed graph.

This method creates a new undirected Graph from the current graph instance by adding all directed edges in opposite direction.

Examples:

>>> import pathpyG as pp
>>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
>>> g_u = g.to_undirected()
>>> print(g_u)
Undirected graph with 3 nodes and 6 (directed) edges

Source code in src/pathpyG/core/graph.py

def to_undirected(self) -> Graph:
    """Return an undirected version of this directed graph.

    This method creates a new undirected Graph from the current graph instance by
    adding all directed edges in opposite direction.

    Examples:
        >>> import pathpyG as pp
        >>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
        >>> g_u = g.to_undirected()
        >>> print(g_u)
        Undirected graph with 3 nodes and 6 (directed) edges
    """
    # create undirected edge index by coalescing the directed edges and keep 
    # track of the original edge index for the edge attributes
    attr_idx = torch.arange(self.data.num_edges, device=self.data.edge_index.device)
    edge_index, attr_idx = to_undirected(
        self.data.edge_index,
        edge_attr=attr_idx,
        num_nodes=self.data.num_nodes,
        reduce="min",
    )

    data = Data(
        edge_index=EdgeIndex(data=edge_index, sparse_size=(self.data.num_nodes, self.data.num_nodes), is_undirected=True),
        num_nodes=self.data.num_nodes
    )
    # Note that while the torch_geometric.transforms.ToUndirected function would do this automatically,
    # we do it manually since the transform cannot handle numpy arrays as edge attributes.
    # make sure to copy all node and (undirected) edge attributes
    for node_attr in self.node_attrs():
        data[node_attr] = self.data[node_attr]
    for edge_attr in self.edge_attrs():
        if edge_attr != "edge_index":
            data[edge_attr] = self.data[edge_attr][attr_idx]

    return Graph(data, self.mapping)

`to_weighted_graph` ¶

Coalesces multi-edges to single-edges with an additional weight attribute

If the graph contains multiple edges between the same nodes, this method will coalesce them into a single edge with an additional weight attribute called edge_weight that contains the number of coalesced edges. The method returns a new graph instance with the coalesced edges.

Returns:

Name	Type	Description
`Graph`	`pathpyG.core.graph.Graph`	Graph with coalesced edges

Source code in src/pathpyG/core/graph.py

def to_weighted_graph(self) -> Graph:
    """Coalesces multi-edges to single-edges with an additional weight attribute

    If the graph contains multiple edges between the same nodes, this method will coalesce
    them into a single edge with an additional weight attribute called `edge_weight` that
    contains the number of coalesced edges. The method returns a new graph instance with
    the coalesced edges.

    Returns:
        Graph: Graph with coalesced edges
    """
    i, w = torch_geometric.utils.coalesce(
        self.data.edge_index.as_tensor(), torch.ones(self.m, device=self.data.edge_index.device)
    )
    return Graph(Data(edge_index=i, edge_weight=w, num_nodes=self.data.num_nodes), mapping=self.mapping)

`transition_probabilities` ¶

Compute transition probabilities based on weighted outdegrees.

Returns:

Name	Type	Description
`tensor`	`torch.Tensor`	Transition probabilities.

Source code in src/pathpyG/core/graph.py

def transition_probabilities(self) -> torch.Tensor:
    """
    Compute transition probabilities based on weighted outdegrees.

    Returns:
        tensor: Transition probabilities.
    """
    weighted_outdegree = self.weighted_outdegrees()
    source_ids = self.data.edge_index[0]
    edge_weight = getattr(self.data, 'edge_weight', None)
    if edge_weight is None:
        edge_weight = torch.ones(self.data.num_edges, device=self.data.edge_index.device)
    return edge_weight / weighted_outdegree[source_ids]

`weighted_outdegrees` ¶

Compute the weighted outdegrees of each node in the graph.

Parameters:

Name	Type	Description	Default
`graph`	`pathpyG.core.graph.Graph`	pathpy graph object.	required

Returns:

Name	Type	Description
`tensor`	`torch.Tensor`	Weighted outdegrees of nodes.

Source code in src/pathpyG/core/graph.py

def weighted_outdegrees(self) -> torch.Tensor:
    """
    Compute the weighted outdegrees of each node in the graph.

    Args:
        graph (Graph): pathpy graph object.

    Returns:
        tensor: Weighted outdegrees of nodes.
    """
    edge_weight = getattr(self.data, 'edge_weight', None)
    if edge_weight is None:
        edge_weight = torch.ones(self.data.num_edges, device=self.data.edge_index.device)
    weighted_outdegree = scatter(
        edge_weight, self.data.edge_index[0], dim=0, dim_size=self.data.num_nodes, reduce="sum"
    )
    return weighted_outdegree

`closed_triads` ¶

Calculates the set of edges that represent a closed triad around a given node v.

Parameters¶

network : Network

The network in which to calculate the list of closed triads

Source code in src/pathpyG/statistics/clustering.py

def closed_triads(g: Graph, v: str) -> Set:
    """Calculates the set of edges that represent a closed triad
    around a given node v.

    Parameters
    ----------

    network : Network

        The network in which to calculate the list of closed triads

    """
    c_triads: set = set()
    edges = set()

    # Collect all edges of successors
    for x in g.successors(v):
        for y in g.successors(x):
            edges.add((x, y))

    for x, y in edges:
        if y in g.successors(v):
            c_triads.add((x, y))
    return c_triads

`degree_assortativity` ¶

Calculate the degree assortativity

Source code in src/pathpyG/statistics/degrees.py

def degree_assortativity(g: Graph, mode: str = "total") -> float:
    """Calculate the degree assortativity"""

    A = g.sparse_adj_matrix().todense()
    m = _np.sum(A)

    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()
    elif not g.is_directed():
        m = m / 2.0

    cov = 0.0
    var = 0.0
    for i in g.nodes:
        for j in g.nodes:
            cov += (A[g.mapping.to_idx(i), g.mapping.to_idx(j)] - (d[i] * d[j]) / (2 * m)) * d[i] * d[j]
            if i != j:
                var -= (d[i] * d[j]) / (2 * m) * d[i] * d[j]
            else:
                var += (d[i] - (d[i] * d[j]) / (2 * m)) * d[i] * d[j]
    return cov / var

`degree_central_moment` ¶

Calculates the k-th central moment of the degree distribution.

Parameters:

Name	Type	Description	Default
`graph`	`pathpyG.core.graph.Graph`	The graph for which to calculate the k-th central moment	required

Source code in src/pathpyG/statistics/degrees.py

def degree_central_moment(graph: Graph, k: int = 1, mode: str = "total") -> float:
    """Calculates the k-th central moment of the degree distribution.

    Args:
        graph: The graph for which to calculate the k-th central moment

    """
    p_k = degree_distribution(graph, mode=mode)
    mean = _np.mean(degree_sequence(graph, mode=mode))
    m = 0.0
    for x in p_k:
        m += (x - mean) ** k * p_k[x]
    return m

`degree_distribution` ¶

Calculates the degree distribution of a graph

Source code in src/pathpyG/statistics/degrees.py

def degree_distribution(g: Graph, mode: str = "total") -> Dict[int, float]:
    """Calculates the degree distribution of a graph"""
    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()

    cnt: defaultdict = defaultdict(float)
    for v in g.nodes:
        cnt[d[v]] += 1.0 / g.n
    return cnt

`degree_generating_function` ¶

Returns the generating function of the degree distribution of a network, calculated for either a single argument x or a list or numpy array of arguments x

Returns f(x) where f is the probability generating function for the degree distribution P(k) for a graph. The function is defined in the interval [0,1]. The value returned is from the range [0,1]. The following properties hold:

[1/k! d^k/dx f]_{x=0} = P(k) with d^k/dx f being the k-th derivative of f by x

f'(1) = with f' being the first derivative and the mean degree

[(x d/dx)^m f]_{x=1} = with being the m-th raw moment of P

Parameters:

Name	Type	Description	Default
`graph`	`pathpyG.core.graph.Graph`	The graph for which the generating function shall be computed	required

float, list, numpy.ndarray

The argument(s) for which value(s) f(x) shall be computed.

Example:

name="__codelineno-0-1" href="#__codelineno-0-1"> # Generate simple network import pathpyG as pp import numpy as np import matplotlib.pyplot as plt g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c'), ('c', 'd'), ('d', 'e'), ('d', 'f'), ('e', 'f')]).to_undirected() # Return single function value val = pp.statistics.degreee_generating_func(n, 0.3) print(val) 0.069 # Plot generating function of degree distribution x = np.linspace(0, 1, 20) y = pp.statistics.degree_generating_func(n, x) x = plt.plot(x, y) # [Function plot] # Plot generating function based on degree sequence x = np.linspace(0, 1, 20) y = pp.statistics.degree_generating_func([1,2,1,2], x) x = plt.plot(x, y) # [Function plot]

Source code in src/pathpyG/statistics/degrees.py

def degree_generating_function(
    graph: Graph, x: float | list[float] | _np.ndarray, mode: str = "total"
) -> float | _np.ndarray:
    """Returns the generating function of the degree distribution of a network,
        calculated for either a single argument x or a list or numpy array of arguments x


    Returns f(x) where f is the probability generating function for the degree
    distribution P(k) for a graph. The function is defined in the interval
    [0,1].  The value returned is from the range [0,1]. The following properties
    hold:

    [1/k! d^k/dx f]_{x=0} = P(k)
    with d^k/dx f being the k-th derivative of f by x

    f'(1) = <k>
    with f' being the first derivative and <k> the mean degree

    [(x d/dx)^m f]_{x=1} = <k^m>
    with <k^m> being the m-th raw moment of P

    Args:
        graph: The graph for which the generating function shall be computed

    x:  float, list, numpy.ndarray
        The argument(s) for which value(s) f(x) shall be computed.

    Example:
    ```py
        # Generate simple network
        import pathpyG as pp
        import numpy as np
        import matplotlib.pyplot as plt

        g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c'), ('c', 'd'),
                                    ('d', 'e'), ('d', 'f'), ('e', 'f')]).to_undirected()

        # Return single function value
        val = pp.statistics.degreee_generating_func(n, 0.3)
        print(val)
        0.069

        # Plot generating function of degree distribution

        x = np.linspace(0, 1, 20)
        y = pp.statistics.degree_generating_func(n, x)
        x = plt.plot(x, y)
        # [Function plot]

        # Plot generating function based on degree sequence

        x = np.linspace(0, 1, 20)
        y = pp.statistics.degree_generating_func([1,2,1,2], x)
        x = plt.plot(x, y)
        # [Function plot]
    ```
    """

    p_k = degree_distribution(graph, mode=mode)

    if isinstance(x, float):
        x_range = [x]
    else:
        x_range = x

    values: defaultdict = defaultdict(float)
    for k in p_k:
        for v in x_range:
            values[v] += p_k[k] * v**k

    _values: float | _np.ndarray
    if len(x_range) > 1:
        _values = _np.fromiter(values.values(), dtype=float)
    else:
        _values = values[x]
    return _values

`degree_raw_moment` ¶

Calculates the k-th raw moment of the degree distribution of a network

Parameters:

Name	Type	Description	Default
`graph`	`pathpyG.core.graph.Graph`	The graph in which to calculate the k-th raw moment	required

Source code in src/pathpyG/statistics/degrees.py

def degree_raw_moment(graph: Graph, k: int = 1, mode: str = "total") -> float:
    """Calculates the k-th raw moment of the degree distribution of a network

    Args:
        graph:  The graph in which to calculate the k-th raw moment

    """
    p_k = degree_distribution(graph, mode=mode)
    mom = 0.0
    for x in p_k:
        mom += x**k * p_k[x]
    return mom

`degree_sequence` ¶

Calculates the degree sequence of an undirected network.

Parameters:

Name	Type	Description	Default
`graph`		The `Graph` object for which degrees are calculated	required

Source code in src/pathpyG/statistics/degrees.py

def degree_sequence(g: Graph, mode: str = "total") -> _np.array:
    """Calculates the degree sequence of an undirected network.

    Args:
        graph: The `Graph` object for which degrees are calculated
    """
    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()

    _degrees = _np.zeros(g.n, dtype=float)
    for v in g.nodes:
        _degrees[g.mapping.to_idx(v)] = d[v]
    return _degrees

statistics

Graph ¶

edges property ¶

in_degrees property ¶

m property ¶

n property ¶

nodes property ¶

order property ¶

out_degrees property ¶

__add__ ¶

__getitem__ ¶

__init__ ¶

__setitem__ ¶

__str__ ¶

degrees ¶

edge_attrs ¶

from_edge_index staticmethod ¶

from_edge_list staticmethod ¶

get_predecessors ¶

get_successors ¶

has_self_loops ¶

is_directed ¶

is_edge ¶

is_undirected ¶

laplacian ¶

node_attrs ¶

predecessors ¶

sparse_adj_matrix ¶

successors ¶

to_undirected ¶

to_weighted_graph ¶

transition_probabilities ¶

weighted_outdegrees ¶

closed_triads ¶

Parameters¶

degree_assortativity ¶

degree_central_moment ¶

degree_distribution ¶

degree_generating_function ¶

degree_raw_moment ¶

degree_sequence ¶

`Graph` ¶

`edges` `property` ¶

`in_degrees` `property` ¶

`m` `property` ¶

`n` `property` ¶

`nodes` `property` ¶

`order` `property` ¶

`out_degrees` `property` ¶

`add` ¶

`getitem` ¶

`init` ¶

`setitem` ¶

`str` ¶

`degrees` ¶

`edge_attrs` ¶

`from_edge_index` `staticmethod` ¶

`from_edge_list` `staticmethod` ¶

`get_predecessors` ¶

`get_successors` ¶

`has_self_loops` ¶

`is_directed` ¶

`is_edge` ¶

`is_undirected` ¶

`laplacian` ¶

`node_attrs` ¶

`predecessors` ¶

`sparse_adj_matrix` ¶

`successors` ¶

`to_undirected` ¶

`to_weighted_graph` ¶

`transition_probabilities` ¶

`weighted_outdegrees` ¶

`closed_triads` ¶

`degree_assortativity` ¶

`degree_central_moment` ¶

`degree_distribution` ¶

`degree_generating_function` ¶

`degree_raw_moment` ¶

`degree_sequence` ¶