Skip to content

statistics

Functions to compute various graph statistics.

The functions in this module allow to compute various statistics on graphs

Example
import pathpyG as pp

# Generate a toy example graph.
g = pp.Graph.from_edge_list([
    ('b', 'c'),
    ('a', 'b'),
    ('c', 'd'),
    ('d', 'a'),
    ('b', 'd')
])

# Calculate degree distribution and raw moments
d_dist = pp.statistics.degree_distribution(g)
k_1 = pp.statistics.degree_raw_moment(g, k=1)
k_2 = pp.statistics.degree_raw_moment(g, k=2)

Graph

A graph object storing nodes, edges, and attributes.

An object than be be used to store directed or undirected graphs with node and edge attributes. Data on nodes and edges are stored in an underlying instance of torch_geometric.Data.

Source code in src/pathpyG/core/graph.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
class Graph:
    """
    A graph object storing nodes, edges, and attributes.

    An object than be be used to store directed or undirected graphs with node
    and edge attributes. Data on nodes and edges are stored in an underlying instance of
    [`torch_geometric.Data`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data).
    """

    def __init__(self, data: Data, mapping: Optional[IndexMap] = None):
        """Generate graph instance from a pyG `Data` object.

        Generate a Graph instance from a `torch_geometric.Data` object that contains an EdgeIndex as well as
        optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map
        node indices to string identifiers.

        Args:
            data: A pyG Data object containing an EdgeIndex and additional attributes
            mapping: `IndexMap` object that maps node indices to string identifiers

        Example:
            ```py
            import pathpyG as pp
            from torch_geometric.data import Data
            from torch_geometric import EdgeIndex

            data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
            g = pp.Graph(data)

            g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))
            ```
        """
        if mapping is None:
            self.mapping = IndexMap()
        else:
            self.mapping = mapping

        # set num_nodes property
        if "num_nodes" not in data:
            data.num_nodes = data.edge_index.max().item() + 1

        # turn edge index tensor into EdgeIndex object
        if not isinstance(data.edge_index, EdgeIndex):
            data.edge_index = EdgeIndex(data=data.edge_index, sparse_size=(data.num_nodes, data.num_nodes))

        if (
            data.edge_index.get_sparse_size(dim=0) != data.num_nodes
            or data.edge_index.get_sparse_size(dim=1) != data.num_nodes
        ):
            raise Exception("sparse size of EdgeIndex should match number of nodes!")

        # sort EdgeIndex and validate
        data.edge_index = data.edge_index.sort_by("row").values
        data.edge_index.validate()

        self.data = data

        # create mapping between edge tuples and edge indices
        self.edge_to_index = {
            (e[0].item(), e[1].item()): i for i, e in enumerate([e for e in self.data.edge_index.t()])
        }

        ((self.row_ptr, self.col), _) = self.data.edge_index.get_csr()
        ((self.col_ptr, self.row), _) = self.data.edge_index.get_csc()

        # create node_sequence mapping for higher-order graphs
        if "node_sequence" not in self.data:
            self.data.node_sequence = torch.arange(data.num_nodes).reshape(-1, 1)

    @staticmethod
    def from_edge_index(edge_index: torch.Tensor, mapping: Optional[IndexMap] = None, num_nodes: int = None) -> Graph:
        """Construct a graph from a torch Tensor containing an edge index. An optional mapping can
        be used to transparently map node indices to string identifiers.

        Args:
            edge_index:  torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index
            mapping: `IndexMap` object that maps node indices to string identifiers
            num_nodes: optional number of nodes (default: None). If None, the number of nodes will be
                inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.

        Examples:
            You can create a graph from an edge index tensor as follows:

            >>> import torch
            >>> import pathpyG as pp
            >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
            >>> print(g)
            Directed graph with 3 nodes and 3 edges ...

            You can also include a mapping of node IDs:

            >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
            >>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
            >>> print(g.mapping)
            a -> 0
            b -> 1
            c -> 2
        """

        if not num_nodes:
            d = Data(edge_index=edge_index)
        else:
            d = Data(edge_index=edge_index, num_nodes=num_nodes)
        return Graph(d, mapping=mapping)

    @staticmethod
    def from_edge_list(
        edge_list: Iterable[Tuple[str, str]],
        is_undirected: bool = False,
        mapping: Optional[IndexMap] = None,
        num_nodes: Optional[int] = None,
    ) -> Graph:
        """Generate a Graph based on an edge list.

        Edges can be given as string or integer tuples. If strings are used and no mapping is given,
        a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of
        node IDs.

        Args:
            edge_list: Iterable of edges represented as tuples
            is_undirected: Whether the edge list contains all bidorectional edges
            mapping: optional mapping of string IDs to node indices
            num_nodes: optional number of nodes (useful in case not all nodes have incident edges)

        Examples:
            >>> import pathpyG as pp
            >>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
            >>> g = pp.Graph.from_edge_list(l)
            >>> print(list(g.edges))
            [('a', 'b'), ('a', 'c'), ('b', 'c')]
        """

        # handle empty graph
        if len(edge_list) == 0:
            return Graph(Data(edge_index=torch.tensor([[], []], dtype=torch.int32), num_nodes=0), mapping=IndexMap())

        if mapping is None:
            edge_array = np.array(edge_list)
            node_ids = np.unique(edge_array)
            if np.issubdtype(node_ids.dtype, str) and np.char.isnumeric(node_ids).all():
                node_ids = np.sort(node_ids.astype(int)).astype(str)
            mapping = IndexMap(node_ids)

        if num_nodes is None:
            num_nodes = mapping.num_ids()

        edge_index = EdgeIndex(
            mapping.to_idxs(edge_list).T.contiguous(),
            sparse_size=(num_nodes, num_nodes),
            is_undirected=is_undirected,
        )
        return Graph(Data(edge_index=edge_index, num_nodes=num_nodes), mapping=mapping)

    def to_undirected(self) -> Graph:
        """
        Returns an undirected version of a directed graph.

        This method transforms the current graph instance into an undirected graph by
        adding all directed edges in opposite direction. It applies [`ToUndirected`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.transforms.ToUndirected.html#torch_geometric.transforms.ToUndirected)
        transform to the underlying [`torch_geometric.Data`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data) object, which automatically
        duplicates edge attributes for newly created directed edges.

        Examples:
            >>> import pathpyG as pp
            >>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
            >>> g_u = g.to_undirected()
            >>> print(g_u)
            Undirected graph with 3 nodes and 6 (directed) edges
        """
        tf = ToUndirected()
        d = tf(self.data)
        # unfortunately, the application of a transform creates a new edge_index of type tensor
        # so we have to recreate the EdgeIndex tensor and sort it again

        e = EdgeIndex(data=d.edge_index, sparse_size=(self.data.num_nodes, self.data.num_nodes), is_undirected=True)
        d.edge_index = e
        d.num_nodes = self.data.num_nodes
        return Graph(d, self.mapping)

    def to_weighted_graph(self) -> Graph:
        """Coalesces multi-edges to single-edges with an additional weight attribute

        If the graph contains multiple edges between the same nodes, this method will coalesce
        them into a single edge with an additional weight attribute called `edge_weight` that
        contains the number of coalesced edges. The method returns a new graph instance with
        the coalesced edges.

        Returns:
            Graph: Graph with coalesced edges
        """
        i, w = torch_geometric.utils.coalesce(
            self.data.edge_index.as_tensor(), torch.ones(self.m, device=self.data.edge_index.device)
        )
        return Graph(Data(edge_index=i, edge_weight=w, num_nodes=self.data.num_nodes), mapping=self.mapping)

    def node_attrs(self) -> List[str]:
        """
        Return a list of node attributes.

        This method returns a list containing the names of all node-level attributes,
        ignoring the special `node_sequence` attribute.

        Returns:
            list: list of node attributes
        """
        attrs = []
        for k in self.data.keys():
            if k != "node_sequence" and k.startswith("node_"):
                attrs.append(k)
        return attrs

    def edge_attrs(self) -> List[str]:
        """
        Return a list of edge attributes.

        This method returns a list containing the names of all edge-level attributes,
        ignoring the special `edge_index` attribute.

        Returns:
            list: list of edge attributes
        """
        attrs = []
        for k in self.data.keys():
            if k != "edge_index" and k.startswith("edge_"):
                attrs.append(k)
        return attrs

    @property
    def nodes(self) -> list:
        """
        Return indices or IDs of all nodes in the graph.

        This method returns a list object that contains all nodes.
        If an IndexMap is used, nodes are returned as string IDs.
        If no IndexMap is used, nodes are returned as integer indices.

        Returns:
            list: list of all nodes using IDs or indices (if no mapping is used)
        """
        node_list = self.mapping.to_ids(np.arange(self.n)).tolist()
        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    @property
    def edges(self) -> list:
        """Return all edges in the graph.

        This method returns a list object that contains all edges, where each
        edge is a tuple of two elements. If an IndexMap is used to map node
        indices to string IDs, edges are returned as tuples of string IDs.
        If no mapping is used, edges are returned as tuples of integer indices.

        Returns:
            list: list object yielding all edges using IDs or indices (if no mapping is used)
        """
        edge_list = self.mapping.to_ids(self.data.edge_index.t()).tolist()
        if self.order > 1:
            return [tuple(map(tuple, x)) for x in edge_list]
        return list(map(tuple, edge_list))

    def get_successors(self, row_idx: int) -> torch.Tensor:
        """Return a tensor containing the indices of all successor nodes for a given node identified by an index.

        Args:
            row_idx:   Index of node for which predecessors shall be returned.

        Returns:
            tensor: tensor containing indices of all successor nodes of the node indexed by `row_idx`
        """

        if row_idx + 1 < self.row_ptr.size(0):
            row_start = self.row_ptr[row_idx]
            row_end = self.row_ptr[row_idx + 1]
            return self.col[row_start:row_end]
        else:
            return torch.tensor([], device=self.data.edge_index.device)

    def get_predecessors(self, col_idx: int) -> torch.Tensor:
        """Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

        Args:
            col_idx:   Index of node for which predecessors shall be returned.

        Returns:
            tensor: tensor containing indices of all predecessor nodes of the node indexed by `col_idx`
        """
        if col_idx + 1 < self.col_ptr.size(0):
            col_start = self.col_ptr[col_idx]
            col_end = self.col_ptr[col_idx + 1]
            return self.row[col_start:col_end]
        else:
            return torch.tensor([], device=self.data.edge_index.device)

    def successors(self, node: Union[int, str] | tuple) -> list:
        """Return all successors of a given node.

        This method returns a generator object that yields all successors of a
        given node. If an IndexMap is used, successors are returned
        as string IDs. If no mapping is used, successors are returned as indices.

        Args:
            node:   Index or string ID of node for which successors shall be returned.

        Returns:
            list: list with all successors of the node identified
                by `node` using ID or index (if no mapping is used)
        """

        node_list = self.mapping.to_ids(self.get_successors(self.mapping.to_idx(node))).tolist()  # type: ignore

        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    def predecessors(self, node: Union[str, int] | tuple) -> list:
        """Return the predecessors of a given node.

        This method returns a generator object that yields all predecessors of a
        given node. If a `node_id` mapping is used, predecessors will be returned
        as string IDs. If no mapping is used, predecessors are returned as indices.

        Args:
            node:   Index or string ID of node for which predecessors shall be returned.

        Returns:
            list: list with all predecessors of the node identified
                by `node` using ID or index (if no mapping is used)
        """
        node_list = self.mapping.to_ids(self.get_predecessors(self.mapping.to_idx(node))).tolist()  # type: ignore

        if self.order > 1:
            return list(map(tuple, node_list))
        return node_list

    def is_edge(self, v: Union[str, int], w: Union[str, int]) -> bool:
        """Return whether edge $(v,w)$ exists in the graph.

        If an index to ID mapping is used, nodes are assumed to be string IDs. If no
        mapping is used, nodes are assumed to be integer indices.

        Args:
            v: source node of edge as integer index or string ID
            w: target node of edge as integer index or string ID

        Returns:
            bool: True if edge exists, False otherwise
        """
        row = self.mapping.to_idx(v)
        row_start = self.row_ptr[row]
        row_end = self.row_ptr[row + 1]

        return self.mapping.to_idx(w) in self.col[row_start:row_end]

    def sparse_adj_matrix(self, edge_attr: Any = None) -> Any:
        """Return sparse adjacency matrix representation of (weighted) graph.

        Args:
            edge_attr: the edge attribute that shall be used as edge weight

        Returns:
            scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph
        """
        if edge_attr is None:
            return torch_geometric.utils.to_scipy_sparse_matrix(self.data.edge_index.as_tensor(), num_nodes=self.n)
        else:
            return torch_geometric.utils.to_scipy_sparse_matrix(
                self.data.edge_index.as_tensor(), edge_attr=self.data[edge_attr], num_nodes=self.n
            )

    @property
    def in_degrees(self) -> Dict[str, float]:
        """Return in-degrees of nodes in directed network.

        Returns:
            dict: dictionary containing in-degrees of nodes
        """
        return self.degrees(mode="in")

    @property
    def out_degrees(self) -> Dict[str, float]:
        """Return out-degrees of nodes in directed network.

        Returns:
            dict: dictionary containing out-degrees of nodes
        """
        return self.degrees(mode="out")

    def degrees(self, mode: str = "in") -> Dict[str, float]:
        """
        Return degrees of nodes.

        Args:
            mode: `in` or `out` to calculate the in- or out-degree for
                directed networks.

        Returns:
            dict: dictionary containing degrees of nodes
        """
        if mode == "in":
            d = torch_geometric.utils.degree(self.data.edge_index[1], num_nodes=self.n, dtype=torch.int)
        else:
            d = torch_geometric.utils.degree(self.data.edge_index[0], num_nodes=self.n, dtype=torch.int)
        return {self.mapping.to_id(i): d[i].item() for i in range(self.n)}

    def weighted_outdegrees(self) -> torch.Tensor:
        """
        Compute the weighted outdegrees of each node in the graph.

        Args:
            graph (Graph): pathpy graph object.

        Returns:
            tensor: Weighted outdegrees of nodes.
        """
        weighted_outdegree = scatter(
            self.data.edge_weight, self.data.edge_index[0], dim=0, dim_size=self.data.num_nodes, reduce="sum"
        )
        return weighted_outdegree

    def transition_probabilities(self) -> torch.Tensor:
        """
        Compute transition probabilities based on weighted outdegrees.

        Returns:
            tensor: Transition probabilities.
        """
        weighted_outdegree = self.weighted_outdegrees()
        source_ids = self.data.edge_index[0]
        return self.data.edge_weight / weighted_outdegree[source_ids]

    def laplacian(self, normalization: Any = None, edge_attr: Any = None) -> Any:
        """Return Laplacian matrix for a given graph.

        This wrapper method will use [`torch_geometric.utils.laplacian`](https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.laplacian)
        to return a Laplcian matrix representation of a given graph.

        Args:
            normalization: normalization parameter passed to pyG `get_laplacian`
                function
            edge_attr: optinal name of numerical edge attribute that shall
                be passed to pyG `get_laplacian` function as edge weight

        Returns:
            scipy.sparse.coo_matrix: Laplacian matrix representation of graph
        """
        if edge_attr is None:
            index, weight = torch_geometric.utils.get_laplacian(
                self.data.edge_index.as_tensor(), normalization=normalization
            )
            return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)
        else:
            index, weight = torch_geometric.utils.get_laplacian(
                self.data.edge_index.as_tensor(),
                normalization=normalization,
                edge_weight=self.data[edge_attr],
            )
            return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)

    def __getitem__(self, key: Union[tuple, str]) -> Any:
        """Return node, edge, or graph attribute.

        Args:
            key: name of attribute to be returned
        """
        if not isinstance(key, tuple):
            if key in self.data.keys():
                return self.data[key]
            else:
                raise KeyError(key + " is not a graph attribute")
        elif key[0] in self.node_attrs():
            return self.data[key[0]][self.mapping.to_idx(key[1])]
        elif key[0] in self.edge_attrs():
            return self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]]
        else:
            raise KeyError(key[0] + " is not a node or edge attribute")

    def __setitem__(self, key: str, val: torch.Tensor) -> None:
        """Store node, edge, or graph attribute.

        Args:
            key: name of attribute to be stored
            val: value of attribute
        """
        if not isinstance(key, tuple):
            if key.startswith("node_"):
                if val.size(0) != self.n:
                    raise ValueError("Attribute must have same length as number of nodes")
                self.data[key] = val
            elif key.startswith("edge_"):
                if val.size(0) != self.m:
                    raise ValueError("Attribute must have same length as number of edges")
                self.data[key] = val
            else:
                self.data[key] = val
        elif key[0].startswith("node_"):  # type: ignore
            if key[0] not in self.data.keys():
                raise KeyError(
                    "Attribute does not yet exist. Setting the value of a specific node attribute"
                    + "requires that the attribute already exists."
                )
            self.data[key[0]][self.mapping.to_idx(key[1])] = val
        elif key[0].startswith("edge_"):  # type: ignore
            if key[0] not in self.data.keys():
                raise KeyError(
                    "Attribute does not yet exist. Setting the value of a specific node attribute"
                    + "requires that the attribute already exists."
                )
            self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]] = val
        else:
            raise KeyError("node and edge specific attributes should be prefixed with 'node_' or 'edge_'")

    @property
    def n(self) -> int:
        """
        Return number of nodes.

        Returns:
            int: number of nodes in the graph
        """
        return self.data.num_nodes  # type: ignore

    @property
    def m(self) -> int:
        """
        Return number of edges.

        Returns the number of edges in the graph. For an undirected graph, the number of directed edges is returned.

        Returns:
            int: number of edges in the graph
        """
        return self.data.num_edges  # type: ignore

    @property
    def order(self) -> int:
        """
        Return order of graph.

        Returns:
            int: order of the (De Bruijn) graph
        """
        return self.data.node_sequence.size(1)  # type: ignore

    def is_directed(self) -> bool:
        """Return whether graph is directed.

        Returns:
            bool: True if graph is directed, False otherwise
        """
        return not self.data.edge_index.is_undirected

    def is_undirected(self) -> bool:
        """Return whether graph is undirected.

        Returns:
            bool: True if graph is undirected, False otherwise
        """
        return self.data.edge_index.is_undirected

    def has_self_loops(self) -> bool:
        """Return whether graph contains self-loops.

        Returns:
            bool: True if graph contains self-loops, False otherwise
        """
        return self.data.has_self_loops()

    def __add__(self, other: Graph) -> Graph:
        """Combine Graph object with other Graph object.

        The semantics of this operation depends on the optional IndexMap
        of both graphs. If no IndexMap is included, the two underlying data objects
        are concatenated, thus merging edges from both graphs while leaving node indices
        unchanged. If both graphs include IndexMaps that assign node IDs to indices,
        indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

        Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

        Examples:
            Adding two graphs without node IDs:

            >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
            >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
            >>> print(g1 + g2)
            Graph with 3 nodes and 6 edges

            Adding two graphs with identical node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
            >>> print(g1 + g2)
            Graph with 3 nodes and 4 edges

            Adding two graphs with non-overlapping node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
            >>> print(g1 + g2)
            Graph with 6 nodes and 4 edges

            Adding two graphs with partly overlapping node IDs:

            >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
            >>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
            >>> print(g1 + g2)
            Graph with 5 nodes and 4 edges
        """

        if self.order > 1:
            raise NotImplementedError("Add operator can only be applied to order 1 graphs")

        d1 = self.data.clone()
        m1 = self.mapping

        d2 = other.data.clone()
        m2 = other.mapping

        # compute overlap and additional nodes in g2 over g1
        overlap = set(m2.node_ids).intersection(m1.node_ids)
        additional_nodes = set(m2.node_ids).difference(m1.node_ids)

        d2_idx_translation = {}
        node_ids = [""] * (self.n + len(additional_nodes))
        # keep mappings of nodes in g1
        for v in m1.node_ids:
            node_ids[m1.to_idx(v)] = v
        for v in m2.node_ids:
            d2_idx_translation[m2.to_idx(v)] = m2.to_idx(v)
        # for overlapping node IDs we must correct node indices in m2
        for v in overlap:
            d2_idx_translation[m2.to_idx(v)] = m1.to_idx(v)
        # add mapping for nodes in g2 that are not in g1 and correct indices in g2
        for v in additional_nodes:
            new_idx = m2.to_idx(v) + self.n - len(overlap)
            node_ids[new_idx] = v
            d2_idx_translation[m2.to_idx(v)] = new_idx
        # apply index translation to d2
        # fast dictionary based mapping using torch
        palette, key = zip(*d2_idx_translation.items())
        key = torch.tensor(key)
        palette = torch.tensor(palette)

        index = torch.bucketize(d2.edge_index.ravel(), palette)
        d2.edge_index = key[index].reshape(d2.edge_index.shape)
        d = d1.concat(d2)
        mapping = IndexMap(node_ids)
        d.num_nodes = self.n + len(additional_nodes)
        d.edge_index = EdgeIndex(d.edge_index, sparse_size=(d.num_nodes, d.num_nodes))
        return Graph(d, mapping=mapping)

    def __str__(self) -> str:
        """Return a string representation of the graph."""

        attr = self.data.to_dict()
        attr_types = {}
        for k in attr:
            t = type(attr[k])
            if t == torch.Tensor:
                attr_types[k] = str(t) + " -> " + str(attr[k].size())
            else:
                attr_types[k] = str(t)

        from pprint import pformat

        if self.is_undirected():
            s = "Undirected graph with {0} nodes and {1} (directed) edges\n".format(self.n, self.m)
        else:
            s = "Directed graph with {0} nodes and {1} edges\n".format(self.n, self.m)

        attribute_info = {"Node Attributes": {}, "Edge Attributes": {}, "Graph Attributes": {}}
        for a in self.node_attrs():
            attribute_info["Node Attributes"][a] = attr_types[a]
        for a in self.edge_attrs():
            attribute_info["Edge Attributes"][a] = attr_types[a]
        for a in self.data.keys():
            if not self.data.is_node_attr(a) and not self.data.is_edge_attr(a):
                attribute_info["Graph Attributes"][a] = attr_types[a]
        s += pformat(attribute_info, indent=4, width=160)
        return s

edges property

Return all edges in the graph.

This method returns a list object that contains all edges, where each edge is a tuple of two elements. If an IndexMap is used to map node indices to string IDs, edges are returned as tuples of string IDs. If no mapping is used, edges are returned as tuples of integer indices.

Returns:

Name Type Description
list list

list object yielding all edges using IDs or indices (if no mapping is used)

in_degrees property

Return in-degrees of nodes in directed network.

Returns:

Name Type Description
dict typing.Dict[str, float]

dictionary containing in-degrees of nodes

m property

Return number of edges.

Returns the number of edges in the graph. For an undirected graph, the number of directed edges is returned.

Returns:

Name Type Description
int int

number of edges in the graph

n property

Return number of nodes.

Returns:

Name Type Description
int int

number of nodes in the graph

nodes property

Return indices or IDs of all nodes in the graph.

This method returns a list object that contains all nodes. If an IndexMap is used, nodes are returned as string IDs. If no IndexMap is used, nodes are returned as integer indices.

Returns:

Name Type Description
list list

list of all nodes using IDs or indices (if no mapping is used)

order property

Return order of graph.

Returns:

Name Type Description
int int

order of the (De Bruijn) graph

out_degrees property

Return out-degrees of nodes in directed network.

Returns:

Name Type Description
dict typing.Dict[str, float]

dictionary containing out-degrees of nodes

__add__

Combine Graph object with other Graph object.

The semantics of this operation depends on the optional IndexMap of both graphs. If no IndexMap is included, the two underlying data objects are concatenated, thus merging edges from both graphs while leaving node indices unchanged. If both graphs include IndexMaps that assign node IDs to indices, indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

Examples:

Adding two graphs without node IDs:

>>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
>>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
>>> print(g1 + g2)
Graph with 3 nodes and 6 edges

Adding two graphs with identical node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
>>> print(g1 + g2)
Graph with 3 nodes and 4 edges

Adding two graphs with non-overlapping node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
>>> print(g1 + g2)
Graph with 6 nodes and 4 edges

Adding two graphs with partly overlapping node IDs:

>>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
>>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
>>> print(g1 + g2)
Graph with 5 nodes and 4 edges
Source code in src/pathpyG/core/graph.py
def __add__(self, other: Graph) -> Graph:
    """Combine Graph object with other Graph object.

    The semantics of this operation depends on the optional IndexMap
    of both graphs. If no IndexMap is included, the two underlying data objects
    are concatenated, thus merging edges from both graphs while leaving node indices
    unchanged. If both graphs include IndexMaps that assign node IDs to indices,
    indiced will be adjusted, creating a new mapping for the union of node Ids in both graphs.

    Node IDs of graphs to be combined can be disjoint, partly overlapping or non-overlapping.

    Examples:
        Adding two graphs without node IDs:

        >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,1,1],[1,2,3]]))
        >>> g1 = pp.Graph.from_edge_index(torch.Tensor([[0,2,3],[3,2,1]]))
        >>> print(g1 + g2)
        Graph with 3 nodes and 6 edges

        Adding two graphs with identical node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('a', 'c'), ('c', 'b')])
        >>> print(g1 + g2)
        Graph with 3 nodes and 4 edges

        Adding two graphs with non-overlapping node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('c', 'd'), ('d', 'e')])
        >>> print(g1 + g2)
        Graph with 6 nodes and 4 edges

        Adding two graphs with partly overlapping node IDs:

        >>> g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])
        >>> g2 = pp.Graph.from_edge_list([('b', 'd'), ('d', 'e')])
        >>> print(g1 + g2)
        Graph with 5 nodes and 4 edges
    """

    if self.order > 1:
        raise NotImplementedError("Add operator can only be applied to order 1 graphs")

    d1 = self.data.clone()
    m1 = self.mapping

    d2 = other.data.clone()
    m2 = other.mapping

    # compute overlap and additional nodes in g2 over g1
    overlap = set(m2.node_ids).intersection(m1.node_ids)
    additional_nodes = set(m2.node_ids).difference(m1.node_ids)

    d2_idx_translation = {}
    node_ids = [""] * (self.n + len(additional_nodes))
    # keep mappings of nodes in g1
    for v in m1.node_ids:
        node_ids[m1.to_idx(v)] = v
    for v in m2.node_ids:
        d2_idx_translation[m2.to_idx(v)] = m2.to_idx(v)
    # for overlapping node IDs we must correct node indices in m2
    for v in overlap:
        d2_idx_translation[m2.to_idx(v)] = m1.to_idx(v)
    # add mapping for nodes in g2 that are not in g1 and correct indices in g2
    for v in additional_nodes:
        new_idx = m2.to_idx(v) + self.n - len(overlap)
        node_ids[new_idx] = v
        d2_idx_translation[m2.to_idx(v)] = new_idx
    # apply index translation to d2
    # fast dictionary based mapping using torch
    palette, key = zip(*d2_idx_translation.items())
    key = torch.tensor(key)
    palette = torch.tensor(palette)

    index = torch.bucketize(d2.edge_index.ravel(), palette)
    d2.edge_index = key[index].reshape(d2.edge_index.shape)
    d = d1.concat(d2)
    mapping = IndexMap(node_ids)
    d.num_nodes = self.n + len(additional_nodes)
    d.edge_index = EdgeIndex(d.edge_index, sparse_size=(d.num_nodes, d.num_nodes))
    return Graph(d, mapping=mapping)

__getitem__

Return node, edge, or graph attribute.

Parameters:

Name Type Description Default
key typing.Union[tuple, str]

name of attribute to be returned

required
Source code in src/pathpyG/core/graph.py
def __getitem__(self, key: Union[tuple, str]) -> Any:
    """Return node, edge, or graph attribute.

    Args:
        key: name of attribute to be returned
    """
    if not isinstance(key, tuple):
        if key in self.data.keys():
            return self.data[key]
        else:
            raise KeyError(key + " is not a graph attribute")
    elif key[0] in self.node_attrs():
        return self.data[key[0]][self.mapping.to_idx(key[1])]
    elif key[0] in self.edge_attrs():
        return self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]]
    else:
        raise KeyError(key[0] + " is not a node or edge attribute")

__init__

Generate graph instance from a pyG Data object.

Generate a Graph instance from a torch_geometric.Data object that contains an EdgeIndex as well as optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map node indices to string identifiers.

Parameters:

Name Type Description Default
data torch_geometric.data.Data

A pyG Data object containing an EdgeIndex and additional attributes

required
mapping typing.Optional[pathpyG.core.index_map.IndexMap]

IndexMap object that maps node indices to string identifiers

None
Example
import pathpyG as pp
from torch_geometric.data import Data
from torch_geometric import EdgeIndex

data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
g = pp.Graph(data)

g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))
Source code in src/pathpyG/core/graph.py
def __init__(self, data: Data, mapping: Optional[IndexMap] = None):
    """Generate graph instance from a pyG `Data` object.

    Generate a Graph instance from a `torch_geometric.Data` object that contains an EdgeIndex as well as
    optional node-, edge- or graph-level attributes. An optional mapping can be used to transparently map
    node indices to string identifiers.

    Args:
        data: A pyG Data object containing an EdgeIndex and additional attributes
        mapping: `IndexMap` object that maps node indices to string identifiers

    Example:
        ```py
        import pathpyG as pp
        from torch_geometric.data import Data
        from torch_geometric import EdgeIndex

        data = Data(edge_index=EdgeIndex([[1,1,2],[0,2,1]], sparse_size=(3,3)))
        g = pp.Graph(data)

        g = pp.Graph(data, mapping=pp.IndexMap(['a', 'b', 'c']))
        ```
    """
    if mapping is None:
        self.mapping = IndexMap()
    else:
        self.mapping = mapping

    # set num_nodes property
    if "num_nodes" not in data:
        data.num_nodes = data.edge_index.max().item() + 1

    # turn edge index tensor into EdgeIndex object
    if not isinstance(data.edge_index, EdgeIndex):
        data.edge_index = EdgeIndex(data=data.edge_index, sparse_size=(data.num_nodes, data.num_nodes))

    if (
        data.edge_index.get_sparse_size(dim=0) != data.num_nodes
        or data.edge_index.get_sparse_size(dim=1) != data.num_nodes
    ):
        raise Exception("sparse size of EdgeIndex should match number of nodes!")

    # sort EdgeIndex and validate
    data.edge_index = data.edge_index.sort_by("row").values
    data.edge_index.validate()

    self.data = data

    # create mapping between edge tuples and edge indices
    self.edge_to_index = {
        (e[0].item(), e[1].item()): i for i, e in enumerate([e for e in self.data.edge_index.t()])
    }

    ((self.row_ptr, self.col), _) = self.data.edge_index.get_csr()
    ((self.col_ptr, self.row), _) = self.data.edge_index.get_csc()

    # create node_sequence mapping for higher-order graphs
    if "node_sequence" not in self.data:
        self.data.node_sequence = torch.arange(data.num_nodes).reshape(-1, 1)

__setitem__

Store node, edge, or graph attribute.

Parameters:

Name Type Description Default
key str

name of attribute to be stored

required
val torch.Tensor

value of attribute

required
Source code in src/pathpyG/core/graph.py
def __setitem__(self, key: str, val: torch.Tensor) -> None:
    """Store node, edge, or graph attribute.

    Args:
        key: name of attribute to be stored
        val: value of attribute
    """
    if not isinstance(key, tuple):
        if key.startswith("node_"):
            if val.size(0) != self.n:
                raise ValueError("Attribute must have same length as number of nodes")
            self.data[key] = val
        elif key.startswith("edge_"):
            if val.size(0) != self.m:
                raise ValueError("Attribute must have same length as number of edges")
            self.data[key] = val
        else:
            self.data[key] = val
    elif key[0].startswith("node_"):  # type: ignore
        if key[0] not in self.data.keys():
            raise KeyError(
                "Attribute does not yet exist. Setting the value of a specific node attribute"
                + "requires that the attribute already exists."
            )
        self.data[key[0]][self.mapping.to_idx(key[1])] = val
    elif key[0].startswith("edge_"):  # type: ignore
        if key[0] not in self.data.keys():
            raise KeyError(
                "Attribute does not yet exist. Setting the value of a specific node attribute"
                + "requires that the attribute already exists."
            )
        self.data[key[0]][self.edge_to_index[self.mapping.to_idx(key[1]), self.mapping.to_idx(key[2])]] = val
    else:
        raise KeyError("node and edge specific attributes should be prefixed with 'node_' or 'edge_'")

__str__

Return a string representation of the graph.

Source code in src/pathpyG/core/graph.py
def __str__(self) -> str:
    """Return a string representation of the graph."""

    attr = self.data.to_dict()
    attr_types = {}
    for k in attr:
        t = type(attr[k])
        if t == torch.Tensor:
            attr_types[k] = str(t) + " -> " + str(attr[k].size())
        else:
            attr_types[k] = str(t)

    from pprint import pformat

    if self.is_undirected():
        s = "Undirected graph with {0} nodes and {1} (directed) edges\n".format(self.n, self.m)
    else:
        s = "Directed graph with {0} nodes and {1} edges\n".format(self.n, self.m)

    attribute_info = {"Node Attributes": {}, "Edge Attributes": {}, "Graph Attributes": {}}
    for a in self.node_attrs():
        attribute_info["Node Attributes"][a] = attr_types[a]
    for a in self.edge_attrs():
        attribute_info["Edge Attributes"][a] = attr_types[a]
    for a in self.data.keys():
        if not self.data.is_node_attr(a) and not self.data.is_edge_attr(a):
            attribute_info["Graph Attributes"][a] = attr_types[a]
    s += pformat(attribute_info, indent=4, width=160)
    return s

degrees

Return degrees of nodes.

Parameters:

Name Type Description Default
mode str

in or out to calculate the in- or out-degree for directed networks.

'in'

Returns:

Name Type Description
dict typing.Dict[str, float]

dictionary containing degrees of nodes

Source code in src/pathpyG/core/graph.py
def degrees(self, mode: str = "in") -> Dict[str, float]:
    """
    Return degrees of nodes.

    Args:
        mode: `in` or `out` to calculate the in- or out-degree for
            directed networks.

    Returns:
        dict: dictionary containing degrees of nodes
    """
    if mode == "in":
        d = torch_geometric.utils.degree(self.data.edge_index[1], num_nodes=self.n, dtype=torch.int)
    else:
        d = torch_geometric.utils.degree(self.data.edge_index[0], num_nodes=self.n, dtype=torch.int)
    return {self.mapping.to_id(i): d[i].item() for i in range(self.n)}

edge_attrs

Return a list of edge attributes.

This method returns a list containing the names of all edge-level attributes, ignoring the special edge_index attribute.

Returns:

Name Type Description
list typing.List[str]

list of edge attributes

Source code in src/pathpyG/core/graph.py
def edge_attrs(self) -> List[str]:
    """
    Return a list of edge attributes.

    This method returns a list containing the names of all edge-level attributes,
    ignoring the special `edge_index` attribute.

    Returns:
        list: list of edge attributes
    """
    attrs = []
    for k in self.data.keys():
        if k != "edge_index" and k.startswith("edge_"):
            attrs.append(k)
    return attrs

from_edge_index staticmethod

Construct a graph from a torch Tensor containing an edge index. An optional mapping can be used to transparently map node indices to string identifiers.

Parameters:

Name Type Description Default
edge_index torch.Tensor

torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index

required
mapping typing.Optional[pathpyG.core.index_map.IndexMap]

IndexMap object that maps node indices to string identifiers

None
num_nodes int

optional number of nodes (default: None). If None, the number of nodes will be inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.

None

Examples:

You can create a graph from an edge index tensor as follows:

>>> import torch
>>> import pathpyG as pp
>>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
>>> print(g)
Directed graph with 3 nodes and 3 edges ...

You can also include a mapping of node IDs:

>>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
>>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
>>> print(g.mapping)
a -> 0
b -> 1
c -> 2
Source code in src/pathpyG/core/graph.py
@staticmethod
def from_edge_index(edge_index: torch.Tensor, mapping: Optional[IndexMap] = None, num_nodes: int = None) -> Graph:
    """Construct a graph from a torch Tensor containing an edge index. An optional mapping can
    be used to transparently map node indices to string identifiers.

    Args:
        edge_index:  torch.Tensor or torch_geometric.EdgeIndex object containing an edge_index
        mapping: `IndexMap` object that maps node indices to string identifiers
        num_nodes: optional number of nodes (default: None). If None, the number of nodes will be
            inferred based on the maximum node index in the edge index, i.e. there will be no isolated nodes.

    Examples:
        You can create a graph from an edge index tensor as follows:

        >>> import torch
        >>> import pathpyG as pp
        >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]))
        >>> print(g)
        Directed graph with 3 nodes and 3 edges ...

        You can also include a mapping of node IDs:

        >>> g = pp.Graph.from_edge_index(torch.LongTensor([[1, 1, 2], [0, 2, 1]]),
        >>>                              mapping=pp.IndexMap(['a', 'b', 'c']))
        >>> print(g.mapping)
        a -> 0
        b -> 1
        c -> 2
    """

    if not num_nodes:
        d = Data(edge_index=edge_index)
    else:
        d = Data(edge_index=edge_index, num_nodes=num_nodes)
    return Graph(d, mapping=mapping)

from_edge_list staticmethod

Generate a Graph based on an edge list.

Edges can be given as string or integer tuples. If strings are used and no mapping is given, a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of node IDs.

Parameters:

Name Type Description Default
edge_list typing.Iterable[typing.Tuple[str, str]]

Iterable of edges represented as tuples

required
is_undirected bool

Whether the edge list contains all bidorectional edges

False
mapping typing.Optional[pathpyG.core.index_map.IndexMap]

optional mapping of string IDs to node indices

None
num_nodes typing.Optional[int]

optional number of nodes (useful in case not all nodes have incident edges)

None

Examples:

>>> import pathpyG as pp
>>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
>>> g = pp.Graph.from_edge_list(l)
>>> print(list(g.edges))
[('a', 'b'), ('a', 'c'), ('b', 'c')]
Source code in src/pathpyG/core/graph.py
@staticmethod
def from_edge_list(
    edge_list: Iterable[Tuple[str, str]],
    is_undirected: bool = False,
    mapping: Optional[IndexMap] = None,
    num_nodes: Optional[int] = None,
) -> Graph:
    """Generate a Graph based on an edge list.

    Edges can be given as string or integer tuples. If strings are used and no mapping is given,
    a mapping of node IDs to indices will be automatically created based on a lexicographic ordering of
    node IDs.

    Args:
        edge_list: Iterable of edges represented as tuples
        is_undirected: Whether the edge list contains all bidorectional edges
        mapping: optional mapping of string IDs to node indices
        num_nodes: optional number of nodes (useful in case not all nodes have incident edges)

    Examples:
        >>> import pathpyG as pp
        >>> l = [('a', 'b'), ('a', 'c'), ('b', 'c')]
        >>> g = pp.Graph.from_edge_list(l)
        >>> print(list(g.edges))
        [('a', 'b'), ('a', 'c'), ('b', 'c')]
    """

    # handle empty graph
    if len(edge_list) == 0:
        return Graph(Data(edge_index=torch.tensor([[], []], dtype=torch.int32), num_nodes=0), mapping=IndexMap())

    if mapping is None:
        edge_array = np.array(edge_list)
        node_ids = np.unique(edge_array)
        if np.issubdtype(node_ids.dtype, str) and np.char.isnumeric(node_ids).all():
            node_ids = np.sort(node_ids.astype(int)).astype(str)
        mapping = IndexMap(node_ids)

    if num_nodes is None:
        num_nodes = mapping.num_ids()

    edge_index = EdgeIndex(
        mapping.to_idxs(edge_list).T.contiguous(),
        sparse_size=(num_nodes, num_nodes),
        is_undirected=is_undirected,
    )
    return Graph(Data(edge_index=edge_index, num_nodes=num_nodes), mapping=mapping)

get_predecessors

Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

Parameters:

Name Type Description Default
col_idx int

Index of node for which predecessors shall be returned.

required

Returns:

Name Type Description
tensor torch.Tensor

tensor containing indices of all predecessor nodes of the node indexed by col_idx

Source code in src/pathpyG/core/graph.py
def get_predecessors(self, col_idx: int) -> torch.Tensor:
    """Return a tensor containing the indices of all predecessor nodes for a given node identified by an index.

    Args:
        col_idx:   Index of node for which predecessors shall be returned.

    Returns:
        tensor: tensor containing indices of all predecessor nodes of the node indexed by `col_idx`
    """
    if col_idx + 1 < self.col_ptr.size(0):
        col_start = self.col_ptr[col_idx]
        col_end = self.col_ptr[col_idx + 1]
        return self.row[col_start:col_end]
    else:
        return torch.tensor([], device=self.data.edge_index.device)

get_successors

Return a tensor containing the indices of all successor nodes for a given node identified by an index.

Parameters:

Name Type Description Default
row_idx int

Index of node for which predecessors shall be returned.

required

Returns:

Name Type Description
tensor torch.Tensor

tensor containing indices of all successor nodes of the node indexed by row_idx

Source code in src/pathpyG/core/graph.py
def get_successors(self, row_idx: int) -> torch.Tensor:
    """Return a tensor containing the indices of all successor nodes for a given node identified by an index.

    Args:
        row_idx:   Index of node for which predecessors shall be returned.

    Returns:
        tensor: tensor containing indices of all successor nodes of the node indexed by `row_idx`
    """

    if row_idx + 1 < self.row_ptr.size(0):
        row_start = self.row_ptr[row_idx]
        row_end = self.row_ptr[row_idx + 1]
        return self.col[row_start:row_end]
    else:
        return torch.tensor([], device=self.data.edge_index.device)

has_self_loops

Return whether graph contains self-loops.

Returns:

Name Type Description
bool bool

True if graph contains self-loops, False otherwise

Source code in src/pathpyG/core/graph.py
def has_self_loops(self) -> bool:
    """Return whether graph contains self-loops.

    Returns:
        bool: True if graph contains self-loops, False otherwise
    """
    return self.data.has_self_loops()

is_directed

Return whether graph is directed.

Returns:

Name Type Description
bool bool

True if graph is directed, False otherwise

Source code in src/pathpyG/core/graph.py
def is_directed(self) -> bool:
    """Return whether graph is directed.

    Returns:
        bool: True if graph is directed, False otherwise
    """
    return not self.data.edge_index.is_undirected

is_edge

Return whether edge \((v,w)\) exists in the graph.

If an index to ID mapping is used, nodes are assumed to be string IDs. If no mapping is used, nodes are assumed to be integer indices.

Parameters:

Name Type Description Default
v typing.Union[str, int]

source node of edge as integer index or string ID

required
w typing.Union[str, int]

target node of edge as integer index or string ID

required

Returns:

Name Type Description
bool bool

True if edge exists, False otherwise

Source code in src/pathpyG/core/graph.py
def is_edge(self, v: Union[str, int], w: Union[str, int]) -> bool:
    """Return whether edge $(v,w)$ exists in the graph.

    If an index to ID mapping is used, nodes are assumed to be string IDs. If no
    mapping is used, nodes are assumed to be integer indices.

    Args:
        v: source node of edge as integer index or string ID
        w: target node of edge as integer index or string ID

    Returns:
        bool: True if edge exists, False otherwise
    """
    row = self.mapping.to_idx(v)
    row_start = self.row_ptr[row]
    row_end = self.row_ptr[row + 1]

    return self.mapping.to_idx(w) in self.col[row_start:row_end]

is_undirected

Return whether graph is undirected.

Returns:

Name Type Description
bool bool

True if graph is undirected, False otherwise

Source code in src/pathpyG/core/graph.py
def is_undirected(self) -> bool:
    """Return whether graph is undirected.

    Returns:
        bool: True if graph is undirected, False otherwise
    """
    return self.data.edge_index.is_undirected

laplacian

Return Laplacian matrix for a given graph.

This wrapper method will use torch_geometric.utils.laplacian to return a Laplcian matrix representation of a given graph.

Parameters:

Name Type Description Default
normalization typing.Any

normalization parameter passed to pyG get_laplacian function

None
edge_attr typing.Any

optinal name of numerical edge attribute that shall be passed to pyG get_laplacian function as edge weight

None

Returns:

Type Description
typing.Any

scipy.sparse.coo_matrix: Laplacian matrix representation of graph

Source code in src/pathpyG/core/graph.py
def laplacian(self, normalization: Any = None, edge_attr: Any = None) -> Any:
    """Return Laplacian matrix for a given graph.

    This wrapper method will use [`torch_geometric.utils.laplacian`](https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.laplacian)
    to return a Laplcian matrix representation of a given graph.

    Args:
        normalization: normalization parameter passed to pyG `get_laplacian`
            function
        edge_attr: optinal name of numerical edge attribute that shall
            be passed to pyG `get_laplacian` function as edge weight

    Returns:
        scipy.sparse.coo_matrix: Laplacian matrix representation of graph
    """
    if edge_attr is None:
        index, weight = torch_geometric.utils.get_laplacian(
            self.data.edge_index.as_tensor(), normalization=normalization
        )
        return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)
    else:
        index, weight = torch_geometric.utils.get_laplacian(
            self.data.edge_index.as_tensor(),
            normalization=normalization,
            edge_weight=self.data[edge_attr],
        )
        return torch_geometric.utils.to_scipy_sparse_matrix(index, weight)

node_attrs

Return a list of node attributes.

This method returns a list containing the names of all node-level attributes, ignoring the special node_sequence attribute.

Returns:

Name Type Description
list typing.List[str]

list of node attributes

Source code in src/pathpyG/core/graph.py
def node_attrs(self) -> List[str]:
    """
    Return a list of node attributes.

    This method returns a list containing the names of all node-level attributes,
    ignoring the special `node_sequence` attribute.

    Returns:
        list: list of node attributes
    """
    attrs = []
    for k in self.data.keys():
        if k != "node_sequence" and k.startswith("node_"):
            attrs.append(k)
    return attrs

predecessors

Return the predecessors of a given node.

This method returns a generator object that yields all predecessors of a given node. If a node_id mapping is used, predecessors will be returned as string IDs. If no mapping is used, predecessors are returned as indices.

Parameters:

Name Type Description Default
node typing.Union[str, int] | tuple

Index or string ID of node for which predecessors shall be returned.

required

Returns:

Name Type Description
list list

list with all predecessors of the node identified by node using ID or index (if no mapping is used)

Source code in src/pathpyG/core/graph.py
def predecessors(self, node: Union[str, int] | tuple) -> list:
    """Return the predecessors of a given node.

    This method returns a generator object that yields all predecessors of a
    given node. If a `node_id` mapping is used, predecessors will be returned
    as string IDs. If no mapping is used, predecessors are returned as indices.

    Args:
        node:   Index or string ID of node for which predecessors shall be returned.

    Returns:
        list: list with all predecessors of the node identified
            by `node` using ID or index (if no mapping is used)
    """
    node_list = self.mapping.to_ids(self.get_predecessors(self.mapping.to_idx(node))).tolist()  # type: ignore

    if self.order > 1:
        return list(map(tuple, node_list))
    return node_list

sparse_adj_matrix

Return sparse adjacency matrix representation of (weighted) graph.

Parameters:

Name Type Description Default
edge_attr typing.Any

the edge attribute that shall be used as edge weight

None

Returns:

Type Description
typing.Any

scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph

Source code in src/pathpyG/core/graph.py
def sparse_adj_matrix(self, edge_attr: Any = None) -> Any:
    """Return sparse adjacency matrix representation of (weighted) graph.

    Args:
        edge_attr: the edge attribute that shall be used as edge weight

    Returns:
        scipy.sparse.coo_matrix: sparse adjacency matrix representation of graph
    """
    if edge_attr is None:
        return torch_geometric.utils.to_scipy_sparse_matrix(self.data.edge_index.as_tensor(), num_nodes=self.n)
    else:
        return torch_geometric.utils.to_scipy_sparse_matrix(
            self.data.edge_index.as_tensor(), edge_attr=self.data[edge_attr], num_nodes=self.n
        )

successors

Return all successors of a given node.

This method returns a generator object that yields all successors of a given node. If an IndexMap is used, successors are returned as string IDs. If no mapping is used, successors are returned as indices.

Parameters:

Name Type Description Default
node typing.Union[int, str] | tuple

Index or string ID of node for which successors shall be returned.

required

Returns:

Name Type Description
list list

list with all successors of the node identified by node using ID or index (if no mapping is used)

Source code in src/pathpyG/core/graph.py
def successors(self, node: Union[int, str] | tuple) -> list:
    """Return all successors of a given node.

    This method returns a generator object that yields all successors of a
    given node. If an IndexMap is used, successors are returned
    as string IDs. If no mapping is used, successors are returned as indices.

    Args:
        node:   Index or string ID of node for which successors shall be returned.

    Returns:
        list: list with all successors of the node identified
            by `node` using ID or index (if no mapping is used)
    """

    node_list = self.mapping.to_ids(self.get_successors(self.mapping.to_idx(node))).tolist()  # type: ignore

    if self.order > 1:
        return list(map(tuple, node_list))
    return node_list

to_undirected

Returns an undirected version of a directed graph.

This method transforms the current graph instance into an undirected graph by adding all directed edges in opposite direction. It applies ToUndirected transform to the underlying torch_geometric.Data object, which automatically duplicates edge attributes for newly created directed edges.

Examples:

>>> import pathpyG as pp
>>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
>>> g_u = g.to_undirected()
>>> print(g_u)
Undirected graph with 3 nodes and 6 (directed) edges
Source code in src/pathpyG/core/graph.py
def to_undirected(self) -> Graph:
    """
    Returns an undirected version of a directed graph.

    This method transforms the current graph instance into an undirected graph by
    adding all directed edges in opposite direction. It applies [`ToUndirected`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.transforms.ToUndirected.html#torch_geometric.transforms.ToUndirected)
    transform to the underlying [`torch_geometric.Data`](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data) object, which automatically
    duplicates edge attributes for newly created directed edges.

    Examples:
        >>> import pathpyG as pp
        >>> g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('c', 'a')])
        >>> g_u = g.to_undirected()
        >>> print(g_u)
        Undirected graph with 3 nodes and 6 (directed) edges
    """
    tf = ToUndirected()
    d = tf(self.data)
    # unfortunately, the application of a transform creates a new edge_index of type tensor
    # so we have to recreate the EdgeIndex tensor and sort it again

    e = EdgeIndex(data=d.edge_index, sparse_size=(self.data.num_nodes, self.data.num_nodes), is_undirected=True)
    d.edge_index = e
    d.num_nodes = self.data.num_nodes
    return Graph(d, self.mapping)

to_weighted_graph

Coalesces multi-edges to single-edges with an additional weight attribute

If the graph contains multiple edges between the same nodes, this method will coalesce them into a single edge with an additional weight attribute called edge_weight that contains the number of coalesced edges. The method returns a new graph instance with the coalesced edges.

Returns:

Name Type Description
Graph pathpyG.core.graph.Graph

Graph with coalesced edges

Source code in src/pathpyG/core/graph.py
def to_weighted_graph(self) -> Graph:
    """Coalesces multi-edges to single-edges with an additional weight attribute

    If the graph contains multiple edges between the same nodes, this method will coalesce
    them into a single edge with an additional weight attribute called `edge_weight` that
    contains the number of coalesced edges. The method returns a new graph instance with
    the coalesced edges.

    Returns:
        Graph: Graph with coalesced edges
    """
    i, w = torch_geometric.utils.coalesce(
        self.data.edge_index.as_tensor(), torch.ones(self.m, device=self.data.edge_index.device)
    )
    return Graph(Data(edge_index=i, edge_weight=w, num_nodes=self.data.num_nodes), mapping=self.mapping)

transition_probabilities

Compute transition probabilities based on weighted outdegrees.

Returns:

Name Type Description
tensor torch.Tensor

Transition probabilities.

Source code in src/pathpyG/core/graph.py
def transition_probabilities(self) -> torch.Tensor:
    """
    Compute transition probabilities based on weighted outdegrees.

    Returns:
        tensor: Transition probabilities.
    """
    weighted_outdegree = self.weighted_outdegrees()
    source_ids = self.data.edge_index[0]
    return self.data.edge_weight / weighted_outdegree[source_ids]

weighted_outdegrees

Compute the weighted outdegrees of each node in the graph.

Parameters:

Name Type Description Default
graph pathpyG.core.graph.Graph

pathpy graph object.

required

Returns:

Name Type Description
tensor torch.Tensor

Weighted outdegrees of nodes.

Source code in src/pathpyG/core/graph.py
def weighted_outdegrees(self) -> torch.Tensor:
    """
    Compute the weighted outdegrees of each node in the graph.

    Args:
        graph (Graph): pathpy graph object.

    Returns:
        tensor: Weighted outdegrees of nodes.
    """
    weighted_outdegree = scatter(
        self.data.edge_weight, self.data.edge_index[0], dim=0, dim_size=self.data.num_nodes, reduce="sum"
    )
    return weighted_outdegree

closed_triads

Calculates the set of edges that represent a closed triad around a given node v.

Parameters

network : Network

The network in which to calculate the list of closed triads
Source code in src/pathpyG/statistics/clustering.py
def closed_triads(g: Graph, v: str) -> Set:
    """Calculates the set of edges that represent a closed triad
    around a given node v.

    Parameters
    ----------

    network : Network

        The network in which to calculate the list of closed triads

    """
    c_triads: set = set()
    edges = set()

    # Collect all edges of successors
    for x in g.successors(v):
        for y in g.successors(x):
            edges.add((x, y))

    for x, y in edges:
        if y in g.successors(v):
            c_triads.add((x, y))
    return c_triads

degree_assortativity

Calculate the degree assortativity

Source code in src/pathpyG/statistics/degrees.py
def degree_assortativity(g: Graph, mode: str = "total") -> float:
    """Calculate the degree assortativity"""

    A = g.sparse_adj_matrix().todense()
    m = _np.sum(A)

    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()
    elif not g.is_directed():
        m = m / 2.0

    cov = 0.0
    var = 0.0
    for i in g.nodes:
        for j in g.nodes:
            cov += (A[g.mapping.to_idx(i), g.mapping.to_idx(j)] - (d[i] * d[j]) / (2 * m)) * d[i] * d[j]
            if i != j:
                var -= (d[i] * d[j]) / (2 * m) * d[i] * d[j]
            else:
                var += (d[i] - (d[i] * d[j]) / (2 * m)) * d[i] * d[j]
    return cov / var

degree_central_moment

Calculates the k-th central moment of the degree distribution.

Parameters:

Name Type Description Default
graph pathpyG.core.graph.Graph

The graph for which to calculate the k-th central moment

required
Source code in src/pathpyG/statistics/degrees.py
def degree_central_moment(graph: Graph, k: int = 1, mode: str = "total") -> float:
    """Calculates the k-th central moment of the degree distribution.

    Args:
        graph: The graph for which to calculate the k-th central moment

    """
    p_k = degree_distribution(graph, mode=mode)
    mean = _np.mean(degree_sequence(graph, mode=mode))
    m = 0.0
    for x in p_k:
        m += (x - mean) ** k * p_k[x]
    return m

degree_distribution

Calculates the degree distribution of a graph

Source code in src/pathpyG/statistics/degrees.py
def degree_distribution(g: Graph, mode: str = "total") -> Dict[int, float]:
    """Calculates the degree distribution of a graph"""
    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()

    cnt: defaultdict = defaultdict(float)
    for v in g.nodes:
        cnt[d[v]] += 1.0 / g.n
    return cnt

degree_generating_function

Returns the generating function of the degree distribution of a network, calculated for either a single argument x or a list or numpy array of arguments x

Returns f(x) where f is the probability generating function for the degree distribution P(k) for a graph. The function is defined in the interval [0,1]. The value returned is from the range [0,1]. The following properties hold:

[1/k! d^k/dx f]_{x=0} = P(k) with d^k/dx f being the k-th derivative of f by x

f'(1) = with f' being the first derivative and the mean degree

[(x d/dx)^m f]_{x=1} = with being the m-th raw moment of P

Parameters:

Name Type Description Default
graph pathpyG.core.graph.Graph

The graph for which the generating function shall be computed

required
float, list, numpy.ndarray

The argument(s) for which value(s) f(x) shall be computed.

Example:

    # Generate simple network
    import pathpyG as pp
    import numpy as np
    import matplotlib.pyplot as plt

    g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c'), ('c', 'd'),
                                ('d', 'e'), ('d', 'f'), ('e', 'f')]).to_undirected()

    # Return single function value
    val = pp.statistics.degreee_generating_func(n, 0.3)
    print(val)
    0.069

    # Plot generating function of degree distribution

    x = np.linspace(0, 1, 20)
    y = pp.statistics.degree_generating_func(n, x)
    x = plt.plot(x, y)
    # [Function plot]

    # Plot generating function based on degree sequence

    x = np.linspace(0, 1, 20)
    y = pp.statistics.degree_generating_func([1,2,1,2], x)
    x = plt.plot(x, y)
    # [Function plot]

Source code in src/pathpyG/statistics/degrees.py
def degree_generating_function(
    graph: Graph, x: float | list[float] | _np.ndarray, mode: str = "total"
) -> float | _np.ndarray:
    """Returns the generating function of the degree distribution of a network,
        calculated for either a single argument x or a list or numpy array of arguments x


    Returns f(x) where f is the probability generating function for the degree
    distribution P(k) for a graph. The function is defined in the interval
    [0,1].  The value returned is from the range [0,1]. The following properties
    hold:

    [1/k! d^k/dx f]_{x=0} = P(k)
    with d^k/dx f being the k-th derivative of f by x

    f'(1) = <k>
    with f' being the first derivative and <k> the mean degree

    [(x d/dx)^m f]_{x=1} = <k^m>
    with <k^m> being the m-th raw moment of P

    Args:
        graph: The graph for which the generating function shall be computed

    x:  float, list, numpy.ndarray
        The argument(s) for which value(s) f(x) shall be computed.

    Example:
    ```py
        # Generate simple network
        import pathpyG as pp
        import numpy as np
        import matplotlib.pyplot as plt

        g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c'), ('c', 'd'),
                                    ('d', 'e'), ('d', 'f'), ('e', 'f')]).to_undirected()

        # Return single function value
        val = pp.statistics.degreee_generating_func(n, 0.3)
        print(val)
        0.069

        # Plot generating function of degree distribution

        x = np.linspace(0, 1, 20)
        y = pp.statistics.degree_generating_func(n, x)
        x = plt.plot(x, y)
        # [Function plot]

        # Plot generating function based on degree sequence

        x = np.linspace(0, 1, 20)
        y = pp.statistics.degree_generating_func([1,2,1,2], x)
        x = plt.plot(x, y)
        # [Function plot]
    ```
    """

    p_k = degree_distribution(graph, mode=mode)

    if isinstance(x, float):
        x_range = [x]
    else:
        x_range = x

    values: defaultdict = defaultdict(float)
    for k in p_k:
        for v in x_range:
            values[v] += p_k[k] * v**k

    _values: float | _np.ndarray
    if len(x_range) > 1:
        _values = _np.fromiter(values.values(), dtype=float)
    else:
        _values = values[x]
    return _values

degree_raw_moment

Calculates the k-th raw moment of the degree distribution of a network

Parameters:

Name Type Description Default
graph pathpyG.core.graph.Graph

The graph in which to calculate the k-th raw moment

required
Source code in src/pathpyG/statistics/degrees.py
def degree_raw_moment(graph: Graph, k: int = 1, mode: str = "total") -> float:
    """Calculates the k-th raw moment of the degree distribution of a network

    Args:
        graph:  The graph in which to calculate the k-th raw moment

    """
    p_k = degree_distribution(graph, mode=mode)
    mom = 0.0
    for x in p_k:
        mom += x**k * p_k[x]
    return mom

degree_sequence

Calculates the degree sequence of an undirected network.

Parameters:

Name Type Description Default
graph

The Graph object for which degrees are calculated

required
Source code in src/pathpyG/statistics/degrees.py
def degree_sequence(g: Graph, mode: str = "total") -> _np.array:
    """Calculates the degree sequence of an undirected network.

    Args:
        graph: The `Graph` object for which degrees are calculated
    """
    d = g.degrees()
    if g.is_directed() and mode == "in":
        d = g.in_degrees
    elif g.is_directed() and mode == "out":
        d = g.out_degrees
    elif g.is_directed() and mode == "total":
        d = g.degrees()

    _degrees = _np.zeros(g.n, dtype=float)
    for v in g.nodes:
        _degrees[g.mapping.to_idx(v)] = d[v]
    return _degrees