Skip to content

netzschleuder

list_netzschleuder_records

Read a list of data sets available at the netzschleuder repository.

Parameters:

Name Type Description Default
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'
**kwargs typing.Any

Keyword arguments that will be passed to the netzschleuder repository as HTTP GET parameters. For supported parameters see https://networks.skewed.de/api

{}

Examples:

Return a list of all data sets

>>> import pathpy as pp
>>> pp.io.graphtool.list_netzschleuder_records()
['karate', 'reality_mining', 'sp_hypertext', ...]

Return a list of all data sets with a given tag

>>> pp.io.graphtool.list_netzschleuder_records(tags='temporal')
['reality_mining', 'sp_hypertext', ...]

Return a dictionary containing all data set names (keys) as well as all network attributes

>>> pp.io.graphtool.list_netzschleuder_records(full=True)
{ 'reality_mining': [...], 'karate': [...] }

Returns:

Type Description
typing.Union[list, dict]

Either a list of data set names or a dictionary containing all data set names and network attributes.

Source code in src/pathpyG/io/netzschleuder.py
def list_netzschleuder_records(base_url: str='https://networks.skewed.de', **kwargs: Any) -> Union[list, dict]:
    """
    Read a list of data sets available at the netzschleuder repository.

    Args:
        base_url: Base URL of netzschleuder repository
        **kwargs: Keyword arguments that will be passed to the netzschleuder repository as HTTP GET parameters.
            For supported parameters see https://networks.skewed.de/api


    Examples:
        Return a list of all data sets

        >>> import pathpy as pp
        >>> pp.io.graphtool.list_netzschleuder_records()
        ['karate', 'reality_mining', 'sp_hypertext', ...]

        Return a list of all data sets with a given tag

        >>> pp.io.graphtool.list_netzschleuder_records(tags='temporal')
        ['reality_mining', 'sp_hypertext', ...]

        Return a dictionary containing all data set names (keys) as well as all network attributes

        >>> pp.io.graphtool.list_netzschleuder_records(full=True)
        { 'reality_mining': [...], 'karate': [...] }


    Returns:
        Either a list of data set names or a dictionary containing all data set names and network attributes.

    """
    url = '/api/nets'
    for k, v in kwargs.items():
        url += '?{0}={1}'.format(k, v)
    try:
        f = request.urlopen(base_url + url).read()
        return json.loads(f)
    except HTTPError:
        msg = 'Could not connect to netzschleuder repository at {0}'.format(base_url)
        # LOG.error(msg)
        raise Exception(msg)

parse_graphtool_format

Decodes data in graphtool binary format and returns a Graph. For a documentation of hte graphtool binary format, see see doc at https://graph-tool.skewed.de/static/doc/gt_format.html

Parameters:

Name Type Description Default
data bytes

Array of bys to be decoded

required
ignore_temporal

If False, this function will return a static or temporal network depending on whether edges contain a time attribute. If True, pathpy will not interpret time attributes and thus always return a static network.

required

Returns:

Type Description
pathpyG.core.Graph.Graph

Network or TemporalNetwork: a static or temporal network object

Source code in src/pathpyG/io/netzschleuder.py
def parse_graphtool_format(data: bytes, id_node_attr=None) -> Graph:
    """
    Decodes data in graphtool binary format and returns a [`Graph`][pathpyG.Graph]. For a documentation of
    hte graphtool binary format, see see doc at https://graph-tool.skewed.de/static/doc/gt_format.html

    Args:
        data: Array of bys to be decoded
        ignore_temporal: If False, this function will return a static or temporal network depending
            on whether edges contain a time attribute. If True, pathpy will not interpret
            time attributes and thus always return a static network.

    Returns:
        Network or TemporalNetwork: a static or temporal network object
    """

    # check magic bytes
    if data[0:6] != b'\xe2\x9b\xbe\x20\x67\x74':
        print('Invalid graphtool file. Wrong magic bytes.')
        raise Exception('Invalid graphtool file. Wrong magic bytes.')
    ptr = 6

    # read graphtool version byte
    graphtool_version = int(data[ptr])
    ptr += 1

    # read endianness
    if bool(data[ptr]):
        graphtool_endianness = '>'
    else:
        graphtool_endianness = '<'
    ptr += 1

    # read length of comment
    str_len = struct.unpack(graphtool_endianness + 'Q', data[ptr:ptr+8])[0]
    ptr += 8

    # read string comment
    comment = data[ptr:ptr+str_len].decode('ascii')
    ptr += str_len

    # read network directedness
    directed = bool(data[ptr])
    ptr += 1

    # read number of nodes
    n_nodes = struct.unpack(graphtool_endianness + 'Q', data[ptr:ptr+8])[0]
    ptr += 8

    # create pandas dataframe
    network_dict = {}
    # n = Network(directed = directed, multiedges=True)

    # determine binary representation of neighbour lists
    if n_nodes<2**8:
        fmt = 'B'
        d = 1
    elif n_nodes<2**16:
        fmt = 'H'
        d = 2
    elif n_nodes<2**32:
        fmt = 'I'
        d = 4
    else:
        fmt = 'Q'
        d = 8

    sources = []
    targets = []
    # parse lists of out-neighbors for all n nodes
    n_edges = 0
    for v in range(n_nodes):
        # read number of neighbors
        num_neighbors = struct.unpack(graphtool_endianness + 'Q', data[ptr:ptr+8])[0]
        ptr += 8

        # add edges to record
        for j in range(num_neighbors):
            w = struct.unpack(graphtool_endianness + fmt, data[ptr:ptr+d])[0]
            ptr += d
            sources.append(v)
            targets.append(w)
            n_edges += 1

    # collect attributes from property maps
    graph_attr = dict()
    node_attr = dict()
    edge_attr = dict()

    # parse property maps
    property_maps = struct.unpack(graphtool_endianness + 'Q', data[ptr:ptr+8])[0]
    ptr += 8

    for i in range(property_maps):
        key_type = struct.unpack(graphtool_endianness + 'B', data[ptr:ptr+1])[0]
        ptr += 1

        property_len  = struct.unpack(graphtool_endianness + 'Q', data[ptr:ptr+8])[0]
        ptr += 8

        property_name = data[ptr:ptr+property_len].decode('ascii')
        ptr += property_len

        property_type = struct.unpack(graphtool_endianness + 'B', data[ptr:ptr+1])[0]
        ptr += 1

        if key_type == 0: # graph-level property
            res = _parse_property_value(data, ptr, property_type, graphtool_endianness)
            graph_attr[property_name] = res[0]
            ptr += res[1]
        elif key_type == 1: # node-level property
            if property_name not in node_attr:
                node_attr[property_name] = []
            for v in range(n_nodes):
                res = _parse_property_value(data, ptr, property_type, graphtool_endianness)
                node_attr[property_name].append([res[0]])
                ptr += res[1]
        elif key_type == 2: # edge-level property
            if property_name not in edge_attr:
                edge_attr[property_name] = []
            for e in range(n_edges):
                res = _parse_property_value(data, ptr, property_type, graphtool_endianness)
                edge_attr[property_name].append(res[0])
                ptr += res[1]
        else:
            print('Unknown key type {0}'.format(key_type))

    # LOG.info('Version \t= {0}'.format(graphtool_version))
    # LOG.info('Endianness \t= {0}'.format(graphtool_endianness))
    # LOG.info('comment size \t= {0}'.format(str_len))
    # LOG.info('comment \t= {0}'.format(comment))
    # LOG.info('directed \t= {0}'.format(directed))
    # LOG.info('nodes \t\t= {0}'.format(n_nodes))

    # add edge properties to data frame
    # for p in edge_attribute_names:
    #     # due to use of default_dict, this will add NA values to edges which have missing properties
    #     network_data[p] = [ edge_attributes[e][p] for e in range(n_edges) ]

    # create graph from pandas dataframe


    # if 'time' in edge_attribute_names and not ignore_temporal:
    #     raise Exception('')
    #     n = to_temporal_network(network_data, directed=directed, **network_attributes)
    # else:


    if id_node_attr:
        mapping = pp.IndexMap(node_attr[id_node_attr])
    else:
        mapping = None

    g = Graph.from_edge_index(torch.LongTensor([sources, targets]).to(config['torch']['device']), mapping=mapping)
    for a in node_attr:
        if not a.startswith('node_'):
            # print(node_attr[a])
            # g.data['node_{0}'.format(a)] = torch.tensor(node_attr[a], dtype=torch.float).to(config['torch']['device'])
            g.data['node_{0}'.format(a)] = node_attr[a]
    for a in edge_attr:
        if not a.startswith('edge_'):
            g.data['edge_{0}'.format(a)] = torch.tensor(edge_attr[a], dtype=torch.float).to(config['torch']['device'])
    for a in graph_attr:
        g.data[a] = graph_attr[a]

    if not directed:
        return g.to_undirected()
    return g

read_graphtool

Read a file in graphtool binary format.

Parameters:

Name Type Description Default
file str

Path to graphtool file to be read

required
Source code in src/pathpyG/io/netzschleuder.py
def read_graphtool(file: str, ignore_temporal: bool=False, multiedges: bool=False) -> Optional[Union[Graph, TemporalGraph]]:
    """
    Read a file in graphtool binary format.

    Args:
        file: Path to graphtool file to be read
    """
    with open(file, 'rb') as f:
        if '.zst' in file:
            try:
                import zstandard as zstd
                dctx = zstd.ZstdDecompressor()
                data = f.read()
                return parse_graphtool_format(dctx.decompress(data, max_output_size=len(data)))
            except ModuleNotFoundError:
                msg = 'Package zstandard is required to decompress graphtool files. Please install module, e.g., using "pip install zstandard".'
                # LOG.error(msg)
                raise Exception(msg)
        else:
            return parse_graphtool_format(f.read(), multiedges)

read_netzschleuder_network

Read a pathpy network record from the netzschleuder repository.

Parameters:

Name Type Description Default
name str

Name of the network data sets to read from

required
net typing.Optional[str]

Identifier of the network within the data set to read. For data sets containing a single network only, this can be set to None.

None
ignore_temporal bool

If False, this function will return a static or temporal network depending on whether edges contain a time attribute. If True, pathpy will not interpret time attributes and thus always return a static network.

False
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'

Examples:

Read network '77' from karate club data set

>>> import pathpy as pp
>>> n = pp.io.graphtool.read_netzschleuder_network('karate', '77')
>>> print(type(n))
>>> pp.plot(n)
pp.Network

Read a temporal network from a data set containing a single network only (i.e. net can be omitted):

>>> n = pp.io.graphtool.read_netzschleuder_network('reality_mining')
>>> print(type(n))
>>> pp.plot(n)
pp.TemporalNetwork

Read temporal network but ignore time attribute of edges:

>>> n = pp.io.graphtool.read_netzschleuder_network('reality_mining', ignore_temporal=True)
>>> print(type(n))
>>> pp.plot(n)
pp.Network

Returns:

Type Description
typing.Union[pathpyG.core.Graph.Graph, pathpyG.core.TemporalGraph.TemporalGraph]

Depending on whether the network data set contains an edge attribute

typing.Union[pathpyG.core.Graph.Graph, pathpyG.core.TemporalGraph.TemporalGraph]

time (and whether ignore_temporal is set to True), this function

typing.Union[pathpyG.core.Graph.Graph, pathpyG.core.TemporalGraph.TemporalGraph]

returns an instance of Network or TemporalNetwork

Source code in src/pathpyG/io/netzschleuder.py
def read_netzschleuder_network(name: str, net: Optional[str]=None,
        ignore_temporal: bool=False, multiedges: bool=False,
        base_url: str='https://networks.skewed.de') -> Union[Graph, TemporalGraph]:
    """Read a pathpy network record from the netzschleuder repository.

    Args:
        name: Name of the network data sets to read from
        net: Identifier of the network within the data set to read. For data sets
            containing a single network only, this can be set to None.
        ignore_temporal: If False, this function will return a static or temporal network depending
            on whether edges contain a time attribute. If True, pathpy will not interpret
            time attributes and thus always return a static network.
        base_url: Base URL of netzschleuder repository

    Examples:
        Read network '77' from karate club data set

        >>> import pathpy as pp
        >>> n = pp.io.graphtool.read_netzschleuder_network('karate', '77')
        >>> print(type(n))
        >>> pp.plot(n)
        pp.Network

        Read a temporal network from a data set containing a single network only
        (i.e. net can be omitted):

        >>> n = pp.io.graphtool.read_netzschleuder_network('reality_mining')
        >>> print(type(n))
        >>> pp.plot(n)
        pp.TemporalNetwork

        Read temporal network but ignore time attribute of edges:

        >>> n = pp.io.graphtool.read_netzschleuder_network('reality_mining', ignore_temporal=True)
        >>> print(type(n))
        >>> pp.plot(n)
        pp.Network


    Returns:
        Depending on whether the network data set contains an edge attribute
        `time` (and whether ignore_temporal is set to True), this function
        returns an instance of Network or TemporalNetwork

    """
    try:
        import zstandard as zstd

        # retrieve network properties
        url = '/api/net/{0}'.format(name)
        properties = json.loads(request.urlopen(base_url + url).read())

        # retrieve data
        if not net:
            net = name
        url = '/net/{0}/files/{1}.gt.zst'.format(name, net)
        try:
            f = request.urlopen(base_url + url)
        except HTTPError:
            msg = 'Could not connect to netzschleuder repository at {0}'.format(base_url)
            #LOG.error(msg)
            raise Exception(msg)

        # decompress data
        dctx = zstd.ZstdDecompressor()
        reader = dctx.stream_reader(f)
        decompressed = reader.readall()

        # parse graphtool binary format
        return parse_graphtool_format(bytes(decompressed))

    except ModuleNotFoundError:
        msg = 'Package zstandard is required to decompress graphtool files. Please install module, e.g., using "pip install zstandard.'
        # LOG.error(msg)
        raise Exception(msg)

read_netzschleuder_record

Read metadata of a single data record with given name from the netzschleuder repository

Parameters:

Name Type Description Default
name str

Name of the data set for which to retrieve the metadata

required
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'

Examples:

Retrieve metadata of karate club network

>>> import pathpy as pp
>>> metdata = pp.io.graphtool.read_netzschleuder_record('karate')
>>> print(metadata)
{
    'analyses': {'77': {'average_degree': 4.52... } }
}

Returns:

Type Description
dict

Dictionary containing key-value pairs of metadata

Source code in src/pathpyG/io/netzschleuder.py
def read_netzschleuder_record(name: str, base_url: str='https://networks.skewed.de') -> dict:
    """
    Read metadata of a single data record with given name from the netzschleuder repository

    Args:
        name: Name of the data set for which to retrieve the metadata
        base_url: Base URL of netzschleuder repository

    Examples:
        Retrieve metadata of karate club network

        >>> import pathpy as pp
        >>> metdata = pp.io.graphtool.read_netzschleuder_record('karate')
        >>> print(metadata)
        {
            'analyses': {'77': {'average_degree': 4.52... } }
        }

    Returns:
        Dictionary containing key-value pairs of metadata
    """
    url = '/api/net/{0}'.format(name)
    try:
        return json.loads(request.urlopen(base_url + url).read())
    except HTTPError:
        msg = 'Could not connect to netzschleuder repository at {0}'.format(base_url)
        #LOG.error(msg)
        raise Exception(msg)