Skip to content

netzschleuder

list_netzschleuder_records

Read a list of data sets available at the netzschleuder repository.

Parameters:

Name Type Description Default
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'
**kwargs typing.Any

Keyword arguments that will be passed to the netzschleuder repository as HTTP GET parameters. For supported parameters see https://networks.skewed.de/api

{}

Examples:

Return a list of all data sets

>>> import pathpyG as pp
>>> pp.io.list_netzschleuder_records()
['karate', 'reality_mining', 'sp_hypertext', ...]

Return a list of all data sets with a given tag

>>> pp.io.list_netzschleuder_records(tags='temporal')
['reality_mining', 'sp_hypertext', ...]

Return a dictionary containing all data set names (keys) as well as all network attributes

>>> pp.io.list_netzschleuder_records(full=True)
{ 'reality_mining': [...], 'karate': [...] }

Returns:

Type Description
typing.Union[list, dict]

Either a list of data set names or a dictionary containing all data set names and network attributes.

Source code in src/pathpyG/io/netzschleuder.py
def list_netzschleuder_records(base_url: str = "https://networks.skewed.de", **kwargs: Any) -> Union[list, dict]:
    """
    Read a list of data sets available at the netzschleuder repository.

    Args:
        base_url: Base URL of netzschleuder repository
        **kwargs: Keyword arguments that will be passed to the netzschleuder repository as HTTP GET parameters.
            For supported parameters see https://networks.skewed.de/api


    Examples:
        Return a list of all data sets

        >>> import pathpyG as pp
        >>> pp.io.list_netzschleuder_records()
        ['karate', 'reality_mining', 'sp_hypertext', ...]

        Return a list of all data sets with a given tag

        >>> pp.io.list_netzschleuder_records(tags='temporal')
        ['reality_mining', 'sp_hypertext', ...]

        Return a dictionary containing all data set names (keys) as well as all network attributes

        >>> pp.io.list_netzschleuder_records(full=True)
        { 'reality_mining': [...], 'karate': [...] }


    Returns:
        Either a list of data set names or a dictionary containing all data set names and network attributes.

    """
    url = "/api/nets"
    for k, v in kwargs.items():
        url += "?{0}={1}".format(k, v)
    try:
        f = request.urlopen(base_url + url).read()
        return json.loads(f)
    except HTTPError:
        msg = "Could not connect to netzschleuder repository at {0}".format(base_url)
        # LOG.error(msg)
        raise Exception(msg)

read_netzschleuder_graph

Read a pathpyG graph or temporal graph from the netzschleuder repository.

Parameters:

Name Type Description Default
name str

Name of the network data set to read from

required
net typing.Optional[str]

Identifier of the network within the data set to read. For data sets containing a single network only, this can be set to None.

None
ignore_temporal

If False, this function will return a static or temporal network depending on whether edges contain a time attribute. If True, pathpy will not interpret time attributes and thus always return a static network.

required
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'
format

for 'csv' a zipped csv file will be downloaded, for 'gt' the binary graphtool format will be retrieved via the API

'csv'

Examples:

Read network '77' from karate club data set

>>> import pathpyG as pp
>>> n = pp.io.read_netzschleuder_network('karate', '77')
>>> print(type(n))
>>> pp.plot(n)
pp.Graph

Returns:

Type Description
pathpyG.core.graph.Graph

an instance of Graph

Source code in src/pathpyG/io/netzschleuder.py
def read_netzschleuder_graph(
    name: str,
    net: Optional[str] = None,
    multiedges: bool = False,
    time_attr: Optional[str] = None,
    base_url: str = "https://networks.skewed.de",
    format="csv",
) -> Graph:
    """Read a pathpyG graph or temporal graph from the netzschleuder repository.

    Args:
        name: Name of the network data set to read from
        net: Identifier of the network within the data set to read. For data sets
            containing a single network only, this can be set to None.
        ignore_temporal: If False, this function will return a static or temporal network depending
            on whether edges contain a time attribute. If True, pathpy will not interpret
            time attributes and thus always return a static network.
        base_url: Base URL of netzschleuder repository
        format: for 'csv' a zipped csv file will be downloaded, for 'gt' the binary graphtool format will be retrieved via the API

    Examples:
        Read network '77' from karate club data set

        >>> import pathpyG as pp
        >>> n = pp.io.read_netzschleuder_network('karate', '77')
        >>> print(type(n))
        >>> pp.plot(n)
        pp.Graph


    Returns:
        an instance of Graph

    """
    # build URL

    try:
        # retrieve properties of data record via API
        properties = json.loads(request.urlopen(f"{base_url}/api/net/{name}").read())
        # print(properties)

        timestamps = not (time_attr is None)

        if not net:
            analyses = properties["analyses"]
            net = name
        else:
            analyses = properties["analyses"][net]

        try:
            is_directed = analyses["is_directed"]
            num_nodes = analyses["num_vertices"]
        except KeyError:
            raise Exception(f"Record {name} contains multiple networks, please specify network name.")

        if format == "csv":
            url = f"{base_url}/net/{name}/files/{net}.csv.zip"
            try:
                response = request.urlopen(url)

                # decompress zip into temporary folder
                data = BytesIO(response.read())

                with zipfile.ZipFile(data, "r") as zip_ref:
                    with tempfile.TemporaryDirectory() as temp_dir:
                        zip_ref.extractall(path=temp_dir)

                        # the gprop file contains lines with property name/value pairs
                        # gprops = pd.read_csv(f'{temp_dir}/gprops.csv', header=0, sep=',', skip_blank_lines=True, skipinitialspace=True)

                        # nodes.csv contains node indices with node properties (like name)
                        edges = pd.read_csv(
                            f"{temp_dir}/edges.csv", header=0, sep=",", skip_blank_lines=True, skipinitialspace=True
                        )

                        # rename columns
                        edges.rename(columns={"# source": "v", "target": "w"}, inplace=True)
                        if timestamps and time_attr:
                            edges.rename(columns={time_attr: "t"}, inplace=True)

                        # construct graph and assign edge attributes
                        if timestamps:
                            g = df_to_temporal_graph(df=edges, is_undirected=not is_directed, num_nodes=num_nodes)
                        else:
                            g = df_to_graph(df=edges, multiedges=True, num_nodes=num_nodes)
                            if not is_directed:
                                g = g.to_undirected()

                        node_attrs = pd.read_csv(
                            f"{temp_dir}/nodes.csv", header=0, sep=",", skip_blank_lines=True, skipinitialspace=True
                        )
                        node_attrs.rename(columns={"# index": "index"}, inplace=True)

                        add_node_attributes(node_attrs, g)

                        # add graph-level attributes
                        for x in analyses:
                            g.data["analyses_" + x] = analyses[x]

                        return g
            except HTTPError:
                msg = f"Could not retrieve netzschleuder record at {url}"
                raise Exception(msg)

        elif format == "gt":
            try:
                import zstandard as zstd

                url = f"/net/{name}/files/{net}.gt.zst"
                try:
                    f = request.urlopen(base_url + url)
                    # decompress data
                    dctx = zstd.ZstdDecompressor()
                    reader = dctx.stream_reader(f)
                    decompressed = reader.readall()

                    # parse graphtool binary format
                    return parse_graphtool_format(bytes(decompressed))
                except HTTPError:
                    msg = f"Could not retrieve netzschleuder record at {url}"
                    raise Exception(msg)
            except ModuleNotFoundError:
                msg = 'Package zstandard is required to decompress graphtool files. Please install module, e.g., using "pip install zstandard.'
                # LOG.error(msg)
                raise Exception(msg)
    except HTTPError:
        msg = f"Could not retrieve netzschleuder record at {base_url}/api/net/{name}"
        raise Exception(msg)

read_netzschleuder_record

Read metadata of a single data record with given name from the netzschleuder repository

Parameters:

Name Type Description Default
name str

Name of the data set for which to retrieve the metadata

required
base_url str

Base URL of netzschleuder repository

'https://networks.skewed.de'

Examples:

Retrieve metadata of karate club network

>>> import pathpyG as pp
>>> metdata = pp.io.read_netzschleuder_record('karate')
>>> print(metadata)
{
    'analyses': {'77': {'average_degree': 4.52... } }
}

Returns:

Type Description
dict

Dictionary containing key-value pairs of metadata

Source code in src/pathpyG/io/netzschleuder.py
def read_netzschleuder_record(name: str, base_url: str = "https://networks.skewed.de") -> dict:
    """
    Read metadata of a single data record with given name from the netzschleuder repository

    Args:
        name: Name of the data set for which to retrieve the metadata
        base_url: Base URL of netzschleuder repository

    Examples:
        Retrieve metadata of karate club network

        >>> import pathpyG as pp
        >>> metdata = pp.io.read_netzschleuder_record('karate')
        >>> print(metadata)
        {
            'analyses': {'77': {'average_degree': 4.52... } }
        }

    Returns:
        Dictionary containing key-value pairs of metadata
    """
    url = "/api/net/{0}".format(name)
    try:
        return json.loads(request.urlopen(base_url + url).read())
    except HTTPError:
        msg = "Could not connect to netzschleuder repository at {0}".format(base_url)
        # LOG.error(msg)
        raise Exception(msg)