Source code for percolate.percolate

#!/usr/bin/env python
# encoding: utf-8

"""
Low-level routines to implement the Newman-Ziff algorithm

See also
--------

percolate : The high-level module
"""

from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
from future.builtins import (ascii, bytes, chr, dict, filter, hex, input,
                             int, map, next, oct, open, pow, range, round,
                             str, super, zip)

import copy
import numpy as np
import scipy.stats
import networkx as nx


alpha_1sigma = 2 * scipy.stats.norm.cdf(-1.0)
"""
The alpha for the 1 sigma confidence level
"""


[docs]def percolation_graph(graph, spanning_cluster=True): """ Prepare the (internal) percolation graph from a given graph Helper function to prepare the given graph for spanning cluster detection (if required). Basically it strips off the auxiliary nodes and edges again. It also returns fundamental graph quantitities (number of nodes and edges). Parameters ---------- graph spanning_cluster Returns ------- ret : tuple """ ret = dict() ret['graph'] = graph ret['spanning_cluster'] = bool(spanning_cluster) # initialize percolation graph if spanning_cluster: spanning_auxiliary_node_attributes = nx.get_node_attributes( graph, 'span' ) ret['auxiliary_node_attributes'] = spanning_auxiliary_node_attributes auxiliary_nodes = spanning_auxiliary_node_attributes.keys() if not list(auxiliary_nodes): raise ValueError( 'Spanning cluster is to be detected, but no auxiliary nodes ' 'given.' ) spanning_sides = list(set(spanning_auxiliary_node_attributes.values())) if len(spanning_sides) != 2: raise ValueError( 'Spanning cluster is to be detected, but auxiliary nodes ' 'of less or more than 2 types (sides) given.' ) ret['spanning_sides'] = spanning_sides ret['auxiliary_edge_attributes'] = nx.get_edge_attributes( graph, 'span' ) # get subgraph on which percolation is to take place (strip off the # auxiliary nodes) if spanning_cluster: perc_graph = graph.subgraph( [ node for node in graph.nodes_iter() if 'span' not in graph.node[node] ] ) else: perc_graph = graph ret['perc_graph'] = perc_graph # number of nodes N ret['num_nodes'] = nx.number_of_nodes(perc_graph) # number of edges M ret['num_edges'] = nx.number_of_edges(perc_graph) return ret
[docs]def sample_states( graph, spanning_cluster=True, model='bond', copy_result=True ): ''' Generate successive sample states of the percolation model This is a :ref:`generator function <python:tut-generators>` to successively add one edge at a time from the graph to the percolation model. At each iteration, it calculates and returns the cluster statistics. Parameters ---------- graph : networkx.Graph The substrate graph on which percolation is to take place spanning_cluster : bool, optional Whether to detect a spanning cluster or not. Defaults to ``True``. model : str, optional The percolation model (either ``'bond'`` or ``'site'``). Defaults to ``'bond'``. .. note:: Other models than ``'bond'`` are not supported yet. copy_result : bool, optional Whether to return a copy or a reference to the result dictionary. Defaults to ``True``. Yields ------ ret : dict Cluster statistics ret['n'] : int Number of occupied bonds ret['N'] : int Total number of sites ret['M'] : int Total number of bonds ret['has_spanning_cluster'] : bool ``True`` if there is a spanning cluster, ``False`` otherwise. Only exists if `spanning_cluster` argument is set to ``True``. ret['max_cluster_size'] : int Size of the largest cluster (absolute number of sites) ret['moments'] : 1-D :py:class:`numpy.ndarray` of int Array of size ``5``. The ``k``-th entry is the ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``. Raises ------ ValueError If `model` does not equal ``'bond'``. ValueError If `spanning_cluster` is ``True``, but `graph` does not contain any auxiliary nodes to detect spanning clusters. See also -------- microcanonical_averages : Evolves multiple sample states in parallel Notes ----- Iterating through this generator is a single run of the Newman-Ziff algorithm. [2]_ The first iteration yields the trivial state with :math:`n = 0` occupied bonds. Spanning cluster In order to detect a spanning cluster, `graph` needs to contain auxiliary nodes and edges, cf. Reference [2]_, Figure 6. The auxiliary nodes and edges have the ``'span'`` `attribute <http://networkx.github.io/documentation/latest/tutorial/tutorial.html#node-attributes>`_. The value is either ``0`` or ``1``, distinguishing the two sides of the graph to span. Raw moments of the cluster size distribution The :math:`k`-th raw moment of the (absolute) cluster size distribution is :math:`\sum_s' s^k N_s`, where :math:`s` is the cluster size and :math:`N_s` is the number of clusters of size :math:`s`. [3]_ The primed sum :math:`\sum'` signifies that the largest cluster is excluded from the sum. [4]_ References ---------- .. [2] Newman, M. E. J. & Ziff, R. M. Fast monte carlo algorithm for site or bond percolation. Physical Review E 64, 016706+ (2001), `doi:10.1103/physreve.64.016706 <http://dx.doi.org/10.1103/physreve.64.016706>`_. .. [3] Stauffer, D. & Aharony, A. Introduction to Percolation Theory (Taylor & Francis, London, 1994), second edn. .. [4] Binder, K. & Heermann, D. W. Monte Carlo Simulation in Statistical Physics (Springer, Berlin, Heidelberg, 2010), `doi:10.1007/978-3-642-03163-2 <http://dx.doi.org/10.1007/978-3-642-03163-2>`_. ''' if model != 'bond': raise ValueError('Only bond percolation supported.') if spanning_cluster: auxiliary_node_attributes = nx.get_node_attributes(graph, 'span') auxiliary_nodes = auxiliary_node_attributes.keys() if not list(auxiliary_nodes): raise ValueError( 'Spanning cluster is to be detected, but no auxiliary nodes ' 'given.' ) spanning_sides = list(set(auxiliary_node_attributes.values())) if len(spanning_sides) != 2: raise ValueError( 'Spanning cluster is to be detected, but auxiliary nodes ' 'of less or more than 2 types (sides) given.' ) auxiliary_edge_attributes = nx.get_edge_attributes(graph, 'span') # get subgraph on which percolation is to take place (strip off the # auxiliary nodes) if spanning_cluster: perc_graph = graph.subgraph( [ node for node in graph.nodes_iter() if 'span' not in graph.node[node] ] ) else: perc_graph = graph # get a list of edges for easy access in later iterations perc_edges = perc_graph.edges() # number of nodes N num_nodes = nx.number_of_nodes(perc_graph) # number of edges M num_edges = nx.number_of_edges(perc_graph) # initial iteration: no edges added yet (n == 0) ret = dict() ret['n'] = 0 ret['N'] = num_nodes ret['M'] = num_edges ret['max_cluster_size'] = 1 ret['moments'] = np.ones(5) * (num_nodes - 1) if spanning_cluster: ret['has_spanning_cluster'] = False if copy_result: yield copy.deepcopy(ret) else: yield ret # permute edges perm_edges = np.random.permutation(num_edges) # set up disjoint set (union-find) data structure ds = nx.utils.union_find.UnionFind() if spanning_cluster: ds_spanning = nx.utils.union_find.UnionFind() # merge all auxiliary nodes for each side side_roots = dict() for side in spanning_sides: nodes = [ node for (node, node_side) in auxiliary_node_attributes.items() if node_side is side ] ds_spanning.union(*nodes) side_roots[side] = ds_spanning[nodes[0]] for (edge, edge_side) in auxiliary_edge_attributes.items(): ds_spanning.union(side_roots[edge_side], *edge) side_roots = [ ds_spanning[side_root] for side_root in side_roots.values() ] # get first node max_cluster_root = next(perc_graph.nodes_iter()) # loop over all edges (n == 1..M) for n in range(num_edges): ret['n'] = n + 1 # draw new edge from permutation edge_index = perm_edges[n] edge = perc_edges[edge_index] ret['edge'] = edge # find roots and weights roots = [ ds[node] for node in edge ] weights = [ ds.weights[root] for root in roots ] if roots[0] is not roots[1]: # not same cluster: union! ds.union(*roots) if spanning_cluster: ds_spanning.union(*roots) ret['has_spanning_cluster'] = ( ds_spanning[side_roots[0]] == ds_spanning[side_roots[1]] ) # find new root and weight root = ds[edge[0]] weight = ds.weights[root] # moments and maximum cluster size # deduct the previous sub-maximum clusters from moments for i in [0, 1]: if roots[i] is max_cluster_root: continue ret['moments'] -= weights[i] ** np.arange(5) if max_cluster_root in roots: # merged with maximum cluster max_cluster_root = root ret['max_cluster_size'] = weight else: # merged previously sub-maximum clusters if ret['max_cluster_size'] >= weight: # previously largest cluster remains largest cluster # add merged cluster to moments ret['moments'] += weight ** np.arange(5) else: # merged cluster overtook previously largest cluster # add previously largest cluster to moments max_cluster_root = root ret['moments'] += ret['max_cluster_size'] ** np.arange(5) ret['max_cluster_size'] = weight if copy_result: yield copy.deepcopy(ret) else: yield ret
[docs]def single_run_arrays(spanning_cluster=True, **kwargs): r''' Generate statistics for a single run This is a stand-alone helper function to evolve a single sample state (realization) and return the cluster statistics. Parameters ---------- spanning_cluster : bool, optional Whether to detect a spanning cluster or not. Defaults to ``True``. kwargs : keyword arguments Piped through to :func:`sample_states` Returns ------- ret : dict Cluster statistics ret['N'] : int Total number of sites ret['M'] : int Total number of bonds ret['max_cluster_size'] : 1-D :py:class:`numpy.ndarray` of int, size ``ret['M'] + 1`` Array of the sizes of the largest cluster (absolute number of sites) at the respective occupation number. ret['has_spanning_cluster'] : 1-D :py:class:`numpy.ndarray` of bool, size ``ret['M'] + 1`` Array of booleans for each occupation number. The respective entry is ``True`` if there is a spanning cluster, ``False`` otherwise. Only exists if `spanning_cluster` argument is set to ``True``. ret['moments'] : 2-D :py:class:`numpy.ndarray` of int Array of shape ``(5, ret['M'] + 1)``. The ``(k, m)``-th entry is the ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``, at occupation number ``m``. See Also -------- sample_states ''' # initial iteration # we do not need a copy of the result dictionary since we copy the values # anyway kwargs['copy_result'] = False ret = dict() for n, state in enumerate(sample_states( spanning_cluster=spanning_cluster, **kwargs )): # merge cluster statistics if 'N' in ret: assert ret['N'] == state['N'] else: ret['N'] = state['N'] if 'M' in ret: assert ret['M'] == state['M'] else: ret['M'] = state['M'] number_of_states = state['M'] + 1 max_cluster_size = np.empty(number_of_states) if spanning_cluster: has_spanning_cluster = np.empty(number_of_states, dtype=np.bool) moments = np.empty((5, number_of_states)) max_cluster_size[n] = state['max_cluster_size'] for k in range(5): moments[k, n] = state['moments'][k] if spanning_cluster: has_spanning_cluster[n] = state['has_spanning_cluster'] ret['max_cluster_size'] = max_cluster_size ret['moments'] = moments if spanning_cluster: ret['has_spanning_cluster'] = has_spanning_cluster return ret
def _microcanonical_average_spanning_cluster(has_spanning_cluster, alpha): r''' Compute the average number of runs that have a spanning cluster Helper function for :func:`microcanonical_averages` Parameters ---------- has_spanning_cluster : 1-D :py:class:`numpy.ndarray` of bool Each entry is the ``has_spanning_cluster`` field of the output of :func:`sample_states`: An entry is ``True`` if there is a spanning cluster in that respective run, and ``False`` otherwise. alpha : float Significance level. Returns ------- ret : dict Spanning cluster statistics ret['spanning_cluster'] : float The average relative number (Binomial proportion) of runs that have a spanning cluster. This is the Bayesian point estimate of the posterior mean, with a uniform prior. ret['spanning_cluster_ci'] : 1-D :py:class:`numpy.ndarray` of float, size 2 The lower and upper bounds of the Binomial proportion confidence interval with uniform prior. See Also -------- sample_states : spanning cluster detection microcanonical_averages : spanning cluster statistics Notes ----- Averages and confidence intervals for Binomial proportions As Cameron [8]_ puts it, the normal approximation to the confidence interval for a Binomial proportion :math:`p` "suffers a *systematic* decline in performance (...) towards extreme values of :math:`p` near :math:`0` and :math:`1`, generating binomial [confidence intervals] with effective coverage far below the desired level." (see also References [6]_ and [7]_). A different approach to quantifying uncertainty is Bayesian inference. [5]_ For :math:`n` independent Bernoulli trails with common success probability :math:`p`, the *likelihood* to have :math:`k` successes given :math:`p` is the binomial distribution .. math:: P(k|p) = \binom{n}{k} p^k (1-p)^{n-k} \equiv B(a,b), where :math:`B(a, b)` is the *Beta distribution* with parameters :math:`a = k + 1` and :math:`b = n - k + 1`. Assuming a uniform prior :math:`P(p) = 1`, the *posterior* is [5]_ .. math:: P(p|k) = P(k|p)=B(a,b). A point estimate is the posterior mean .. math:: \bar{p} = \frac{k+1}{n+2} with the :math:`1 - \alpha` credible interval :math:`(p_l, p_u)` given by .. math:: \int_0^{p_l} dp B(a,b) = \int_{p_u}^1 dp B(a,b) = \frac{\alpha}{2}. References ---------- .. [5] Wasserman, L. All of Statistics (Springer New York, 2004), `doi:10.1007/978-0-387-21736-9 <http://dx.doi.org/10.1007/978-0-387-21736-9>`_. .. [6] DasGupta, A., Cai, T. T. & Brown, L. D. Interval Estimation for a Binomial Proportion. Statistical Science 16, 101-133 (2001). `doi:10.1214/ss/1009213286 <http://dx.doi.org/10.1214/ss/1009213286>`_. .. [7] Agresti, A. & Coull, B. A. Approximate is Better than "Exact" for Interval Estimation of Binomial Proportions. The American Statistician 52, 119-126 (1998), `doi:10.2307/2685469 <http://dx.doi.org/10.2307/2685469>`_. .. [8] Cameron, E. On the Estimation of Confidence Intervals for Binomial Population Proportions in Astronomy: The Simplicity and Superiority of the Bayesian Approach. Publications of the Astronomical Society of Australia 28, 128-139 (2011), `doi:10.1071/as10046 <http://dx.doi.org/10.1071/as10046>`_. ''' ret = dict() runs = has_spanning_cluster.size # Bayesian posterior mean for Binomial proportion (uniform prior) k = has_spanning_cluster.sum(dtype=np.float) ret['spanning_cluster'] = ( (k + 1) / (runs + 2) ) # Bayesian credible interval for Binomial proportion (uniform # prior) ret['spanning_cluster_ci'] = scipy.stats.beta.ppf( [alpha / 2, 1 - alpha / 2], k + 1, runs - k + 1 ) return ret def _microcanonical_average_max_cluster_size(max_cluster_size, alpha): """ Compute the average size of the largest cluster Helper function for :func:`microcanonical_averages` Parameters ---------- max_cluster_size : 1-D :py:class:`numpy.ndarray` of int Each entry is the ``max_cluster_size`` field of the output of :func:`sample_states`: The size of the largest cluster (absolute number of sites). alpha: float Significance level. Returns ------- ret : dict Largest cluster statistics ret['max_cluster_size'] : float Average size of the largest cluster (absolute number of sites) ret['max_cluster_size_ci'] : 1-D :py:class:`numpy.ndarray` of float, size 2 Lower and upper bounds of the normal confidence interval of the average size of the largest cluster (absolute number of sites) See Also -------- sample_states : largest cluster detection microcanonical_averages : largest cluster statistics """ ret = dict() runs = max_cluster_size.size sqrt_n = np.sqrt(runs) max_cluster_size_sample_mean = max_cluster_size.mean() ret['max_cluster_size'] = max_cluster_size_sample_mean max_cluster_size_sample_std = max_cluster_size.std(ddof=1) if max_cluster_size_sample_std: old_settings = np.seterr(all='raise') ret['max_cluster_size_ci'] = scipy.stats.t.interval( 1 - alpha, df=runs - 1, loc=max_cluster_size_sample_mean, scale=max_cluster_size_sample_std / sqrt_n ) np.seterr(**old_settings) else: ret['max_cluster_size_ci'] = ( max_cluster_size_sample_mean * np.ones(2) ) return ret def _microcanonical_average_moments(moments, alpha): """ Compute the average moments of the cluster size distributions Helper function for :func:`microcanonical_averages` Parameters ---------- moments : 2-D :py:class:`numpy.ndarray` of int ``moments.shape[1] == 5`. Each array ``moments[r, :]`` is the ``moments`` field of the output of :func:`sample_states`: The ``k``-th entry is the ``k``-th raw moment of the (absolute) cluster size distribution. alpha: float Significance level. Returns ------- ret : dict Moment statistics ret['moments'] : 1-D :py:class:`numpy.ndarray` of float, size 5 The ``k``-th entry is the average ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``. ret['moments_ci'] : 2-D :py:class:`numpy.ndarray` of float, shape (5,2) ``ret['moments_ci'][k]`` are the lower and upper bounds of the normal confidence interval of the average ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``. See Also -------- sample_states : computation of moments microcanonical_averages : moment statistics """ ret = dict() runs = moments.shape[0] sqrt_n = np.sqrt(runs) moments_sample_mean = moments.mean(axis=0) ret['moments'] = moments_sample_mean moments_sample_std = moments.std(axis=0, ddof=1) ret['moments_ci'] = np.empty((5, 2)) for k in range(5): if moments_sample_std[k]: old_settings = np.seterr(all='raise') ret['moments_ci'][k] = scipy.stats.t.interval( 1 - alpha, df=runs - 1, loc=moments_sample_mean[k], scale=moments_sample_std[k] / sqrt_n ) np.seterr(**old_settings) else: ret['moments_ci'][k] = ( moments_sample_mean[k] * np.ones(2) ) return ret
[docs]def microcanonical_averages( graph, runs=40, spanning_cluster=True, model='bond', alpha=alpha_1sigma, copy_result=True ): r''' Generate successive microcanonical percolation ensemble averages This is a :ref:`generator function <python:tut-generators>` to successively add one edge at a time from the graph to the percolation model for a number of independent runs in parallel. At each iteration, it calculates and returns the averaged cluster statistics. Parameters ---------- graph : networkx.Graph The substrate graph on which percolation is to take place runs : int, optional Number of independent runs. Defaults to ``40``. spanning_cluster : bool, optional Defaults to ``True``. model : str, optional The percolation model (either ``'bond'`` or ``'site'``). Defaults to ``'bond'``. .. note:: Other models than ``'bond'`` are not supported yet. alpha: float, optional Significance level. Defaults to 1 sigma of the normal distribution. ``1 - alpha`` is the confidence level. copy_result : bool, optional Whether to return a copy or a reference to the result dictionary. Defaults to ``True``. Yields ------ ret : dict Cluster statistics ret['n'] : int Number of occupied bonds ret['N'] : int Total number of sites ret['M'] : int Total number of bonds ret['spanning_cluster'] : float The average number (Binomial proportion) of runs that have a spanning cluster. This is the Bayesian point estimate of the posterior mean, with a uniform prior. Only exists if `spanning_cluster` is set to ``True``. ret['spanning_cluster_ci'] : 1-D :py:class:`numpy.ndarray` of float, size 2 The lower and upper bounds of the Binomial proportion confidence interval with uniform prior. Only exists if `spanning_cluster` is set to ``True``. ret['max_cluster_size'] : float Average size of the largest cluster (absolute number of sites) ret['max_cluster_size_ci'] : 1-D :py:class:`numpy.ndarray` of float, size 2 Lower and upper bounds of the normal confidence interval of the average size of the largest cluster (absolute number of sites) ret['moments'] : 1-D :py:class:`numpy.ndarray` of float, size 5 The ``k``-th entry is the average ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``. ret['moments_ci'] : 2-D :py:class:`numpy.ndarray` of float, shape (5,2) ``ret['moments_ci'][k]`` are the lower and upper bounds of the normal confidence interval of the average ``k``-th raw moment of the (absolute) cluster size distribution, with ``k`` ranging from ``0`` to ``4``. Raises ------ ValueError If `runs` is not a positive integer ValueError If `alpha` is not a float in the interval (0, 1) See also -------- sample_states percolate.percolate._microcanonical_average_spanning_cluster percolate.percolate._microcanonical_average_max_cluster_size Notes ----- Iterating through this generator corresponds to several parallel runs of the Newman-Ziff algorithm. Each iteration yields a microcanonical percolation ensemble for the number :math:`n` of occupied bonds. [9]_ The first iteration yields the trivial microcanonical percolation ensemble with :math:`n = 0` occupied bonds. Spanning cluster .. seealso:: :py:func:`sample_states` Raw moments of the cluster size distribution .. seealso:: :py:func:`sample_states` References ---------- .. [9] Newman, M. E. J. & Ziff, R. M. Fast monte carlo algorithm for site or bond percolation. Physical Review E 64, 016706+ (2001), `doi:10.1103/physreve.64.016706 <http://dx.doi.org/10.1103/physreve.64.016706>`_. ''' try: runs = int(runs) except: raise ValueError("runs needs to be a positive integer") if runs <= 0: raise ValueError("runs needs to be a positive integer") try: alpha = float(alpha) except: raise ValueError("alpha needs to be a float in the interval (0, 1)") if alpha <= 0.0 or alpha >= 1.0: raise ValueError("alpha needs to be a float in the interval (0, 1)") # initial iteration # we do not need a copy of the result dictionary since we copy the values # anyway run_iterators = [ sample_states( graph, spanning_cluster=spanning_cluster, model=model, copy_result=False ) for _ in range(runs) ] ret = dict() for microcanonical_ensemble in zip(*run_iterators): # merge cluster statistics ret['n'] = microcanonical_ensemble[0]['n'] ret['N'] = microcanonical_ensemble[0]['N'] ret['M'] = microcanonical_ensemble[0]['M'] max_cluster_size = np.empty(runs) moments = np.empty((runs, 5)) if spanning_cluster: has_spanning_cluster = np.empty(runs) for r, state in enumerate(microcanonical_ensemble): assert state['n'] == ret['n'] assert state['N'] == ret['N'] assert state['M'] == ret['M'] max_cluster_size[r] = state['max_cluster_size'] moments[r] = state['moments'] if spanning_cluster: has_spanning_cluster[r] = state['has_spanning_cluster'] ret.update(_microcanonical_average_max_cluster_size( max_cluster_size, alpha )) ret.update(_microcanonical_average_moments(moments, alpha)) if spanning_cluster: ret.update(_microcanonical_average_spanning_cluster( has_spanning_cluster, alpha )) if copy_result: yield copy.deepcopy(ret) else: yield ret
[docs]def spanning_1d_chain(length): """ Generate a linear chain with auxiliary nodes for spanning cluster detection Parameters ---------- length : int Number of nodes in the chain, excluding the auxiliary nodes. Returns ------- networkx.Graph A linear chain graph with auxiliary nodes for spanning cluster detection See Also -------- sample_states : spanning cluster detection """ ret = nx.grid_graph(dim=[int(length + 2)]) ret.node[0]['span'] = 0 ret[0][1]['span'] = 0 ret.node[length + 1]['span'] = 1 ret[length][length + 1]['span'] = 1 return ret
[docs]def spanning_2d_grid(length): """ Generate a square lattice with auxiliary nodes for spanning detection Parameters ---------- length : int Number of nodes in one dimension, excluding the auxiliary nodes. Returns ------- networkx.Graph A square lattice graph with auxiliary nodes for spanning cluster detection See Also -------- sample_states : spanning cluster detection """ ret = nx.grid_2d_graph(length + 2, length) for i in range(length): # side 0 ret.node[(0, i)]['span'] = 0 ret[(0, i)][(1, i)]['span'] = 0 # side 1 ret.node[(length + 1, i)]['span'] = 1 ret[(length + 1, i)][(length, i)]['span'] = 1 return ret
[docs]def microcanonical_averages_arrays(microcanonical_averages): """ Compile microcanonical averages over all iteration steps into single arrays Helper function to aggregate the microcanonical averages over all iteration steps into single arrays for further processing Parameters ---------- microcanonical_averages : iterable Typically, this is the :func:`microcanonical_averages` generator Returns ------- ret : dict Aggregated cluster statistics ret['N'] : int Total number of sites ret['M'] : int Total number of bonds ret['spanning_cluster'] : 1-D :py:class:`numpy.ndarray` of float The percolation probability: The normalized average number of runs that have a spanning cluster. ret['spanning_cluster_ci'] : 2-D :py:class:`numpy.ndarray` of float, size 2 The lower and upper bounds of the percolation probability. ret['max_cluster_size'] : 1-D :py:class:`numpy.ndarray` of float The percolation strength: Average relative size of the largest cluster ret['max_cluster_size_ci'] : 2-D :py:class:`numpy.ndarray` of float Lower and upper bounds of the normal confidence interval of the percolation strength. ret['moments'] : 2-D :py:class:`numpy.ndarray` of float, shape (5, M + 1) Average raw moments of the (relative) cluster size distribution. ret['moments_ci'] : 3-D :py:class:`numpy.ndarray` of float, shape (5, M + 1, 2) Lower and upper bounds of the normal confidence interval of the raw moments of the (relative) cluster size distribution. See Also -------- microcanonical_averages """ ret = dict() for n, microcanonical_average in enumerate(microcanonical_averages): assert n == microcanonical_average['n'] if n == 0: num_edges = microcanonical_average['M'] num_sites = microcanonical_average['N'] spanning_cluster = ('spanning_cluster' in microcanonical_average) ret['max_cluster_size'] = np.empty(num_edges + 1) ret['max_cluster_size_ci'] = np.empty((num_edges + 1, 2)) if spanning_cluster: ret['spanning_cluster'] = np.empty(num_edges + 1) ret['spanning_cluster_ci'] = np.empty((num_edges + 1, 2)) ret['moments'] = np.empty((5, num_edges + 1)) ret['moments_ci'] = np.empty((5, num_edges + 1, 2)) ret['max_cluster_size'][n] = microcanonical_average['max_cluster_size'] ret['max_cluster_size_ci'][n] = ( microcanonical_average['max_cluster_size_ci'] ) if spanning_cluster: ret['spanning_cluster'][n] = ( microcanonical_average['spanning_cluster'] ) ret['spanning_cluster_ci'][n] = ( microcanonical_average['spanning_cluster_ci'] ) ret['moments'][:, n] = microcanonical_average['moments'] ret['moments_ci'][:, n] = microcanonical_average['moments_ci'] # normalize by number of sites for key in ret: if 'spanning_cluster' in key: continue ret[key] /= num_sites ret['M'] = num_edges ret['N'] = num_sites return ret
def _binomial_pmf(n, p): """ Compute the binomial PMF according to Newman and Ziff Helper function for :func:`canonical_averages` See Also -------- canonical_averages Notes ----- See Newman & Ziff, Equation (10) [10]_ References ---------- .. [10] Newman, M. E. J. & Ziff, R. M. Fast monte carlo algorithm for site or bond percolation. Physical Review E 64, 016706+ (2001), `doi:10.1103/physreve.64.016706 <http://dx.doi.org/10.1103/physreve.64.016706>`_. """ n = int(n) ret = np.empty(n + 1) nmax = int(np.round(p * n)) ret[nmax] = 1.0 old_settings = np.seterr(under='ignore') # seterr to known value for i in range(nmax + 1, n + 1): ret[i] = ret[i - 1] * (n - i + 1.0) / i * p / (1.0 - p) for i in range(nmax - 1, -1, -1): ret[i] = ret[i + 1] * (i + 1.0) / (n - i) * (1.0 - p) / p np.seterr(**old_settings) # reset to default return ret / ret.sum()
[docs]def canonical_averages(ps, microcanonical_averages_arrays): """ Compute the canonical cluster statistics from microcanonical statistics This is according to Newman and Ziff, Equation (2). Note that we also simply average the bounds of the confidence intervals according to this formula. Parameters ---------- ps : iterable of float Each entry is a probability for which to form the canonical ensemble and compute the weighted statistics from the microcanonical statistics microcanonical_averages_arrays Typically the output of :func:`microcanonical_averages_arrays` Returns ------- ret : dict Canonical ensemble cluster statistics ret['ps'] : iterable of float The parameter `ps` ret['N'] : int Total number of sites ret['M'] : int Total number of bonds ret['spanning_cluster'] : 1-D :py:class:`numpy.ndarray` of float The percolation probability: The normalized average number of runs that have a spanning cluster. ret['spanning_cluster_ci'] : 2-D :py:class:`numpy.ndarray` of float, size 2 The lower and upper bounds of the percolation probability. ret['max_cluster_size'] : 1-D :py:class:`numpy.ndarray` of float The percolation strength: Average relative size of the largest cluster ret['max_cluster_size_ci'] : 2-D :py:class:`numpy.ndarray` of float Lower and upper bounds of the normal confidence interval of the percolation strength. ret['moments'] : 2-D :py:class:`numpy.ndarray` of float, shape (5, M + 1) Average raw moments of the (relative) cluster size distribution. ret['moments_ci'] : 3-D :py:class:`numpy.ndarray` of float, shape (5, M + 1, 2) Lower and upper bounds of the normal confidence interval of the raw moments of the (relative) cluster size distribution. See Also -------- microcanonical_averages microcanonical_averages_arrays """ num_sites = microcanonical_averages_arrays['N'] num_edges = microcanonical_averages_arrays['M'] spanning_cluster = ('spanning_cluster' in microcanonical_averages_arrays) ret = dict() ret['ps'] = ps ret['N'] = num_sites ret['M'] = num_edges ret['max_cluster_size'] = np.empty(ps.size) ret['max_cluster_size_ci'] = np.empty((ps.size, 2)) if spanning_cluster: ret['spanning_cluster'] = np.empty(ps.size) ret['spanning_cluster_ci'] = np.empty((ps.size, 2)) ret['moments'] = np.empty((5, ps.size)) ret['moments_ci'] = np.empty((5, ps.size, 2)) for p_index, p in enumerate(ps): binomials = _binomial_pmf(n=num_edges, p=p) for key, value in microcanonical_averages_arrays.items(): if len(key) <= 1: continue if key in ['max_cluster_size', 'spanning_cluster']: ret[key][p_index] = np.sum(binomials * value) elif key in ['max_cluster_size_ci', 'spanning_cluster_ci']: ret[key][p_index] = np.sum( np.tile(binomials, (2, 1)).T * value, axis=0 ) elif key == 'moments': ret[key][:, p_index] = np.sum( np.tile(binomials, (5, 1)) * value, axis=1 ) elif key == 'moments_ci': ret[key][:, p_index] = np.sum( np.rollaxis(np.tile(binomials, (5, 2, 1)), 2, 1) * value, axis=1 ) else: raise NotImplementedError( '{}-dimensional array'.format(value.ndim) ) return ret
[docs]def statistics( graph, ps, spanning_cluster=True, model='bond', alpha=alpha_1sigma, runs=40 ): """ Helper function to compute percolation statistics See Also -------- canonical_averages microcanonical_averages sample_states """ my_microcanonical_averages = microcanonical_averages( graph=graph, runs=runs, spanning_cluster=spanning_cluster, model=model, alpha=alpha ) my_microcanonical_averages_arrays = microcanonical_averages_arrays( my_microcanonical_averages ) return canonical_averages(ps, my_microcanonical_averages_arrays)