#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Common plotting utilities for brutus visualization functions.
This module provides low-level plotting utilities shared across
multiple plotting functions.
"""
import logging
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, colorConverter
from scipy.ndimage import gaussian_filter as norm_kde
from ..utils.sampling import quantile
__all__ = ["hist2d"]
[docs]
def hist2d(
x,
y,
smooth=0.02,
span=None,
weights=None,
levels=None,
ax=None,
color="gray",
plot_datapoints=False,
plot_density=True,
plot_contours=True,
no_fill_contours=False,
fill_contours=True,
contour_kwargs=None,
contourf_kwargs=None,
data_kwargs=None,
**kwargs,
):
"""
Internal function used to generate a 2-D histogram/contour of samples.
This is the refactored version of _hist2d from the original plotting.py.
Parameters
----------
x : interable with shape `(nsamps,)`
Sample positions in the first dimension.
y : iterable with shape `(nsamps,)`
Sample positions in the second dimension.
span : iterable with shape `(ndim,)`, optional
A list where each element is either a length-2 tuple containing
lower and upper bounds or a float from `(0., 1.]` giving the
fraction of (weighted) samples to include. If a fraction is provided,
the bounds are chosen to be equal-tailed. An example would be::
span = [(0., 10.), 0.95, (5., 6.)]
Default is `0.99` (99% credible interval).
weights : iterable with shape `(nsamps,)`
Weights associated with the samples. Default is `None` (no weights).
levels : iterable, optional
The contour levels to draw. Default are `[0.5, 1, 1.5, 2]`-sigma.
ax : `~matplotlib.axes.Axes`, optional
An `~matplotlib.axes.axes` instance on which to add the 2-D histogram.
If not provided, a figure will be generated.
color : str, optional
The `~matplotlib`-style color used to draw lines and color cells
and contours. Default is `'gray'`.
plot_datapoints : bool, optional
Whether to plot the individual data points. Default is `False`.
plot_density : bool, optional
Whether to draw the density colormap. Default is `True`.
plot_contours : bool, optional
Whether to draw the contours. Default is `True`.
no_fill_contours : bool, optional
Whether to add absolutely no filling to the contours. This differs
from `fill_contours=False`, which still adds a white fill at the
densest points. Default is `False`.
fill_contours : bool, optional
Whether to fill the contours. Default is `True`.
contour_kwargs : dict
Any additional keyword arguments to pass to the `contour` method.
contourf_kwargs : dict
Any additional keyword arguments to pass to the `contourf` method.
data_kwargs : dict
Any additional keyword arguments to pass to the `plot` method when
adding the individual data points.
"""
if ax is None:
ax = plt.gca()
# Determine plotting bounds.
data = [x, y]
if span is None:
span = [0.99 for i in range(2)]
span = list(span)
if len(span) != 2:
raise ValueError("Dimension mismatch between samples and span.")
for i, _ in enumerate(span):
try:
xmin, xmax = span[i]
except (TypeError, ValueError):
q = [0.5 - 0.5 * span[i], 0.5 + 0.5 * span[i]]
span[i] = quantile(data[i], q, weights=weights)
# The default "sigma" contour levels.
if levels is None:
levels = 1.0 - np.exp(-0.5 * np.arange(0.5, 2.1, 0.5) ** 2)
# Color map for the density plot, over-plotted to indicate the
# density of the points near the center.
density_cmap = LinearSegmentedColormap.from_list(
"density_cmap", [color, (1, 1, 1, 0)]
)
# Color map used to hide the points at the high density areas.
white_cmap = LinearSegmentedColormap.from_list(
"white_cmap", [(1, 1, 1), (1, 1, 1)], N=2
)
# This "color map" is the list of colors for the contour levels if the
# contours are filled.
rgba_color = colorConverter.to_rgba(color)
contour_cmap = [list(rgba_color) for level in levels] + [rgba_color]
for i, level in enumerate(levels):
contour_cmap[i][-1] *= float(i) / (len(levels) + 1)
# Initialize smoothing.
if isinstance(smooth, int) or isinstance(smooth, float):
smooth = [smooth, smooth]
bins = []
svalues = []
for s in smooth:
if isinstance(s, int):
# If `s` is an integer, the weighted histogram has
# `s` bins within the provided bounds.
bins.append(s)
svalues.append(0.0)
else:
# If `s` is a float, oversample the data relative to the
# smoothing filter by a factor of 2, then use a Gaussian
# filter to smooth the results.
bins.append(int(round(2.0 / s)))
svalues.append(2.0)
# We'll make the 2D histogram to directly estimate the density.
try:
H, X, Y = np.histogram2d(
x.flatten(),
y.flatten(),
bins=bins,
range=list(map(np.sort, span)),
weights=weights,
)
except ValueError:
raise ValueError(
"It looks like at least one of your sample columns "
"have no dynamic range."
)
# Smooth the results. (svalues is a Python list, so guard on an array to
# make the comparison element-wise rather than a scalar list-vs-float that
# always evaluated truthy.)
if not np.all(np.asarray(svalues) == 0.0):
H = norm_kde(H, svalues)
# Compute the density levels.
Hflat = H.flatten()
inds = np.argsort(Hflat)[::-1]
Hflat = Hflat[inds]
sm = np.cumsum(Hflat)
# Check for degenerate case (all zeros or no variation)
if sm[-1] == 0 or H.max() == 0:
logging.warning("Data has no density variation; skipping contour plots.")
V = np.array([0]) # Minimal valid contour level
degenerate_data = True
else:
sm /= sm[-1]
V = np.empty(len(levels))
for i, v0 in enumerate(levels):
try:
V[i] = Hflat[sm <= v0][-1]
except IndexError:
V[i] = Hflat[0]
V.sort()
m = np.diff(V) == 0
if np.any(m) and plot_contours:
logging.warning("Too few points to create valid contours.")
# Fix infinite loop when all values are zero or identical
safety_counter = 0
max_iterations = 100 # Prevent infinite loop
while np.any(m) and safety_counter < max_iterations:
idx = np.where(m)[0][0]
if V[idx] == 0:
# If value is zero, add small increment instead of multiplying
V[idx] = 1e-10 * (safety_counter + 1)
else:
V[idx] *= 1.0 - 1e-4
m = np.diff(V) == 0
safety_counter += 1
if safety_counter >= max_iterations:
logging.warning(
"Could not resolve duplicate contour levels after %d iterations.",
max_iterations,
)
V.sort()
degenerate_data = False
# Compute the bin centers.
X1, Y1 = 0.5 * (X[1:] + X[:-1]), 0.5 * (Y[1:] + Y[:-1])
# Extend the array for the sake of the contours at the plot edges.
H2 = H.min() + np.zeros((H.shape[0] + 4, H.shape[1] + 4))
H2[2:-2, 2:-2] = H
H2[2:-2, 1] = H[:, 0]
H2[2:-2, -2] = H[:, -1]
H2[1, 2:-2] = H[0]
H2[-2, 2:-2] = H[-1]
H2[1, 1] = H[0, 0]
H2[1, -2] = H[0, -1]
H2[-2, 1] = H[-1, 0]
H2[-2, -2] = H[-1, -1]
X2 = np.concatenate(
[
X1[0] + np.array([-2, -1]) * np.diff(X1[:2]),
X1,
X1[-1] + np.array([1, 2]) * np.diff(X1[-2:]),
]
)
Y2 = np.concatenate(
[
Y1[0] + np.array([-2, -1]) * np.diff(Y1[:2]),
Y1,
Y1[-1] + np.array([1, 2]) * np.diff(Y1[-2:]),
]
)
# Plot the data points.
if plot_datapoints:
if data_kwargs is None:
data_kwargs = dict()
data_kwargs["color"] = data_kwargs.get("color", color)
data_kwargs["ms"] = data_kwargs.get("ms", 2.0)
data_kwargs["mec"] = data_kwargs.get("mec", "none")
data_kwargs["alpha"] = data_kwargs.get("alpha", 0.1)
ax.plot(x, y, "o", zorder=-1, rasterized=True, **data_kwargs)
# Plot the base fill to hide the densest data points.
if (plot_contours or plot_density) and not no_fill_contours and not degenerate_data:
# Ensure contour levels are valid and increasing
base_levels = [V.min(), H.max()]
if base_levels[0] >= base_levels[1]:
# If levels are not increasing, create a minimal valid range
if H.max() > 0:
base_levels = [H.max() * 0.9, H.max()]
else:
base_levels = [0, 1e-10]
ax.contourf(X2, Y2, H2.T, base_levels, cmap=white_cmap, antialiased=False)
if plot_contours and fill_contours and not degenerate_data:
if contourf_kwargs is None:
contourf_kwargs = dict()
contourf_kwargs["colors"] = contourf_kwargs.get("colors", contour_cmap)
contourf_kwargs["antialiased"] = contourf_kwargs.get("antialiased", False)
ax.contourf(
X2,
Y2,
H2.T,
np.concatenate([[0], V, [H.max() * (1 + 1e-4)]]),
**contourf_kwargs,
)
# Plot the density map. This can't be plotted at the same time as the
# contour fills. Use sqrt scaling to make low-density bins more visible.
elif plot_density:
H_scaled = np.sqrt(np.maximum(H, 0))
ax.pcolor(X, Y, H_scaled.max() - H_scaled.T, cmap=density_cmap)
# Plot the contour edge colors.
if plot_contours and not degenerate_data:
if contour_kwargs is None:
contour_kwargs = dict()
contour_kwargs["colors"] = contour_kwargs.get("colors", color)
ax.contour(X2, Y2, H2.T, V, **contour_kwargs)
ax.set_xlim(span[0])
ax.set_ylim(span[1])