You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
558 lines
19 KiB
558 lines
19 KiB
"""
|
|
This type stub file was generated by pyright.
|
|
"""
|
|
|
|
from .axisgrid import Grid
|
|
from ._decorators import _deprecate_positional_args
|
|
|
|
"""Functions to visualize matrices of data."""
|
|
__all__ = ["heatmap", "clustermap"]
|
|
class _HeatMapper:
|
|
"""Draw a heatmap plot of a matrix with nice labels and colormaps."""
|
|
def __init__(self, data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, cbar, cbar_kws, xticklabels=..., yticklabels=..., mask=...) -> None:
|
|
"""Initialize the plotting object."""
|
|
...
|
|
|
|
def plot(self, ax, cax, kws): # -> None:
|
|
"""Draw the heatmap on the provided Axes."""
|
|
...
|
|
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def heatmap(data, *, vmin=..., vmax=..., cmap=..., center=..., robust=..., annot=..., fmt=..., annot_kws=..., linewidths=..., linecolor=..., cbar=..., cbar_kws=..., cbar_ax=..., square=..., xticklabels=..., yticklabels=..., mask=..., ax=..., **kwargs): # -> Axes:
|
|
"""Plot rectangular data as a color-encoded matrix.
|
|
|
|
This is an Axes-level function and will draw the heatmap into the
|
|
currently-active Axes if none is provided to the ``ax`` argument. Part of
|
|
this Axes space will be taken and used to plot a colormap, unless ``cbar``
|
|
is False or a separate Axes is provided to ``cbar_ax``.
|
|
|
|
Parameters
|
|
----------
|
|
data : rectangular dataset
|
|
2D dataset that can be coerced into an ndarray. If a Pandas DataFrame
|
|
is provided, the index/column information will be used to label the
|
|
columns and rows.
|
|
vmin, vmax : floats, optional
|
|
Values to anchor the colormap, otherwise they are inferred from the
|
|
data and other keyword arguments.
|
|
cmap : matplotlib colormap name or object, or list of colors, optional
|
|
The mapping from data values to color space. If not provided, the
|
|
default will depend on whether ``center`` is set.
|
|
center : float, optional
|
|
The value at which to center the colormap when plotting divergant data.
|
|
Using this parameter will change the default ``cmap`` if none is
|
|
specified.
|
|
robust : bool, optional
|
|
If True and ``vmin`` or ``vmax`` are absent, the colormap range is
|
|
computed with robust quantiles instead of the extreme values.
|
|
annot : bool or rectangular dataset, optional
|
|
If True, write the data value in each cell. If an array-like with the
|
|
same shape as ``data``, then use this to annotate the heatmap instead
|
|
of the data. Note that DataFrames will match on position, not index.
|
|
fmt : str, optional
|
|
String formatting code to use when adding annotations.
|
|
annot_kws : dict of key, value mappings, optional
|
|
Keyword arguments for :meth:`matplotlib.axes.Axes.text` when ``annot``
|
|
is True.
|
|
linewidths : float, optional
|
|
Width of the lines that will divide each cell.
|
|
linecolor : color, optional
|
|
Color of the lines that will divide each cell.
|
|
cbar : bool, optional
|
|
Whether to draw a colorbar.
|
|
cbar_kws : dict of key, value mappings, optional
|
|
Keyword arguments for :meth:`matplotlib.figure.Figure.colorbar`.
|
|
cbar_ax : matplotlib Axes, optional
|
|
Axes in which to draw the colorbar, otherwise take space from the
|
|
main Axes.
|
|
square : bool, optional
|
|
If True, set the Axes aspect to "equal" so each cell will be
|
|
square-shaped.
|
|
xticklabels, yticklabels : "auto", bool, list-like, or int, optional
|
|
If True, plot the column names of the dataframe. If False, don't plot
|
|
the column names. If list-like, plot these alternate labels as the
|
|
xticklabels. If an integer, use the column names but plot only every
|
|
n label. If "auto", try to densely plot non-overlapping labels.
|
|
mask : bool array or DataFrame, optional
|
|
If passed, data will not be shown in cells where ``mask`` is True.
|
|
Cells with missing values are automatically masked.
|
|
ax : matplotlib Axes, optional
|
|
Axes in which to draw the plot, otherwise use the currently-active
|
|
Axes.
|
|
kwargs : other keyword arguments
|
|
All other keyword arguments are passed to
|
|
:meth:`matplotlib.axes.Axes.pcolormesh`.
|
|
|
|
Returns
|
|
-------
|
|
ax : matplotlib Axes
|
|
Axes object with the heatmap.
|
|
|
|
See Also
|
|
--------
|
|
clustermap : Plot a matrix using hierachical clustering to arrange the
|
|
rows and columns.
|
|
|
|
Examples
|
|
--------
|
|
|
|
Plot a heatmap for a numpy array:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> import numpy as np; np.random.seed(0)
|
|
>>> import seaborn as sns; sns.set_theme()
|
|
>>> uniform_data = np.random.rand(10, 12)
|
|
>>> ax = sns.heatmap(uniform_data)
|
|
|
|
Change the limits of the colormap:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(uniform_data, vmin=0, vmax=1)
|
|
|
|
Plot a heatmap for data centered on 0 with a diverging colormap:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> normal_data = np.random.randn(10, 12)
|
|
>>> ax = sns.heatmap(normal_data, center=0)
|
|
|
|
Plot a dataframe with meaningful row and column labels:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> flights = sns.load_dataset("flights")
|
|
>>> flights = flights.pivot("month", "year", "passengers")
|
|
>>> ax = sns.heatmap(flights)
|
|
|
|
Annotate each cell with the numeric value using integer formatting:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(flights, annot=True, fmt="d")
|
|
|
|
Add lines between each cell:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(flights, linewidths=.5)
|
|
|
|
Use a different colormap:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(flights, cmap="YlGnBu")
|
|
|
|
Center the colormap at a specific value:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(flights, center=flights.loc["Jan", 1955])
|
|
|
|
Plot every other column label and don't plot row labels:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> data = np.random.randn(50, 20)
|
|
>>> ax = sns.heatmap(data, xticklabels=2, yticklabels=False)
|
|
|
|
Don't draw a colorbar:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> ax = sns.heatmap(flights, cbar=False)
|
|
|
|
Use different axes for the colorbar:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> grid_kws = {"height_ratios": (.9, .05), "hspace": .3}
|
|
>>> f, (ax, cbar_ax) = plt.subplots(2, gridspec_kw=grid_kws)
|
|
>>> ax = sns.heatmap(flights, ax=ax,
|
|
... cbar_ax=cbar_ax,
|
|
... cbar_kws={"orientation": "horizontal"})
|
|
|
|
Use a mask to plot only part of a matrix
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> corr = np.corrcoef(np.random.randn(10, 200))
|
|
>>> mask = np.zeros_like(corr)
|
|
>>> mask[np.triu_indices_from(mask)] = True
|
|
>>> with sns.axes_style("white"):
|
|
... f, ax = plt.subplots(figsize=(7, 5))
|
|
... ax = sns.heatmap(corr, mask=mask, vmax=.3, square=True)
|
|
"""
|
|
...
|
|
|
|
class _DendrogramPlotter:
|
|
"""Object for drawing tree of similarities between data rows/columns"""
|
|
def __init__(self, data, linkage, metric, method, axis, label, rotate) -> None:
|
|
"""Plot a dendrogram of the relationships between the columns of data
|
|
|
|
Parameters
|
|
----------
|
|
data : pandas.DataFrame
|
|
Rectangular data
|
|
"""
|
|
...
|
|
|
|
@property
|
|
def calculated_linkage(self):
|
|
...
|
|
|
|
def calculate_dendrogram(self): # -> dict[str, Unknown]:
|
|
"""Calculates a dendrogram based on the linkage matrix
|
|
|
|
Made a separate function, not a property because don't want to
|
|
recalculate the dendrogram every time it is accessed.
|
|
|
|
Returns
|
|
-------
|
|
dendrogram : dict
|
|
Dendrogram dictionary as returned by scipy.cluster.hierarchy
|
|
.dendrogram. The important key-value pairing is
|
|
"reordered_ind" which indicates the re-ordering of the matrix
|
|
"""
|
|
...
|
|
|
|
@property
|
|
def reordered_ind(self):
|
|
"""Indices of the matrix, reordered by the dendrogram"""
|
|
...
|
|
|
|
def plot(self, ax, tree_kws): # -> Self@_DendrogramPlotter:
|
|
"""Plots a dendrogram of the similarities between data on the axes
|
|
|
|
Parameters
|
|
----------
|
|
ax : matplotlib.axes.Axes
|
|
Axes object upon which the dendrogram is plotted
|
|
|
|
"""
|
|
...
|
|
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def dendrogram(data, *, linkage=..., axis=..., label=..., metric=..., method=..., rotate=..., tree_kws=..., ax=...): # -> _DendrogramPlotter:
|
|
"""Draw a tree diagram of relationships within a matrix
|
|
|
|
Parameters
|
|
----------
|
|
data : pandas.DataFrame
|
|
Rectangular data
|
|
linkage : numpy.array, optional
|
|
Linkage matrix
|
|
axis : int, optional
|
|
Which axis to use to calculate linkage. 0 is rows, 1 is columns.
|
|
label : bool, optional
|
|
If True, label the dendrogram at leaves with column or row names
|
|
metric : str, optional
|
|
Distance metric. Anything valid for scipy.spatial.distance.pdist
|
|
method : str, optional
|
|
Linkage method to use. Anything valid for
|
|
scipy.cluster.hierarchy.linkage
|
|
rotate : bool, optional
|
|
When plotting the matrix, whether to rotate it 90 degrees
|
|
counter-clockwise, so the leaves face right
|
|
tree_kws : dict, optional
|
|
Keyword arguments for the ``matplotlib.collections.LineCollection``
|
|
that is used for plotting the lines of the dendrogram tree.
|
|
ax : matplotlib axis, optional
|
|
Axis to plot on, otherwise uses current axis
|
|
|
|
Returns
|
|
-------
|
|
dendrogramplotter : _DendrogramPlotter
|
|
A Dendrogram plotter object.
|
|
|
|
Notes
|
|
-----
|
|
Access the reordered dendrogram indices with
|
|
dendrogramplotter.reordered_ind
|
|
|
|
"""
|
|
...
|
|
|
|
class ClusterGrid(Grid):
|
|
def __init__(self, data, pivot_kws=..., z_score=..., standard_scale=..., figsize=..., row_colors=..., col_colors=..., mask=..., dendrogram_ratio=..., colors_ratio=..., cbar_pos=...) -> None:
|
|
"""Grid object for organizing clustered heatmap input on to axes"""
|
|
...
|
|
|
|
def format_data(self, data, pivot_kws, z_score=..., standard_scale=...):
|
|
"""Extract variables from data or use directly."""
|
|
...
|
|
|
|
@staticmethod
|
|
def z_score(data2d, axis=...):
|
|
"""Standarize the mean and variance of the data axis
|
|
|
|
Parameters
|
|
----------
|
|
data2d : pandas.DataFrame
|
|
Data to normalize
|
|
axis : int
|
|
Which axis to normalize across. If 0, normalize across rows, if 1,
|
|
normalize across columns.
|
|
|
|
Returns
|
|
-------
|
|
normalized : pandas.DataFrame
|
|
Noramlized data with a mean of 0 and variance of 1 across the
|
|
specified axis.
|
|
"""
|
|
...
|
|
|
|
@staticmethod
|
|
def standard_scale(data2d, axis=...):
|
|
"""Divide the data by the difference between the max and min
|
|
|
|
Parameters
|
|
----------
|
|
data2d : pandas.DataFrame
|
|
Data to normalize
|
|
axis : int
|
|
Which axis to normalize across. If 0, normalize across rows, if 1,
|
|
normalize across columns.
|
|
vmin : int
|
|
If 0, then subtract the minimum of the data before dividing by
|
|
the range.
|
|
|
|
Returns
|
|
-------
|
|
standardized : pandas.DataFrame
|
|
Noramlized data with a mean of 0 and variance of 1 across the
|
|
specified axis.
|
|
|
|
"""
|
|
...
|
|
|
|
def dim_ratios(self, colors, dendrogram_ratio, colors_ratio): # -> list[Unknown]:
|
|
"""Get the proportions of the figure taken up by each axes."""
|
|
...
|
|
|
|
@staticmethod
|
|
def color_list_to_matrix_and_cmap(colors, ind, axis=...): # -> tuple[ndarray[Any, dtype[Any]], Unknown]:
|
|
"""Turns a list of colors into a numpy matrix and matplotlib colormap
|
|
|
|
These arguments can now be plotted using heatmap(matrix, cmap)
|
|
and the provided colors will be plotted.
|
|
|
|
Parameters
|
|
----------
|
|
colors : list of matplotlib colors
|
|
Colors to label the rows or columns of a dataframe.
|
|
ind : list of ints
|
|
Ordering of the rows or columns, to reorder the original colors
|
|
by the clustered dendrogram order
|
|
axis : int
|
|
Which axis this is labeling
|
|
|
|
Returns
|
|
-------
|
|
matrix : numpy.array
|
|
A numpy array of integer values, where each corresponds to a color
|
|
from the originally provided list of colors
|
|
cmap : matplotlib.colors.ListedColormap
|
|
|
|
"""
|
|
...
|
|
|
|
def savefig(self, *args, **kwargs): # -> None:
|
|
...
|
|
|
|
def plot_dendrograms(self, row_cluster, col_cluster, metric, method, row_linkage, col_linkage, tree_kws): # -> None:
|
|
...
|
|
|
|
def plot_colors(self, xind, yind, **kws): # -> None:
|
|
"""Plots color labels between the dendrogram and the heatmap
|
|
|
|
Parameters
|
|
----------
|
|
heatmap_kws : dict
|
|
Keyword arguments heatmap
|
|
|
|
"""
|
|
...
|
|
|
|
def plot_matrix(self, colorbar_kws, xind, yind, **kws): # -> None:
|
|
...
|
|
|
|
def plot(self, metric, method, colorbar_kws, row_cluster, col_cluster, row_linkage, col_linkage, tree_kws, **kws): # -> Self@ClusterGrid:
|
|
...
|
|
|
|
|
|
|
|
@_deprecate_positional_args
|
|
def clustermap(data, *, pivot_kws=..., method=..., metric=..., z_score=..., standard_scale=..., figsize=..., cbar_kws=..., row_cluster=..., col_cluster=..., row_linkage=..., col_linkage=..., row_colors=..., col_colors=..., mask=..., dendrogram_ratio=..., colors_ratio=..., cbar_pos=..., tree_kws=..., **kwargs): # -> ClusterGrid:
|
|
"""
|
|
Plot a matrix dataset as a hierarchically-clustered heatmap.
|
|
|
|
Parameters
|
|
----------
|
|
data : 2D array-like
|
|
Rectangular data for clustering. Cannot contain NAs.
|
|
pivot_kws : dict, optional
|
|
If `data` is a tidy dataframe, can provide keyword arguments for
|
|
pivot to create a rectangular dataframe.
|
|
method : str, optional
|
|
Linkage method to use for calculating clusters. See
|
|
:func:`scipy.cluster.hierarchy.linkage` documentation for more
|
|
information.
|
|
metric : str, optional
|
|
Distance metric to use for the data. See
|
|
:func:`scipy.spatial.distance.pdist` documentation for more options.
|
|
To use different metrics (or methods) for rows and columns, you may
|
|
construct each linkage matrix yourself and provide them as
|
|
`{row,col}_linkage`.
|
|
z_score : int or None, optional
|
|
Either 0 (rows) or 1 (columns). Whether or not to calculate z-scores
|
|
for the rows or the columns. Z scores are: z = (x - mean)/std, so
|
|
values in each row (column) will get the mean of the row (column)
|
|
subtracted, then divided by the standard deviation of the row (column).
|
|
This ensures that each row (column) has mean of 0 and variance of 1.
|
|
standard_scale : int or None, optional
|
|
Either 0 (rows) or 1 (columns). Whether or not to standardize that
|
|
dimension, meaning for each row or column, subtract the minimum and
|
|
divide each by its maximum.
|
|
figsize : tuple of (width, height), optional
|
|
Overall size of the figure.
|
|
cbar_kws : dict, optional
|
|
Keyword arguments to pass to `cbar_kws` in :func:`heatmap`, e.g. to
|
|
add a label to the colorbar.
|
|
{row,col}_cluster : bool, optional
|
|
If ``True``, cluster the {rows, columns}.
|
|
{row,col}_linkage : :class:`numpy.ndarray`, optional
|
|
Precomputed linkage matrix for the rows or columns. See
|
|
:func:`scipy.cluster.hierarchy.linkage` for specific formats.
|
|
{row,col}_colors : list-like or pandas DataFrame/Series, optional
|
|
List of colors to label for either the rows or columns. Useful to evaluate
|
|
whether samples within a group are clustered together. Can use nested lists or
|
|
DataFrame for multiple color levels of labeling. If given as a
|
|
:class:`pandas.DataFrame` or :class:`pandas.Series`, labels for the colors are
|
|
extracted from the DataFrames column names or from the name of the Series.
|
|
DataFrame/Series colors are also matched to the data by their index, ensuring
|
|
colors are drawn in the correct order.
|
|
mask : bool array or DataFrame, optional
|
|
If passed, data will not be shown in cells where `mask` is True.
|
|
Cells with missing values are automatically masked. Only used for
|
|
visualizing, not for calculating.
|
|
{dendrogram,colors}_ratio : float, or pair of floats, optional
|
|
Proportion of the figure size devoted to the two marginal elements. If
|
|
a pair is given, they correspond to (row, col) ratios.
|
|
cbar_pos : tuple of (left, bottom, width, height), optional
|
|
Position of the colorbar axes in the figure. Setting to ``None`` will
|
|
disable the colorbar.
|
|
tree_kws : dict, optional
|
|
Parameters for the :class:`matplotlib.collections.LineCollection`
|
|
that is used to plot the lines of the dendrogram tree.
|
|
kwargs : other keyword arguments
|
|
All other keyword arguments are passed to :func:`heatmap`.
|
|
|
|
Returns
|
|
-------
|
|
:class:`ClusterGrid`
|
|
A :class:`ClusterGrid` instance.
|
|
|
|
See Also
|
|
--------
|
|
heatmap : Plot rectangular data as a color-encoded matrix.
|
|
|
|
Notes
|
|
-----
|
|
The returned object has a ``savefig`` method that should be used if you
|
|
want to save the figure object without clipping the dendrograms.
|
|
|
|
To access the reordered row indices, use:
|
|
``clustergrid.dendrogram_row.reordered_ind``
|
|
|
|
Column indices, use:
|
|
``clustergrid.dendrogram_col.reordered_ind``
|
|
|
|
Examples
|
|
--------
|
|
|
|
Plot a clustered heatmap:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> import seaborn as sns; sns.set_theme(color_codes=True)
|
|
>>> iris = sns.load_dataset("iris")
|
|
>>> species = iris.pop("species")
|
|
>>> g = sns.clustermap(iris)
|
|
|
|
Change the size and layout of the figure:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris,
|
|
... figsize=(7, 5),
|
|
... row_cluster=False,
|
|
... dendrogram_ratio=(.1, .2),
|
|
... cbar_pos=(0, .2, .03, .4))
|
|
|
|
Add colored labels to identify observations:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> lut = dict(zip(species.unique(), "rbg"))
|
|
>>> row_colors = species.map(lut)
|
|
>>> g = sns.clustermap(iris, row_colors=row_colors)
|
|
|
|
Use a different colormap and adjust the limits of the color range:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris, cmap="mako", vmin=0, vmax=10)
|
|
|
|
Use a different similarity metric:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris, metric="correlation")
|
|
|
|
Use a different clustering method:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris, method="single")
|
|
|
|
Standardize the data within the columns:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris, standard_scale=1)
|
|
|
|
Normalize the data within the rows:
|
|
|
|
.. plot::
|
|
:context: close-figs
|
|
|
|
>>> g = sns.clustermap(iris, z_score=0, cmap="vlag")
|
|
"""
|
|
...
|
|
|