Source code for pycat.toolbox.data_viz_tools
"""
Data Visualization and Plotting Tools for PyCAT
This module contains classes and functions for visualizing data using various types of plots.
The PlottingWidget class provides a GUI for selecting and visualizing data from different
DataFrames using various types of plots. The class uses PyQt5 for the GUI and pandas, seaborn,
and matplotlib for data manipulation and plotting.
Author
------
Christian Neureuter, GitHub: https://github.com/cneureuter
Date
----
4-20-2024
"""
# Third party imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QComboBox, QRadioButton, QButtonGroup, QGroupBox, QHBoxLayout, QLabel, QCheckBox, QScrollArea, QLineEdit, QPushButton
from PyQt5.QtCore import Qt
[docs]
class PlottingWidget(QWidget):
"""
A widget for selecting and visualizing data from different DataFrames using various types of plots.
Attributes
----------
central_manager : CentralManager
An instance of CentralManager to manage and provide access to data resources.
data_class_instance : DataClass
An active instance of DataClass used to interact with data.
dataframes : dict
A dictionary of pandas DataFrames indexed by name.
layout : QVBoxLayout
Layout for organizing UI components vertically within the widget.
df_combo : QComboBox
Dropdown menu to select from available DataFrames.
line_radio, violin_radio, hist_radio : QRadioButton
Radio buttons to select the type of plot to display.
line_options, violin_options, hist_options : QWidget
Widgets containing specific options for each type of plot.
plot_button : QPushButton
Button to trigger the plotting of selected data.
"""
[docs]
def __init__(self, central_manager):
"""
Initializes the PlottingWidget with a reference to a CentralManager instance.
Parameters
----------
central_manager : CentralManager
An instance of CentralManager which manages the active data classes and dataframes.
"""
super().__init__()
self.central_manager = central_manager
self.data_class_instance = self.central_manager.active_data_class
self.dataframes = self.data_class_instance.get_dataframes()
layout = QVBoxLayout()
# Dropdown to select DataFrame
self.df_combo = QComboBox()
self.df_combo.addItems(self.dataframes.keys())
self.df_combo.currentIndexChanged.connect(self.on_dataframe_changed)
layout.addWidget(self.df_combo)
# Radio buttons for plot type
self.line_radio = QRadioButton("Scatter/Line Plot")
self.violin_radio = QRadioButton("Violin Plot")
self.hist_radio = QRadioButton("Histogram Plot")
self.line_radio.clicked.connect(self.update_ui)
self.violin_radio.clicked.connect(self.update_ui)
self.hist_radio.clicked.connect(self.update_ui)
radio_layout = QHBoxLayout()
radio_layout.addWidget(self.line_radio)
radio_layout.addWidget(self.violin_radio)
radio_layout.addWidget(self.hist_radio)
layout.addLayout(radio_layout)
# Options for each plot type
self.line_options = self.create_line_options()
self.violin_options = self.create_violin_options()
self.hist_options = self.create_hist_options()
layout.addWidget(self.line_options)
layout.addWidget(self.violin_options)
layout.addWidget(self.hist_options)
# Plot button
self.plot_button = QPushButton("Plot")
self.plot_button.clicked.connect(self.plot_data)
layout.addWidget(self.plot_button)
self.setLayout(layout)
self.update_ui()
[docs]
def update_ui(self):
"""
Updates the user interface elements based on the current state of the data class instance and selected options.
Ensures that the UI components are synchronized with the current data and selections.
"""
# Update the active data class instance
self.data_class_instance = self.central_manager.active_data_class
# Update the available DataFrames
self.update_dataframes()
# Show or hide plot options based on the selected plot type
self.line_options.setVisible(self.line_radio.isChecked())
self.violin_options.setVisible(self.violin_radio.isChecked())
self.hist_options.setVisible(self.hist_radio.isChecked())
# Update the plotting options dropdown
self.update_plot_options_dropdowns()
[docs]
def on_dataframe_changed(self, index):
"""
Handles the event when a new DataFrame is selected in the dropdown menu.
Parameters
----------
index : int
The index of the newly selected DataFrame in the dropdown menu.
"""
# This slot is called when the user selects a different DataFrame from the dropdown.
self.update_dataframes()
self.update_plot_options_dropdowns()
[docs]
def update_dataframes(self):
"""
Refreshes the DataFrame selection dropdown to match the currently available DataFrames in the data class instance.
"""
# Disconnect the signal to prevent triggering on programmatically setting items
self.df_combo.currentIndexChanged.disconnect(self.on_dataframe_changed)
# Get the current list of DataFrame names
new_dataframe_names = set(self.data_class_instance.get_dataframes().keys())
# Get the current list of items in the dropdown
current_items = set([self.df_combo.itemText(i) for i in range(self.df_combo.count())])
# Calculate the difference
items_to_add = new_dataframe_names - current_items
items_to_remove = current_items - new_dataframe_names
# Remove items that are no longer present
for item in items_to_remove:
index = self.df_combo.findText(item)
if index >= 0:
self.df_combo.removeItem(index)
# Add new items
for item in items_to_add:
self.df_combo.addItem(item)
# Reconnect the signal after updating the dropdown
self.df_combo.currentIndexChanged.connect(self.on_dataframe_changed)
[docs]
def update_plot_options_dropdowns(self):
"""
Updates dropdown menus and checkboxes to reflect the columns available in the currently selected DataFrame.
"""
# Get the name of the currently selected DataFrame
current_df_name = self.df_combo.currentText()
current_df_columns = self.dataframes[current_df_name].columns.tolist()
# Clear the dropdowns for line plot
self.line_x_combo.clear()
self.line_y_combo.clear()
self.hist_data_combo.clear()
# Populate the dropdowns with the columns of the current DataFrame
self.line_x_combo.addItems(current_df_columns)
self.line_y_combo.addItems(current_df_columns)
self.hist_data_combo.addItems(current_df_columns)
# Update the checkboxes for the plots
for checkbox in self.checkboxes:
checkbox.deleteLater()
self.checkboxes.clear()
for column in current_df_columns:
checkbox = QCheckBox(column)
self.checkboxes.append(checkbox)
self.checkbox_layout.addWidget(checkbox)
[docs]
def create_line_options(self):
"""
Creates and returns a widget containing options for configuring line or scatter plots.
Returns
-------
QWidget
A widget containing UI components for setting options specific to line or scatter plots.
"""
# Create a group box to contain the line plot options
group = QGroupBox()
layout = QVBoxLayout()
# Create dropdowns for the line plot data
self.line_x_combo = QComboBox()
self.line_y_combo = QComboBox()
# Populate the QComboBoxes with DataFrame columns
self.line_x_combo.addItems(self.dataframes[self.df_combo.currentText()].columns)
self.line_y_combo.addItems(self.dataframes[self.df_combo.currentText()].columns)
# For setting the X and Y data
xy_layout = QHBoxLayout()
xy_layout.addWidget(QLabel("X values:"))
xy_layout.addWidget(self.line_x_combo)
xy_layout.addWidget(QLabel("Y values:"))
xy_layout.addWidget(self.line_y_combo)
layout.addLayout(xy_layout)
# For linestyle and Marker style
self.linestyle_combo = QComboBox()
self.marker_combo = QComboBox()
self.linestyle_combo.addItems(["-", "--", "-.", ":", "None"])
self.marker_combo.addItems(["o", "s", "v", "x"])
lm_layout = QHBoxLayout()
lm_layout.addWidget(QLabel("Linestyle:"))
lm_layout.addWidget(self.linestyle_combo)
lm_layout.addWidget(QLabel("Marker Style:"))
lm_layout.addWidget(self.marker_combo)
layout.addLayout(lm_layout)
# For X-scale and Y-scale
self.x_scale_combo = QComboBox()
self.y_scale_combo = QComboBox()
self.x_scale_combo.addItems(["linear", "log"])
self.y_scale_combo.addItems(["linear", "log"])
scale_layout = QHBoxLayout()
scale_layout.addWidget(QLabel("X-Scale:"))
scale_layout.addWidget(self.x_scale_combo)
scale_layout.addWidget(QLabel("Y-Scale:"))
scale_layout.addWidget(self.y_scale_combo)
layout.addLayout(scale_layout)
# For X-limit and Y-limit
self.x_limit = QLineEdit()
self.y_limit = QLineEdit()
limit_layout = QHBoxLayout()
limit_layout.addWidget(QLabel("X-Limit:"))
limit_layout.addWidget(self.x_limit)
limit_layout.addWidget(QLabel("Y-Limit:"))
limit_layout.addWidget(self.y_limit)
layout.addLayout(limit_layout)
group.setLayout(layout)
return group
[docs]
def create_violin_options(self):
"""
Creates and returns a widget containing options for configuring violin plots.
Returns
-------
QWidget
A widget containing UI components for setting options specific to violin plots.
"""
# Create a group box to contain the violin plot options
group = QGroupBox()
layout = QVBoxLayout()
# Create a widget and layout for checkboxes
checkbox_widget = QWidget()
self.checkbox_layout = QHBoxLayout() # Define the layout for checkboxes
checkbox_widget.setLayout(self.checkbox_layout)
# Create and add checkboxes based on DataFrame columns
self.checkboxes = []
for column in self.dataframes[self.df_combo.currentText()].columns:
checkbox = QCheckBox(column)
self.checkboxes.append(checkbox)
self.checkbox_layout.addWidget(checkbox) # Use self.checkbox_layout here
# Wrap the checkboxes inside a scroll area
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOn) # Set horizontal scrollbar
scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) # Turn off vertical scrollbar
scroll.setWidget(checkbox_widget)
layout.addWidget(QLabel("Data columns:"))
layout.addWidget(scroll)
# Side by side combo boxes for Orientation and Inner plot layout
combo_layout = QHBoxLayout()
# For orientation
self.orientation_combo = QComboBox()
self.orientation_combo.addItems(["v", "h"])
combo_layout.addWidget(QLabel("Orientation:"))
combo_layout.addWidget(self.orientation_combo)
# For inner layout style
self.inner_combo = QComboBox()
self.inner_combo.addItems(["box", "quart", "point", "stick"])
combo_layout.addWidget(QLabel("Inner Layout:"))
combo_layout.addWidget(self.inner_combo)
layout.addLayout(combo_layout)
group.setLayout(layout)
return group
[docs]
def create_hist_options(self):
"""
Creates and returns a widget containing options for configuring histogram plots.
Returns
-------
QWidget
A widget containing UI components for setting options specific to histogram plots.
"""
# Create a group box to contain the histogram plot options
group = QGroupBox()
layout = QVBoxLayout()
# Create a dropdown for the histogram data
self.hist_data_combo = QComboBox()
self.hist_data_combo.addItems(self.dataframes[self.df_combo.currentText()].columns)
# Create input fields for number of bins and bin width
self.hist_bins_input = QLineEdit()
self.hist_bin_width_input = QLineEdit()
# Radio buttons for 'cumulative' distribution function
self.cumulative_true_radio = QRadioButton("True")
self.cumulative_false_radio = QRadioButton("False")
self.cumulative_false_radio.setChecked(True) # default to False
cumulative_layout = QHBoxLayout()
cumulative_layout.addWidget(QLabel("Cumulative:"))
cumulative_layout.addWidget(self.cumulative_true_radio)
cumulative_layout.addWidget(self.cumulative_false_radio)
# Group the radio buttons so that they're mutually exclusive
self.cumulative_group = QButtonGroup()
self.cumulative_group.addButton(self.cumulative_true_radio)
self.cumulative_group.addButton(self.cumulative_false_radio)
# Radio buttons for 'kde'
self.kde_true_radio = QRadioButton("True")
self.kde_false_radio = QRadioButton("False")
self.kde_false_radio.setChecked(True) # default to False
kde_layout = QHBoxLayout()
kde_layout.addWidget(QLabel("KDE:"))
kde_layout.addWidget(self.kde_true_radio)
kde_layout.addWidget(self.kde_false_radio)
# Group the radio buttons so that they're mutually exclusive
self.kde_group = QButtonGroup()
self.kde_group.addButton(self.kde_true_radio)
self.kde_group.addButton(self.kde_false_radio)
# Add widgets to layout
layout.addWidget(QLabel("Data column:"))
layout.addWidget(self.hist_data_combo)
layout.addWidget(QLabel("Number of bins:"))
layout.addWidget(self.hist_bins_input)
layout.addWidget(QLabel("Bin width:"))
layout.addWidget(self.hist_bin_width_input)
layout.addLayout(cumulative_layout)
layout.addLayout(kde_layout)
group.setLayout(layout)
return group
[docs]
def plot_data(self):
"""
Generates and displays the plot based on the selected DataFrame, plot type, and associated options.
"""
# Get the selected DataFrame
df = self.dataframes[self.df_combo.currentText()]
# Setup the plot if line plot is selected
if self.line_radio.isChecked():
x_col = self.line_x_combo.currentText() # Get the selected X column
y_col = self.line_y_combo.currentText() # Get the selected Y column
ls = self.linestyle_combo.currentText() # Get the selected linestyle
ms = self.marker_combo.currentText() # Get the selected marker style
x_scale = self.x_scale_combo.currentText() # Get the selected X scale
y_scale = self.y_scale_combo.currentText() # Get the selected Y scale
x_lim = self.x_limit.text() # Get the X limit
y_lim = self.y_limit.text() # Get the Y limit
# Setup the plot based on selcted options
plt.plot(df[x_col], df[y_col], linestyle=ls, marker=ms)
plt.xscale(x_scale)
plt.yscale(y_scale)
if x_lim:
plt.xlim(x_lim)
if y_lim:
plt.ylim(y_lim)
plt.xlabel(x_col)
plt.ylabel(y_col)
# Setup the plot if violin plot is selected
elif self.violin_radio.isChecked():
# Retrieve the selected columns from the checkboxes
selected_columns = [checkbox.text() for checkbox in self.checkboxes if checkbox.isChecked()]
# Get the orientation and inner options
orientation = self.orientation_combo.currentText()
inner = self.inner_combo.currentText()
# Using seaborn for the violin plot
if orientation == "v":
sns.violinplot(data=df[selected_columns], cut=0, inner=inner, orient=orientation)
plt.xlabel("Data")
else:
# Melt the dataframe for the selected columns
melted_df = pd.melt(df, value_vars=selected_columns)
sns.violinplot(data=melted_df, y="variable", x="value", cut=0, inner=inner, orient=orientation)
#sns.violinplot(data=df, y=selected_columns, cut=0, inner=inner, orient=orientation)
plt.ylabel("Data")
# Setup the plot if histogram plot is selected
elif self.hist_radio.isChecked():
# Get the selected data column
data_col = self.hist_data_combo.currentText()
# Get the number of bins or bin width
bins = None
bin_width = self.hist_bin_width_input.text()
# Determine if the histogram should be cumulative
cumulative = self.cumulative_true_radio.isChecked()
kde = self.kde_true_radio.isChecked()
# If bin width is specified
if bin_width:
bin_width = float(bin_width)
# Calculate the number of bins based on bin width
data_range = df[data_col].max() - df[data_col].min()
bins = int(data_range / bin_width)
# If number of bins is specified
elif self.hist_bins_input.text():
bins = int(self.hist_bins_input.text())
# Using seaborn's histplot
if bins is not None:
sns.histplot(df[data_col], bins=bins, kde=kde, cumulative=cumulative, stat="density" if kde else "count")
else:
sns.histplot(df[data_col], kde=kde, cumulative=cumulative, stat="density" if kde else "count")
plt.xlabel(data_col)
plt.ylabel("Density" if kde else "Count")
plt.show()