Files
OqeeAdWatch/visualizer/plotter.py
2025-12-23 10:33:01 +01:00

629 lines
21 KiB
Python

import matplotlib.pyplot as plt
from matplotlib import font_manager as font_manager
from pathlib import Path
from typing import Dict, List, Callable
FPATH = "libs/LibertinusSerif-Regular.otf"
prop = font_manager.FontProperties(fname=FPATH, size=14)
# Register the font file so Matplotlib can find it and use it by default.
try:
font_manager.fontManager.addfont(FPATH)
font_name = font_manager.FontProperties(fname=FPATH).get_name()
if font_name:
plt.rcParams["font.family"] = font_name
plt.rcParams["font.size"] = prop.get_size()
except (
Exception
): # pylint: disable=broad-exception-caught # pragma: no cover - optional font may be missing
font_name = None
# Renamed _format_duration and _human_ts to be accessible
from visualizer.utils import format_duration, human_ts, CHANNELS_DATA
def plot_hourly_profile(
channel_id: str,
profile: Dict,
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
) -> None:
"""Plot the average ad activity per hour of day."""
if not profile or not profile.get("days"):
print("No data available or not enough distinct days for the hourly plot.")
return
hours = list(range(24))
avg_duration_minutes = [
(profile["durations"][hour] / profile["days"]) / 60 for hour in hours
]
avg_counts = [profile["counts"][hour] / profile["days"] for hour in hours]
fig, ax_left = plt.subplots(figsize=(14, 5))
ax_left.bar(hours, avg_duration_minutes, color="tab:blue", alpha=0.7)
ax_left.set_xlabel("Hour of day", fontproperties=prop)
ax_left.set_ylabel(
"Avg ad duration per day (min)", color="tab:blue", fontproperties=prop
)
ax_left.set_xticks(hours)
ax_left.set_xticklabels([str(h) for h in hours], fontproperties=prop)
ax_left.set_xlim(-0.5, 23.5)
ax_right = ax_left.twinx()
ax_right.plot(hours, avg_counts, color="tab:orange", marker="o")
ax_right.set_ylabel("Avg number of breaks", color="tab:orange", fontproperties=prop)
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
for t in ax_left.get_yticklabels():
t.set_fontproperties(prop)
for t in ax_right.get_yticklabels():
t.set_fontproperties(prop)
fig.suptitle(
(
"Average ad activity for channel "
f"{channel_name} ({channel_id}) across {profile['days']} day(s)"
),
fontproperties=prop,
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
fig.text(
0.73,
0.5,
overview_text,
transform=fig.transFigure,
fontproperties=prop,
fontsize=12,
verticalalignment="center",
horizontalalignment="left",
bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8},
)
fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 1])
if not save:
plt.show()
if save:
filename = output_dir / f"hourly_profile_{channel_id}.png"
fig.savefig(filename)
print(f"Hourly profile saved to {filename}")
plt.close(fig)
def plot_heatmap(
channel_id: str,
heatmap_data: Dict,
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
) -> None:
"""Plot a heatmap of ad minute coverage by minute of hour and hour of day."""
if not heatmap_data or not heatmap_data.get("days"):
print("No data available or not enough distinct days for the heatmap plot.")
return
days = heatmap_data.get("days", 0)
normalized = [
[min(value / (60 * days), 1.0) for value in row] for row in heatmap_data["grid"]
]
fig, ax = plt.subplots(figsize=(14, 5))
im = ax.imshow(
normalized,
origin="lower",
aspect="auto",
cmap="Reds",
extent=[0, 24, 0, 60],
vmin=0,
vmax=1,
)
ax.set_xlabel("Hour of day", fontproperties=prop)
ax.set_ylabel("Minute within hour", fontproperties=prop)
ax.set_xticks(range(0, 25, 2))
ax.set_xticklabels([str(x) for x in range(0, 25, 2)], fontproperties=prop)
ax.set_yticks(range(0, 61, 10))
ax.set_yticklabels([str(y) for y in range(0, 61, 10)], fontproperties=prop)
cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Share of minute spent in ads per day", fontproperties=prop)
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
fig.suptitle(
(
"Ad minute coverage for channel "
f"{channel_name} ({channel_id}) across {days} day(s)"
),
fontproperties=prop,
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
fig.text(
0.73,
0.5,
overview_text,
transform=fig.transFigure,
fontproperties=prop,
fontsize=12,
verticalalignment="center",
horizontalalignment="left",
bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8},
)
fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 1])
if not save:
plt.show()
if save:
filename = output_dir / f"heatmap_{channel_id}.png"
fig.savefig(filename)
print(f"Heatmap saved to {filename}")
plt.close(fig)
def plot_combined(
channel_id: str,
profile: Dict,
heatmap_data: Dict,
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
) -> None:
"""Plot both hourly profile and heatmap in a single figure with the overview text box."""
if not profile or not profile.get("days"):
print("No data available for the hourly plot.")
return
if not heatmap_data or not heatmap_data.get("days"):
print("No data available for the heatmap plot.")
return
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
fig, (ax_hourly, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 10))
# --- Hourly profile (top) ---
hours = list(range(24))
avg_duration_minutes = [
(profile["durations"][hour] / profile["days"]) / 60 for hour in hours
]
avg_counts = [profile["counts"][hour] / profile["days"] for hour in hours]
ax_hourly.bar(hours, avg_duration_minutes, color="tab:blue", alpha=0.7)
ax_hourly.set_xlabel("Hour of day", fontproperties=prop)
ax_hourly.set_ylabel(
"Avg ad duration per day (min)", color="tab:blue", fontproperties=prop
)
ax_hourly.set_xticks(hours)
ax_hourly.set_xticklabels([str(h) for h in hours], fontproperties=prop)
ax_hourly.set_xlim(-0.5, 23.5)
ax_hourly.set_title("Average ad activity by hour", fontproperties=prop)
ax_hourly_right = ax_hourly.twinx()
ax_hourly_right.plot(hours, avg_counts, color="tab:orange", marker="o")
ax_hourly_right.set_ylabel(
"Avg number of breaks", color="tab:orange", fontproperties=prop
)
for t in ax_hourly.get_yticklabels():
t.set_fontproperties(prop)
for t in ax_hourly_right.get_yticklabels():
t.set_fontproperties(prop)
# --- Heatmap (bottom) ---
days = heatmap_data.get("days", 0)
normalized = [
[min(value / (60 * days), 1.0) for value in row] for row in heatmap_data["grid"]
]
im = ax_heatmap.imshow(
normalized,
origin="lower",
aspect="auto",
cmap="Reds",
extent=[0, 24, 0, 60],
vmin=0,
vmax=1,
)
ax_heatmap.set_xlabel("Hour of day", fontproperties=prop)
ax_heatmap.set_ylabel("Minute within hour", fontproperties=prop)
ax_heatmap.set_xticks(range(0, 25, 2))
ax_heatmap.set_xticklabels([str(x) for x in range(0, 25, 2)], fontproperties=prop)
ax_heatmap.set_yticks(range(0, 61, 10))
ax_heatmap.set_yticklabels([str(y) for y in range(0, 61, 10)], fontproperties=prop)
ax_heatmap.set_title("Ad minute coverage heatmap", fontproperties=prop)
cbar = fig.colorbar(im, ax=ax_heatmap)
cbar.set_label("Share of minute spent in ads per day", fontproperties=prop)
fig.suptitle(
f"Ad analysis for {channel_name} ({channel_id}) across {profile['days']} day(s)",
fontproperties=prop,
fontsize=16,
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
fig.text(
0.73,
0.5,
overview_text,
transform=fig.transFigure,
fontproperties=prop,
fontsize=12,
verticalalignment="center",
horizontalalignment="left",
bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8},
)
fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 0.96])
if not save:
plt.show()
if save:
filename = output_dir / f"{channel_id}_combined.png"
fig.savefig(filename, dpi=300)
print(f"Combined plot saved to {filename}")
plt.close(fig)
def plot_weekday_overview(
all_channels_data: List[Dict],
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {}
) -> None:
"""
Plot a weekday overview for all channels.
Each channel gets:
- A bar showing number of ads per weekday
- A horizontal heatmap strip showing ad coverage by weekday x hour
"""
if not all_channels_data:
print("No data available for weekday overview.")
return
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
num_channels = len(all_channels_data)
fig, (ax_bars, ax_heatmap) = plt.subplots(
1, 2, figsize=(18, max(8, num_channels * 0.5))
)
channel_names = []
weekday_counts_all = []
heatmap_plot_data = []
for data in all_channels_data:
channel_id = data["channel_id"]
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_names.append(f"{channel_name}")
weekday_profile = data.get("weekday_profile", {})
weekday_heatmap = data.get("weekday_heatmap", {})
counts = weekday_profile.get("counts", [0] * 7)
days_seen = weekday_profile.get("days_seen", [1] * 7)
avg_counts = [c / max(d, 1) for c, d in zip(counts, days_seen)]
weekday_counts_all.append(avg_counts)
grid = weekday_heatmap.get("grid", [[0] * 24 for _ in range(7)])
hm_days_seen = weekday_heatmap.get("days_seen", [1] * 7)
normalized_row = []
for weekday in range(7):
for hour in range(24):
val = (
grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600
)
normalized_row.append(min(val, 1.0))
heatmap_plot_data.append(normalized_row)
x = range(num_channels)
bar_width = 0.12
colors = plt.cm.tab10(range(7))
for i, weekday in enumerate(weekday_names):
offsets = [xi + (i - 3) * bar_width for xi in x]
values = [weekday_counts_all[ch][i] for ch in range(num_channels)]
ax_bars.barh(
offsets, values, height=bar_width, label=weekday, color=colors[i], alpha=0.8
)
ax_bars.set_yticks(list(x))
ax_bars.set_yticklabels(channel_names, fontproperties=prop)
ax_bars.set_xlabel("Avg number of ad breaks per day", fontproperties=prop)
ax_bars.set_title("Ad breaks by day of week", fontproperties=prop)
ax_bars.legend(title="Day", loc="lower right", fontsize=9)
ax_bars.invert_yaxis()
im = ax_heatmap.imshow(
heatmap_plot_data,
aspect="auto",
cmap="Reds",
vmin=0,
vmax=0.5,
)
ax_heatmap.set_xticks([i * 24 + 12 for i in range(7)])
ax_heatmap.set_xticklabels(weekday_names, fontproperties=prop)
for i in range(1, 7):
ax_heatmap.axvline(x=i * 24 - 0.5, color="white", linewidth=1)
ax_heatmap.set_yticks(list(range(num_channels)))
ax_heatmap.set_yticklabels(channel_names, fontproperties=prop)
ax_heatmap.set_xlabel("Day of week (each day spans 24 hours)", fontproperties=prop)
ax_heatmap.set_title("Ad coverage heatmap by weekday & hour", fontproperties=prop)
cbar = fig.colorbar(im, ax=ax_heatmap, shrink=0.8)
cbar.set_label("Fraction of hour in ads (avg per day)", fontproperties=prop)
fig.suptitle(
"Weekly ad patterns across all channels", fontproperties=prop, fontsize=16
)
fig.tight_layout(rect=[0, 0, 1, 0.96])
if not save:
plt.show()
if save:
filename = output_dir / "weekday_overview_all_channels.png"
fig.savefig(filename, dpi=300)
print(f"Weekday overview saved to {filename}")
plt.close(fig)
def plot_weekday_channel(
channel_id: str,
weekday_profile: Dict,
weekday_hour_counts: Dict,
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
) -> None:
"""
Plot a weekday overview for a single channel.
Shows:
- Bar chart of ad breaks per weekday
- Heatmap of ad break counts by weekday x hour (7 rows x 24 columns)
- Stats text box on the right
"""
if not weekday_profile or not weekday_hour_counts:
print(f"No weekday data available for channel {channel_id}.")
return
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
fig, (ax_bars, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 8))
# --- Top plot: Bar chart for weekday counts ---
counts = weekday_profile.get("counts", [0] * 7)
days_seen = weekday_profile.get("days_seen", [1] * 7)
avg_counts = [c / max(d, 1) for c, d in zip(counts, days_seen)]
durations = weekday_profile.get("durations", [0] * 7)
avg_duration_minutes = [d / max(ds, 1) / 60 for d, ds in zip(durations, days_seen)]
x = range(7)
bar_width = 0.35
bars1 = ax_bars.bar(
[i - bar_width / 2 for i in x],
avg_counts,
bar_width,
label="Avg breaks",
color="tab:blue",
alpha=0.7,
)
ax_bars.set_ylabel("Avg number of ad breaks", color="tab:blue", fontproperties=prop)
ax_bars.set_xticks(list(x))
ax_bars.set_xticklabels(weekday_names, fontproperties=prop)
ax_bars.set_xlabel("Day of week", fontproperties=prop)
ax_bars.set_title("Ad breaks by day of week (average per day)", fontproperties=prop)
ax_bars_right = ax_bars.twinx()
bars2 = ax_bars_right.bar(
[i + bar_width / 2 for i in x],
avg_duration_minutes,
bar_width,
label="Avg duration (min)",
color="tab:orange",
alpha=0.7,
)
ax_bars_right.set_ylabel(
"Avg ad duration (min)", color="tab:orange", fontproperties=prop
)
ax_bars.legend(
[bars1, bars2], ["Avg breaks", "Avg duration (min)"], loc="upper right"
)
for t in ax_bars.get_yticklabels():
t.set_fontproperties(prop)
for t in ax_bars_right.get_yticklabels():
t.set_fontproperties(prop)
grid = weekday_hour_counts.get("grid", [[0] * 24 for _ in range(7)])
im = ax_heatmap.imshow(
grid,
aspect="auto",
cmap="Reds",
origin="upper",
)
ax_heatmap.set_xticks(range(0, 24, 2))
ax_heatmap.set_xticklabels([str(h) for h in range(0, 24, 2)], fontproperties=prop)
ax_heatmap.set_yticks(range(7))
ax_heatmap.set_yticklabels(weekday_names, fontproperties=prop)
ax_heatmap.set_xlabel("Hour of day", fontproperties=prop)
ax_heatmap.set_ylabel("Day of week", fontproperties=prop)
ax_heatmap.set_title("Total ad breaks by weekday & hour", fontproperties=prop)
cbar = fig.colorbar(im, ax=ax_heatmap, shrink=0.8)
cbar.set_label("Number of ad breaks", fontproperties=prop)
fig.suptitle(
f"Weekly ad patterns for {channel_name} ({channel_id})",
fontproperties=prop,
fontsize=16,
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
fig.text(
0.73,
0.5,
overview_text,
transform=fig.transFigure,
fontproperties=prop,
fontsize=12,
verticalalignment="center",
horizontalalignment="left",
bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8},
)
fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 0.96])
if not save:
plt.show()
if save:
filename = output_dir / f"{channel_id}_weekday.png"
fig.savefig(filename, dpi=300)
print(f"Weekday overview saved to {filename}")
plt.close(fig)
def plot_channel_rankings(
all_stats: List[Dict],
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {}
) -> None:
"""
Plot rankings of all channels based on:
- Total number of ads
- Total ad duration
- Longest single ad break
"""
if not all_stats:
print("No data available for channel rankings.")
return
channels_data_for_plot = []
for data in all_stats:
channel_id = data["channel_id"]
stats = data["stats"]
if not stats:
continue
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
max_break_duration = stats["max_break"][0] if stats.get("max_break") else 0
channels_data_for_plot.append(
{
"channel_id": channel_id,
"channel_name": channel_name,
"total_ads": stats.get("count", 0),
"total_duration": stats.get("total_duration", 0),
"longest_break": max_break_duration,
}
)
if not channels_data_for_plot:
print("No channel data for rankings.")
return
fig, axes = plt.subplots(1, 3, figsize=(18, max(8, len(channels_data_for_plot) * 0.4)))
rankings = [
("total_ads", "Total Number of Ads", "Number of ad breaks", "tab:blue"),
("total_duration", "Total Ad Duration", "Duration", "tab:green"),
("longest_break", "Longest Single Ad Break", "Duration", "tab:red"),
]
for ax, (metric, title, xlabel, color) in zip(axes, rankings):
sorted_data = sorted(channels_data_for_plot, key=lambda x, m=metric: x[m], reverse=True)
names = [d["channel_name"] for d in sorted_data]
values = [d[metric] for d in sorted_data]
if metric in ("total_duration", "longest_break"):
display_values = values
labels = [format_duration(int(v)) for v in values]
else:
display_values = values
labels = [str(v) for v in values]
y_pos = range(len(names))
bars = ax.barh(y_pos, display_values, color=color, alpha=0.7)
ax.set_yticks(list(y_pos))
ax.set_yticklabels(names, fontproperties=prop)
ax.set_xlabel(xlabel, fontproperties=prop)
ax.set_title(title, fontproperties=prop, fontsize=14)
ax.invert_yaxis()
for bar_rect, label in zip(bars, labels):
width = bar_rect.get_width()
ax.text(
width + max(display_values) * 0.01,
bar_rect.get_y() + bar_rect.get_height() / 2,
label,
va="center",
ha="left",
fontproperties=prop,
fontsize=10,
)
ax.set_xlim(0, max(display_values) * 1.25)
for t in ax.get_yticklabels():
t.set_fontproperties(prop)
for t in ax.get_xticklabels():
t.set_fontproperties(prop)
fig.suptitle("Channel Rankings by Ad Metrics", fontproperties=prop, fontsize=18)
fig.tight_layout(rect=[0, 0, 1, 0.96])
if not save:
plt.show()
if save:
filename = output_dir / "channel_rankings.png"
fig.savefig(filename, dpi=300)
print(f"Channel rankings saved to {filename}")
plt.close(fig)