From d5434b52e2833f92d645cc3d565e7d6c9ccf882a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=88=9A=28noham=29=C2=B2?= <100566912+NohamR@users.noreply.github.com> Date: Mon, 22 Dec 2025 15:54:49 +0100 Subject: [PATCH] Lint --- utils/visualizer.py | 208 +++++++++++++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 72 deletions(-) diff --git a/utils/visualizer.py b/utils/visualizer.py index 90d99ec..8589240 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -7,29 +7,35 @@ from collections import defaultdict from datetime import datetime, timedelta import sqlite3 import statistics -from typing import Iterable, Sequence import sys +from typing import Iterable, Sequence from pathlib import Path import matplotlib.pyplot as plt -from matplotlib import font_manager as fm +from matplotlib import font_manager as font_manager FPATH = "libs/LibertinusSerif-Regular.otf" -prop = fm.FontProperties(fname=FPATH, size=14) +prop = font_manager.FontProperties(fname=FPATH, size=14) # Register the font file so Matplotlib can find it and use it by default. try: - fm.fontManager.addfont(FPATH) - font_name = fm.FontProperties(fname=FPATH).get_name() + font_manager.fontManager.addfont(FPATH) + font_name = font_manager.FontProperties(fname=FPATH).get_name() if font_name: plt.rcParams["font.family"] = font_name plt.rcParams["font.size"] = prop.get_size() -except Exception: # pylint: disable=broad-exception-caught # pragma: no cover - optional font may be missing +except ( + Exception +): # pylint: disable=broad-exception-caught # pragma: no cover - optional font may be missing font_name = None # Allow running as a script from anywhere sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) -from utils.scrap import DB_PATH, get_connection, fetch_service_plan # pylint: disable=wrong-import-position +from utils.scrap import ( + DB_PATH, + get_connection, + fetch_service_plan, +) # pylint: disable=wrong-import-position Row = Sequence @@ -61,13 +67,11 @@ def _merge_overlapping_breaks(rows: list[Row]) -> list[Row]: merged[-1] = (prev_row[0], prev_row[1], new_end, prev_row[3]) # Filter out breaks longer than MAX_BREAK_DURATION (likely errors) - filtered = [ - row for row in merged - if (row[2] - row[1]) <= MAX_BREAK_DURATION - ] + filtered = [row for row in merged if (row[2] - row[1]) <= MAX_BREAK_DURATION] return filtered + def _format_duration(seconds: int) -> str: minutes, secs = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) @@ -359,7 +363,9 @@ def _build_overview_text(channel_id: str, stats: dict) -> str: return "\n".join(lines) -def _plot_hourly_profile(channel_id: str, profile: dict, stats: dict | None = None, save=False) -> None: +def _plot_hourly_profile( + channel_id: str, profile: dict, stats: dict | None = None, save=False +) -> None: if not profile: print("No data available for the hourly plot.") return @@ -376,7 +382,9 @@ def _plot_hourly_profile(channel_id: str, profile: dict, stats: dict | None = No fig, ax_left = plt.subplots(figsize=(14, 5)) ax_left.bar(hours, avg_duration_minutes, color="tab:blue", alpha=0.7) ax_left.set_xlabel("Hour of day", fontproperties=prop) - ax_left.set_ylabel("Avg ad duration per day (min)", color="tab:blue", fontproperties=prop) + ax_left.set_ylabel( + "Avg ad duration per day (min)", color="tab:blue", fontproperties=prop + ) ax_left.set_xticks(hours) ax_left.set_xticklabels([str(h) for h in hours], fontproperties=prop) ax_left.set_xlim(-0.5, 23.5) @@ -407,13 +415,15 @@ def _plot_hourly_profile(channel_id: str, profile: dict, stats: dict | None = No if stats: overview_text = _build_overview_text(channel_id, stats) fig.text( - 0.73, 0.5, overview_text, + 0.73, + 0.5, + overview_text, transform=fig.transFigure, fontproperties=prop, fontsize=12, verticalalignment="center", horizontalalignment="left", - bbox=dict(boxstyle="round,pad=0.5", facecolor="wheat", alpha=0.8), + bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8}, ) fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 1]) @@ -425,7 +435,9 @@ def _plot_hourly_profile(channel_id: str, profile: dict, stats: dict | None = No print(f"Hourly profile saved to {filename}") -def _plot_heatmap(channel_id: str, heatmap: dict, stats: dict | None = None, save=False) -> None: +def _plot_heatmap( + channel_id: str, heatmap: dict, stats: dict | None = None, save=False +) -> None: if not heatmap: print("No data available for the heatmap plot.") return @@ -435,8 +447,7 @@ def _plot_heatmap(channel_id: str, heatmap: dict, stats: dict | None = None, sav return normalized = [ - [min(value / (60 * days), 1.0) for value in row] - for row in heatmap["grid"] + [min(value / (60 * days), 1.0) for value in row] for row in heatmap["grid"] ] fig, ax = plt.subplots(figsize=(14, 5)) @@ -476,13 +487,15 @@ def _plot_heatmap(channel_id: str, heatmap: dict, stats: dict | None = None, sav if stats: overview_text = _build_overview_text(channel_id, stats) fig.text( - 0.73, 0.5, overview_text, + 0.73, + 0.5, + overview_text, transform=fig.transFigure, fontproperties=prop, fontsize=12, verticalalignment="center", horizontalalignment="left", - bbox=dict(boxstyle="round,pad=0.5", facecolor="wheat", alpha=0.8), + bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8}, ) fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 1]) @@ -494,7 +507,9 @@ def _plot_heatmap(channel_id: str, heatmap: dict, stats: dict | None = None, sav print(f"Heatmap saved to {filename}") -def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | None = None, save=False) -> None: +def _plot_combined( + channel_id: str, profile: dict, heatmap: dict, stats: dict | None = None, save=False +) -> None: """Plot both hourly profile and heatmap in a single figure with the overview text box.""" if not profile or not profile.get("days"): print("No data available for the hourly plot.") @@ -521,7 +536,9 @@ def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | ax_hourly.bar(hours, avg_duration_minutes, color="tab:blue", alpha=0.7) ax_hourly.set_xlabel("Hour of day", fontproperties=prop) - ax_hourly.set_ylabel("Avg ad duration per day (min)", color="tab:blue", fontproperties=prop) + ax_hourly.set_ylabel( + "Avg ad duration per day (min)", color="tab:blue", fontproperties=prop + ) ax_hourly.set_xticks(hours) ax_hourly.set_xticklabels([str(h) for h in hours], fontproperties=prop) ax_hourly.set_xlim(-0.5, 23.5) @@ -529,7 +546,9 @@ def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | ax_hourly_right = ax_hourly.twinx() ax_hourly_right.plot(hours, avg_counts, color="tab:orange", marker="o") - ax_hourly_right.set_ylabel("Avg number of breaks", color="tab:orange", fontproperties=prop) + ax_hourly_right.set_ylabel( + "Avg number of breaks", color="tab:orange", fontproperties=prop + ) for t in ax_hourly.get_yticklabels(): t.set_fontproperties(prop) @@ -539,8 +558,7 @@ def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | # --- Heatmap (bottom) --- days = heatmap.get("days", 0) normalized = [ - [min(value / (60 * days), 1.0) for value in row] - for row in heatmap["grid"] + [min(value / (60 * days), 1.0) for value in row] for row in heatmap["grid"] ] im = ax_heatmap.imshow( @@ -574,13 +592,15 @@ def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | if stats: overview_text = _build_overview_text(channel_id, stats) fig.text( - 0.73, 0.5, overview_text, + 0.73, + 0.5, + overview_text, transform=fig.transFigure, fontproperties=prop, fontsize=12, verticalalignment="center", horizontalalignment="left", - bbox=dict(boxstyle="round,pad=0.5", facecolor="wheat", alpha=0.8), + bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8}, ) fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 0.96]) @@ -595,7 +615,7 @@ def _plot_combined(channel_id: str, profile: dict, heatmap: dict, stats: dict | def _plot_weekday_overview(all_channels_data: list[dict], save=False) -> None: """ Plot a weekday overview for all channels. - + Each channel gets: - A bar showing number of ads per weekday - A horizontal heatmap strip showing ad coverage by weekday x hour @@ -608,7 +628,9 @@ def _plot_weekday_overview(all_channels_data: list[dict], save=False) -> None: num_channels = len(all_channels_data) # Create figure with 2 subplots side by side - fig, (ax_bars, ax_heatmap) = plt.subplots(1, 2, figsize=(18, max(8, num_channels * 0.5))) + fig, (ax_bars, ax_heatmap) = plt.subplots( + 1, 2, figsize=(18, max(8, num_channels * 0.5)) + ) # Prepare data for plotting channel_names = [] @@ -640,19 +662,23 @@ def _plot_weekday_overview(all_channels_data: list[dict], save=False) -> None: normalized_row = [] for weekday in range(7): for hour in range(24): - val = grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600 # Fraction of hour + val = ( + grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600 + ) # Fraction of hour normalized_row.append(min(val, 1.0)) heatmap_data.append(normalized_row) # --- Left plot: Grouped bar chart for weekday counts --- x = range(num_channels) bar_width = 0.12 - colors = plt.cm.tab10(range(7)) + colors = plt.cm.tab10(range(7)) # pylint: disable=no-member for i, weekday in enumerate(weekday_names): offsets = [xi + (i - 3) * bar_width for xi in x] values = [weekday_counts_all[ch][i] for ch in range(num_channels)] - ax_bars.barh(offsets, values, height=bar_width, label=weekday, color=colors[i], alpha=0.8) + ax_bars.barh( + offsets, values, height=bar_width, label=weekday, color=colors[i], alpha=0.8 + ) ax_bars.set_yticks(list(x)) ax_bars.set_yticklabels(channel_names, fontproperties=prop) @@ -687,7 +713,9 @@ def _plot_weekday_overview(all_channels_data: list[dict], save=False) -> None: cbar = fig.colorbar(im, ax=ax_heatmap, shrink=0.8) cbar.set_label("Fraction of hour in ads (avg per day)", fontproperties=prop) - fig.suptitle("Weekly ad patterns across all channels", fontproperties=prop, fontsize=16) + fig.suptitle( + "Weekly ad patterns across all channels", fontproperties=prop, fontsize=16 + ) fig.tight_layout(rect=[0, 0, 1, 0.96]) plt.show() @@ -697,10 +725,16 @@ def _plot_weekday_overview(all_channels_data: list[dict], save=False) -> None: print(f"Weekday overview saved to {filename}") -def _plot_weekday_channel(channel_id: str, weekday_profile: dict, weekday_hour_counts: dict, stats: dict | None = None, save=False) -> None: +def _plot_weekday_channel( + channel_id: str, + weekday_profile: dict, + weekday_hour_counts: dict, + stats: dict | None = None, + save=False, +) -> None: """ Plot a weekday overview for a single channel. - + Shows: - Bar chart of ad breaks per weekday - Heatmap of ad break counts by weekday x hour (7 rows x 24 columns) @@ -732,7 +766,14 @@ def _plot_weekday_channel(channel_id: str, weekday_profile: dict, weekday_hour_c x = range(7) bar_width = 0.35 - bars1 = ax_bars.bar([i - bar_width/2 for i in x], avg_counts, bar_width, label="Avg breaks", color="tab:blue", alpha=0.7) + bars1 = ax_bars.bar( + [i - bar_width / 2 for i in x], + avg_counts, + bar_width, + label="Avg breaks", + color="tab:blue", + alpha=0.7, + ) ax_bars.set_ylabel("Avg number of ad breaks", color="tab:blue", fontproperties=prop) ax_bars.set_xticks(list(x)) ax_bars.set_xticklabels(weekday_names, fontproperties=prop) @@ -740,11 +781,22 @@ def _plot_weekday_channel(channel_id: str, weekday_profile: dict, weekday_hour_c ax_bars.set_title("Ad breaks by day of week (average per day)", fontproperties=prop) ax_bars_right = ax_bars.twinx() - bars2 = ax_bars_right.bar([i + bar_width/2 for i in x], avg_duration_minutes, bar_width, label="Avg duration (min)", color="tab:orange", alpha=0.7) - ax_bars_right.set_ylabel("Avg ad duration (min)", color="tab:orange", fontproperties=prop) + bars2 = ax_bars_right.bar( + [i + bar_width / 2 for i in x], + avg_duration_minutes, + bar_width, + label="Avg duration (min)", + color="tab:orange", + alpha=0.7, + ) + ax_bars_right.set_ylabel( + "Avg ad duration (min)", color="tab:orange", fontproperties=prop + ) # Combined legend - ax_bars.legend([bars1, bars2], ["Avg breaks", "Avg duration (min)"], loc="upper right") + ax_bars.legend( + [bars1, bars2], ["Avg breaks", "Avg duration (min)"], loc="upper right" + ) for t in ax_bars.get_yticklabels(): t.set_fontproperties(prop) @@ -783,13 +835,15 @@ def _plot_weekday_channel(channel_id: str, weekday_profile: dict, weekday_hour_c if stats: overview_text = _build_overview_text(channel_id, stats) fig.text( - 0.73, 0.5, overview_text, + 0.73, + 0.5, + overview_text, transform=fig.transFigure, fontproperties=prop, fontsize=12, verticalalignment="center", horizontalalignment="left", - bbox=dict(boxstyle="round,pad=0.5", facecolor="wheat", alpha=0.8), + bbox={"boxstyle": "round,pad=0.5", "facecolor": "wheat", "alpha": 0.8}, ) fig.tight_layout(rect=[0, 0, 0.72 if stats else 1, 0.96]) @@ -805,7 +859,9 @@ def list_channels() -> list[str]: """List all channel IDs present in the database.""" conn = get_connection(DB_PATH) try: - cursor = conn.execute("SELECT DISTINCT channel_id FROM ads ORDER BY channel_id ASC") + cursor = conn.execute( + "SELECT DISTINCT channel_id FROM ads ORDER BY channel_id ASC" + ) return [row[0] for row in cursor.fetchall()] finally: conn.close() @@ -838,13 +894,15 @@ def _plot_channel_rankings(all_stats: list[dict], save=False) -> None: max_break_duration = stats["max_break"][0] if stats.get("max_break") else 0 - channels_data.append({ - "channel_id": channel_id, - "channel_name": channel_name, - "total_ads": stats.get("count", 0), - "total_duration": stats.get("total_duration", 0), - "longest_break": max_break_duration, - }) + channels_data.append( + { + "channel_id": channel_id, + "channel_name": channel_name, + "total_ads": stats.get("count", 0), + "total_duration": stats.get("total_duration", 0), + "longest_break": max_break_duration, + } + ) if not channels_data: print("No channel data for rankings.") @@ -861,7 +919,7 @@ def _plot_channel_rankings(all_stats: list[dict], save=False) -> None: for ax, (metric, title, xlabel, color) in zip(axes, rankings): # Sort by the metric (descending) - sorted_data = sorted(channels_data, key=lambda x: x[metric], reverse=True) + sorted_data = sorted(channels_data, key=lambda x, m=metric: x[m], reverse=True) names = [d["channel_name"] for d in sorted_data] values = [d[metric] for d in sorted_data] @@ -885,11 +943,11 @@ def _plot_channel_rankings(all_stats: list[dict], save=False) -> None: ax.invert_yaxis() # Highest at top # Add value labels on bars - for i, (bar, label) in enumerate(zip(bars, labels)): - width = bar.get_width() + for bar_rect, label in zip(bars, labels): + width = bar_rect.get_width() ax.text( width + max(display_values) * 0.01, - bar.get_y() + bar.get_height() / 2, + bar_rect.get_y() + bar_rect.get_height() / 2, label, va="center", ha="left", @@ -924,12 +982,12 @@ def process_all_channels(start_date, end_date) -> None: for file in output_dir.glob("*.png"): file.unlink() channel_ids = list_channels() - + # Collect data for all channels (for the weekday overview plot) all_channels_data = [] # Collect stats for all channels (for the rankings plot) all_stats = [] - + for channel_id in channel_ids: print(f"Processing channel {channel_id}...") rows = _load_rows(channel_id, start_date, end_date) @@ -939,30 +997,36 @@ def process_all_channels(start_date, end_date) -> None: hourly_profile = _compute_hourly_profile(rows) heatmap = _compute_heatmap(rows) _plot_combined(channel_id, hourly_profile, heatmap, stats=stats, save=True) - + # Compute weekday data for the overview plot weekday_profile = _compute_weekday_profile(rows) weekday_heatmap = _compute_weekday_hour_heatmap(rows) weekday_hour_counts = _compute_weekday_hour_counts(rows) - + # Generate individual weekday overview for this channel - _plot_weekday_channel(channel_id, weekday_profile, weekday_hour_counts, stats=stats, save=True) - - all_channels_data.append({ - "channel_id": channel_id, - "weekday_profile": weekday_profile, - "weekday_heatmap": weekday_heatmap, - }) - + _plot_weekday_channel( + channel_id, weekday_profile, weekday_hour_counts, stats=stats, save=True + ) + + all_channels_data.append( + { + "channel_id": channel_id, + "weekday_profile": weekday_profile, + "weekday_heatmap": weekday_heatmap, + } + ) + # Collect stats for rankings - all_stats.append({ - "channel_id": channel_id, - "stats": stats, - }) - + all_stats.append( + { + "channel_id": channel_id, + "stats": stats, + } + ) + # Generate the weekday overview plot for all channels _plot_weekday_overview(all_channels_data, save=True) - + # Generate the channel rankings plot _plot_channel_rankings(all_stats, save=True) @@ -1011,4 +1075,4 @@ def main() -> None: if __name__ == "__main__": CHANNELS_DATA = fetch_service_plan() - main() \ No newline at end of file + main()