Lint + update

This commit is contained in:
√(noham)²
2025-12-23 10:46:32 +01:00
parent 9f71bc6073
commit 2534210c91
7 changed files with 151 additions and 91 deletions

View File

@@ -36,20 +36,21 @@ The primary key `(channel_id, start_ts, end_ts)` prevents duplicates when the AP
### Visualizing collected ads
The helper `visualizer.py` script analyzes and visualizes ad data from the database:
The `visualizer/main.py` script analyzes and visualizes ad data from the database:
```bash
# Process all channels (default)
uv run python utils/visualizer.py
uv run ./visualizer/main.py
# Process a specific channel
uv run python utils/visualizer.py <channel-id>
uv run ./visualizer/main.py <channel-id>
# Filter by date range
uv run python utils/visualizer.py --start-date 2025-11-28 --end-date 2025-12-21
uv run ./visualizer/main.py --start-date 2025-11-28 --end-date 2025-12-21
```
# Single channel with date filter
uv run python utils/visualizer.py <channel-id> --start-date 2025-11-28
uv run ./visualizer/main.py <channel-id> --start-date 2025-11-28
```
**Single channel mode** displays:
@@ -57,7 +58,7 @@ uv run python utils/visualizer.py <channel-id> --start-date 2025-11-28
- A 24h profile (bars = average ad minutes per day, line = average break count)
- A minute-vs-hour heatmap showing ad coverage
**All channels mode** generates additional visualizations saved to `visualizer/`:
**All channels mode** generates additional visualizations saved to `visualizer_output/`:
- Combined hourly profile and heatmap for each channel
- Weekday analysis per channel (ad breaks by day of week, weekday×hour heatmap)
- Weekly ad patterns overview across all channels

View File

@@ -1,15 +1,18 @@
"""Data loading utilities for the ad visualizer."""
import sqlite3
from typing import Sequence, List, Optional
from pathlib import Path
import sys
from utils.scrap import DB_PATH, get_connection
# Allow running as a script from anywhere
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
from utils.scrap import DB_PATH, get_connection
Row = Sequence
def load_ads_data(
channel_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None
) -> List[Row]:
@@ -50,4 +53,4 @@ def list_channels() -> List[str]:
)
return [row[0] for row in cursor.fetchall()]
finally:
conn.close()
conn.close()

View File

@@ -27,6 +27,7 @@ from visualizer.plotter import (
from visualizer.text_output import print_stats, build_overview_text
from visualizer.utils import CHANNELS_DATA
def process_all_channels(start_date, end_date) -> None:
"""Process all channels in the database and generate visualizations."""
output_dir = Path("visualizer_output")
@@ -35,8 +36,8 @@ def process_all_channels(start_date, end_date) -> None:
file.unlink()
channel_ids = list_channels()
all_channels_plot_data = [] # Data for combined weekday plots
all_channels_ranking_data = [] # Data for channel rankings
all_channels_plot_data = [] # Data for combined weekday plots
all_channels_ranking_data = [] # Data for channel rankings
for channel_id in channel_ids:
print(f"Processing channel {channel_id}...")
@@ -46,14 +47,30 @@ def process_all_channels(start_date, end_date) -> None:
hourly_profile = compute_hourly_profile(rows)
heatmap = compute_heatmap(rows)
plot_combined(channel_id, hourly_profile, heatmap, stats=stats, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
plot_combined(
channel_id,
hourly_profile,
heatmap,
stats=stats,
save=True,
output_dir=output_dir,
channels_data=CHANNELS_DATA,
build_overview_text_func=build_overview_text,
)
weekday_profile = compute_weekday_profile(rows)
weekday_heatmap = compute_weekday_hour_heatmap(rows)
weekday_hour_counts = compute_weekday_hour_counts(rows)
plot_weekday_channel(
channel_id, weekday_profile, weekday_hour_counts, stats=stats, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text
channel_id,
weekday_profile,
weekday_hour_counts,
stats=stats,
save=True,
output_dir=output_dir,
channels_data=CHANNELS_DATA,
build_overview_text_func=build_overview_text,
)
all_channels_plot_data.append(
@@ -71,8 +88,18 @@ def process_all_channels(start_date, end_date) -> None:
}
)
plot_weekday_overview(all_channels_plot_data, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA)
plot_channel_rankings(all_channels_ranking_data, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA)
plot_weekday_overview(
all_channels_plot_data,
save=True,
output_dir=output_dir,
channels_data=CHANNELS_DATA,
)
plot_channel_rankings(
all_channels_ranking_data,
save=True,
output_dir=output_dir,
channels_data=CHANNELS_DATA,
)
def main() -> None:
@@ -110,10 +137,24 @@ def main() -> None:
if not args.no_plot:
hourly_profile = compute_hourly_profile(rows)
plot_hourly_profile(args.channel_id, hourly_profile, stats=stats, output_dir=Path("visualizer_output"), channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
plot_hourly_profile(
args.channel_id,
hourly_profile,
stats=stats,
output_dir=Path("visualizer_output"),
channels_data=CHANNELS_DATA,
build_overview_text_func=build_overview_text,
)
heatmap = compute_heatmap(rows)
plot_heatmap(args.channel_id, heatmap, stats=stats, output_dir=Path("visualizer_output"), channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
plot_heatmap(
args.channel_id,
heatmap,
stats=stats,
output_dir=Path("visualizer_output"),
channels_data=CHANNELS_DATA,
build_overview_text_func=build_overview_text,
)
if __name__ == "__main__":
main()
main()

View File

@@ -1,7 +1,11 @@
import matplotlib.pyplot as plt
from matplotlib import font_manager as font_manager
"""Plotting utilities for the ad visualizer."""
from pathlib import Path
from typing import Dict, List, Callable
from typing import Dict, List, Callable, Optional
import matplotlib.pyplot as plt
from matplotlib import font_manager
from .utils import format_duration, get_channel_name
FPATH = "libs/LibertinusSerif-Regular.otf"
prop = font_manager.FontProperties(fname=FPATH, size=14)
@@ -13,13 +17,9 @@ try:
if font_name:
plt.rcParams["font.family"] = font_name
plt.rcParams["font.size"] = prop.get_size()
except (
Exception
): # pylint: disable=broad-exception-caught # pragma: no cover - optional font may be missing
except (OSError, ValueError):
font_name = None
# Renamed _format_duration and _human_ts to be accessible
from visualizer.utils import format_duration, human_ts, CHANNELS_DATA
def plot_hourly_profile(
channel_id: str,
@@ -27,10 +27,12 @@ def plot_hourly_profile(
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
channels_data: Optional[Dict] = None,
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
) -> None:
"""Plot the average ad activity per hour of day."""
if channels_data is None:
channels_data = {}
if not profile or not profile.get("days"):
print("No data available or not enough distinct days for the hourly plot.")
return
@@ -55,10 +57,7 @@ def plot_hourly_profile(
ax_right.plot(hours, avg_counts, color="tab:orange", marker="o")
ax_right.set_ylabel("Avg number of breaks", color="tab:orange", fontproperties=prop)
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
channel_name = get_channel_name(channel_id, channels_data)
for t in ax_left.get_yticklabels():
t.set_fontproperties(prop)
@@ -74,7 +73,9 @@ def plot_hourly_profile(
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
overview_text = build_overview_text_func(
channel_id, stats, channels_data=channels_data
)
fig.text(
0.73,
0.5,
@@ -104,10 +105,12 @@ def plot_heatmap(
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
channels_data: Optional[Dict] = None,
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
) -> None:
"""Plot a heatmap of ad minute coverage by minute of hour and hour of day."""
if channels_data is None:
channels_data = {}
if not heatmap_data or not heatmap_data.get("days"):
print("No data available or not enough distinct days for the heatmap plot.")
return
@@ -137,10 +140,7 @@ def plot_heatmap(
cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Share of minute spent in ads per day", fontproperties=prop)
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
channel_name = get_channel_name(channel_id, channels_data)
fig.suptitle(
(
@@ -151,7 +151,9 @@ def plot_heatmap(
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
overview_text = build_overview_text_func(
channel_id, stats, channels_data=channels_data
)
fig.text(
0.73,
0.5,
@@ -182,10 +184,12 @@ def plot_combined(
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
channels_data: Optional[Dict] = None,
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
) -> None:
"""Plot both hourly profile and heatmap in a single figure with the overview text box."""
if channels_data is None:
channels_data = {}
if not profile or not profile.get("days"):
print("No data available for the hourly plot.")
return
@@ -193,11 +197,7 @@ def plot_combined(
print("No data available for the heatmap plot.")
return
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_name = get_channel_name(channel_id, channels_data)
fig, (ax_hourly, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 10))
@@ -262,7 +262,9 @@ def plot_combined(
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
overview_text = build_overview_text_func(
channel_id, stats, channels_data=channels_data
)
fig.text(
0.73,
0.5,
@@ -290,7 +292,7 @@ def plot_weekday_overview(
all_channels_data: List[Dict],
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {}
channels_data: Optional[Dict] = None,
) -> None:
"""
Plot a weekday overview for all channels.
@@ -298,6 +300,8 @@ def plot_weekday_overview(
- A bar showing number of ads per weekday
- A horizontal heatmap strip showing ad coverage by weekday x hour
"""
if channels_data is None:
channels_data = {}
if not all_channels_data:
print("No data available for weekday overview.")
return
@@ -315,11 +319,7 @@ def plot_weekday_overview(
for data in all_channels_data:
channel_id = data["channel_id"]
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_name = get_channel_name(channel_id, channels_data)
channel_names.append(f"{channel_name}")
weekday_profile = data.get("weekday_profile", {})
@@ -335,15 +335,13 @@ def plot_weekday_overview(
normalized_row = []
for weekday in range(7):
for hour in range(24):
val = (
grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600
)
val = grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600
normalized_row.append(min(val, 1.0))
heatmap_plot_data.append(normalized_row)
x = range(num_channels)
bar_width = 0.12
colors = plt.cm.tab10(range(7))
colors = plt.get_cmap("tab10").colors[:7]
for i, weekday in enumerate(weekday_names):
offsets = [xi + (i - 3) * bar_width for xi in x]
@@ -401,27 +399,24 @@ def plot_weekday_channel(
stats: Dict | None = None,
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {},
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
channels_data: Optional[Dict] = None,
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
) -> None:
"""
Plot a weekday overview for a single channel.
Shows:
- Bar chart of ad breaks per weekday
- Heatmap of ad break counts by weekday x hour (7 rows x 24 columns)
- Stats text box on the right
"""
if channels_data is None:
channels_data = {}
if not weekday_profile or not weekday_hour_counts:
print(f"No weekday data available for channel {channel_id}.")
return
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_name = get_channel_name(channel_id, channels_data)
fig, (ax_bars, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 8))
@@ -499,7 +494,9 @@ def plot_weekday_channel(
)
if stats:
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
overview_text = build_overview_text_func(
channel_id, stats, channels_data=channels_data
)
fig.text(
0.73,
0.5,
@@ -527,7 +524,7 @@ def plot_channel_rankings(
all_stats: List[Dict],
save: bool = False,
output_dir: Path = Path("."),
channels_data: Dict = {}
channels_data: Optional[Dict] = None,
) -> None:
"""
Plot rankings of all channels based on:
@@ -535,6 +532,8 @@ def plot_channel_rankings(
- Total ad duration
- Longest single ad break
"""
if channels_data is None:
channels_data = {}
if not all_stats:
print("No data available for channel rankings.")
return
@@ -546,11 +545,7 @@ def plot_channel_rankings(
if not stats:
continue
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_name = get_channel_name(channel_id, channels_data)
max_break_duration = stats["max_break"][0] if stats.get("max_break") else 0
@@ -568,7 +563,9 @@ def plot_channel_rankings(
print("No channel data for rankings.")
return
fig, axes = plt.subplots(1, 3, figsize=(18, max(8, len(channels_data_for_plot) * 0.4)))
fig, axes = plt.subplots(
1, 3, figsize=(18, max(8, len(channels_data_for_plot) * 0.4))
)
rankings = [
("total_ads", "Total Number of Ads", "Number of ad breaks", "tab:blue"),
@@ -577,7 +574,9 @@ def plot_channel_rankings(
]
for ax, (metric, title, xlabel, color) in zip(axes, rankings):
sorted_data = sorted(channels_data_for_plot, key=lambda x, m=metric: x[m], reverse=True)
sorted_data = sorted(
channels_data_for_plot, key=lambda x, m=metric: x[m], reverse=True
)
names = [d["channel_name"] for d in sorted_data]
values = [d[metric] for d in sorted_data]
@@ -626,4 +625,4 @@ def plot_channel_rankings(
filename = output_dir / "channel_rankings.png"
fig.savefig(filename, dpi=300)
print(f"Channel rankings saved to {filename}")
plt.close(fig)
plt.close(fig)

View File

@@ -1,3 +1,5 @@
"""Statistics computation utilities for the visualizer."""
from collections import defaultdict
from datetime import datetime, timedelta
import statistics
@@ -215,4 +217,4 @@ def compute_weekday_hour_heatmap(rows: Iterable[Row]) -> Dict:
return {
"grid": heatmap,
"days_seen": [len(s) for s in weekday_days_seen],
}
}

View File

@@ -1,6 +1,8 @@
from datetime import datetime
"""Text output utilities for the visualizer."""
from typing import Dict
from visualizer.utils import format_duration, human_ts, CHANNELS_DATA
from .utils import format_duration, human_ts, CHANNELS_DATA, get_channel_name
def print_stats(channel_id: str, stats: Dict) -> None:
"""Print formatted ad break statistics to the console."""
@@ -36,18 +38,15 @@ def print_stats(channel_id: str, stats: Dict) -> None:
)
def build_overview_text(channel_id: str, stats: Dict, channels_data: Dict = CHANNELS_DATA) -> str:
def build_overview_text(
channel_id: str, stats: Dict, channels_data: Dict = CHANNELS_DATA
) -> str:
"""Build a multi-line string with channel overview stats."""
if not stats:
return ""
max_break_duration, max_break_row = stats["max_break"]
channel_name = channel_id
for ch_id, channel_info in (channels_data or {}).items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
channel_name = get_channel_name(channel_id, channels_data)
lines = [
f"Channel: {channel_name} ({channel_id})",
@@ -60,4 +59,4 @@ def build_overview_text(channel_id: str, stats: Dict, channels_data: Dict = CHAN
f"Longest break: {format_duration(max_break_duration)}",
f" ({human_ts(max_break_row[1])}{human_ts(max_break_row[2])})",
]
return "\n".join(lines)
return "\n".join(lines)

View File

@@ -1,16 +1,19 @@
"""Utility functions for the visualizer."""
from datetime import datetime
import sys
from pathlib import Path
from typing import Dict
import sys
from utils.scrap import fetch_service_plan
# Allow running as a script from anywhere
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from utils.scrap import fetch_service_plan
# Load CHANNELS_DATA once when this module is imported
CHANNELS_DATA: Dict = fetch_service_plan()
def format_duration(seconds: int) -> str:
"""Format a duration in seconds into a human-readable string (e.g., '1h 2m 3s')."""
minutes, secs = divmod(seconds, 60)
@@ -24,4 +27,16 @@ def format_duration(seconds: int) -> str:
def human_ts(ts_value: int) -> str:
"""Convert a Unix timestamp to a human-readable date and time string."""
return datetime.fromtimestamp(ts_value).strftime("%d/%m/%Y at %H:%M:%S")
return datetime.fromtimestamp(ts_value).strftime("%d/%m/%Y at %H:%M:%S")
def get_channel_name(channel_id: str, channels_data: Dict = None) -> str:
"""Get the channel name from channel_id, or return channel_id if not found."""
if channels_data is None:
channels_data = CHANNELS_DATA
channel_name = channel_id
for ch_id, channel_info in channels_data.items():
if ch_id == channel_id:
channel_name = channel_info["name"]
break
return channel_name