mirror of
https://github.com/NohamR/OqeeAdWatch.git
synced 2026-01-10 00:08:17 +00:00
Lint + update
This commit is contained in:
13
README.md
13
README.md
@@ -36,20 +36,21 @@ The primary key `(channel_id, start_ts, end_ts)` prevents duplicates when the AP
|
|||||||
|
|
||||||
### Visualizing collected ads
|
### Visualizing collected ads
|
||||||
|
|
||||||
The helper `visualizer.py` script analyzes and visualizes ad data from the database:
|
The `visualizer/main.py` script analyzes and visualizes ad data from the database:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Process all channels (default)
|
# Process all channels (default)
|
||||||
uv run python utils/visualizer.py
|
uv run ./visualizer/main.py
|
||||||
|
|
||||||
# Process a specific channel
|
# Process a specific channel
|
||||||
uv run python utils/visualizer.py <channel-id>
|
uv run ./visualizer/main.py <channel-id>
|
||||||
|
|
||||||
# Filter by date range
|
# Filter by date range
|
||||||
uv run python utils/visualizer.py --start-date 2025-11-28 --end-date 2025-12-21
|
uv run ./visualizer/main.py --start-date 2025-11-28 --end-date 2025-12-21
|
||||||
|
```
|
||||||
|
|
||||||
# Single channel with date filter
|
# Single channel with date filter
|
||||||
uv run python utils/visualizer.py <channel-id> --start-date 2025-11-28
|
uv run ./visualizer/main.py <channel-id> --start-date 2025-11-28
|
||||||
```
|
```
|
||||||
|
|
||||||
**Single channel mode** displays:
|
**Single channel mode** displays:
|
||||||
@@ -57,7 +58,7 @@ uv run python utils/visualizer.py <channel-id> --start-date 2025-11-28
|
|||||||
- A 24h profile (bars = average ad minutes per day, line = average break count)
|
- A 24h profile (bars = average ad minutes per day, line = average break count)
|
||||||
- A minute-vs-hour heatmap showing ad coverage
|
- A minute-vs-hour heatmap showing ad coverage
|
||||||
|
|
||||||
**All channels mode** generates additional visualizations saved to `visualizer/`:
|
**All channels mode** generates additional visualizations saved to `visualizer_output/`:
|
||||||
- Combined hourly profile and heatmap for each channel
|
- Combined hourly profile and heatmap for each channel
|
||||||
- Weekday analysis per channel (ad breaks by day of week, weekday×hour heatmap)
|
- Weekday analysis per channel (ad breaks by day of week, weekday×hour heatmap)
|
||||||
- Weekly ad patterns overview across all channels
|
- Weekly ad patterns overview across all channels
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
|
"""Data loading utilities for the ad visualizer."""
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from typing import Sequence, List, Optional
|
from typing import Sequence, List, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from utils.scrap import DB_PATH, get_connection
|
||||||
|
|
||||||
# Allow running as a script from anywhere
|
# Allow running as a script from anywhere
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
|
||||||
|
|
||||||
from utils.scrap import DB_PATH, get_connection
|
|
||||||
|
|
||||||
Row = Sequence
|
Row = Sequence
|
||||||
|
|
||||||
|
|
||||||
def load_ads_data(
|
def load_ads_data(
|
||||||
channel_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None
|
channel_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None
|
||||||
) -> List[Row]:
|
) -> List[Row]:
|
||||||
@@ -50,4 +53,4 @@ def list_channels() -> List[str]:
|
|||||||
)
|
)
|
||||||
return [row[0] for row in cursor.fetchall()]
|
return [row[0] for row in cursor.fetchall()]
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from visualizer.plotter import (
|
|||||||
from visualizer.text_output import print_stats, build_overview_text
|
from visualizer.text_output import print_stats, build_overview_text
|
||||||
from visualizer.utils import CHANNELS_DATA
|
from visualizer.utils import CHANNELS_DATA
|
||||||
|
|
||||||
|
|
||||||
def process_all_channels(start_date, end_date) -> None:
|
def process_all_channels(start_date, end_date) -> None:
|
||||||
"""Process all channels in the database and generate visualizations."""
|
"""Process all channels in the database and generate visualizations."""
|
||||||
output_dir = Path("visualizer_output")
|
output_dir = Path("visualizer_output")
|
||||||
@@ -35,8 +36,8 @@ def process_all_channels(start_date, end_date) -> None:
|
|||||||
file.unlink()
|
file.unlink()
|
||||||
channel_ids = list_channels()
|
channel_ids = list_channels()
|
||||||
|
|
||||||
all_channels_plot_data = [] # Data for combined weekday plots
|
all_channels_plot_data = [] # Data for combined weekday plots
|
||||||
all_channels_ranking_data = [] # Data for channel rankings
|
all_channels_ranking_data = [] # Data for channel rankings
|
||||||
|
|
||||||
for channel_id in channel_ids:
|
for channel_id in channel_ids:
|
||||||
print(f"Processing channel {channel_id}...")
|
print(f"Processing channel {channel_id}...")
|
||||||
@@ -46,14 +47,30 @@ def process_all_channels(start_date, end_date) -> None:
|
|||||||
|
|
||||||
hourly_profile = compute_hourly_profile(rows)
|
hourly_profile = compute_hourly_profile(rows)
|
||||||
heatmap = compute_heatmap(rows)
|
heatmap = compute_heatmap(rows)
|
||||||
plot_combined(channel_id, hourly_profile, heatmap, stats=stats, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
|
plot_combined(
|
||||||
|
channel_id,
|
||||||
|
hourly_profile,
|
||||||
|
heatmap,
|
||||||
|
stats=stats,
|
||||||
|
save=True,
|
||||||
|
output_dir=output_dir,
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
build_overview_text_func=build_overview_text,
|
||||||
|
)
|
||||||
|
|
||||||
weekday_profile = compute_weekday_profile(rows)
|
weekday_profile = compute_weekday_profile(rows)
|
||||||
weekday_heatmap = compute_weekday_hour_heatmap(rows)
|
weekday_heatmap = compute_weekday_hour_heatmap(rows)
|
||||||
weekday_hour_counts = compute_weekday_hour_counts(rows)
|
weekday_hour_counts = compute_weekday_hour_counts(rows)
|
||||||
|
|
||||||
plot_weekday_channel(
|
plot_weekday_channel(
|
||||||
channel_id, weekday_profile, weekday_hour_counts, stats=stats, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text
|
channel_id,
|
||||||
|
weekday_profile,
|
||||||
|
weekday_hour_counts,
|
||||||
|
stats=stats,
|
||||||
|
save=True,
|
||||||
|
output_dir=output_dir,
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
build_overview_text_func=build_overview_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
all_channels_plot_data.append(
|
all_channels_plot_data.append(
|
||||||
@@ -71,8 +88,18 @@ def process_all_channels(start_date, end_date) -> None:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
plot_weekday_overview(all_channels_plot_data, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA)
|
plot_weekday_overview(
|
||||||
plot_channel_rankings(all_channels_ranking_data, save=True, output_dir=output_dir, channels_data=CHANNELS_DATA)
|
all_channels_plot_data,
|
||||||
|
save=True,
|
||||||
|
output_dir=output_dir,
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
)
|
||||||
|
plot_channel_rankings(
|
||||||
|
all_channels_ranking_data,
|
||||||
|
save=True,
|
||||||
|
output_dir=output_dir,
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
@@ -110,10 +137,24 @@ def main() -> None:
|
|||||||
|
|
||||||
if not args.no_plot:
|
if not args.no_plot:
|
||||||
hourly_profile = compute_hourly_profile(rows)
|
hourly_profile = compute_hourly_profile(rows)
|
||||||
plot_hourly_profile(args.channel_id, hourly_profile, stats=stats, output_dir=Path("visualizer_output"), channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
|
plot_hourly_profile(
|
||||||
|
args.channel_id,
|
||||||
|
hourly_profile,
|
||||||
|
stats=stats,
|
||||||
|
output_dir=Path("visualizer_output"),
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
build_overview_text_func=build_overview_text,
|
||||||
|
)
|
||||||
heatmap = compute_heatmap(rows)
|
heatmap = compute_heatmap(rows)
|
||||||
plot_heatmap(args.channel_id, heatmap, stats=stats, output_dir=Path("visualizer_output"), channels_data=CHANNELS_DATA, build_overview_text_func=build_overview_text)
|
plot_heatmap(
|
||||||
|
args.channel_id,
|
||||||
|
heatmap,
|
||||||
|
stats=stats,
|
||||||
|
output_dir=Path("visualizer_output"),
|
||||||
|
channels_data=CHANNELS_DATA,
|
||||||
|
build_overview_text_func=build_overview_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
import matplotlib.pyplot as plt
|
"""Plotting utilities for the ad visualizer."""
|
||||||
from matplotlib import font_manager as font_manager
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Callable
|
from typing import Dict, List, Callable, Optional
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib import font_manager
|
||||||
|
|
||||||
|
from .utils import format_duration, get_channel_name
|
||||||
|
|
||||||
FPATH = "libs/LibertinusSerif-Regular.otf"
|
FPATH = "libs/LibertinusSerif-Regular.otf"
|
||||||
prop = font_manager.FontProperties(fname=FPATH, size=14)
|
prop = font_manager.FontProperties(fname=FPATH, size=14)
|
||||||
@@ -13,13 +17,9 @@ try:
|
|||||||
if font_name:
|
if font_name:
|
||||||
plt.rcParams["font.family"] = font_name
|
plt.rcParams["font.family"] = font_name
|
||||||
plt.rcParams["font.size"] = prop.get_size()
|
plt.rcParams["font.size"] = prop.get_size()
|
||||||
except (
|
except (OSError, ValueError):
|
||||||
Exception
|
|
||||||
): # pylint: disable=broad-exception-caught # pragma: no cover - optional font may be missing
|
|
||||||
font_name = None
|
font_name = None
|
||||||
|
|
||||||
# Renamed _format_duration and _human_ts to be accessible
|
|
||||||
from visualizer.utils import format_duration, human_ts, CHANNELS_DATA
|
|
||||||
|
|
||||||
def plot_hourly_profile(
|
def plot_hourly_profile(
|
||||||
channel_id: str,
|
channel_id: str,
|
||||||
@@ -27,10 +27,12 @@ def plot_hourly_profile(
|
|||||||
stats: Dict | None = None,
|
stats: Dict | None = None,
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {},
|
channels_data: Optional[Dict] = None,
|
||||||
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
|
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Plot the average ad activity per hour of day."""
|
"""Plot the average ad activity per hour of day."""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not profile or not profile.get("days"):
|
if not profile or not profile.get("days"):
|
||||||
print("No data available or not enough distinct days for the hourly plot.")
|
print("No data available or not enough distinct days for the hourly plot.")
|
||||||
return
|
return
|
||||||
@@ -55,10 +57,7 @@ def plot_hourly_profile(
|
|||||||
ax_right.plot(hours, avg_counts, color="tab:orange", marker="o")
|
ax_right.plot(hours, avg_counts, color="tab:orange", marker="o")
|
||||||
ax_right.set_ylabel("Avg number of breaks", color="tab:orange", fontproperties=prop)
|
ax_right.set_ylabel("Avg number of breaks", color="tab:orange", fontproperties=prop)
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
|
|
||||||
for t in ax_left.get_yticklabels():
|
for t in ax_left.get_yticklabels():
|
||||||
t.set_fontproperties(prop)
|
t.set_fontproperties(prop)
|
||||||
@@ -74,7 +73,9 @@ def plot_hourly_profile(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if stats:
|
if stats:
|
||||||
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
|
overview_text = build_overview_text_func(
|
||||||
|
channel_id, stats, channels_data=channels_data
|
||||||
|
)
|
||||||
fig.text(
|
fig.text(
|
||||||
0.73,
|
0.73,
|
||||||
0.5,
|
0.5,
|
||||||
@@ -104,10 +105,12 @@ def plot_heatmap(
|
|||||||
stats: Dict | None = None,
|
stats: Dict | None = None,
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {},
|
channels_data: Optional[Dict] = None,
|
||||||
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
|
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Plot a heatmap of ad minute coverage by minute of hour and hour of day."""
|
"""Plot a heatmap of ad minute coverage by minute of hour and hour of day."""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not heatmap_data or not heatmap_data.get("days"):
|
if not heatmap_data or not heatmap_data.get("days"):
|
||||||
print("No data available or not enough distinct days for the heatmap plot.")
|
print("No data available or not enough distinct days for the heatmap plot.")
|
||||||
return
|
return
|
||||||
@@ -137,10 +140,7 @@ def plot_heatmap(
|
|||||||
cbar = fig.colorbar(im, ax=ax)
|
cbar = fig.colorbar(im, ax=ax)
|
||||||
cbar.set_label("Share of minute spent in ads per day", fontproperties=prop)
|
cbar.set_label("Share of minute spent in ads per day", fontproperties=prop)
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
|
|
||||||
fig.suptitle(
|
fig.suptitle(
|
||||||
(
|
(
|
||||||
@@ -151,7 +151,9 @@ def plot_heatmap(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if stats:
|
if stats:
|
||||||
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
|
overview_text = build_overview_text_func(
|
||||||
|
channel_id, stats, channels_data=channels_data
|
||||||
|
)
|
||||||
fig.text(
|
fig.text(
|
||||||
0.73,
|
0.73,
|
||||||
0.5,
|
0.5,
|
||||||
@@ -182,10 +184,12 @@ def plot_combined(
|
|||||||
stats: Dict | None = None,
|
stats: Dict | None = None,
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {},
|
channels_data: Optional[Dict] = None,
|
||||||
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
|
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Plot both hourly profile and heatmap in a single figure with the overview text box."""
|
"""Plot both hourly profile and heatmap in a single figure with the overview text box."""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not profile or not profile.get("days"):
|
if not profile or not profile.get("days"):
|
||||||
print("No data available for the hourly plot.")
|
print("No data available for the hourly plot.")
|
||||||
return
|
return
|
||||||
@@ -193,11 +197,7 @@ def plot_combined(
|
|||||||
print("No data available for the heatmap plot.")
|
print("No data available for the heatmap plot.")
|
||||||
return
|
return
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
break
|
|
||||||
|
|
||||||
fig, (ax_hourly, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 10))
|
fig, (ax_hourly, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 10))
|
||||||
|
|
||||||
@@ -262,7 +262,9 @@ def plot_combined(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if stats:
|
if stats:
|
||||||
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
|
overview_text = build_overview_text_func(
|
||||||
|
channel_id, stats, channels_data=channels_data
|
||||||
|
)
|
||||||
fig.text(
|
fig.text(
|
||||||
0.73,
|
0.73,
|
||||||
0.5,
|
0.5,
|
||||||
@@ -290,7 +292,7 @@ def plot_weekday_overview(
|
|||||||
all_channels_data: List[Dict],
|
all_channels_data: List[Dict],
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {}
|
channels_data: Optional[Dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Plot a weekday overview for all channels.
|
Plot a weekday overview for all channels.
|
||||||
@@ -298,6 +300,8 @@ def plot_weekday_overview(
|
|||||||
- A bar showing number of ads per weekday
|
- A bar showing number of ads per weekday
|
||||||
- A horizontal heatmap strip showing ad coverage by weekday x hour
|
- A horizontal heatmap strip showing ad coverage by weekday x hour
|
||||||
"""
|
"""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not all_channels_data:
|
if not all_channels_data:
|
||||||
print("No data available for weekday overview.")
|
print("No data available for weekday overview.")
|
||||||
return
|
return
|
||||||
@@ -315,11 +319,7 @@ def plot_weekday_overview(
|
|||||||
|
|
||||||
for data in all_channels_data:
|
for data in all_channels_data:
|
||||||
channel_id = data["channel_id"]
|
channel_id = data["channel_id"]
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
break
|
|
||||||
channel_names.append(f"{channel_name}")
|
channel_names.append(f"{channel_name}")
|
||||||
|
|
||||||
weekday_profile = data.get("weekday_profile", {})
|
weekday_profile = data.get("weekday_profile", {})
|
||||||
@@ -335,15 +335,13 @@ def plot_weekday_overview(
|
|||||||
normalized_row = []
|
normalized_row = []
|
||||||
for weekday in range(7):
|
for weekday in range(7):
|
||||||
for hour in range(24):
|
for hour in range(24):
|
||||||
val = (
|
val = grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600
|
||||||
grid[weekday][hour] / max(hm_days_seen[weekday], 1) / 3600
|
|
||||||
)
|
|
||||||
normalized_row.append(min(val, 1.0))
|
normalized_row.append(min(val, 1.0))
|
||||||
heatmap_plot_data.append(normalized_row)
|
heatmap_plot_data.append(normalized_row)
|
||||||
|
|
||||||
x = range(num_channels)
|
x = range(num_channels)
|
||||||
bar_width = 0.12
|
bar_width = 0.12
|
||||||
colors = plt.cm.tab10(range(7))
|
colors = plt.get_cmap("tab10").colors[:7]
|
||||||
|
|
||||||
for i, weekday in enumerate(weekday_names):
|
for i, weekday in enumerate(weekday_names):
|
||||||
offsets = [xi + (i - 3) * bar_width for xi in x]
|
offsets = [xi + (i - 3) * bar_width for xi in x]
|
||||||
@@ -401,27 +399,24 @@ def plot_weekday_channel(
|
|||||||
stats: Dict | None = None,
|
stats: Dict | None = None,
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {},
|
channels_data: Optional[Dict] = None,
|
||||||
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: ""
|
build_overview_text_func: Callable[[str, Dict], str] = lambda x, y: "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Plot a weekday overview for a single channel.
|
Plot a weekday overview for a single channel.
|
||||||
Shows:
|
|
||||||
- Bar chart of ad breaks per weekday
|
- Bar chart of ad breaks per weekday
|
||||||
- Heatmap of ad break counts by weekday x hour (7 rows x 24 columns)
|
- Heatmap of ad break counts by weekday x hour (7 rows x 24 columns)
|
||||||
- Stats text box on the right
|
- Stats text box on the right
|
||||||
"""
|
"""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not weekday_profile or not weekday_hour_counts:
|
if not weekday_profile or not weekday_hour_counts:
|
||||||
print(f"No weekday data available for channel {channel_id}.")
|
print(f"No weekday data available for channel {channel_id}.")
|
||||||
return
|
return
|
||||||
|
|
||||||
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
break
|
|
||||||
|
|
||||||
fig, (ax_bars, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 8))
|
fig, (ax_bars, ax_heatmap) = plt.subplots(2, 1, figsize=(14, 8))
|
||||||
|
|
||||||
@@ -499,7 +494,9 @@ def plot_weekday_channel(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if stats:
|
if stats:
|
||||||
overview_text = build_overview_text_func(channel_id, stats, channels_data=channels_data)
|
overview_text = build_overview_text_func(
|
||||||
|
channel_id, stats, channels_data=channels_data
|
||||||
|
)
|
||||||
fig.text(
|
fig.text(
|
||||||
0.73,
|
0.73,
|
||||||
0.5,
|
0.5,
|
||||||
@@ -527,7 +524,7 @@ def plot_channel_rankings(
|
|||||||
all_stats: List[Dict],
|
all_stats: List[Dict],
|
||||||
save: bool = False,
|
save: bool = False,
|
||||||
output_dir: Path = Path("."),
|
output_dir: Path = Path("."),
|
||||||
channels_data: Dict = {}
|
channels_data: Optional[Dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Plot rankings of all channels based on:
|
Plot rankings of all channels based on:
|
||||||
@@ -535,6 +532,8 @@ def plot_channel_rankings(
|
|||||||
- Total ad duration
|
- Total ad duration
|
||||||
- Longest single ad break
|
- Longest single ad break
|
||||||
"""
|
"""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = {}
|
||||||
if not all_stats:
|
if not all_stats:
|
||||||
print("No data available for channel rankings.")
|
print("No data available for channel rankings.")
|
||||||
return
|
return
|
||||||
@@ -546,11 +545,7 @@ def plot_channel_rankings(
|
|||||||
if not stats:
|
if not stats:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
break
|
|
||||||
|
|
||||||
max_break_duration = stats["max_break"][0] if stats.get("max_break") else 0
|
max_break_duration = stats["max_break"][0] if stats.get("max_break") else 0
|
||||||
|
|
||||||
@@ -568,7 +563,9 @@ def plot_channel_rankings(
|
|||||||
print("No channel data for rankings.")
|
print("No channel data for rankings.")
|
||||||
return
|
return
|
||||||
|
|
||||||
fig, axes = plt.subplots(1, 3, figsize=(18, max(8, len(channels_data_for_plot) * 0.4)))
|
fig, axes = plt.subplots(
|
||||||
|
1, 3, figsize=(18, max(8, len(channels_data_for_plot) * 0.4))
|
||||||
|
)
|
||||||
|
|
||||||
rankings = [
|
rankings = [
|
||||||
("total_ads", "Total Number of Ads", "Number of ad breaks", "tab:blue"),
|
("total_ads", "Total Number of Ads", "Number of ad breaks", "tab:blue"),
|
||||||
@@ -577,7 +574,9 @@ def plot_channel_rankings(
|
|||||||
]
|
]
|
||||||
|
|
||||||
for ax, (metric, title, xlabel, color) in zip(axes, rankings):
|
for ax, (metric, title, xlabel, color) in zip(axes, rankings):
|
||||||
sorted_data = sorted(channels_data_for_plot, key=lambda x, m=metric: x[m], reverse=True)
|
sorted_data = sorted(
|
||||||
|
channels_data_for_plot, key=lambda x, m=metric: x[m], reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
names = [d["channel_name"] for d in sorted_data]
|
names = [d["channel_name"] for d in sorted_data]
|
||||||
values = [d[metric] for d in sorted_data]
|
values = [d[metric] for d in sorted_data]
|
||||||
@@ -626,4 +625,4 @@ def plot_channel_rankings(
|
|||||||
filename = output_dir / "channel_rankings.png"
|
filename = output_dir / "channel_rankings.png"
|
||||||
fig.savefig(filename, dpi=300)
|
fig.savefig(filename, dpi=300)
|
||||||
print(f"Channel rankings saved to {filename}")
|
print(f"Channel rankings saved to {filename}")
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
"""Statistics computation utilities for the visualizer."""
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import statistics
|
import statistics
|
||||||
@@ -215,4 +217,4 @@ def compute_weekday_hour_heatmap(rows: Iterable[Row]) -> Dict:
|
|||||||
return {
|
return {
|
||||||
"grid": heatmap,
|
"grid": heatmap,
|
||||||
"days_seen": [len(s) for s in weekday_days_seen],
|
"days_seen": [len(s) for s in weekday_days_seen],
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
from datetime import datetime
|
"""Text output utilities for the visualizer."""
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from visualizer.utils import format_duration, human_ts, CHANNELS_DATA
|
from .utils import format_duration, human_ts, CHANNELS_DATA, get_channel_name
|
||||||
|
|
||||||
|
|
||||||
def print_stats(channel_id: str, stats: Dict) -> None:
|
def print_stats(channel_id: str, stats: Dict) -> None:
|
||||||
"""Print formatted ad break statistics to the console."""
|
"""Print formatted ad break statistics to the console."""
|
||||||
@@ -36,18 +38,15 @@ def print_stats(channel_id: str, stats: Dict) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_overview_text(channel_id: str, stats: Dict, channels_data: Dict = CHANNELS_DATA) -> str:
|
def build_overview_text(
|
||||||
|
channel_id: str, stats: Dict, channels_data: Dict = CHANNELS_DATA
|
||||||
|
) -> str:
|
||||||
"""Build a multi-line string with channel overview stats."""
|
"""Build a multi-line string with channel overview stats."""
|
||||||
if not stats:
|
if not stats:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
max_break_duration, max_break_row = stats["max_break"]
|
max_break_duration, max_break_row = stats["max_break"]
|
||||||
|
|
||||||
channel_name = channel_id
|
channel_name = get_channel_name(channel_id, channels_data)
|
||||||
for ch_id, channel_info in (channels_data or {}).items():
|
|
||||||
if ch_id == channel_id:
|
|
||||||
channel_name = channel_info["name"]
|
|
||||||
break
|
|
||||||
|
|
||||||
lines = [
|
lines = [
|
||||||
f"Channel: {channel_name} ({channel_id})",
|
f"Channel: {channel_name} ({channel_id})",
|
||||||
@@ -60,4 +59,4 @@ def build_overview_text(channel_id: str, stats: Dict, channels_data: Dict = CHAN
|
|||||||
f"Longest break: {format_duration(max_break_duration)}",
|
f"Longest break: {format_duration(max_break_duration)}",
|
||||||
f" ({human_ts(max_break_row[1])} → {human_ts(max_break_row[2])})",
|
f" ({human_ts(max_break_row[1])} → {human_ts(max_break_row[2])})",
|
||||||
]
|
]
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|||||||
@@ -1,16 +1,19 @@
|
|||||||
|
"""Utility functions for the visualizer."""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from utils.scrap import fetch_service_plan
|
||||||
|
|
||||||
# Allow running as a script from anywhere
|
# Allow running as a script from anywhere
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
from utils.scrap import fetch_service_plan
|
|
||||||
|
|
||||||
# Load CHANNELS_DATA once when this module is imported
|
# Load CHANNELS_DATA once when this module is imported
|
||||||
CHANNELS_DATA: Dict = fetch_service_plan()
|
CHANNELS_DATA: Dict = fetch_service_plan()
|
||||||
|
|
||||||
|
|
||||||
def format_duration(seconds: int) -> str:
|
def format_duration(seconds: int) -> str:
|
||||||
"""Format a duration in seconds into a human-readable string (e.g., '1h 2m 3s')."""
|
"""Format a duration in seconds into a human-readable string (e.g., '1h 2m 3s')."""
|
||||||
minutes, secs = divmod(seconds, 60)
|
minutes, secs = divmod(seconds, 60)
|
||||||
@@ -24,4 +27,16 @@ def format_duration(seconds: int) -> str:
|
|||||||
|
|
||||||
def human_ts(ts_value: int) -> str:
|
def human_ts(ts_value: int) -> str:
|
||||||
"""Convert a Unix timestamp to a human-readable date and time string."""
|
"""Convert a Unix timestamp to a human-readable date and time string."""
|
||||||
return datetime.fromtimestamp(ts_value).strftime("%d/%m/%Y at %H:%M:%S")
|
return datetime.fromtimestamp(ts_value).strftime("%d/%m/%Y at %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
|
def get_channel_name(channel_id: str, channels_data: Dict = None) -> str:
|
||||||
|
"""Get the channel name from channel_id, or return channel_id if not found."""
|
||||||
|
if channels_data is None:
|
||||||
|
channels_data = CHANNELS_DATA
|
||||||
|
channel_name = channel_id
|
||||||
|
for ch_id, channel_info in channels_data.items():
|
||||||
|
if ch_id == channel_id:
|
||||||
|
channel_name = channel_info["name"]
|
||||||
|
break
|
||||||
|
return channel_name
|
||||||
|
|||||||
Reference in New Issue
Block a user