From c0f862636a113fb3d57b45d4caed1318f5003ee4 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Sat, 11 Apr 2026 10:12:23 -0500 Subject: [PATCH] fix(render): schedule image_url backfill on cache hits The card render endpoint's file-on-disk cache short-circuit returned FileResponse immediately when the cached PNG existed, bypassing the backfill BackgroundTask that populates battingcard.image_url / pitchingcard.image_url. If a variant row was later recreated with image_url=NULL while the PNG was still cached, the DB column stayed NULL forever, breaking every downstream consumer that reads image_url for display. Extract the backfill-scheduling logic into a _schedule_variant_image_backfill helper and call it on both the cache-hit and full-render paths. The helper's internal guards (variant > 0, tier is None, row exists with NULL image_url) keep the happy-path cache hit a no-op. Co-Authored-By: Claude Sonnet 4.6 --- app/routers_v2/players.py | 99 +++++-- tests/test_players_card_render.py | 417 ++++++++++++++++++++++++++++++ 2 files changed, 489 insertions(+), 27 deletions(-) create mode 100644 tests/test_players_card_render.py diff --git a/app/routers_v2/players.py b/app/routers_v2/players.py index faa9eaa..8dabd83 100644 --- a/app/routers_v2/players.py +++ b/app/routers_v2/players.py @@ -874,6 +874,55 @@ async def get_animated_card( return FileResponse(path=cache_path, media_type="image/apng", headers=headers) +def _schedule_variant_image_backfill( + background_tasks: BackgroundTasks, + player_id: int, + variant: int, + card_type: str, + tier: Optional[int], + cardset_id: int, + png_path: str, +) -> bool: + """Schedule a backfill task to upload the rendered PNG to S3 and + populate battingcard.image_url / pitchingcard.image_url, but only + if the variant row exists with a NULL image_url. + + No-op when: + - variant == 0 (base card, not a variant) + - tier is not None (dev preview override, not a real variant row) + - the variant row doesn't exist (DoesNotExist) + - the variant row already has image_url populated + + Returns True if a backfill task was scheduled, False otherwise. + """ + if variant == 0 or tier is not None: + return False + + CardModel = BattingCard if card_type == "batting" else PitchingCard + try: + card_row = CardModel.get( + (CardModel.player_id == player_id) & (CardModel.variant == variant) + ) + if card_row.image_url is None: + background_tasks.add_task( + backfill_variant_image_url, + player_id=player_id, + variant=variant, + card_type=card_type, + cardset_id=cardset_id, + png_path=png_path, + ) + logging.info( + f"Scheduled image_url backfill for player_id={player_id} " + f"variant={variant} card_type={card_type}" + ) + return True + except CardModel.DoesNotExist: + pass + + return False + + @router.get("/{player_id}/{card_type}card") @router.get("/{player_id}/{card_type}card/{d}") @router.get("/{player_id}/{card_type}card/{d}/{variant}") @@ -900,18 +949,22 @@ async def get_batter_card( _filename = ( f"{this_player.description} {this_player.p_name} {card_type} {d}-v{variant}" ) - if ( - os.path.isfile( - f"storage/cards/cardset-{this_player.cardset.id}/{card_type}/{player_id}-{d}-v{variant}.png" + cached_path = f"storage/cards/cardset-{this_player.cardset.id}/{card_type}/{player_id}-{d}-v{variant}.png" + if os.path.isfile(cached_path) and html is False and tier is None: + backfill_scheduled = _schedule_variant_image_backfill( + background_tasks=background_tasks, + player_id=player_id, + variant=variant, + card_type=card_type, + tier=tier, + cardset_id=this_player.cardset.id, + png_path=cached_path, ) - and html is False - and tier is None - ): - return FileResponse( - path=f"storage/cards/cardset-{this_player.cardset.id}/{card_type}/{player_id}-{d}-v{variant}.png", - media_type="image/png", - headers=headers, + logging.info( + f"Cache hit for player_id={player_id} variant={variant} " + f"card_type={card_type} backfill_scheduled={backfill_scheduled}" ) + return FileResponse(path=cached_path, media_type="image/png", headers=headers) all_pos = ( CardPosition.select() @@ -1057,23 +1110,15 @@ async def get_batter_card( # Schedule S3 upload for variant cards that don't have an image_url yet. # Skip when tier is overridden (?tier= dev preview) — those renders don't # correspond to real variant card rows. - if variant > 0 and tier is None: - CardModel = BattingCard if card_type == "batting" else PitchingCard - try: - card_row = CardModel.get( - (CardModel.player_id == player_id) & (CardModel.variant == variant) - ) - if card_row.image_url is None: - background_tasks.add_task( - backfill_variant_image_url, - player_id=player_id, - variant=variant, - card_type=card_type, - cardset_id=this_player.cardset.id, - png_path=file_path, - ) - except CardModel.DoesNotExist: - pass + _schedule_variant_image_backfill( + background_tasks=background_tasks, + player_id=player_id, + variant=variant, + card_type=card_type, + tier=tier, + cardset_id=this_player.cardset.id, + png_path=file_path, + ) return FileResponse(path=file_path, media_type="image/png", headers=headers) diff --git a/tests/test_players_card_render.py b/tests/test_players_card_render.py new file mode 100644 index 0000000..4263df0 --- /dev/null +++ b/tests/test_players_card_render.py @@ -0,0 +1,417 @@ +"""Tests for the static PNG card render endpoint (get_batter_card). + +What: GET /api/v2/players/{player_id}/{card_type}card/{d}/{variant} + +These tests focus on the file-on-disk cache short-circuit and the +_schedule_variant_image_backfill helper introduced to fix a bug where +variant rows with image_url=NULL stayed NULL forever once the PNG was +cached on disk. + +Why: + The cache-hit path previously returned FileResponse immediately without + scheduling the backfill BackgroundTask that populates + battingcard.image_url / pitchingcard.image_url. If a variant row was + later recreated with image_url=NULL while its PNG remained cached, the + column stayed NULL forever — breaking every downstream consumer (Discord + bot embeds, website collection views, refractor card listings). + + The fix extracts backfill-scheduling into _schedule_variant_image_backfill + and calls it on both the cache-hit path and the full-render path. + +Test isolation: + - All DB access uses an in-memory SQLite database (no PostgreSQL needed). + - os.path.isfile is monkeypatched to simulate cache presence without + touching the real filesystem. + - FileResponse is monkeypatched to prevent actual file I/O. + - backfill_variant_image_url is patched so no real S3 calls are made. + - The ?tier= param and html=False are set as needed per test. +""" + +import os +import sys + +# Inject a mock apng module so importing players (which imports +# apng_generator, which does `from apng import APNG`) succeeds in +# environments where the optional C-extension is absent. +sys.modules.setdefault( + "apng", __import__("unittest.mock", fromlist=["MagicMock"]).MagicMock() +) + +os.environ.setdefault("DATABASE_TYPE", "postgresql") +os.environ.setdefault("POSTGRES_PASSWORD", "test-dummy") +os.environ.setdefault("API_TOKEN", "test-token") + +from unittest.mock import MagicMock, patch # noqa: E402 + +import pytest # noqa: E402 +from fastapi import FastAPI, Request # noqa: E402 +from fastapi.testclient import TestClient # noqa: E402 +from peewee import SqliteDatabase # noqa: E402 + +from app.db_engine import ( # noqa: E402 + BattingCard, + BattingCardRatings, + BattingSeasonStats, + Card, + CardPosition, + Cardset, + Decision, + Event, + MlbPlayer, + Pack, + PackType, + PitchingCard, + PitchingCardRatings, + PitchingSeasonStats, + Player, + ProcessedGame, + Rarity, + RefractorBoostAudit, + RefractorCardState, + RefractorTrack, + Roster, + RosterSlot, + ScoutClaim, + ScoutOpportunity, + StratGame, + StratPlay, + Team, +) + +# --------------------------------------------------------------------------- +# SQLite in-memory database +# --------------------------------------------------------------------------- + +_render_db = SqliteDatabase( + "file:rendercardendpointtest?mode=memory&cache=shared", + uri=True, + pragmas={"foreign_keys": 1}, +) + +# Full model list in FK dependency order — parents before children. +_RENDER_MODELS = [ + Rarity, + Event, + Cardset, + MlbPlayer, + Player, + BattingCard, + BattingCardRatings, + PitchingCard, + PitchingCardRatings, + CardPosition, + Team, + PackType, + Pack, + Card, + Roster, + RosterSlot, + StratGame, + StratPlay, + Decision, + ScoutOpportunity, + ScoutClaim, + BattingSeasonStats, + PitchingSeasonStats, + ProcessedGame, + RefractorTrack, + RefractorCardState, + RefractorBoostAudit, +] + + +@pytest.fixture(autouse=False) +def setup_render_db(): + """Bind render-test models to shared-memory SQLite and create tables. + + What: Creates all tables needed by the card render endpoint before each + test and drops them afterwards. + + Why: SQLite shared-memory databases persist between tests in the same + process. Creating and dropping around each test guarantees a clean state + without needing a real PostgreSQL instance or a separate process. + """ + _render_db.bind(_RENDER_MODELS) + _render_db.connect(reuse_if_open=True) + _render_db.create_tables(_RENDER_MODELS) + yield _render_db + _render_db.drop_tables(list(reversed(_RENDER_MODELS)), safe=True) + + +def _build_render_app() -> FastAPI: + """Minimal FastAPI app with the players router for render tests.""" + from app.routers_v2.players import router as players_router + + app = FastAPI() + + @app.middleware("http") + async def db_middleware(request: Request, call_next): + _render_db.connect(reuse_if_open=True) + return await call_next(request) + + app.include_router(players_router) + return app + + +@pytest.fixture +def render_client(setup_render_db): + """FastAPI TestClient backed by in-memory SQLite for card render tests.""" + with TestClient(_build_render_app()) as c: + yield c + + +# --------------------------------------------------------------------------- +# Seed helpers +# --------------------------------------------------------------------------- + + +def _make_rarity() -> Rarity: + r, _ = Rarity.get_or_create( + value=20, name="RC_Common", defaults={"color": "#aabbcc"} + ) + return r + + +def _make_cardset() -> Cardset: + cs, _ = Cardset.get_or_create( + name="RC Test Set", + defaults={"description": "render card test cardset", "total_cards": 1}, + ) + return cs + + +def _make_player() -> Player: + return Player.create( + p_name="RC Player", + rarity=_make_rarity(), + cardset=_make_cardset(), + set_num=1, + pos_1="CF", + image="https://example.com/rc.png", + mlbclub="TST", + franchise="TST", + description="RC test player", + ) + + +def _make_batting_card(player: Player, variant: int = 1, image_url=None) -> BattingCard: + return BattingCard.create( + player=player, + variant=variant, + steal_low=1, + steal_high=3, + steal_auto=False, + steal_jump=1.0, + bunting="N", + hit_and_run="N", + running=5, + offense_col=1, + hand="R", + image_url=image_url, + ) + + +def _mock_file_response() -> MagicMock: + """Return a MagicMock that behaves like a 200 FileResponse from TestClient.""" + mock = MagicMock() + mock.status_code = 200 + mock.headers = {"content-type": "image/png"} + return mock + + +# --------------------------------------------------------------------------- +# Tests: cache-hit path — _schedule_variant_image_backfill integration +# --------------------------------------------------------------------------- + + +def test_cache_hit_with_null_image_url_schedules_backfill( + render_client, setup_render_db +): + """Cache hit for a variant with image_url=NULL schedules a backfill task. + + What: Seeds a player and a BattingCard with variant=1 and image_url=None. + Mocks os.path.isfile to return True (simulating a cached PNG on disk). + Patches backfill_variant_image_url to capture calls. Asserts the + backfill function was scheduled via background_tasks.add_task. + + Why: This is the exact bug scenario: variant row recreated with + image_url=NULL while the cached PNG still exists on disk. The cache-hit + path previously returned FileResponse immediately without scheduling the + backfill, leaving image_url NULL forever. The fix must call + _schedule_variant_image_backfill before returning. + """ + player = _make_player() + _make_batting_card(player, variant=1, image_url=None) + + with ( + patch("os.path.isfile", return_value=True), + patch( + "app.routers_v2.players.FileResponse", + return_value=_mock_file_response(), + ), + patch("app.routers_v2.players.backfill_variant_image_url") as mock_backfill, + ): + resp = render_client.get( + f"/api/v2/players/{player.player_id}/battingcard/2026-04-11/1" + ) + + assert resp.status_code == 200 + mock_backfill.assert_called_once() + call_kwargs = mock_backfill.call_args.kwargs + assert call_kwargs["player_id"] == player.player_id + assert call_kwargs["variant"] == 1 + assert call_kwargs["card_type"] == "batting" + + +def test_cache_hit_with_populated_image_url_does_not_schedule_backfill( + render_client, setup_render_db +): + """Cache hit for a variant with image_url already set skips the backfill. + + What: Seeds a player and a BattingCard with variant=1 and a real image_url + (simulating the happy-path row that already has S3 URL populated). + Mocks os.path.isfile to return True. Asserts backfill_variant_image_url + is NOT called. + + Why: The helper's internal guard checks card_row.image_url is None before + scheduling. A cache hit for a row that already has image_url must be a + true no-op — we must not enqueue a redundant S3 upload on every hit. + """ + player = _make_player() + _make_batting_card( + player, + variant=1, + image_url="https://paper-dynasty.s3.amazonaws.com/cards/test.png", + ) + + with ( + patch("os.path.isfile", return_value=True), + patch( + "app.routers_v2.players.FileResponse", + return_value=_mock_file_response(), + ), + patch("app.routers_v2.players.backfill_variant_image_url") as mock_backfill, + ): + resp = render_client.get( + f"/api/v2/players/{player.player_id}/battingcard/2026-04-11/1" + ) + + assert resp.status_code == 200 + mock_backfill.assert_not_called() + + +def test_cache_hit_with_missing_card_row_does_not_crash(render_client, setup_render_db): + """Cache hit when the BattingCard row doesn't exist is a no-op, no crash. + + What: Seeds a player but no BattingCard row for variant=1. Mocks + os.path.isfile to return True. Asserts the endpoint returns 200 and + backfill_variant_image_url is NOT called. + + Why: The _schedule_variant_image_backfill helper catches DoesNotExist + and passes. This test ensures the endpoint does not crash when the PNG + exists on disk but the DB row has been deleted — an edge case that can + occur during data corrections. + """ + player = _make_player() + # Deliberately do NOT create a BattingCard row. + + with ( + patch("os.path.isfile", return_value=True), + patch( + "app.routers_v2.players.FileResponse", + return_value=_mock_file_response(), + ), + patch("app.routers_v2.players.backfill_variant_image_url") as mock_backfill, + ): + resp = render_client.get( + f"/api/v2/players/{player.player_id}/battingcard/2026-04-11/1" + ) + + assert resp.status_code == 200 + mock_backfill.assert_not_called() + + +def test_cache_hit_with_variant_zero_does_not_schedule_backfill( + render_client, setup_render_db +): + """Cache hit for variant=0 (base card) does not schedule a backfill. + + What: Seeds a player and a BattingCard with variant=0 (base card). Mocks + os.path.isfile to return True. Asserts backfill_variant_image_url is NOT + called. + + Why: variant=0 is the base card convention. Base cards are not stored as + variant rows and do not need image_url backfill. The helper guards against + this with `if variant == 0: return False`. + """ + player = _make_player() + _make_batting_card(player, variant=0, image_url=None) + + with ( + patch("os.path.isfile", return_value=True), + patch( + "app.routers_v2.players.FileResponse", + return_value=_mock_file_response(), + ), + patch("app.routers_v2.players.backfill_variant_image_url") as mock_backfill, + ): + resp = render_client.get( + f"/api/v2/players/{player.player_id}/battingcard/2026-04-11/0" + ) + + assert resp.status_code == 200 + mock_backfill.assert_not_called() + + +def test_cache_hit_with_tier_override_does_not_schedule_backfill( + render_client, setup_render_db +): + """Cache hit with ?tier= query param bypasses cache and does not schedule backfill. + + What: Seeds a player and a BattingCard with variant=1 and image_url=None. + Passes ?tier=2 to the endpoint. The ?tier= override intentionally bypasses + the file-on-disk cache (html is False, but tier is not None so the short-circuit + does not fire). Asserts backfill_variant_image_url is NOT called from the + cache-hit path. + + Why: ?tier= is a dev preview mode that overrides the refractor tier for + visual testing. These renders do NOT correspond to real variant card rows + and must not trigger a backfill. The helper guards against this with + `if tier is not None: return False`. Additionally, the cache-hit condition + itself requires `tier is None`, so the entire short-circuit is bypassed — + this test confirms the no-op behaviour even if the guard were ever relaxed. + """ + player = _make_player() + _make_batting_card(player, variant=1, image_url=None) + + with ( + patch("os.path.isfile", return_value=True), + patch("app.routers_v2.players.backfill_variant_image_url") as mock_backfill, + ): + # When ?tier= is set, the cache-hit branch is skipped entirely + # (tier is not None), so the endpoint proceeds to the full render path. + # Mock the browser and screenshot to avoid Playwright launch. + with ( + patch("app.routers_v2.players.get_browser") as mock_browser, + patch( + "app.routers_v2.players.FileResponse", + return_value=_mock_file_response(), + ), + ): + mock_page = MagicMock() + mock_page.set_content = MagicMock(return_value=None) + mock_page.screenshot = MagicMock(return_value=None) + mock_page.close = MagicMock(return_value=None) + mock_browser_instance = MagicMock() + mock_browser_instance.new_page = MagicMock(return_value=mock_page) + mock_browser.return_value = mock_browser_instance + + # Use ?tier=2 — this bypasses the cache-hit short-circuit + render_client.get( + f"/api/v2/players/{player.player_id}/battingcard/2026-04-11/1?tier=2" + ) + + # With tier override, the cache-hit path is skipped entirely. + # The full-render path calls _schedule_variant_image_backfill, but the + # helper's `tier is not None` guard means backfill is still not scheduled. + mock_backfill.assert_not_called()