Facility 002119 - Facility Scrapers

Stale Data Warning: This facility has not been successfully scraped in 76 days (threshold: 3 days). Data may be outdated.

Facility Information active

Facility ID: 002119
Name: American Storage
URL: https://www.storeitquick.com/missouri/facility/storage-sikeston-mo-104-ingram

Address: 106 N Ingram Rd, Sikeston, MO 63801, USA, Sikeston, Missouri 63801
Platform: custom_facility_002119
Parser File: src/parsers/custom/facility_002119_parser.py

Last Scraped: 2026-03-27 13:39:38.461586
Created: 2026-03-23 02:35:08.816820
Updated: 2026-03-27 13:39:38.461586

Parser & Healing Diagnosis needs_fix

Parser Status: ⚠ Needs Fix
Status Reason: Parser returned 0 units

Last Healing Attempt: Not attempted

Parser Source (src/parsers/custom/facility_002119_parser.py)

"""Parser for American Storage (Store It Quick) — CubbyStorage API-backed site.

NOTE: This parser makes HTTP requests at parse time to call the Cubby Storage API.
This violates the pipeline architecture (parse should only read saved HTML).
The API call should be moved to the fetch stage so the API response is embedded
in the snapshot.
"""

from __future__ import annotations

import json
import logging
from urllib.error import URLError
from urllib.request import Request, urlopen

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult

logger = logging.getLogger(__name__)

_API_BASE = "https://api.cubbystorage.com/sf/facilities"
_API_TIMEOUT = 10  # seconds — hard cap for parse-time HTTP calls


class Facility002119Parser(BaseParser):
    """Extract storage units from American Storage via the CubbyStorage API.

    This site uses ``<cubby-facility>`` web components that render inside
    shadow DOM.  The unit data is *not* present in the page source captured
    by Selenium; it is fetched at runtime from the CubbyStorage REST API.

    The parser therefore:
    1. Extracts the API key (``data-api-key``) from the CubbyStorage
       ``<script>`` tag in the HTML.
    2. Extracts the facility slug from the *first* ``<cubby-facility>``
       element (``facility`` attribute).
    3. Calls ``GET /sf/facilities/{slug}`` with a Bearer token.
    4. Maps the JSON pricing groups to ``UnitResult`` objects.
    """

    platform = "custom_facility_002119"

    # Fallback values if the HTML does not contain the expected tags.
    _DEFAULT_API_KEY = "58cb6a7d-79e9-4334-b4b0-0a417554af5d"
    _DEFAULT_SLUG = "storage-sikeston-mo-104-ingram"

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        api_key = self._extract_api_key(soup)
        slug = self._extract_facility_slug(soup)

        # TODO: Move this HTTP call to the fetch stage so parse() only
        # reads local data.
        data = self._fetch_facility(slug, api_key)
        if data is None:
            result.warnings.append(
                f"CubbyStorage API call failed for slug={slug!r}. "
                "This parser makes HTTP requests at parse time which "
                "should be moved to the fetch stage."
            )
            return result

        for collection in data.get("collections", []):
            for group in collection.get("pricingGroups", []):
                unit = self._pricing_group_to_unit(group)
                if unit is not None:
                    result.units.append(unit)

        if not result.units:
            result.warnings.append("No units found in CubbyStorage API response")

        return result

    # ------------------------------------------------------------------
    # Helpers — HTML extraction
    # ------------------------------------------------------------------

    def _extract_api_key(self, soup: BeautifulSoup) -> str:
        tag = soup.find("script", attrs={"data-api-key": True})
        if tag:
            return tag["data-api-key"]
        return self._DEFAULT_API_KEY

    def _extract_facility_slug(self, soup: BeautifulSoup) -> str:
        tag = soup.find("cubby-facility", attrs={"facility": True})
        if tag:
            return tag["facility"]
        return self._DEFAULT_SLUG

    # ------------------------------------------------------------------
    # Helpers — API call
    # ------------------------------------------------------------------

    def _fetch_facility(self, slug: str, api_key: str) -> dict | None:
        """Call the CubbyStorage REST API and return parsed JSON, or None.

        # TODO: Move this HTTP call to the fetch stage. The Cubby API call
        # at parse time violates the pipeline architecture (Fetch -> Parse
        # should be a clean boundary where Parse only reads local data).
        """
        endpoint = f"{_API_BASE}/{slug}"
        req = Request(
            endpoint,
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json",
            },
        )
        try:
            with urlopen(req, timeout=_API_TIMEOUT) as resp:
                return json.loads(resp.read().decode())
        except (URLError, json.JSONDecodeError, OSError) as exc:
            logger.warning("CubbyStorage API request failed: %s — %s", endpoint, exc)
            return None
        except Exception:
            logger.exception("Unexpected error calling CubbyStorage API: %s", endpoint)
            return None

    # ------------------------------------------------------------------
    # Helpers — mapping
    # ------------------------------------------------------------------

    def _pricing_group_to_unit(self, group: dict) -> UnitResult | None:
        name = group.get("name", "")
        width = group.get("width")
        depth = group.get("depth")
        occupied = group.get("occupied", False)

        # Prices are in cents; convert to dollars.
        raw_price = group.get("strikethroughPrice") or group.get("price")
        sale_raw = group.get("price") if group.get("strikethroughPrice") else None

        if not name and width is None:
            return None

        size_text = name or f"{int(width)} x {int(depth)}"

        unit = UnitResult()
        unit.size = size_text

        # Use normalize_size for consistent width/length/sqft.
        w, ln, sq = self.normalize_size(size_text)
        if w is None and width is not None and depth is not None:
            w, ln, sq = float(width), float(depth), float(width) * float(depth)
        if w is not None:
            unit.metadata = {"width": w, "length": ln, "sqft": sq}

        if raw_price is not None:
            unit.price = raw_price / 100.0
        if sale_raw is not None:
            unit.sale_price = sale_raw / 100.0

        # Build promotion text from discount programs.
        discounts = group.get("discountPrograms") or []
        if discounts:
            promo_parts = [d.get("name", "") for d in discounts if d.get("name")]
            if promo_parts:
                unit.promotion = "; ".join(promo_parts)

        # Features as description.
        features = group.get("features") or []
        if features:
            unit.description = ", ".join(features)

        # Scarcity / availability.
        if occupied:
            unit.scarcity = "Occupied"

        return unit

Stage	Duration
Fetch	4417ms
Detect	66ms
Parse	418ms
Export	16ms

Facility: 002119

Scrape Runs (3)

Run #1522 Details

All Failures for this Facility (3)

HTML Snapshot — Run #1522