Facility 002783 - Facility Scrapers

Stale Data Warning: This facility has not been successfully scraped in 76 days (threshold: 3 days). Data may be outdated.

Facility Information active

Facility ID: 002783
Name: The Attic Self Storage
URL: https://www.theatticselfstoragetx.com/

Address: 10949 Walnut Hill Ln, Dallas, TX 75238, USA, Dallas, Texas 75238
Platform: custom_facility_002783
Parser File: src/parsers/custom/facility_002783_parser.py

Last Scraped: 2026-03-27 13:39:57.039437
Created: 2026-03-23 02:35:08.816820
Updated: 2026-03-27 13:39:57.071255

Parser & Healing Diagnosis working

Parser Status: ✓ Working
Status Reason: N/A

Last Healing Attempt: Not attempted

Parser Source (src/parsers/custom/facility_002783_parser.py)

"""Parser for The Attic Self Storage (StorEdge multi-location homepage)."""

from __future__ import annotations

import json
import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult


class Facility002783Parser(BaseParser):
    """Extract facility data from The Attic Self Storage StorEdge homepage.

    This site is a StorEdge platform multi-location homepage. Individual unit
    pricing is loaded dynamically via API on per-facility pages. The homepage
    Apollo state contains facility-level data including minimumUnitPrice,
    display name, address, and phone number.

    Because this is a multi-location page, we filter to only the target
    facility (matched by address or name) instead of returning all locations.
    """

    platform = "custom_facility_002783"

    # Target facility details for matching.
    _TARGET_ADDRESS_FRAGMENT = "10949 walnut hill"
    _TARGET_NAME_FRAGMENT = "walnut hill"

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        apollo_data = self._extract_apollo_state(soup)
        if not apollo_data:
            result.warnings.append("No __APOLLO_STATE__ found in page")
            return result

        facilities = self._extract_facilities(apollo_data)
        if not facilities:
            result.warnings.append("No facility objects found in Apollo state")
            return result

        # Filter to the target facility
        matched = self._match_target_facility(facilities)

        if matched is None:
            result.warnings.append(
                f"Multi-location page with {len(facilities)} facilities but "
                f"could not identify target facility "
                f"(looking for '{self._TARGET_ADDRESS_FRAGMENT}'). "
                f"Available: {', '.join(f.get('displayName', '?') for f in facilities)}. "
                f"Returning 0 units to avoid data contamination."
            )
            return result

        if len(facilities) > 1:
            result.warnings.append(
                f"Multi-location page with {len(facilities)} facilities; "
                f"filtered to: {matched.get('displayName', 'unknown')}"
            )

        # Build unit from matched facility only
        unit = UnitResult()
        name = matched.get("displayName", "")
        address_data = matched.get("_address", {})
        full_address = address_data.get("fullAddress", "")
        phone = matched.get("phone", "")
        min_price = matched.get("minimumUnitPrice")

        unit.description = f"{name} - {full_address}".strip(" -")
        if phone:
            unit.description += f" ({phone})"

        if min_price is not None:
            unit.price = float(min_price)

        unit.size = "Facility"
        unit.metadata = {
            "facility_name": name,
            "address": full_address,
            "phone": phone,
            "storedge_facility_id": matched.get("id", ""),
            "min_price": min_price,
            "note": "Min unit price from StorEdge homepage; "
            "individual unit data requires per-facility page scrape",
        }

        if unit.price is not None:
            result.units.append(unit)

        if not result.units:
            result.warnings.append(
                f"Matched facility '{name}' but it has no pricing data"
            )

        return result

    def _match_target_facility(self, facilities: list[dict]) -> dict | None:
        """Find the facility matching our target address or name."""
        # Try address match first (most reliable)
        for fac in facilities:
            addr = fac.get("_address", {}).get("fullAddress", "").lower()
            if self._TARGET_ADDRESS_FRAGMENT in addr:
                return fac

        # Try name match
        for fac in facilities:
            name = fac.get("displayName", "").lower()
            if self._TARGET_NAME_FRAGMENT in name:
                return fac

        # Single facility -- no ambiguity
        if len(facilities) == 1:
            return facilities[0]

        return None

    def _extract_apollo_state(self, soup: BeautifulSoup) -> dict | None:
        """Parse the window.__APOLLO_STATE__ JSON from a script tag."""
        for script in soup.find_all("script"):
            text = script.get_text()
            if "__APOLLO_STATE__" not in text:
                continue

            match = re.search(r"__APOLLO_STATE__\s*=\s*", text)
            if not match:
                continue

            rest = text[match.end():]
            depth = 0
            end = 0
            for i, c in enumerate(rest):
                if c == "{":
                    depth += 1
                elif c == "}":
                    depth -= 1
                if depth == 0 and i > 0:
                    end = i + 1
                    break

            if end == 0:
                continue

            try:
                return json.loads(rest[:end])
            except json.JSONDecodeError:
                continue

        return None

    def _extract_facilities(self, apollo_data: dict) -> list[dict]:
        """Extract facility objects from Apollo state, resolving address references."""
        facilities = []
        for key, value in apollo_data.items():
            if (
                isinstance(value, dict)
                and value.get("__typename") == "Facility"
                and key.startswith("Facility:")
            ):
                fac = dict(value)
                # Resolve address reference
                addr_ref = value.get("address", {})
                if isinstance(addr_ref, dict) and addr_ref.get("id"):
                    addr_data = apollo_data.get(addr_ref["id"], {})
                    fac["_address"] = addr_data
                else:
                    fac["_address"] = {}
                facilities.append(fac)

        # Sort by display name for consistent output
        facilities.sort(key=lambda f: f.get("displayName", ""))
        return facilities

Stage	Duration
Fetch	3789ms
Detect	40ms
Parse	19ms
Export	6ms

Facility: 002783

Scrape Runs (3)

Run #1028 Details

Parsed Units (1)

Facility

HTML Snapshot — Run #1028