Facility: 002783
The Attic Self Storage
- Facility ID
- 002783
- Name
- The Attic Self Storage
- URL
- https://www.theatticselfstoragetx.com/
- Address
- 10949 Walnut Hill Ln, Dallas, TX 75238, USA, Dallas, Texas 75238
- Platform
- custom_facility_002783
- Parser File
- src/parsers/custom/facility_002783_parser.py
- Last Scraped
- 2026-03-27 13:39:57.039437
- Created
- 2026-03-23 02:35:08.816820
- Updated
- 2026-03-27 13:39:57.071255
- Parser Status
- ✓ Working
- Status Reason
- N/A
- Last Healing Attempt
- Not attempted
Parser Source (src/parsers/custom/facility_002783_parser.py)
"""Parser for The Attic Self Storage (StorEdge multi-location homepage)."""
from __future__ import annotations
import json
import re
from bs4 import BeautifulSoup
from src.parsers.base import BaseParser, ParseResult, UnitResult
class Facility002783Parser(BaseParser):
"""Extract facility data from The Attic Self Storage StorEdge homepage.
This site is a StorEdge platform multi-location homepage. Individual unit
pricing is loaded dynamically via API on per-facility pages. The homepage
Apollo state contains facility-level data including minimumUnitPrice,
display name, address, and phone number.
Because this is a multi-location page, we filter to only the target
facility (matched by address or name) instead of returning all locations.
"""
platform = "custom_facility_002783"
# Target facility details for matching.
_TARGET_ADDRESS_FRAGMENT = "10949 walnut hill"
_TARGET_NAME_FRAGMENT = "walnut hill"
def parse(self, html: str, url: str = "") -> ParseResult:
soup = BeautifulSoup(html, "lxml")
result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)
apollo_data = self._extract_apollo_state(soup)
if not apollo_data:
result.warnings.append("No __APOLLO_STATE__ found in page")
return result
facilities = self._extract_facilities(apollo_data)
if not facilities:
result.warnings.append("No facility objects found in Apollo state")
return result
# Filter to the target facility
matched = self._match_target_facility(facilities)
if matched is None:
result.warnings.append(
f"Multi-location page with {len(facilities)} facilities but "
f"could not identify target facility "
f"(looking for '{self._TARGET_ADDRESS_FRAGMENT}'). "
f"Available: {', '.join(f.get('displayName', '?') for f in facilities)}. "
f"Returning 0 units to avoid data contamination."
)
return result
if len(facilities) > 1:
result.warnings.append(
f"Multi-location page with {len(facilities)} facilities; "
f"filtered to: {matched.get('displayName', 'unknown')}"
)
# Build unit from matched facility only
unit = UnitResult()
name = matched.get("displayName", "")
address_data = matched.get("_address", {})
full_address = address_data.get("fullAddress", "")
phone = matched.get("phone", "")
min_price = matched.get("minimumUnitPrice")
unit.description = f"{name} - {full_address}".strip(" -")
if phone:
unit.description += f" ({phone})"
if min_price is not None:
unit.price = float(min_price)
unit.size = "Facility"
unit.metadata = {
"facility_name": name,
"address": full_address,
"phone": phone,
"storedge_facility_id": matched.get("id", ""),
"min_price": min_price,
"note": "Min unit price from StorEdge homepage; "
"individual unit data requires per-facility page scrape",
}
if unit.price is not None:
result.units.append(unit)
if not result.units:
result.warnings.append(
f"Matched facility '{name}' but it has no pricing data"
)
return result
def _match_target_facility(self, facilities: list[dict]) -> dict | None:
"""Find the facility matching our target address or name."""
# Try address match first (most reliable)
for fac in facilities:
addr = fac.get("_address", {}).get("fullAddress", "").lower()
if self._TARGET_ADDRESS_FRAGMENT in addr:
return fac
# Try name match
for fac in facilities:
name = fac.get("displayName", "").lower()
if self._TARGET_NAME_FRAGMENT in name:
return fac
# Single facility -- no ambiguity
if len(facilities) == 1:
return facilities[0]
return None
def _extract_apollo_state(self, soup: BeautifulSoup) -> dict | None:
"""Parse the window.__APOLLO_STATE__ JSON from a script tag."""
for script in soup.find_all("script"):
text = script.get_text()
if "__APOLLO_STATE__" not in text:
continue
match = re.search(r"__APOLLO_STATE__\s*=\s*", text)
if not match:
continue
rest = text[match.end():]
depth = 0
end = 0
for i, c in enumerate(rest):
if c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0 and i > 0:
end = i + 1
break
if end == 0:
continue
try:
return json.loads(rest[:end])
except json.JSONDecodeError:
continue
return None
def _extract_facilities(self, apollo_data: dict) -> list[dict]:
"""Extract facility objects from Apollo state, resolving address references."""
facilities = []
for key, value in apollo_data.items():
if (
isinstance(value, dict)
and value.get("__typename") == "Facility"
and key.startswith("Facility:")
):
fac = dict(value)
# Resolve address reference
addr_ref = value.get("address", {})
if isinstance(addr_ref, dict) and addr_ref.get("id"):
addr_data = apollo_data.get(addr_ref["id"], {})
fac["_address"] = addr_data
else:
fac["_address"] = {}
facilities.append(fac)
# Sort by display name for consistent output
facilities.sort(key=lambda f: f.get("displayName", ""))
return facilities
Scrape Runs (3)
Run #1028 Details
- Status
- exported
- Parser Used
- Facility002783Parser
- Platform Detected
- storageunitsoftware
- Units Found
- 1
- Stage Reached
- exported
- Timestamp
- 2026-03-23 02:39:33.796693
Timing
| Stage | Duration |
|---|---|
| Fetch | 3789ms |
| Detect | 40ms |
| Parse | 19ms |
| Export | 6ms |
Snapshot: 002783_20260323T023937Z.html · Show Snapshot · Open in New Tab
Parsed Units (1)
Facility
$85.00/mo