Facility: 086362

Western Storage WY

Stale Data Warning: This facility has not been successfully scraped in 26 days (threshold: 3 days). Data may be outdated.
Facility Information active
Facility ID
086362
Name
Western Storage WY
URL
https://www.westernstoragewy.com/1545-n-main-st-buffalo-wy-82834
Address
N/A
Platform
custom_facility_086362
Parser File
src/parsers/custom/facility_086362_parser.py
Last Scraped
2026-03-27 14:04:09.813701
Created
2026-03-06 23:45:35.865957
Updated
2026-03-27 14:04:09.838960
Parser & Healing Diagnosis working
Parser Status
✓ Working
Status Reason
N/A
Last Healing Attempt
Not attempted
Parser Source (src/parsers/custom/facility_086362_parser.py)
"""Parser for Western Storage & RV (Buffalo, WY).

This is a StorEdge-powered SPA site.  Unit data is not rendered in the DOM
as plain HTML — instead it is bootstrapped as ``window.__APOLLO_STATE__``
JSON embedded in a ``<script>`` tag.  Each entry keyed ``UnitGroup:<uuid>``
contains all the unit fields we need: size, price, type, and availability.
"""

from __future__ import annotations

import json
import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult

# Pattern to strip the JS variable assignment prefix
_APOLLO_PREFIX = "window.__APOLLO_STATE__= "


class Facility086362Parser(BaseParser):
    """Extract storage units from Western Storage & RV (westernstoragewy.com).

    Unit data lives in the ``window.__APOLLO_STATE__`` JSON object embedded in
    a ``<script>`` tag (no ``type`` attribute).  Keys with the prefix
    ``UnitGroup:`` each represent one unit group with fields:

    - ``size``  — formatted size string, e.g. ``"10'x20'"``
    - ``price`` — monthly price as a number
    - ``type``  — category label, e.g. ``"Self Storage"``, ``"RV Parking"``
    - ``availableUnitsCount`` — integer; 0 means sold out / waitlist only
    - ``amenities`` — list of amenity objects (may be empty)
    - ``discountedPrice`` — sale price if present, else ``null``
    """

    platform = "custom_facility_086362"

    # Maps unit type labels (lower-cased) to amenity tags we store in metadata
    _TYPE_AMENITY_MAP: dict[str, list[str]] = {
        "rv parking": ["vehicle_parking", "rv"],
        "insulated shop": ["climate_control", "insulated"],
        "covered rv": ["vehicle_parking", "rv", "covered"],
        "boat storage": ["vehicle_parking", "boat"],
    }

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        apollo_data = self._extract_apollo_state(soup, result)
        if apollo_data is None:
            return result

        unit_groups = {k: v for k, v in apollo_data.items() if k.startswith("UnitGroup:")}
        if not unit_groups:
            result.warnings.append("No UnitGroup entries found in __APOLLO_STATE__")
            return result

        for _key, group in unit_groups.items():
            unit = self._parse_unit_group(group, url)
            if unit is not None:
                result.units.append(unit)

        if not result.units:
            result.warnings.append("No units extracted from UnitGroup data")

        return result

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    def _extract_apollo_state(self, soup: BeautifulSoup, result: ParseResult) -> dict | None:
        """Find and parse the ``window.__APOLLO_STATE__`` script block."""
        for script in soup.find_all("script"):
            text = script.string or ""
            if not text.strip().startswith(_APOLLO_PREFIX):
                continue
            json_str = text.strip()[len(_APOLLO_PREFIX):]
            try:
                decoder = json.JSONDecoder()
                data, _ = decoder.raw_decode(json_str)
                return data
            except (json.JSONDecodeError, ValueError) as exc:
                result.warnings.append(f"Failed to parse __APOLLO_STATE__ JSON: {exc}")
                return None

        result.warnings.append("No window.__APOLLO_STATE__ script found on page")
        return None

    def _parse_unit_group(self, group: dict, url: str) -> UnitResult | None:
        """Convert one UnitGroup dict into a UnitResult."""
        raw_size = group.get("size", "")
        price_val = group.get("price")
        unit_type = group.get("type", "")
        available = group.get("availableUnitsCount", 0)
        discounted = group.get("discountedPrice")

        # Require at least a size or a price to be useful
        if not raw_size and price_val is None:
            return None

        unit = UnitResult()
        unit.url = url

        # --- Size ---
        # Stored as "10'x20'" — normalize the quote characters first
        normalized_raw = re.sub(r"['\u2019\u2018]", "", raw_size).strip()
        unit.size = raw_size  # keep human-readable original
        w, ln, sq = self.normalize_size(normalized_raw)
        meta: dict = {}
        if w is not None:
            meta["width"] = w
            meta["length"] = ln
            meta["sqft"] = sq

        # --- Price ---
        if price_val is not None:
            unit.price = self.normalize_price(str(price_val))

        # --- Sale / discounted price ---
        if discounted is not None:
            unit.sale_price = self.normalize_price(str(discounted))

        # --- Description (unit type) ---
        if unit_type:
            unit.description = unit_type

        # --- Scarcity / availability ---
        if available == 0:
            unit.scarcity = "sold_out"
        elif available <= 3:
            unit.scarcity = f"only_{available}_left"

        # --- Amenities derived from unit type ---
        type_lower = unit_type.lower()
        amenities: list[str] = []
        for type_key, tags in self._TYPE_AMENITY_MAP.items():
            if type_key in type_lower:
                amenities.extend(tags)
                break

        # Also check amenities list from Apollo data
        for amenity in group.get("amenities", []):
            label = (amenity.get("name") or amenity.get("label") or "").lower()
            if "climate" in label or "heat" in label or "cool" in label:
                if "climate_control" not in amenities:
                    amenities.append("climate_control")
            if "drive" in label:
                if "drive_up" not in amenities:
                    amenities.append("drive_up")
            if "indoor" in label or "interior" in label:
                if "interior" not in amenities:
                    amenities.append("interior")

        if amenities:
            meta["amenities"] = amenities

        if meta:
            unit.metadata = meta

        return unit

Scrape Runs (7)

Run #2124 Details

Status
exported
Parser Used
Facility086362Parser
Platform Detected
storageunitsoftware
Units Found
5
Stage Reached
exported
Timestamp
2026-03-27 14:04:04.522727
Timing
Stage Duration
Fetch5194ms
Detect27ms
Parse13ms
Export17ms

Snapshot: 086362_20260327T140409Z.html · Show Snapshot · Open in New Tab

Parsed Units (5)

10'x20'

$80.00/mo

5'x5'

$30.00/mo
only_2_left

5'x5'

$30.00/mo
sold_out

24'x50'

$1000.00/mo

10'x10'

$60.00/mo
only_1_left

All Failures for this Facility (1)

fetch DatatypeMismatch unknown unknown permanent Run #18 | 2026-03-07 01:42:29.652155

column "success" is of type boolean but expression is of type integer LINE 3: ... VALUES ('086362', 18, '086362_20260307T014229Z.html', 0) ^ HINT: You will need to rewrite or cast the expression.

Stack trace
Traceback (most recent call last):
  File "/app/src/pipeline.py", line 329, in _process_facility
    manifest_id = storage.insert_snapshot_manifest(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/src/db/pg_backend.py", line 615, in insert_snapshot_manifest
    row = self._execute_returning(
          ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/src/db/pg_backend.py", line 54, in _execute_returning
    cur.execute(sql, params)
  File "/app/.venv/lib/python3.11/site-packages/psycopg2/extras.py", line 236, in execute
    return super().execute(query, vars)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
psycopg2.errors.DatatypeMismatch: column "success" is of type boolean but expression is of type integer
LINE 3: ...    VALUES ('086362', 18, '086362_20260307T014229Z.html', 0)
                                                                     ^
HINT:  You will need to rewrite or cast the expression.

← Back to dashboard