Facility: 080338
Stor-It Lake Lowell
- Facility ID
- 080338
- Name
- Stor-It Lake Lowell
- URL
- https://www.stor-it.com/location/USA/ID/Nampa/stor-it-lake-lowell/
- Address
- N/A
- Platform
- custom_facility_080338
- Parser File
- src/parsers/custom/facility_080338_parser.py
- Last Scraped
- 2026-03-23 03:16:35.027164
- Created
- 2026-03-06 23:45:35.865957
- Updated
- 2026-03-23 03:16:35.027164
- Parser Status
- ⚠ Needs Fix
- Status Reason
- Parser returned 0 units
- Last Healing Attempt
- Not attempted
Parser Source (src/parsers/custom/facility_080338_parser.py)
"""Parser for Stor-It Self Storage Lake Lowell (Candee WordPress plugin).
The facility page loads unit listings via a WordPress AJAX call using the
Candee self-storage plugin. The page HTML contains an inline ``<script>``
tag that encodes the AJAX shortcode parameters in base64 and posts them to
``/wp-admin/admin-ajax.php``.
The parser:
1. Tries to extract unit rows directly from the supplied HTML (works when
the AJAX response HTML is passed in directly).
2. Falls back to locating the shortcode parameters from the page HTML, making
the AJAX POST itself, then parsing the response.
Each unit row is a ``<div class="unitsList lineItem unitMasterData …">``
element. All dimensions and pricing are stored in the ``data-unit-info``
JSON attribute:
{
"width": 10, "length": 10, "sqft": 100,
"price": "103.00",
"rental_name": "10 x 10 Self Storage",
"unit_type": "Self Storage",
"total_available": "5",
"discounted_price": null,
...
}
NOTE: This parser makes HTTP requests at parse time (AJAX POST calls to the
Candee WordPress endpoint). This violates the pipeline architecture (parse
should only read saved HTML). The AJAX calls should be moved to the fetch
stage so the AJAX response HTML is captured in the snapshot.
Facility URL: https://www.stor-it.com/location/USA/ID/Nampa/stor-it-lake-lowell/
AJAX endpoint: https://www.stor-it.com/wp-admin/admin-ajax.php?lang=en
"""
from __future__ import annotations
import json
import logging
import re
import urllib.parse
import urllib.request
from urllib.error import URLError
from bs4 import BeautifulSoup
from src.parsers.base import BaseParser, ParseResult, UnitResult
logger = logging.getLogger(__name__)
# Base64-encoded shortcode extracted from the page's inline setup script.
# Decodes to:
# {"shortcode": "[candee_units theme=\"units_theme_5\" prop_id=\"17225\" ...]", "lang": "en"}
_SHORTCODE_B64_RE = re.compile(r"atob\('([A-Za-z0-9+/=]+)'\)")
# URL pattern for the WP AJAX endpoint
_AJAXURL_RE = re.compile(r'ajaxurl\s*=\s*["\']([^"\']+)["\']')
def _fetch_unit_html(page_html: str) -> str | None:
"""Extract AJAX parameters from page HTML and POST to the Candee endpoint.
Returns the HTML fragment containing unit listing rows, or ``None`` if the
call cannot be made or fails.
# TODO: Move this HTTP call to the fetch stage. AJAX POST calls at
# parse time violate the pipeline architecture (Fetch -> Parse should
# be a clean boundary where Parse only reads local data).
"""
soup = BeautifulSoup(page_html, "lxml")
# Find the setup script containing the candee_ajax_load_template call
ajax_url: str | None = None
shortcode: str | None = None
current_url: str | None = None
for script in soup.find_all("script"):
text = script.string or ""
if "candee_ajax_load_template" not in text:
continue
# Extract ajaxurl from this script or surrounding scripts
m_ajax = _AJAXURL_RE.search(text)
if m_ajax:
ajax_url = m_ajax.group(1).replace("\\/", "/")
# Extract base64-encoded shortcode parameters
m_b64 = _SHORTCODE_B64_RE.search(text)
if m_b64:
try:
import base64
decoded = base64.b64decode(m_b64.group(1)).decode("utf-8")
params = json.loads(decoded)
shortcode = params.get("shortcode", "")
except Exception:
continue
# Extract current_url from the script text
m_url = re.search(r"current_url\s*:\s*'([^']+)'", text)
if m_url:
current_url = m_url.group(1)
if shortcode:
break
if not shortcode:
logger.debug("Could not find Candee shortcode parameters in page HTML")
return None
# Fall back to finding ajaxurl in any inline script
if not ajax_url:
for script in soup.find_all("script"):
text = script.string or ""
m = _AJAXURL_RE.search(text)
if m:
ajax_url = m.group(1).replace("\\/", "/")
break
if not ajax_url:
logger.debug("Could not find ajaxurl in page HTML")
return None
# POST to the AJAX endpoint with jQuery-serialised nested params
post_params = {
"action": "candee_ajax_load_template",
"query_vars[shortcode]": shortcode,
"query_vars[lang]": "en",
"current_url": current_url or "",
"ajax_data[theme]": "templates/ajax_load/ajax_load_theme_2.php",
}
data = urllib.parse.urlencode(post_params).encode("utf-8")
req = urllib.request.Request(
ajax_url,
data=data,
headers={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"X-Requested-With": "XMLHttpRequest",
"Referer": current_url or "",
"Origin": "https://www.stor-it.com",
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
),
"Accept": "*/*",
},
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
return resp.read().decode("utf-8")
except (URLError, OSError, TimeoutError) as exc:
logger.warning("Candee AJAX request failed: %s", exc)
return None
except Exception as exc:
logger.warning("Unexpected error in Candee AJAX request: %s", exc)
return None
def _parse_unit_rows(html: str, url: str) -> list[UnitResult]:
"""Extract ``UnitResult`` objects from Candee unit listing HTML.
Each unit row is a ``<div class="unitsList lineItem unitMasterData …">``
element whose ``data-unit-info`` attribute contains the unit details as JSON.
"""
soup = BeautifulSoup(html, "lxml")
units: list[UnitResult] = []
rows = soup.find_all(
"div",
class_=lambda c: c and "unitsList" in c and "lineItem" in c,
)
for row in rows:
info_str = row.get("data-unit-info", "")
if not info_str:
continue
try:
info = json.loads(info_str)
except (json.JSONDecodeError, TypeError):
logger.debug("Failed to parse data-unit-info JSON: %s", info_str[:100])
continue
width = info.get("width")
length = info.get("length")
sqft = info.get("sqft")
rental_name = info.get("rental_name", "")
unit_type = info.get("unit_type", "")
# Price: prefer numeric field; fall back to data-price attribute
raw_price = info.get("price")
price: float | None = None
if raw_price is not None:
try:
price = float(str(raw_price).replace(",", ""))
except (TypeError, ValueError):
price = None
if price is None:
price = BaseParser.normalize_price(row.get("data-price", ""))
# Sale / discounted price
raw_disc = info.get("discounted_price")
sale_price: float | None = None
if raw_disc is not None:
try:
sale_price = float(str(raw_disc).replace(",", ""))
except (TypeError, ValueError):
sale_price = None
# Build size string from dimensions when available
if width and length:
size_str = f"{int(width)}' x {int(length)}'"
elif sqft:
size_str = f"{int(sqft)} sqft"
else:
# Fall back to data-size attribute (square footage)
size_str = row.get("data-size", "")
# Scarcity from total_available
total_avail = info.get("total_available")
scarcity: str | None = None
if total_avail is not None:
try:
avail_int = int(total_avail)
if avail_int == 0:
scarcity = "Sold out"
elif avail_int <= 3:
scarcity = f"Only {avail_int} left"
except (TypeError, ValueError):
pass
# Features from data-features attribute
features: list[str] = []
feat_str = row.get("data-features", "[]")
try:
feat_raw = json.loads(feat_str)
# Exclude action strings like "Rent Now", "Move In", "Select"
action_words = {"Rent Now", "Move In", "Select"}
features = [f for f in feat_raw if f not in action_words]
except (json.JSONDecodeError, TypeError):
pass
unit = UnitResult(
size=size_str or None,
description=rental_name.strip() if rental_name else unit_type.strip(),
price=price,
sale_price=sale_price,
scarcity=scarcity,
url=url,
metadata={
"width": width,
"length": length,
"sqft": sqft,
"unit_type": unit_type,
"features": features,
"rental_id": info.get("rental_id"),
"property_id": info.get("property_id"),
"total_available": total_avail,
},
)
units.append(unit)
return units
class Facility080338Parser(BaseParser):
"""Extract storage units from Stor-It Self Storage Lake Lowell (Candee plugin).
The page uses the Candee WordPress plugin which loads unit listings
asynchronously via a WP AJAX call. The parser handles two modes:
1. **AJAX response HTML** — when the HTML already contains unit listing
rows (``<div class="unitsList lineItem unitMasterData …">``).
2. **Page HTML** — when the HTML is the full facility page; the parser
extracts the shortcode parameters and makes the AJAX POST itself.
Facility URL:
https://www.stor-it.com/location/USA/ID/Nampa/stor-it-lake-lowell/
"""
platform = "custom_facility_080338"
def parse(self, html: str, url: str = "") -> ParseResult:
result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)
# --- Mode 1: HTML already contains unit rows (AJAX response) ---
units = _parse_unit_rows(html, url)
if units:
result.units = units
return result
# --- Mode 2: Full page HTML — make AJAX call to get unit listing ---
# TODO: Move this HTTP call to the fetch stage so parse() only
# reads local data. The AJAX call below should be performed during
# fetching and the response HTML embedded in the snapshot.
logger.debug("No unit rows found in HTML; attempting Candee AJAX fetch")
units_html = _fetch_unit_html(html)
if not units_html:
result.warnings.append(
"No unit rows found in HTML and Candee AJAX fetch failed. "
"This parser makes HTTP requests at parse time which "
"should be moved to the fetch stage. "
"Ensure the snapshot is either the full facility page or the AJAX response HTML."
)
return result
units = _parse_unit_rows(units_html, url)
if units:
result.units = units
else:
result.warnings.append(
"Candee AJAX fetch succeeded but no unit rows were found in the response."
)
return result
Scrape Runs (4)
Run #505 Details
- Status
- exported
- Parser Used
- Facility080338Parser
- Platform Detected
- table_layout
- Units Found
- 0
- Stage Reached
- exported
- Timestamp
- 2026-03-14 16:52:00.504844
Timing
| Stage | Duration |
|---|---|
| Fetch | 11867ms |
| Detect | 158ms |
| Parse | 198ms |
| Export | 11ms |
Snapshot: 080338_20260314T165212Z.html · Show Snapshot · Open in New Tab
No units found in this run.
All Failures for this Facility (4)
parse
_WarningAsException
scraper
no_units_extracted
warning
Run #N/A | 2026-03-23 03:16:35.023179
No units extracted for 080338
Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 080338
parse
_WarningAsException
scraper
no_units_extracted
warning
Run #N/A | 2026-03-21 19:09:22.936612
No units extracted for 080338
Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 080338
parse
_WarningAsException
scraper
no_units_extracted
warning
Run #N/A | 2026-03-14 16:52:12.774608
No units extracted for 080338
Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 080338
parse
_WarningAsException
scraper
no_units_extracted
warning
Run #N/A | 2026-03-14 01:03:26.475779
No units extracted for 080338
Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 080338