Facility: 004990

Al's Mini Storage - Colstrip

Stale Data Warning: This facility has not been successfully scraped in 30 days (threshold: 3 days). Data may be outdated.
Facility Information active
Facility ID
004990
Name
Al's Mini Storage - Colstrip
URL
https://www.alsministorage.com/our-facilities/colstrip/
Address
N/A
Platform
ccstorage
Parser File
src/parsers/ccstorage.py
Last Scraped
2026-03-23 03:20:57.981067
Created
2026-03-06 23:45:35.865957
Updated
2026-03-23 03:20:57.981067
Parser & Healing Diagnosis needs_fix
Parser Status
⚠ Needs Fix
Status Reason
Parser returned 0 units
Last Healing Attempt
Not attempted
Parser Source (src/parsers/ccstorage.py)
"""Parser for CubeSmart / CCStorage facility pages."""

from __future__ import annotations

from bs4 import BeautifulSoup, Tag

from src.parsers.base import BaseParser, ParseResult, UnitResult


class CCStorageParser(BaseParser):
    """Extract storage units from CubeSmart/CCStorage HTML pages.

    Supports the modern Tailwind/Turbo-based layout where unit cards are
    identified by ``data-test-type`` attributes starting with ``sut_``,
    as well as legacy ``cc-`` prefixed CSS class selectors.
    """

    platform = "ccstorage"

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        # Strategy 1: Modern CCStorage — unit cards marked with data-test-type="sut_*"
        unit_elements = soup.select("[data-test-type^='sut_']")

        # Strategy 2: Legacy cc-unit-card containers
        if not unit_elements:
            unit_elements = soup.select(".cc-unit-card, .cc-unit-row")

        # Strategy 3: Fallback to data-ccs attribute containers
        if not unit_elements:
            unit_elements = soup.select("[data-ccs]")

        # Strategy 4: Fallback to common CubeSmart patterns
        if not unit_elements:
            unit_elements = soup.select(".unit-card, .unit-row, .size-row")

        if not unit_elements:
            result.warnings.append("No unit elements found with CCStorage selectors")
            return result

        for el in unit_elements:
            unit = self._parse_unit_element(el)
            if unit is not None:
                result.units.append(unit)

        return result

    def _parse_unit_element(self, el: Tag) -> UnitResult | None:
        """Extract a single unit from an element."""
        unit = UnitResult()

        # Extract raw description text
        unit.description = el.get_text(separator=" ", strip=True)

        # --- Size / Dimensions ---
        self._extract_size(el, unit)

        # --- Pricing ---
        self._extract_pricing(el, unit)

        # --- Amenities ---
        text_lower = (unit.description or "").lower()
        classes_str = " ".join(el.get("class", []))

        climate_control = _has_any(text_lower, classes_str, ["climate", "temperature", "heated", "cooled"])
        drive_access = _has_any(text_lower, classes_str, ["drive-up", "drive up", "driveup", "drive_up"])
        elevator_access = _has_any(text_lower, classes_str, ["elevator"])
        ground_floor = _has_any(text_lower, classes_str, ["ground floor", "ground-floor", "1st floor", "first floor"])
        indoor = _has_any(text_lower, classes_str, ["indoor", "interior"])

        meta = unit.metadata or {}
        if climate_control:
            meta["climateControlled"] = True
        if drive_access:
            meta["driveUpAccess"] = True
        if elevator_access:
            meta["elevatorAccess"] = True
        if ground_floor:
            meta["groundFloor"] = True
        if indoor:
            meta["indoor"] = True
        if meta:
            unit.metadata = meta

        # --- Availability ---
        self._extract_availability(el, unit)

        return unit

    def _extract_size(self, el: Tag, unit: UnitResult) -> None:
        """Extract size/dimensions from the unit element."""
        # Modern layout: first <p> has label (e.g. "10X20"), second <p>
        # has readable dimensions (e.g. "10 x 20")
        paragraphs = el.select("p")
        for p in paragraphs:
            text = p.get_text(strip=True)
            w, ln, sq = self.normalize_size(text)
            if w is not None:
                meta = unit.metadata or {}
                meta["width"] = w
                meta["length"] = ln
                meta["sqft"] = sq
                unit.metadata = meta
                # Use the first <p> (label) as the size if it precedes
                # the dimension paragraph
                idx = paragraphs.index(p)
                if idx > 0:
                    unit.size = paragraphs[idx - 1].get_text(strip=True)
                else:
                    unit.size = text
                break

        # Legacy layout: elements with size/dimension class names
        if not (unit.metadata and "width" in unit.metadata):
            size_el = (
                el.select_one("[class*='size']")
                or el.select_one("[class*='dimension']")
                or el.select_one(".cc-unit-size")
            )
            if size_el:
                size_text = size_el.get_text(strip=True)
                w, ln, sq = self.normalize_size(size_text)
                if w is not None:
                    meta = unit.metadata or {}
                    meta["width"] = w
                    meta["length"] = ln
                    meta["sqft"] = sq
                    unit.metadata = meta
                unit.size = size_text

    def _extract_pricing(self, el: Tag, unit: UnitResult) -> None:
        """Extract pricing from the unit element."""
        # Modern layout: <dl> elements with <dt> label and <dd> price
        dl_elements = el.select("dl")
        if dl_elements:
            for dl in dl_elements:
                dt = dl.select_one("dt")
                dd = dl.select_one("dd")
                if not dt or not dd:
                    continue
                label = dt.get_text(strip=True).lower()
                price = self.normalize_price(dd.get_text(strip=True))
                if "card" in label:
                    unit.price = price
                elif "cash" in label:
                    unit.sale_price = price
                elif "street" in label or "regular" in label:
                    unit.price = price
                elif "web" in label or "online" in label:
                    unit.sale_price = price
                elif "promo" in label or "special" in label:
                    unit.promotion = str(price) if price is not None else None
                elif price is not None and unit.sale_price is None:
                    unit.sale_price = price
            return

        # Legacy layout: elements with rate/price class names
        street_el = el.select_one("[class*='street-rate']") or el.select_one("[class*='regular-rate']")
        if street_el:
            unit.price = self.normalize_price(street_el.get_text(strip=True))

        web_el = el.select_one("[class*='web-rate']") or el.select_one("[class*='online-rate']")
        if web_el:
            unit.sale_price = self.normalize_price(web_el.get_text(strip=True))

        promo_el = el.select_one("[class*='promo']") or el.select_one("[class*='special']")
        if promo_el:
            promo_text = promo_el.get_text(strip=True)
            unit.promotion = promo_text if promo_text else None

        # Fallback: look for any price-like text
        if unit.price is None and unit.sale_price is None:
            price_el = el.select_one("[class*='price']") or el.select_one("[class*='rate']")
            if price_el:
                unit.sale_price = self.normalize_price(price_el.get_text(strip=True))

    def _extract_availability(self, el: Tag, unit: UnitResult) -> None:
        """Extract availability status from the unit element."""
        # Modern layout: "No Units Available" text or "Rent Now" / "Join waiting list"
        text = el.get_text(separator=" ", strip=True)
        if "no units available" in text.lower():
            unit.scarcity = "Unavailable"
        elif "join waiting list" in text.lower():
            unit.scarcity = "Waitlist"
        elif "rent now" in text.lower():
            unit.scarcity = "Available"

        # Legacy layout: element with avail class
        if unit.scarcity is None:
            avail_el = el.select_one("[class*='avail']")
            if avail_el:
                unit.scarcity = avail_el.get_text(strip=True)


def _has_any(text: str, classes: str, keywords: list[str]) -> bool:
    """Check if any keyword appears in the text or CSS classes."""
    combined = f"{text} {classes}".lower()
    return any(kw in combined for kw in keywords)

Scrape Runs (5)

Run #1493 Details

Status
exported
Parser Used
CCStorageParser
Platform Detected
ccstorage
Units Found
0
Stage Reached
exported
Timestamp
2026-03-23 03:20:53.001805
Timing
Stage Duration
Fetch4865ms
Detect35ms
Parse49ms
Export3ms

Snapshot: 004990_20260323T032057Z.html · Show Snapshot · Open in New Tab

No units found in this run.

All Failures for this Facility (5)

parse _WarningAsException scraper no_units_extracted warning Run #N/A | 2026-03-23 03:20:57.975168

No units extracted for 004990

Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 004990
parse _WarningAsException scraper no_units_extracted warning Run #N/A | 2026-03-21 19:14:14.624746

No units extracted for 004990

Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 004990
parse _WarningAsException scraper no_units_extracted warning Run #N/A | 2026-03-14 16:55:59.642665

No units extracted for 004990

Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 004990
parse _WarningAsException scraper no_units_extracted warning Run #N/A | 2026-03-14 04:59:46.368847

No units extracted for 004990

Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 004990
parse _WarningAsException scraper no_units_extracted warning Run #N/A | 2026-03-14 01:01:26.657769

No units extracted for 004990

Stack trace
src.reporting.failure_reporter._WarningAsException: No units extracted for 004990

← Back to dashboard