Facility 005030 - Facility Scrapers

Stale Data Warning: This facility has not been successfully scraped in 76 days (threshold: 3 days). Data may be outdated.

Facility Information active

Facility ID: 005030
Name: Premiere Self-Storage
URL: http://www.premiereselfstorage.com/

Address: 120 Shrewsbury St, Boylston, MA 01505, USA, Boylston, Massachusetts 01505
Platform: custom_facility_005030
Parser File: src/parsers/custom/facility_005030_parser.py

Last Scraped: 2026-03-27 13:40:25.977266
Created: 2026-03-23 02:35:08.816820
Updated: 2026-03-27 13:40:25.977266

Parser & Healing Diagnosis needs_fix

Parser Status: ⚠ Needs Fix
Status Reason: Parser returned 0 units

Last Healing Attempt: Not attempted

Parser Source (src/parsers/custom/facility_005030_parser.py)

"""Parser for Premiere Self-Storage (Wix site, unit-types page)."""

from __future__ import annotations

import re

from bs4 import BeautifulSoup, Tag

from src.parsers.base import BaseParser, ParseResult, UnitResult

# Categories recognised as unit-type headers on the page.
_CATEGORY_NAMES = {"Climate Controlled Units", "Drive-Up Units", "Inside Units"}

_SIZE_RE = re.compile(r"^(\d+)\s*[xX]\s*(\d+)$")
_SQFT_RE = re.compile(r"^(\d+)\s*sf$", re.IGNORECASE)


class Facility005030Parser(BaseParser):
    """Extract storage unit sizes from Premiere Self-Storage.

    The site is built on Wix.  The unit-types page lists three categories
    (Climate Controlled, Drive-Up, Inside) each followed by a list of
    dimensions (``5x10``, ``10x20``, ...) rendered as ``<h2>`` elements
    inside ``wixui-rich-text`` divs.  No prices are published on the site.
    """

    platform = "custom_facility_005030"

    # ------------------------------------------------------------------
    # helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _rich_text_divs(section: Tag) -> list[Tag]:
        """Return top-level ``wixui-rich-text`` divs inside *section*."""
        return section.find_all("div", class_="wixui-rich-text", recursive=True)

    @staticmethod
    def _h2_texts(div: Tag) -> list[str]:
        """Return stripped text of each ``<h2>`` inside *div*."""
        return [h2.get_text(strip=True) for h2 in div.find_all("h2")]

    # ------------------------------------------------------------------
    # main parse
    # ------------------------------------------------------------------

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        for tag in soup.find_all(["script", "style"]):
            tag.decompose()

        sections = soup.find_all("section", class_="wixui-section")
        if not sections:
            result.warnings.append("No wixui-section elements found")
            return result

        seen: set[tuple[str, str]] = set()

        for section in sections:
            rt_divs = self._rich_text_divs(section)
            if len(rt_divs) < 3:
                continue

            # Identify the category name from the first rich-text div
            # whose text matches a known category.
            category = ""
            size_texts: list[str] = []
            sqft_texts: list[str] = []

            for div in rt_divs:
                text = div.get_text(strip=True)

                # Category header
                if text in _CATEGORY_NAMES:
                    category = text
                    continue

                # "UNIT SIZES" label — skip
                if text.upper() == "UNIT SIZES":
                    continue

                h2_items = self._h2_texts(div)
                if not h2_items:
                    continue

                # Determine whether these are dimensions or sqft values
                if _SIZE_RE.match(h2_items[0]):
                    size_texts = h2_items
                elif _SQFT_RE.match(h2_items[0]):
                    sqft_texts = h2_items

            if not size_texts:
                continue

            for idx, raw_size in enumerate(size_texts):
                m = _SIZE_RE.match(raw_size)
                if not m:
                    continue

                width = int(m.group(1))
                length = int(m.group(2))
                sqft = width * length

                # Use the paired sqft value if available (sanity check)
                if idx < len(sqft_texts):
                    sm = _SQFT_RE.match(sqft_texts[idx])
                    if sm:
                        sqft = int(sm.group(1))

                size_label = f"{width}x{length}"
                key = (category, size_label)
                if key in seen:
                    continue
                seen.add(key)

                unit = UnitResult()
                unit.size = size_label
                w, ln, sq = self.normalize_size(size_label)
                unit.metadata = {
                    "width": w if w is not None else width,
                    "length": ln if ln is not None else length,
                    "sqft": sq if sq is not None else sqft,
                    "category": category,
                }
                unit.description = f"{category} - {size_label}" if category else size_label
                result.units.append(unit)

        if not result.units:
            result.warnings.append("No units found on page")

        return result

Stage	Duration
Fetch	2091ms
Detect	16ms
Parse	8ms
Export	14ms

Facility: 005030

Scrape Runs (3)

Run #1544 Details

All Failures for this Facility (3)

HTML Snapshot — Run #1544