Facility 000982 - Facility Scrapers

Stale Data Warning: This facility has not been successfully scraped in 76 days (threshold: 3 days). Data may be outdated.

Facility Information active

Facility ID: 000982
Name: Baugus Storage
URL: https://www.squareup.com/store/baugus-storage/

Address: 10115 Tn-57, Counce, TN 38326, USA, Counce, Tennessee 38326
Platform: custom_facility_000982
Parser File: src/parsers/custom/facility_000982_parser.py

Last Scraped: 2026-03-27 13:39:15.508364
Created: 2026-03-23 02:35:08.816820
Updated: 2026-03-27 13:39:15.542340

Parser & Healing Diagnosis working

Parser Status: ✓ Working
Status Reason: N/A

Last Healing Attempt: Not attempted

Parser Source (src/parsers/custom/facility_000982_parser.py)

"""Parser for Baugus Storage (Square Online store).

This facility uses Square Online to list storage units. The actual product
catalog (with prices) is loaded dynamically via Square's API after page load,
so the HTML snapshot only contains descriptive text about the two categories:

- Boat Storage: 12ft x 30ft slips (134 units)
- Mini Storage: multiple sizes, no specifics given

The parser extracts what is available from the page's embedded JSON
(window.__BOOTSTRAP_STATE__) and the rendered text. Prices are NOT available
in the static HTML; they require Square's catalog API.
"""

from __future__ import annotations

import json
import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult


class Facility000982Parser(BaseParser):
    """Extract storage units from Baugus Storage (Square Online)."""

    platform = "custom_facility_000982"

    # Matches dimensions like "12ft wide and 30ft deep" or "12ft x 30ft"
    _DIM_RE = re.compile(
        r"(\d+)\s*(?:ft|')\s*(?:wide|[xX×])\s*(?:and\s*)?(\d+)\s*(?:ft|')\s*(?:deep)?",
    )

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        # Strategy 1: Extract product data from Square's featured-products
        # blocks if they were rendered by Selenium.
        units = self._try_rendered_products(soup)

        # Strategy 2: Fall back to extracting descriptive info from the
        # __BOOTSTRAP_STATE__ JSON embedded in the page.
        if not units:
            units = self._try_bootstrap_state(html)

        # Strategy 3: Fall back to plain text extraction.
        if not units:
            units = self._try_text_extraction(soup)

        result.units = units

        if not result.units:
            result.warnings.append(
                "No unit pricing found. This Square Online store loads products "
                "via API; the HTML snapshot may not contain catalog data."
            )

        return result

    def _try_rendered_products(self, soup: BeautifulSoup) -> list[UnitResult]:
        """Try to extract units from rendered Square product cards."""
        units: list[UnitResult] = []

        # Square Online renders product cards with price and title.
        # Look for common Square product card patterns.
        for card in soup.select(
            "[data-testid*='product'], "
            ".product-card, "
            ".featured-product, "
            "[class*='ProductCard'], "
            "[class*='item-card']"
        ):
            title_el = card.select_one(
                "[class*='title'], [class*='name'], h2, h3, h4"
            )
            price_el = card.select_one(
                "[class*='price'], [class*='amount'], [class*='cost']"
            )
            if not title_el and not price_el:
                continue

            unit = UnitResult()
            if title_el:
                unit.description = title_el.get_text(strip=True)[:200]
                # Try to extract size from the title
                size_match = re.search(
                    r"(\d+)\s*['\u2032xX×]\s*(\d+)", title_el.get_text()
                )
                if size_match:
                    size_str = f"{size_match.group(1)}x{size_match.group(2)}"
                    unit.size = size_str
                    w, ln, sq = self.normalize_size(size_str)
                    if w is not None:
                        unit.metadata = {"width": w, "length": ln, "sqft": sq}
            if price_el:
                price_text = price_el.get_text(strip=True)
                unit.price = self.normalize_price(price_text)

            if unit.size or unit.price:
                units.append(unit)

        return units

    def _try_bootstrap_state(self, html: str) -> list[UnitResult]:
        """Extract unit info from the __BOOTSTRAP_STATE__ JSON."""
        units: list[UnitResult] = []

        m = re.search(
            r"window\.__BOOTSTRAP_STATE__\s*=\s*(\{.*?\});", html, re.DOTALL
        )
        if not m:
            return units

        try:
            json.loads(m.group(1))  # validate JSON is parseable
        except (json.JSONDecodeError, ValueError):
            return units

        # Collect all text fragments from the "insert" fields in the JSON.
        inserts = re.findall(r'"insert":"([^"]+)"', m.group(1))
        text_blocks: list[str] = []
        for ins in inserts:
            decoded = ins.replace("\\n", "\n").replace("\\u00a0", " ").strip()
            if decoded:
                text_blocks.append(decoded)

        full_text = "\n".join(text_blocks)

        # Parse boat storage section.
        boat_section = re.search(
            r"Boat Storage\n(.*?)(?:Mini Storage|About|\Z)",
            full_text,
            re.DOTALL,
        )
        if boat_section:
            section_text = boat_section.group(1)
            dim_match = self._DIM_RE.search(section_text)
            if dim_match:
                w_val = int(dim_match.group(1))
                l_val = int(dim_match.group(2))
                unit = UnitResult()
                unit.size = f"{w_val}x{l_val}"
                unit.description = "Boat Storage Slip"
                w, ln, sq = self.normalize_size(unit.size)
                if w is not None:
                    unit.metadata = {"width": w, "length": ln, "sqft": sq}
                # Extract unit count if mentioned
                count_match = re.search(r"(\d+)\s*Units?", section_text, re.I)
                if count_match:
                    unit.metadata = unit.metadata or {}
                    unit.metadata["total_units"] = int(count_match.group(1))
                units.append(unit)

        # Parse mini storage section -- sizes not specified on this page.
        mini_section = re.search(
            r"Mini Storage\n(.*?)(?:About|\Z)",
            full_text,
            re.DOTALL,
        )
        if mini_section:
            section_text = mini_section.group(1)
            if "sizes available" in section_text.lower():
                unit = UnitResult()
                unit.description = "Mini Storage (multiple sizes, see store)"
                # No specific size or price available
                units.append(unit)

        return units

    def _try_text_extraction(self, soup: BeautifulSoup) -> list[UnitResult]:
        """Last resort: extract from visible page text."""
        units: list[UnitResult] = []

        for tag in soup.find_all(["script", "style"]):
            tag.decompose()

        body_text = soup.get_text(separator="\n")

        # Look for dimension patterns
        for dim_match in self._DIM_RE.finditer(body_text):
            w_val = int(dim_match.group(1))
            l_val = int(dim_match.group(2))
            if w_val < 3 or l_val < 3:
                continue
            unit = UnitResult()
            unit.size = f"{w_val}x{l_val}"
            w, ln, sq = self.normalize_size(unit.size)
            if w is not None:
                unit.metadata = {"width": w, "length": ln, "sqft": sq}

            # Check for a price nearby
            start = max(0, dim_match.start() - 200)
            end = min(len(body_text), dim_match.end() + 200)
            context = body_text[start:end]
            price_match = re.search(r"\$(\d[\d,.]*)", context)
            if price_match:
                unit.price = self.normalize_price(price_match.group(1))

            unit.description = context.strip()[:200]
            units.append(unit)

        return units

Stage	Duration
Fetch	4106ms
Detect	78ms
Parse	36ms
Export	25ms

Facility: 000982

Scrape Runs (3)

Run #1511 Details

Parsed Units (2)

12x30

Unknown Size

HTML Snapshot — Run #1511