Facility: 000982

Baugus Storage

Stale Data Warning: This facility has not been successfully scraped in 26 days (threshold: 3 days). Data may be outdated.
Facility Information active
Facility ID
000982
Name
Baugus Storage
URL
https://www.squareup.com/store/baugus-storage/
Address
10115 Tn-57, Counce, TN 38326, USA, Counce, Tennessee 38326
Platform
custom_facility_000982
Parser File
src/parsers/custom/facility_000982_parser.py
Last Scraped
2026-03-27 13:39:15.508364
Created
2026-03-23 02:35:08.816820
Updated
2026-03-27 13:39:15.542340
Parser & Healing Diagnosis working
Parser Status
✓ Working
Status Reason
N/A
Last Healing Attempt
Not attempted
Parser Source (src/parsers/custom/facility_000982_parser.py)
"""Parser for Baugus Storage (Square Online store).

This facility uses Square Online to list storage units. The actual product
catalog (with prices) is loaded dynamically via Square's API after page load,
so the HTML snapshot only contains descriptive text about the two categories:

- Boat Storage: 12ft x 30ft slips (134 units)
- Mini Storage: multiple sizes, no specifics given

The parser extracts what is available from the page's embedded JSON
(window.__BOOTSTRAP_STATE__) and the rendered text. Prices are NOT available
in the static HTML; they require Square's catalog API.
"""

from __future__ import annotations

import json
import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult


class Facility000982Parser(BaseParser):
    """Extract storage units from Baugus Storage (Square Online)."""

    platform = "custom_facility_000982"

    # Matches dimensions like "12ft wide and 30ft deep" or "12ft x 30ft"
    _DIM_RE = re.compile(
        r"(\d+)\s*(?:ft|')\s*(?:wide|[xX×])\s*(?:and\s*)?(\d+)\s*(?:ft|')\s*(?:deep)?",
    )

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        # Strategy 1: Extract product data from Square's featured-products
        # blocks if they were rendered by Selenium.
        units = self._try_rendered_products(soup)

        # Strategy 2: Fall back to extracting descriptive info from the
        # __BOOTSTRAP_STATE__ JSON embedded in the page.
        if not units:
            units = self._try_bootstrap_state(html)

        # Strategy 3: Fall back to plain text extraction.
        if not units:
            units = self._try_text_extraction(soup)

        result.units = units

        if not result.units:
            result.warnings.append(
                "No unit pricing found. This Square Online store loads products "
                "via API; the HTML snapshot may not contain catalog data."
            )

        return result

    def _try_rendered_products(self, soup: BeautifulSoup) -> list[UnitResult]:
        """Try to extract units from rendered Square product cards."""
        units: list[UnitResult] = []

        # Square Online renders product cards with price and title.
        # Look for common Square product card patterns.
        for card in soup.select(
            "[data-testid*='product'], "
            ".product-card, "
            ".featured-product, "
            "[class*='ProductCard'], "
            "[class*='item-card']"
        ):
            title_el = card.select_one(
                "[class*='title'], [class*='name'], h2, h3, h4"
            )
            price_el = card.select_one(
                "[class*='price'], [class*='amount'], [class*='cost']"
            )
            if not title_el and not price_el:
                continue

            unit = UnitResult()
            if title_el:
                unit.description = title_el.get_text(strip=True)[:200]
                # Try to extract size from the title
                size_match = re.search(
                    r"(\d+)\s*['\u2032xX×]\s*(\d+)", title_el.get_text()
                )
                if size_match:
                    size_str = f"{size_match.group(1)}x{size_match.group(2)}"
                    unit.size = size_str
                    w, ln, sq = self.normalize_size(size_str)
                    if w is not None:
                        unit.metadata = {"width": w, "length": ln, "sqft": sq}
            if price_el:
                price_text = price_el.get_text(strip=True)
                unit.price = self.normalize_price(price_text)

            if unit.size or unit.price:
                units.append(unit)

        return units

    def _try_bootstrap_state(self, html: str) -> list[UnitResult]:
        """Extract unit info from the __BOOTSTRAP_STATE__ JSON."""
        units: list[UnitResult] = []

        m = re.search(
            r"window\.__BOOTSTRAP_STATE__\s*=\s*(\{.*?\});", html, re.DOTALL
        )
        if not m:
            return units

        try:
            json.loads(m.group(1))  # validate JSON is parseable
        except (json.JSONDecodeError, ValueError):
            return units

        # Collect all text fragments from the "insert" fields in the JSON.
        inserts = re.findall(r'"insert":"([^"]+)"', m.group(1))
        text_blocks: list[str] = []
        for ins in inserts:
            decoded = ins.replace("\\n", "\n").replace("\\u00a0", " ").strip()
            if decoded:
                text_blocks.append(decoded)

        full_text = "\n".join(text_blocks)

        # Parse boat storage section.
        boat_section = re.search(
            r"Boat Storage\n(.*?)(?:Mini Storage|About|\Z)",
            full_text,
            re.DOTALL,
        )
        if boat_section:
            section_text = boat_section.group(1)
            dim_match = self._DIM_RE.search(section_text)
            if dim_match:
                w_val = int(dim_match.group(1))
                l_val = int(dim_match.group(2))
                unit = UnitResult()
                unit.size = f"{w_val}x{l_val}"
                unit.description = "Boat Storage Slip"
                w, ln, sq = self.normalize_size(unit.size)
                if w is not None:
                    unit.metadata = {"width": w, "length": ln, "sqft": sq}
                # Extract unit count if mentioned
                count_match = re.search(r"(\d+)\s*Units?", section_text, re.I)
                if count_match:
                    unit.metadata = unit.metadata or {}
                    unit.metadata["total_units"] = int(count_match.group(1))
                units.append(unit)

        # Parse mini storage section -- sizes not specified on this page.
        mini_section = re.search(
            r"Mini Storage\n(.*?)(?:About|\Z)",
            full_text,
            re.DOTALL,
        )
        if mini_section:
            section_text = mini_section.group(1)
            if "sizes available" in section_text.lower():
                unit = UnitResult()
                unit.description = "Mini Storage (multiple sizes, see store)"
                # No specific size or price available
                units.append(unit)

        return units

    def _try_text_extraction(self, soup: BeautifulSoup) -> list[UnitResult]:
        """Last resort: extract from visible page text."""
        units: list[UnitResult] = []

        for tag in soup.find_all(["script", "style"]):
            tag.decompose()

        body_text = soup.get_text(separator="\n")

        # Look for dimension patterns
        for dim_match in self._DIM_RE.finditer(body_text):
            w_val = int(dim_match.group(1))
            l_val = int(dim_match.group(2))
            if w_val < 3 or l_val < 3:
                continue
            unit = UnitResult()
            unit.size = f"{w_val}x{l_val}"
            w, ln, sq = self.normalize_size(unit.size)
            if w is not None:
                unit.metadata = {"width": w, "length": ln, "sqft": sq}

            # Check for a price nearby
            start = max(0, dim_match.start() - 200)
            end = min(len(body_text), dim_match.end() + 200)
            context = body_text[start:end]
            price_match = re.search(r"\$(\d[\d,.]*)", context)
            if price_match:
                unit.price = self.normalize_price(price_match.group(1))

            unit.description = context.strip()[:200]
            units.append(unit)

        return units

Scrape Runs (3)

Run #1016 Details

Status
exported
Parser Used
Facility000982Parser
Platform Detected
table_layout
Units Found
2
Stage Reached
exported
Timestamp
2026-03-23 02:38:27.041703
Timing
Stage Duration
Fetch6588ms
Detect123ms
Parse68ms
Export7ms

Snapshot: 000982_20260323T023833Z.html · Show Snapshot · Open in New Tab

Parsed Units (2)

12x30

No price

Unknown Size

No price

← Back to dashboard