Facility 043209 - Facility Scrapers

Stale Data Warning: This facility has not been successfully scraped in 26 days (threshold: 3 days). Data may be outdated.

Facility Information active

Facility ID: 043209
Name: Lily Flagg Mini Storage
URL: https://www.lilyflaggministorage.com/8402-a-whitesburg-drive-huntsville-al-35802

Address: 8402 Whitesburg Dr Unit A, Huntsville, AL 35802, USA, Huntsville, Alabama 35802
Platform: custom_facility_043209
Parser File: src/parsers/custom/facility_043209_parser.py

Last Scraped: 2026-03-27 13:41:32.732037
Created: 2026-03-23 02:35:08.816820
Updated: 2026-03-27 13:41:32.758696

Parser & Healing Diagnosis working

Parser Status: ✓ Working
Status Reason: N/A

Last Healing Attempt: Not attempted

Parser Source (src/parsers/custom/facility_043209_parser.py)

"""Parser for Lily Flagg Mini Storage (JSON-LD + inline JSON with full unit data)."""

from __future__ import annotations

import json
import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult


class Facility043209Parser(BaseParser):
    """Extract storage units from Lily Flagg Mini Storage.

    This site embeds structured unit data in two forms:
    1. JSON-LD Product entries with description like '5x10x10 - $64.00 - 16681'
    2. Inline script JSON with UnitGroup objects containing name, type, price, area
    """

    platform = "custom_facility_043209"

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "html.parser")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        seen: set[str] = set()

        # Strategy 1: Parse UnitGroup data from inline scripts (most reliable)
        self._extract_unit_groups(soup, result, seen)

        # Strategy 2: Parse JSON-LD Product entries
        if not result.units:
            self._extract_jsonld(soup, result, seen)

        # Strategy 3: Fall back to text parsing of visible size/price table
        if not result.units:
            self._extract_from_text(soup, result, seen)

        if not result.units:
            result.warnings.append("No units found")

        return result

    def _extract_unit_groups(
        self, soup: BeautifulSoup, result: ParseResult, seen: set[str]
    ) -> None:
        """Extract from inline script UnitGroup JSON objects."""
        pattern = re.compile(
            r'"id":"([^"]+)","name":"([^"]+)","type":"([^"]+)","price":(\d+),'
            r'"__typename":"UnitGroup","amenities":\[\],"area":(\d+),'
            r'"avail\w*":(\d+)'
        )
        for script in soup.find_all("script"):
            if not script.string or "UnitGroup" not in script.string:
                continue
            for m in pattern.finditer(script.string):
                uid = m.group(1)
                name = m.group(2)
                unit_type = m.group(3)
                price = float(m.group(4))
                _area = int(m.group(5))
                available = int(m.group(6))

                if uid in seen:
                    continue
                seen.add(uid)

                unit = UnitResult()
                unit.price = price
                unit.description = unit_type

                # Parse size from name: "WxLxH - $PRICE - ID"
                size_match = re.match(r"(\d+)x(\d+)(?:x\d+)?", name)
                if size_match:
                    w, ln = float(size_match.group(1)), float(size_match.group(2))
                    if w > 0 and ln > 0:
                        unit.size = f"{int(w)}x{int(ln)}"
                        unit.metadata = {"width": w, "length": ln, "sqft": w * ln}

                if available > 0:
                    unit.scarcity = f"{available} available"
                else:
                    unit.scarcity = "unavailable"

                if unit.size or unit.price:
                    result.units.append(unit)

    def _extract_jsonld(
        self, soup: BeautifulSoup, result: ParseResult, seen: set[str]
    ) -> None:
        """Extract from JSON-LD Product entries."""
        for script in soup.find_all("script", type="application/ld+json"):
            try:
                data = json.loads(script.string or "")
            except (json.JSONDecodeError, TypeError):
                continue

            products = []
            if isinstance(data, dict) and "@graph" in data:
                products = [i for i in data["@graph"] if i.get("@type") == "Product"]
            elif isinstance(data, dict) and data.get("@type") == "Product":
                products = [data]

            for product in products:
                desc = product.get("description", "")
                # Pattern: "WxLxH - $PRICE - ID"
                m = re.match(r"(\d+)x(\d+)(?:x\d+)?\s*-\s*\$(\d+\.?\d*)\s*-\s*(\d+)", desc)
                if not m:
                    continue

                w, ln = float(m.group(1)), float(m.group(2))
                price = float(m.group(3))
                desc_key = desc
                if desc_key in seen:
                    continue
                seen.add(desc_key)

                unit = UnitResult()
                unit.price = price
                unit.description = product.get("category", "")
                if w > 0 and ln > 0:
                    unit.size = f"{int(w)}x{int(ln)}"
                    unit.metadata = {"width": w, "length": ln, "sqft": w * ln}
                if unit.size or unit.price:
                    result.units.append(unit)

    def _extract_from_text(
        self, soup: BeautifulSoup, result: ParseResult, seen: set[str]
    ) -> None:
        """Fallback: extract sizes from visible text."""
        for tag in soup.find_all(["script", "style"]):
            tag.decompose()
        text = soup.get_text(separator="\n")

        size_re = re.compile(r"(\d+)['\u2032]?\s*x\s*(\d+)['\u2032]?")
        for m in size_re.finditer(text):
            w, ln = float(m.group(1)), float(m.group(2))
            if w < 3 or ln < 3:
                continue
            key = f"{int(w)}x{int(ln)}"
            if key in seen:
                continue
            seen.add(key)
            unit = UnitResult()
            unit.size = key
            unit.metadata = {"width": w, "length": ln, "sqft": w * ln}
            result.units.append(unit)

Stage	Duration
Fetch	3009ms
Detect	20ms
Parse	9ms
Export	19ms

Facility: 043209

Scrape Runs (3)

Run #1574 Details

Parsed Units (11)

5x10

10x10

Unknown Size

10x10

Unknown Size

10x20

5x10

10x10

6x10

Unknown Size

5x10

HTML Snapshot — Run #1574