Facility: 004394

White Oak Storage

Stale Data Warning: This facility has not been successfully scraped in 26 days (threshold: 3 days). Data may be outdated.
Facility Information active
Facility ID
004394
Name
White Oak Storage
URL
https://www.uhaul.com/Locations/Self-Storage-near-White-Oak-TX-75693/1030645/
Address
116 N White Oak Rd, White Oak, TX 75693, USA, White Oak, Texas 75693
Platform
custom_facility_004394
Parser File
src/parsers/custom/facility_004394_parser.py
Last Scraped
2026-03-27 13:59:01.192326
Created
2026-03-14 16:21:53.706708
Updated
2026-03-27 13:59:01.222488
Parser & Healing Diagnosis working
Parser Status
✓ Working
Status Reason
N/A
Last Healing Attempt
Not attempted
Parser Source (src/parsers/custom/facility_004394_parser.py)
"""Parser for White Oak Storage."""

from __future__ import annotations

import re

from bs4 import BeautifulSoup

from src.parsers.base import BaseParser, ParseResult, UnitResult


class Facility004394Parser(BaseParser):
    """Extract storage units from White Oak Storage."""

    platform = "custom_facility_004394"

    _UNIT_RE = re.compile(
        r"(\d+\s*[\'\'\u2032]?\s*[xX\u00d7]\s*\d+\s*[\'\'\u2032]?)"
        r"[^\$]{0,120}"
        r"\$(\d[\d,.]*)",
        re.DOTALL,
    )

    _PRICE_SIZE_RE = re.compile(
        r"\$(\d[\d,.]*)"
        r".{0,120}"
        r"(\d+\s*[\'\'\u2032]?\s*[xX\u00d7]\s*\d+\s*[\'\'\u2032]?)",
        re.DOTALL,
    )

    _SIZE_ONLY_RE = re.compile(
        r"(\d+\s*[\'\'\u2032]?\s*[xX\u00d7]\s*\d+\s*[\'\'\u2032]?)"
    )

    def parse(self, html: str, url: str = "") -> ParseResult:
        soup = BeautifulSoup(html, "lxml")
        result = ParseResult(platform=self.platform, parser_name=self.__class__.__name__)

        for tag in soup.find_all(["script", "style"]):
            tag.decompose()

        body_text = soup.get_text(separator="\n")

        seen: set[tuple[str, str]] = set()

        # Try size-then-price pattern
        for m in self._UNIT_RE.finditer(body_text):
            size_text = m.group(1).strip()
            price_text = m.group(2).strip()
            key = (size_text, price_text)
            if key in seen:
                continue
            seen.add(key)

            unit = UnitResult()
            unit.size = size_text
            w, ln, sq = self.normalize_size(size_text)
            if w is not None:
                unit.metadata = {"width": w, "length": ln, "sqft": sq}
            unit.price = self.normalize_price(price_text)
            unit.description = m.group(0).strip()[:200]
            if unit.size or unit.price:
                result.units.append(unit)

        # Try price-then-size pattern if no results
        if not result.units:
            for m in self._PRICE_SIZE_RE.finditer(body_text):
                price_text = m.group(1).strip()
                size_text = m.group(2).strip()
                key = (size_text, price_text)
                if key in seen:
                    continue
                seen.add(key)

                unit = UnitResult()
                unit.size = size_text
                w, ln, sq = self.normalize_size(size_text)
                if w is not None:
                    unit.metadata = {"width": w, "length": ln, "sqft": sq}
                unit.price = self.normalize_price(price_text)
                unit.description = m.group(0).strip()[:200]
                if unit.size or unit.price:
                    result.units.append(unit)

        # Fallback: extract sizes without prices
        if not result.units:
            seen_sizes: set[str] = set()
            for m in self._SIZE_ONLY_RE.finditer(body_text):
                size_text = m.group(1).strip()
                if size_text in seen_sizes:
                    continue
                w, ln, sq = self.normalize_size(size_text)
                if w is None or w < 3 or ln < 3:
                    continue
                seen_sizes.add(size_text)
                unit = UnitResult()
                unit.size = size_text
                unit.metadata = {"width": w, "length": ln, "sqft": sq}
                result.units.append(unit)

        if not result.units:
            result.warnings.append("No units found via regex")

        return result

Scrape Runs (6)

Run #1254 Details

Status
exported
Parser Used
Facility004394Parser
Platform Detected
table_layout
Units Found
1
Stage Reached
exported
Timestamp
2026-03-23 02:59:52.609715
Timing
Stage Duration
Fetch4656ms
Detect69ms
Parse39ms
Export34ms

Snapshot: 004394_20260323T025957Z.html · Show Snapshot · Open in New Tab

Parsed Units (1)

20x20

No price

All Failures for this Facility (1)

fetch TimeoutException website timeout transient Run #1982 | 2026-03-27 13:58:50.418831

Message: timeout: Timed out receiving message from renderer: -0.002 (Session info: chrome=146.0.7680.164) Stacktrace: #0 0x5587f282087e <unknown> #1 0x5587f21de1d2 <unknown> #2 0x5587f21c91fc <unknown> #3 0x5587f21c8fe9 <unknown> #4 0x5587f21c75e6 <unknown> #5 0x5587f21c7a96 <unknown> #6 0x5587f21d60f7 <unknown> #7 0x5587f21eb77d <unknown> #8 0x5587f21f102b <unknown> #9 0x5587f21c80a1 <unknown> #10 0x5587f21eb5bf <unknown> #11 0x5587f226d86a <unknown> #12 0x5587f224da03 <unknown> #13 0x5587f221e5d5 <unknown> #14 0x5587f221f1c1 <unknown> #15 0x5587f27e4ec0 <unknown> #16 0x5587f27e8188 <unknown> #17 0x5587f27e7c3a <unknown> #18 0x5587f27e85f5 <unknown> #19 0x5587f27d48ab <unknown> #20 0x5587f27e8957 <unknown> #21 0x5587f27bc7e6 <unknown> #22 0x5587f280d855 <unknown> #23 0x5587f280da4c <unknown> #24 0x5587f281f33a <unknown> #25 0x7fccf159f1f5 <unknown>

Stack trace
Traceback (most recent call last):
  File "/app/src/pipeline.py", line 361, in _process_facility
    fetch_result = fetch_page(driver, url, snapshot_mgr, facility_id, **fetch_kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/app/src/fetcher/fetcher.py", line 125, in fetch_page
    driver.get(url)
  File "/app/.venv/lib/python3.11/site-packages/selenium/webdriver/remote/webdriver.py", line 466, in get
    self.execute(Command.GET, {"url": url})
  File "/app/.venv/lib/python3.11/site-packages/selenium/webdriver/remote/webdriver.py", line 446, in execute
    self.error_handler.check_response(response)
  File "/app/.venv/lib/python3.11/site-packages/selenium/webdriver/remote/errorhandler.py", line 232, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: timeout: Timed out receiving message from renderer: -0.002
  (Session info: chrome=146.0.7680.164)
Stacktrace:
#0 0x5587f282087e <unknown>
#1 0x5587f21de1d2 <unknown>
#2 0x5587f21c91fc <unknown>
#3 0x5587f21c8fe9 <unknown>
#4 0x5587f21c75e6 <unknown>
#5 0x5587f21c7a96 <unknown>
#6 0x5587f21d60f7 <unknown>
#7 0x5587f21eb77d <unknown>
#8 0x5587f21f102b <unknown>
#9 0x5587f21c80a1 <unknown>
#10 0x5587f21eb5bf <unknown>
#11 0x5587f226d86a <unknown>
#12 0x5587f224da03 <unknown>
#13 0x5587f221e5d5 <unknown>
#14 0x5587f221f1c1 <unknown>
#15 0x5587f27e4ec0 <unknown>
#16 0x5587f27e8188 <unknown>
#17 0x5587f27e7c3a <unknown>
#18 0x5587f27e85f5 <unknown>
#19 0x5587f27d48ab <unknown>
#20 0x5587f27e8957 <unknown>
#21 0x5587f27bc7e6 <unknown>
#22 0x5587f280d855 <unknown>
#23 0x5587f280da4c <unknown>
#24 0x5587f281f33a <unknown>
#25 0x7fccf159f1f5 <unknown>

← Back to dashboard