Manually refresh specific event pages to update event data
This skill inherits all available tools. When active, it can use any tool Claude has access to.
<essential_principles>
Re-scrape specific event page URLs to update event data in the database. Use this when event details have changed (date, time, venue, etc.) or to verify event information.
scraped_pages tableSome pages contain multiple events (e.g., weekly schedules). When updating:
source_url are updatedExamples:
https://hvmag.com/events/jazz-night-jan-20https://example.com/event/123 https://example.com/event/456Provide the URL(s): </intake>
<process> ## Step 1: Parse URLsExtract all URLs from user input.
import re
url_pattern = r'https?://[^\s<>"\']+'
urls = re.findall(url_pattern, user_input)
if not urls:
print("ERROR: No valid URLs found in input")
# STOP HERE
from scripts.url_utils import normalize_url
from schemas.sqlite_storage import SqliteStorage
from pathlib import Path
db_path = Path.home() / ".config" / "local-media-tools" / "data" / "events.db"
storage = SqliteStorage(db_path)
# Normalize URLs
url_map = {normalize_url(url): url for url in urls}
# Check which URLs exist in scraped_pages
for normalized_url, original_url in url_map.items():
# Find the source_name for this URL (query scraped_pages)
page_record = None
with storage._connection() as conn:
row = conn.execute(
"SELECT source_name, url, scraped_at FROM scraped_pages WHERE url = ?",
(normalized_url,),
).fetchone()
if row:
page_record = dict(row)
if page_record:
print(f"ℹ Found: {original_url}")
print(f" Source: {page_record['source_name']}")
print(f" Last scraped: {page_record['scraped_at']}")
else:
print(f"⚠ Not found in database: {original_url}")
print(f" Will be scraped as new URL")
from scripts.scrape_firecrawl import FirecrawlClient, FirecrawlError
client = FirecrawlClient()
scraped_pages = []
for normalized_url, original_url in url_map.items():
try:
page = client.scrape_url(original_url)
scraped_pages.append({
"normalized_url": normalized_url,
"original_url": original_url,
"markdown": page.get("markdown", ""),
"title": page.get("title", ""),
})
print(f"✓ Scraped: {original_url}")
except FirecrawlError as e:
print(f"✗ Failed to scrape {original_url}: {e}")
For each scraped page, analyze the markdown and extract events.
For each page:
source_url set to the original URLfrom schemas.event import Event, Venue, EventSource
event = Event(
title=extracted_title,
venue=Venue(name=venue_name, address=venue_address),
event_date=parsed_date,
start_time=parsed_time,
source=EventSource.WEB_AGGREGATOR,
source_url=page["original_url"],
description=description,
price=price,
ticket_url=ticket_url,
confidence=0.9, # Higher confidence for manual refresh
needs_review=False, # User explicitly requested this update
)
For each page, update or create events, then update the scraped_pages record.
from schemas.event import EventCollection
for page in scraped_pages:
events_from_page = events_by_url.get(page["original_url"], [])
# 1. Save/update events
if events_from_page:
collection = EventCollection(events=events_from_page)
result = storage.save(collection)
print(f" → {len(events_from_page)} events: {result.saved} new, {result.updated} updated")
else:
print(f" → No events extracted from page")
# 2. Update scraped_pages record
# Determine source_name (from existing record or ask user)
if page_record:
source_name = page_record["source_name"]
else:
# For new URLs, try to infer source from config or use domain
from urllib.parse import urlparse
source_name = urlparse(page["original_url"]).netloc
storage.save_scraped_page(
source_name=source_name,
url=page["normalized_url"],
events_count=len(events_from_page),
)
print(f"\n✓ Update complete")
Display summary of what was updated:
| URL | Events Found | New | Updated |
|---|---|---|---|
| hvmag.com/events/jazz | 1 | 0 | 1 |
| example.com/schedule | 5 | 2 | 3 |
<success_criteria>