๐ป fetch_a16z_posts.py
python ยท 70 lines ยท โฌ๏ธ Download
import json, urllib.request, urllib.parse, base64, time
APP_ID = "FLQ5EG9WBF"
INDEX = "v2_production_searchable_posts"
key_endpoint = "https://ohhu1ogx2a.execute-api.us-west-1.amazonaws.com/prod/api/generate-key"
req = urllib.request.Request(key_endpoint)
try:
resp = urllib.request.urlopen(req, timeout=10)
key_data = json.loads(resp.read())
API_KEY = key_data.get('key', '')
print(f"Got API key: {API_KEY[:20]}...")
except Exception as e:
print(f"Key fetch error: {e}")
API_KEY = ""
one_year_ago = int(time.time()) - 365 * 24 * 3600
all_posts = []
page = 0
while True:
url = f"https://FLQ5EG9WBF-dsn.algolia.net/1/indexes/{INDEX}/query"
payload = json.dumps({
"query": "",
"page": page,
"hitsPerPage": 100,
"filters": f"date_timestamp > {one_year_ago}",
}).encode()
req = urllib.request.Request(url, data=payload, method="POST", headers={
"Content-Type": "application/json",
"X-Algolia-Application-Id": APP_ID,
"X-Algolia-API-Key": API_KEY,
})
try:
resp = urllib.request.urlopen(req, timeout=15)
data = json.loads(resp.read())
hits = data.get('hits', [])
total_pages = data.get('nbPages', 0)
total_hits = data.get('nbHits', 0)
print(f"Page {page}: {len(hits)} hits, total: {total_hits}, pages: {total_pages}")
for hit in hits:
post = {
'title': hit.get('title', ''),
'url': 'https://a16zcrypto.com' + hit.get('uri', hit.get('slug', '')),
'date': hit.get('date', ''),
'tags': hit.get('tags', []),
'focus_areas': hit.get('focus_areas', []),
'type': hit.get('type', ''),
}
all_posts.append(post)
if page >= total_pages - 1:
break
page += 1
except Exception as e:
print(f"Error on page {page}: {e}")
break
print(f"\nTotal posts collected: {len(all_posts)}")
with open('a16z_posts.json', 'w') as f:
json.dump(all_posts, f, indent=2)
print("Saved to a16z_posts.json")