API Examples
This guide provides practical examples of common use cases for the ActiCrawl API. Each example includes complete code snippets in multiple programming languages.
Basic Web Scraping
Extract Article Content
Scrape a blog post or news article and get clean, readable content:
import requests
api_key = 'YOUR_API_KEY'
response = requests.post(
'https://api.acticrawl.com/v1/scrape',
headers={'X-API-Key': api_key},
json={
'url': 'https://techcrunch.com/2024/01/15/openai-gpt-5/',
'formats': ['markdown'],
'options': {
'onlyMainContent': True
}
}
)
article = response.json()
print(article['data']['markdown'])
Generate Screenshots
Capture a screenshot of any webpage:
const axios = require('axios');
async function captureScreenshot(url) {
const response = await axios.post('https://api.acticrawl.com/v1/scrape', {
url: url,
formats: ['screenshot'],
options: {
waitFor: 3000 // Wait 3 seconds for page to load
}
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
});
// Save screenshot to file
const fs = require('fs');
const buffer = Buffer.from(response.data.data.screenshot, 'base64');
fs.writeFileSync('screenshot.png', buffer);
}
captureScreenshot('https://stripe.com');
E-commerce Data Extraction
Scrape Product Information
Extract structured product data from e-commerce sites:
import requests
import json
def scrape_product(product_url):
response = requests.post(
'https://api.acticrawl.com/v1/scrape',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={
'url': product_url,
'formats': ['json', 'screenshot'],
'options': {
'waitFor': 5000, # Wait for dynamic content
'onlyIncludeTags': [
'h1', # Product title
'.price', # Price
'.description', # Description
'.product-image', # Images
'.reviews' # Reviews
]
}
}
)
return response.json()
# Example usage
product = scrape_product('https://amazon.com/dp/B08N5WRWNW')
print(f"Title: {product['data']['metadata']['title']}")
print(f"Price: {product['data']['json']['price']}")
Monitor Price Changes
Track product prices over time:
const cron = require('node-cron');
const axios = require('axios');
const fs = require('fs');
async function checkPrice(url) {
const response = await axios.post('https://api.acticrawl.com/v1/scrape', {
url: url,
formats: ['json'],
options: {
onlyIncludeTags: ['.price', '.product-title']
}
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
});
const data = response.data.data;
const price = data.json.price;
const timestamp = new Date().toISOString();
// Log price to CSV
fs.appendFileSync('prices.csv', `${timestamp},${price}\n`);
return price;
}
// Check price every hour
cron.schedule('0 * * * *', () => {
checkPrice('https://example.com/product');
});
News and Content Aggregation
Scrape Multiple News Articles
Collect articles from a news website:
import requests
from datetime import datetime
def scrape_news_site(base_url, max_articles=10):
# First, get the homepage to find article links
response = requests.post(
'https://api.acticrawl.com/v1/crawl',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={
'url': base_url,
'match': f'{base_url}/article/**',
'selector': 'article a[href]',
'limit': max_articles,
'depth': 1,
'formats': ['markdown'],
'options': {
'onlyMainContent': True
}
}
)
articles = response.json()['data']
# Process articles
for article in articles:
print(f"Title: {article['metadata']['title']}")
print(f"URL: {article['url']}")
print(f"Published: {article['metadata']['publishedTime']}")
print(f"Content: {article['markdown'][:200]}...")
print("-" * 80)
return articles
# Example usage
news = scrape_news_site('https://news.ycombinator.com', max_articles=5)
Create RSS Feed from Any Website
Convert any website into an RSS feed:
const axios = require('axios');
const RSS = require('rss');
async function websiteToRSS(siteUrl) {
// Crawl website for recent posts
const response = await axios.post('https://api.acticrawl.com/v1/crawl', {
url: siteUrl,
match: `${siteUrl}/blog/**`,
limit: 20,
depth: 1,
formats: ['markdown'],
options: {
onlyMainContent: true
}
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
});
// Create RSS feed
const feed = new RSS({
title: `${siteUrl} RSS Feed`,
description: `Latest posts from ${siteUrl}`,
feed_url: `${siteUrl}/rss`,
site_url: siteUrl,
});
// Add items to feed
response.data.data.forEach(article => {
feed.item({
title: article.metadata.title,
description: article.markdown.substring(0, 300) + '...',
url: article.url,
date: article.metadata.publishedTime || new Date()
});
});
return feed.xml();
}
Research and Data Collection
Academic Paper Collection
Collect research papers and their metadata:
import requests
import pandas as pd
def collect_research_papers(search_url, query):
response = requests.post(
'https://api.acticrawl.com/v1/search',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={
'url': search_url,
'query': query,
'limit': 50,
'depth': 2
}
)
papers = response.json()['data']['results']
# Convert to DataFrame for analysis
df = pd.DataFrame([{
'title': p['title'],
'url': p['url'],
'excerpt': p['excerpt'],
'relevance': p['relevance_score']
} for p in papers])
# Save to CSV
df.to_csv(f'research_{query}.csv', index=False)
return df
# Example usage
papers = collect_research_papers(
'https://arxiv.org',
'machine learning transformers'
)
Website Change Detection
Monitor websites for changes:
const axios = require('axios');
const crypto = require('crypto');
const nodemailer = require('nodemailer');
async function checkForChanges(url, selector) {
// Scrape current content
const response = await axios.post('https://api.acticrawl.com/v1/scrape', {
url: url,
formats: ['html'],
options: {
onlyIncludeTags: [selector]
}
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
});
const content = response.data.data.html;
const hash = crypto.createHash('md5').update(content).digest('hex');
// Compare with stored hash
const fs = require('fs');
const hashFile = 'content-hashes.json';
let hashes = {};
if (fs.existsSync(hashFile)) {
hashes = JSON.parse(fs.readFileSync(hashFile));
}
if (hashes[url] && hashes[url] !== hash) {
// Content changed!
await sendNotification(url, content);
}
// Update hash
hashes[url] = hash;
fs.writeFileSync(hashFile, JSON.stringify(hashes));
}
async function sendNotification(url, newContent) {
// Send email notification
const transporter = nodemailer.createTransport({
// Configure your email service
});
await transporter.sendMail({
to: 'your-email@example.com',
subject: `Website Changed: ${url}`,
html: `The website has been updated:<br><br>${newContent}`
});
}
SEO and Marketing
Competitor Analysis
Analyze competitor websites:
import requests
from collections import Counter
def analyze_competitor(competitor_url):
# Get site structure
map_response = requests.post(
'https://api.acticrawl.com/v1/map',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={
'url': competitor_url,
'limit': 100,
'depth': 3
}
)
site_structure = map_response.json()['data']
# Analyze a sample of pages
sample_urls = site_structure['urls'][:10]
keywords = []
for url in sample_urls:
page_response = requests.post(
'https://api.acticrawl.com/v1/scrape',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={
'url': url,
'formats': ['json'],
'options': {
'onlyMainContent': True
}
}
)
metadata = page_response.json()['data']['metadata']
if metadata.get('keywords'):
keywords.extend(metadata['keywords'])
# Analyze keywords
keyword_freq = Counter(keywords)
return {
'total_pages': len(site_structure['urls']),
'site_structure': site_structure['structure'],
'top_keywords': keyword_freq.most_common(20)
}
# Example usage
analysis = analyze_competitor('https://competitor.com')
print(f"Total pages: {analysis['total_pages']}")
print(f"Top keywords: {analysis['top_keywords']}")
Generate Content Ideas
Find trending topics in your industry:
async function findTrendingTopics(industrySites) {
const allArticles = [];
// Scrape recent articles from multiple sites
for (const site of industrySites) {
const response = await axios.post('https://api.acticrawl.com/v1/crawl', {
url: site,
match: `${site}/**`,
selector: 'article a, .post a',
limit: 20,
depth: 1,
formats: ['markdown'],
options: {
onlyMainContent: true
}
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
});
allArticles.push(...response.data.data);
}
// Extract topics and keywords
const topics = {};
allArticles.forEach(article => {
const keywords = article.metadata.keywords || [];
keywords.forEach(keyword => {
topics[keyword] = (topics[keyword] || 0) + 1;
});
});
// Sort by frequency
const trending = Object.entries(topics)
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
return trending;
}
// Example usage
const sites = [
'https://techcrunch.com',
'https://theverge.com',
'https://wired.com'
];
findTrendingTopics(sites).then(trends => {
console.log('Trending topics:');
trends.forEach(([topic, count]) => {
console.log(`- ${topic}: mentioned ${count} times`);
});
});
Error Handling and Best Practices
Robust Error Handling
Always implement proper error handling:
import requests
import time
from requests.exceptions import RequestException
def scrape_with_retry(url, max_retries=3):
for attempt in range(max_retries):
try:
response = requests.post(
'https://api.acticrawl.com/v1/scrape',
headers={'X-API-Key': 'YOUR_API_KEY'},
json={'url': url, 'formats': ['markdown']},
timeout=30
)
if response.status_code == 200:
return response.json()
elif response.status_code == 429:
# Rate limited - wait and retry
retry_after = int(response.headers.get('Retry-After', 60))
print(f"Rate limited. Waiting {retry_after} seconds...")
time.sleep(retry_after)
else:
print(f"Error {response.status_code}: {response.text}")
except RequestException as e:
print(f"Request failed: {e}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
return None
Batch Processing
Process multiple URLs efficiently:
async function batchScrape(urls, batchSize = 5) {
const results = [];
// Process URLs in batches
for (let i = 0; i < urls.length; i += batchSize) {
const batch = urls.slice(i, i + batchSize);
const promises = batch.map(url =>
axios.post('https://api.acticrawl.com/v1/scrape', {
url: url,
formats: ['markdown', 'metadata']
}, {
headers: { 'X-API-Key': 'YOUR_API_KEY' }
}).catch(err => ({ error: err.message, url }))
);
const batchResults = await Promise.all(promises);
results.push(...batchResults);
// Respect rate limits
if (i + batchSize < urls.length) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
return results;
}
Next Steps
These examples demonstrate just a few of the many possibilities with the ActiCrawl API. For more advanced use cases:
- Explore our API Reference for complete endpoint documentation
- Check out our SDKs and Libraries for your programming language
- Join our Developer Community for support and discussions
- Contact our Enterprise Sales team for custom solutions
Happy scraping! 🚀