Python Library Usage¶
ScoutML provides a comprehensive Python API that mirrors all CLI functionality with a clean, intuitive interface.
Basic Usage¶
Import and Setup¶
import scoutml
# The library automatically uses your configured API key
# from environment variables or config files
Simple Search¶
# Search for papers
results = scoutml.search("transformer models", limit=10)
# Access results
for paper in results['papers']:
print(f"{paper['title']} - {paper['citations']} citations")
Core Functions¶
Search Functions¶
# Semantic search with filters
results = scoutml.search(
"vision transformers",
limit=20,
year_min=2021,
min_citations=50,
sota_only=True,
domain="computer vision"
)
# Search by method
bert_papers = scoutml.method_search(
"BERT",
sort_by="citations", # or "year", "novelty"
limit=15
)
# Search by dataset
imagenet_papers = scoutml.dataset_search(
"ImageNet",
include_benchmarks=True,
year_min=2020
)
Paper Analysis¶
# Get detailed paper information
paper = scoutml.get_paper("2103.00020", include_similar=True)
print(paper['paper']['title'])
print(paper['paper']['abstract'])
# Compare multiple papers
comparison = scoutml.compare_papers("1810.04805", "2005.14165", "1910.10683")
print(comparison['analysis']['summary'])
# Find similar papers
similar = scoutml.find_similar_papers(
paper_id="1810.04805",
limit=10,
threshold=0.8
)
# Or find papers similar to your abstract
similar = scoutml.find_similar_papers(
abstract_text="We propose a new method for self-supervised learning...",
limit=5
)
Research Synthesis¶
# Generate literature review
review = scoutml.generate_review(
"federated learning",
year_min=2020,
min_citations=20,
limit=50
)
print(review['review']['executive_summary'])
print(f"Analyzed {len(review['review']['papers'])} papers")
Insights and Analytics¶
# Get reproducible papers
reproducible = scoutml.get_reproducible_papers(
domain="nlp",
year_min=2022,
limit=20
)
# Analyze compute trends
compute_analysis = scoutml.analyze_compute_trends(
method="large language model",
year_min=2020
)
# Analyze funding patterns
funding = scoutml.analyze_funding(
institution="MIT",
source="NSF",
limit=30
)
AI Agents¶
# Get implementation guide
guide = scoutml.get_implementation_guide(
"2010.11929",
framework="pytorch", # or "tensorflow", "jax", "other"
level="intermediate" # or "beginner", "advanced"
)
# Get research critique
critique = scoutml.critique_paper(
"1810.04805",
aspects=["methodology", "experiments", "reproducibility"]
)
# Solve limitations
solutions = scoutml.solve_limitations(
"2103.00020",
focus="computational",
tradeoffs=["speed", "memory"]
)
# Design experiments
experiment = scoutml.design_experiment(
"2010.11929",
"ViT performs well on small datasets with proper augmentation",
gpu_hours=100,
datasets=["CIFAR-10", "CIFAR-100"]
)
Advanced Usage¶
Custom Client Configuration¶
from scoutml import Config, ScoutMLClient
# Create custom configuration
config = Config()
config.api_key = "your-api-key"
config.base_url = "https://custom.api.url"
config.default_timeout = 60 # seconds
config.max_retries = 5
# Create client with custom config
client = ScoutMLClient(config)
# Use the client
results = client.semantic_search("quantum computing", limit=10)
Context Manager¶
# Automatic session management
with scoutml.Scout() as scout:
# All client methods available
papers = scout.semantic_search("bert variants", limit=5)
comparison = scout.compare_papers(["1810.04805", "1907.11692"])
Batch Operations¶
# Get multiple papers efficiently
client = scoutml.get_client()
paper_ids = ["2103.00020", "2010.11929", "1810.04805", "1706.03762"]
papers = client.batch_get_papers(paper_ids, show_progress=True)
for paper in papers:
if "error" not in paper:
print(f"✓ {paper['title']}")
else:
print(f"✗ {paper['arxiv_id']}: {paper['error']}")
Error Handling¶
from scoutml import (
ScoutMLError,
AuthenticationError,
NotFoundError,
RateLimitError,
ServerError
)
try:
paper = scoutml.get_paper("invalid-id")
except NotFoundError:
print("Paper not found")
except AuthenticationError:
print("Check your API key")
except RateLimitError as e:
print(f"Rate limited: {e}")
except ServerError:
print("Server error, try again later")
except ScoutMLError as e:
print(f"General error: {e}")
Working with Results¶
Parsing Search Results¶
results = scoutml.search("reinforcement learning", limit=50)
# Extract specific fields
papers_data = [
{
"title": p["title"],
"year": p["year"],
"citations": p["citations"],
"url": f"https://arxiv.org/abs/{p['arxiv_id']}"
}
for p in results["papers"]
]
# Filter results
high_impact = [p for p in results["papers"] if p["citations"] > 100]
recent = [p for p in results["papers"] if p["year"] >= 2023]
Exporting Data¶
import json
import csv
# Export to JSON
results = scoutml.search("transformer", limit=100)
with open("papers.json", "w") as f:
json.dump(results, f, indent=2)
# Export to CSV
with open("papers.csv", "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["arxiv_id", "title", "year", "citations"])
writer.writeheader()
writer.writerows(results["papers"])
Integration Examples¶
Pandas Integration¶
import pandas as pd
import scoutml
# Search and convert to DataFrame
results = scoutml.search("neural architecture search", limit=100)
df = pd.DataFrame(results['papers'])
# Analysis
print(df.groupby('year')['citations'].mean())
print(df.nlargest(10, 'citations')[['title', 'citations']])
Jupyter Notebook Usage¶
# Display results nicely in notebooks
from IPython.display import display, HTML
import scoutml
results = scoutml.search("attention mechanisms", limit=5)
# Create HTML table
html = "<table><tr><th>Title</th><th>Year</th><th>Citations</th></tr>"
for paper in results['papers']:
html += f"<tr><td>{paper['title']}</td><td>{paper['year']}</td><td>{paper['citations']}</td></tr>"
html += "</table>"
display(HTML(html))
Async Usage (Future)¶
# Note: Async support planned for future versions
# This is how it will work:
import asyncio
import scoutml
async def search_multiple_topics(topics):
tasks = [scoutml.search_async(topic, limit=10) for topic in topics]
results = await asyncio.gather(*tasks)
return results
# Run async searches
topics = ["bert", "gpt", "t5", "roberta"]
results = asyncio.run(search_multiple_topics(topics))
Best Practices¶
- Rate Limiting: The client handles rate limiting automatically, but be mindful of your API quota
- Error Handling: Always wrap API calls in try-except blocks for production code
- Caching: Consider caching results for expensive operations
- Batch Operations: Use batch methods when fetching multiple papers
- Configuration: Store API keys securely using environment variables
Complete Example¶
import scoutml
from datetime import datetime
def research_topic(topic, output_file):
"""Complete research workflow for a topic."""
print(f"Researching: {topic}")
# 1. Initial search
results = scoutml.search(topic, limit=50, year_min=2020)
print(f"Found {len(results['papers'])} recent papers")
# 2. Find most cited paper
top_paper = max(results['papers'], key=lambda p: p['citations'])
paper_id = top_paper['arxiv_id']
# 3. Get detailed analysis
details = scoutml.get_paper(paper_id)
critique = scoutml.critique_paper(paper_id)
# 4. Find similar work
similar = scoutml.find_similar_papers(paper_id=paper_id, limit=10)
# 5. Generate review
review = scoutml.generate_review(topic, year_min=2020, limit=30)
# 6. Save results
report = {
"topic": topic,
"date": datetime.now().isoformat(),
"top_paper": details['paper'],
"critique": critique['critique'],
"similar_papers": similar['papers'],
"review": review['review']['executive_summary']
}
with open(output_file, 'w') as f:
json.dump(report, f, indent=2)
print(f"Research complete! Report saved to {output_file}")
# Run research
research_topic("self-supervised learning", "ssl_research.json")
Next Steps¶
- Explore the API Reference for detailed documentation
- See examples/python_usage.py for more examples
- Check the CLI documentation - all CLI features are available in Python