Skip to content

Python Library Usage

ScoutML provides a comprehensive Python API that mirrors all CLI functionality with a clean, intuitive interface.

Basic Usage

Import and Setup

import scoutml

# The library automatically uses your configured API key
# from environment variables or config files
# Search for papers
results = scoutml.search("transformer models", limit=10)

# Access results
for paper in results['papers']:
    print(f"{paper['title']} - {paper['citations']} citations")

Core Functions

Search Functions

# Semantic search with filters
results = scoutml.search(
    "vision transformers",
    limit=20,
    year_min=2021,
    min_citations=50,
    sota_only=True,
    domain="computer vision"
)

# Search by method
bert_papers = scoutml.method_search(
    "BERT",
    sort_by="citations",  # or "year", "novelty"
    limit=15
)

# Search by dataset
imagenet_papers = scoutml.dataset_search(
    "ImageNet",
    include_benchmarks=True,
    year_min=2020
)

Paper Analysis

# Get detailed paper information
paper = scoutml.get_paper("2103.00020", include_similar=True)
print(paper['paper']['title'])
print(paper['paper']['abstract'])

# Compare multiple papers
comparison = scoutml.compare_papers("1810.04805", "2005.14165", "1910.10683")
print(comparison['analysis']['summary'])

# Find similar papers
similar = scoutml.find_similar_papers(
    paper_id="1810.04805",
    limit=10,
    threshold=0.8
)

# Or find papers similar to your abstract
similar = scoutml.find_similar_papers(
    abstract_text="We propose a new method for self-supervised learning...",
    limit=5
)

Research Synthesis

# Generate literature review
review = scoutml.generate_review(
    "federated learning",
    year_min=2020,
    min_citations=20,
    limit=50
)

print(review['review']['executive_summary'])
print(f"Analyzed {len(review['review']['papers'])} papers")

Insights and Analytics

# Get reproducible papers
reproducible = scoutml.get_reproducible_papers(
    domain="nlp",
    year_min=2022,
    limit=20
)

# Analyze compute trends
compute_analysis = scoutml.analyze_compute_trends(
    method="large language model",
    year_min=2020
)

# Analyze funding patterns
funding = scoutml.analyze_funding(
    institution="MIT",
    source="NSF",
    limit=30
)

AI Agents

# Get implementation guide
guide = scoutml.get_implementation_guide(
    "2010.11929",
    framework="pytorch",  # or "tensorflow", "jax", "other"
    level="intermediate"  # or "beginner", "advanced"
)

# Get research critique
critique = scoutml.critique_paper(
    "1810.04805",
    aspects=["methodology", "experiments", "reproducibility"]
)

# Solve limitations
solutions = scoutml.solve_limitations(
    "2103.00020",
    focus="computational",
    tradeoffs=["speed", "memory"]
)

# Design experiments
experiment = scoutml.design_experiment(
    "2010.11929",
    "ViT performs well on small datasets with proper augmentation",
    gpu_hours=100,
    datasets=["CIFAR-10", "CIFAR-100"]
)

Advanced Usage

Custom Client Configuration

from scoutml import Config, ScoutMLClient

# Create custom configuration
config = Config()
config.api_key = "your-api-key"
config.base_url = "https://custom.api.url"
config.default_timeout = 60  # seconds
config.max_retries = 5

# Create client with custom config
client = ScoutMLClient(config)

# Use the client
results = client.semantic_search("quantum computing", limit=10)

Context Manager

# Automatic session management
with scoutml.Scout() as scout:
    # All client methods available
    papers = scout.semantic_search("bert variants", limit=5)
    comparison = scout.compare_papers(["1810.04805", "1907.11692"])

Batch Operations

# Get multiple papers efficiently
client = scoutml.get_client()

paper_ids = ["2103.00020", "2010.11929", "1810.04805", "1706.03762"]
papers = client.batch_get_papers(paper_ids, show_progress=True)

for paper in papers:
    if "error" not in paper:
        print(f"✓ {paper['title']}")
    else:
        print(f"✗ {paper['arxiv_id']}: {paper['error']}")

Error Handling

from scoutml import (
    ScoutMLError,
    AuthenticationError,
    NotFoundError,
    RateLimitError,
    ServerError
)

try:
    paper = scoutml.get_paper("invalid-id")
except NotFoundError:
    print("Paper not found")
except AuthenticationError:
    print("Check your API key")
except RateLimitError as e:
    print(f"Rate limited: {e}")
except ServerError:
    print("Server error, try again later")
except ScoutMLError as e:
    print(f"General error: {e}")

Working with Results

Parsing Search Results

results = scoutml.search("reinforcement learning", limit=50)

# Extract specific fields
papers_data = [
    {
        "title": p["title"],
        "year": p["year"],
        "citations": p["citations"],
        "url": f"https://arxiv.org/abs/{p['arxiv_id']}"
    }
    for p in results["papers"]
]

# Filter results
high_impact = [p for p in results["papers"] if p["citations"] > 100]
recent = [p for p in results["papers"] if p["year"] >= 2023]

Exporting Data

import json
import csv

# Export to JSON
results = scoutml.search("transformer", limit=100)
with open("papers.json", "w") as f:
    json.dump(results, f, indent=2)

# Export to CSV
with open("papers.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["arxiv_id", "title", "year", "citations"])
    writer.writeheader()
    writer.writerows(results["papers"])

Integration Examples

Pandas Integration

import pandas as pd
import scoutml

# Search and convert to DataFrame
results = scoutml.search("neural architecture search", limit=100)
df = pd.DataFrame(results['papers'])

# Analysis
print(df.groupby('year')['citations'].mean())
print(df.nlargest(10, 'citations')[['title', 'citations']])

Jupyter Notebook Usage

# Display results nicely in notebooks
from IPython.display import display, HTML
import scoutml

results = scoutml.search("attention mechanisms", limit=5)

# Create HTML table
html = "<table><tr><th>Title</th><th>Year</th><th>Citations</th></tr>"
for paper in results['papers']:
    html += f"<tr><td>{paper['title']}</td><td>{paper['year']}</td><td>{paper['citations']}</td></tr>"
html += "</table>"

display(HTML(html))

Async Usage (Future)

# Note: Async support planned for future versions
# This is how it will work:

import asyncio
import scoutml

async def search_multiple_topics(topics):
    tasks = [scoutml.search_async(topic, limit=10) for topic in topics]
    results = await asyncio.gather(*tasks)
    return results

# Run async searches
topics = ["bert", "gpt", "t5", "roberta"]
results = asyncio.run(search_multiple_topics(topics))

Best Practices

  1. Rate Limiting: The client handles rate limiting automatically, but be mindful of your API quota
  2. Error Handling: Always wrap API calls in try-except blocks for production code
  3. Caching: Consider caching results for expensive operations
  4. Batch Operations: Use batch methods when fetching multiple papers
  5. Configuration: Store API keys securely using environment variables

Complete Example

import scoutml
from datetime import datetime

def research_topic(topic, output_file):
    """Complete research workflow for a topic."""

    print(f"Researching: {topic}")

    # 1. Initial search
    results = scoutml.search(topic, limit=50, year_min=2020)
    print(f"Found {len(results['papers'])} recent papers")

    # 2. Find most cited paper
    top_paper = max(results['papers'], key=lambda p: p['citations'])
    paper_id = top_paper['arxiv_id']

    # 3. Get detailed analysis
    details = scoutml.get_paper(paper_id)
    critique = scoutml.critique_paper(paper_id)

    # 4. Find similar work
    similar = scoutml.find_similar_papers(paper_id=paper_id, limit=10)

    # 5. Generate review
    review = scoutml.generate_review(topic, year_min=2020, limit=30)

    # 6. Save results
    report = {
        "topic": topic,
        "date": datetime.now().isoformat(),
        "top_paper": details['paper'],
        "critique": critique['critique'],
        "similar_papers": similar['papers'],
        "review": review['review']['executive_summary']
    }

    with open(output_file, 'w') as f:
        json.dump(report, f, indent=2)

    print(f"Research complete! Report saved to {output_file}")

# Run research
research_topic("self-supervised learning", "ssl_research.json")

Next Steps