Research & Analysis
AI agents that crawl, summarize, and analyze data over time. They need to track what they've already read, what patterns they've found, and what conclusions they've drawn — building knowledge incrementally instead of starting from zero.
The Challenge
Research agents face unique requirements:
- Knowledge accumulation - Build understanding over time, not just one-shot analysis
- Source tracking - Remember where information came from
- Pattern recognition - Connect insights across multiple data points
- Avoid duplication - Don't re-analyze what's already been processed
- Incremental learning - Update conclusions as new data arrives
How Memtrace Helps
Memtrace enables systematic knowledge building:
- Source attribution - Tag memories with sources and URLs
- Time-series analysis - Track how data changes over time
- Pattern storage - Store insights and patterns as they emerge
- Deduplication - Avoid re-processing same sources
- Decision logging - Record conclusions with supporting evidence
Example: Market Research Agent
An agent that monitors competitor pricing daily, remembering trends and flagging anomalies.
Setup
from memtrace import Memtrace, RegisterAgentRequest, CreateSessionRequest
memtrace = Memtrace("http://localhost:9100", "mtk_...")
# Register research agent
researcher = memtrace.register_agent(RegisterAgentRequest(
name="market-researcher",
description="Monitors competitor pricing and market trends",
))
# Create long-running research session
session = memtrace.create_session(CreateSessionRequest(
agent_id=researcher.id,
metadata={
"project": "competitor_pricing_analysis",
"market": "saas_timeseries_db",
},
))
Daily Data Collection
def collect_pricing_data():
"""Collect pricing data from competitors"""
# Check if already collected today
today = datetime.now().strftime("%Y-%m-%d")
already_collected = memtrace.search_memories({
"agent_id": researcher.id,
"session_id": session.id,
"tags": ["pricing_data", f"date_{today}"],
})
if already_collected.memories:
print("Already collected pricing data today")
return
# Collect data
competitor_a_price = scrape_competitor_pricing("competitor_a")
competitor_b_price = scrape_competitor_pricing("competitor_b")
# Store data points
memtrace.remember(
researcher.id,
f"Competitor A pricing: Basic ${competitor_a_price['basic']}, Pro ${competitor_a_price['pro']}",
session_id=session.id,
tags=["pricing_data", "competitor_a", f"date_{today}"],
metadata={
"competitor": "competitor_a",
"date": today,
"prices": competitor_a_price,
},
importance=0.7,
)
memtrace.remember(
researcher.id,
f"Competitor B pricing: Starter ${competitor_b_price['starter']}, Business ${competitor_b_price['business']}",
session_id=session.id,
tags=["pricing_data", "competitor_b", f"date_{today}"],
metadata={
"competitor": "competitor_b",
"date": today,
"prices": competitor_b_price,
},
importance=0.7,
)
Pattern Detection
def analyze_pricing_trends():
"""Analyze pricing data for patterns"""
# Get last 30 days of pricing data
pricing_data = memtrace.search_memories({
"agent_id": researcher.id,
"session_id": session.id,
"tags": ["pricing_data"],
"since": "30d",
})
# Analyze for changes
for competitor in ["competitor_a", "competitor_b"]:
competitor_data = [m for m in pricing_data.memories if competitor in m.tags]
# Check for price changes
prices = [m.metadata.get("prices") for m in competitor_data]
if len(prices) >= 2:
if prices[-1] != prices[-2]:
memtrace.remember(
researcher.id,
f"{competitor} changed pricing from {prices[-2]} to {prices[-1]}",
session_id=session.id,
tags=["price_change", competitor, "alert"],
importance=1.0,
)
memtrace.decide(
researcher.id,
f"Flag {competitor} price change for review",
"Price changes may indicate market shift or competitive response",
)
Trend Analysis
def analyze_long_term_trends():
"""Analyze trends over 90 days"""
# Get 90 days of data
data = memtrace.search_memories({
"agent_id": researcher.id,
"session_id": session.id,
"tags": ["pricing_data"],
"since": "90d",
})
# Calculate averages
competitor_a_avg = calculate_average_price(data, "competitor_a")
competitor_b_avg = calculate_average_price(data, "competitor_b")
# Store insight
memtrace.remember(
researcher.id,
f"90-day pricing analysis: Competitor A avg ${competitor_a_avg}, Competitor B avg ${competitor_b_avg}",
session_id=session.id,
tags=["analysis", "trend", "90_day"],
importance=0.9,
)
# Check for patterns
if competitor_a_avg < competitor_b_avg * 0.7:
memtrace.decide(
researcher.id,
"Competitor A pursuing aggressive low-price strategy",
"A consistently priced 30% below B over 90 days, likely targeting market share",
)
Example: Content Research Agent
An agent that researches topics by reading articles and building a knowledge base.
Setup
content_researcher = memtrace.register_agent(RegisterAgentRequest(
name="content-researcher",
description="Researches topics by reading web sources",
))
session = memtrace.create_session(CreateSessionRequest(
agent_id=content_researcher.id,
metadata={"topic": "golang_concurrency"},
))
Source Collection
def research_topic(url: str):
"""Research a specific URL"""
# Check if already researched
already_read = memtrace.search_memories({
"agent_id": content_researcher.id,
"session_id": session.id,
"content_contains": url,
"tags": ["source"],
})
if already_read.memories:
print(f"Already researched {url}")
return
# Scrape and analyze
content = scrape_url(url)
key_points = extract_key_points(content)
# Store source
memtrace.remember(
content_researcher.id,
f"Source: {url}",
session_id=session.id,
tags=["source", "web"],
metadata={"url": url, "scraped_at": datetime.now().isoformat()},
importance=0.6,
)
# Store key findings
for point in key_points:
memtrace.remember(
content_researcher.id,
point,
session_id=session.id,
tags=["finding", "golang", "concurrency"],
metadata={"source": url},
importance=0.8,
)
Knowledge Synthesis
def synthesize_findings():
"""Synthesize findings across all sources"""
# Get all findings
findings = memtrace.search_memories({
"agent_id": content_researcher.id,
"session_id": session.id,
"tags": ["finding"],
})
# Group by theme
themes = {}
for finding in findings.memories:
# Use LLM to categorize finding
theme = categorize_finding(finding.content)
if theme not in themes:
themes[theme] = []
themes[theme].append(finding)
# Store synthesis
for theme, theme_findings in themes.items():
memtrace.remember(
content_researcher.id,
f"Theme '{theme}' covered by {len(theme_findings)} sources",
session_id=session.id,
tags=["synthesis", theme],
importance=0.9,
)
# Draw conclusions
memtrace.decide(
content_researcher.id,
f"Research complete: identified {len(themes)} key themes from {len(findings.memories)} findings",
f"Sufficient coverage across {len(get_unique_sources(findings))} sources",
)
Example: Technical Documentation Crawler
An agent that crawls documentation sites and builds a searchable knowledge base.
Setup
doc_crawler = memtrace.register_agent(RegisterAgentRequest(
name="doc-crawler",
description="Crawls and indexes technical documentation",
))
session = memtrace.create_session(CreateSessionRequest(
agent_id=doc_crawler.id,
metadata={"target": "golang_docs"},
))
Crawling
def crawl_documentation(base_url: str):
"""Crawl documentation site"""
pages_to_crawl = [base_url]
crawled = set()
while pages_to_crawl:
url = pages_to_crawl.pop(0)
if url in crawled:
continue
# Check if already crawled (persistent memory)
already_crawled = memtrace.search_memories({
"agent_id": doc_crawler.id,
"content_contains": url,
"tags": ["crawled"],
})
if already_crawled.memories:
print(f"Skipping {url} - already crawled")
crawled.add(url)
continue
# Crawl page
content = fetch_page(url)
links = extract_links(content)
# Store page
memtrace.remember(
doc_crawler.id,
f"Crawled {url}",
session_id=session.id,
tags=["crawled", "documentation"],
metadata={
"url": url,
"word_count": len(content.split()),
"links_found": len(links),
},
importance=0.5,
)
# Extract and store key information
sections = extract_sections(content)
for section in sections:
memtrace.remember(
doc_crawler.id,
f"Section '{section['title']}': {section['summary']}",
session_id=session.id,
tags=["content", section['category']],
metadata={"source": url, "section": section['title']},
importance=0.8,
)
# Add new links to queue
pages_to_crawl.extend([l for l in links if l not in crawled])
crawled.add(url)
Progress Tracking
def get_crawl_progress():
"""Check crawl progress"""
crawled = memtrace.search_memories({
"agent_id": doc_crawler.id,
"session_id": session.id,
"tags": ["crawled"],
})
content_extracted = memtrace.search_memories({
"agent_id": doc_crawler.id,
"session_id": session.id,
"tags": ["content"],
})
return {
"pages_crawled": len(crawled.memories),
"sections_extracted": len(content_extracted.memories),
}
Example: Data Analyst Agent
An agent that processes CSV files, finds patterns, and generates insights.
Setup
analyst = memtrace.register_agent(RegisterAgentRequest(
name="data-analyst",
description="Analyzes datasets and generates insights",
))
session = memtrace.create_session(CreateSessionRequest(
agent_id=analyst.id,
metadata={"dataset": "sales_2025"},
))
Data Processing
def analyze_dataset(file_path: str):
"""Analyze a dataset file"""
# Check if already analyzed
already_analyzed = memtrace.search_memories({
"agent_id": analyst.id,
"content_contains": file_path,
"tags": ["analyzed"],
})
if already_analyzed.memories:
print(f"Dataset {file_path} already analyzed")
return
# Load and analyze
df = pd.read_csv(file_path)
# Store metadata
memtrace.remember(
analyst.id,
f"Analyzed {file_path}: {len(df)} rows, {len(df.columns)} columns",
session_id=session.id,
tags=["analyzed", "metadata"],
metadata={
"file": file_path,
"rows": len(df),
"columns": list(df.columns),
},
importance=0.7,
)
# Find patterns
for column in df.select_dtypes(include='number').columns:
stats = df[column].describe()
memtrace.remember(
analyst.id,
f"Column '{column}': mean={stats['mean']:.2f}, std={stats['std']:.2f}",
session_id=session.id,
tags=["statistics", column],
metadata={"file": file_path, "column": column},
importance=0.6,
)
# Flag anomalies
if stats['std'] > stats['mean'] * 2:
memtrace.remember(
analyst.id,
f"High variance detected in '{column}' - potential anomaly",
session_id=session.id,
tags=["anomaly", column],
importance=0.9,
)
Cross-Dataset Insights
def find_cross_dataset_patterns():
"""Find patterns across multiple analyzed datasets"""
all_stats = memtrace.search_memories({
"agent_id": analyst.id,
"session_id": session.id,
"tags": ["statistics"],
})
# Correlate patterns
# ... analysis logic ...
memtrace.decide(
analyst.id,
"Found correlation between sales and marketing spend across datasets",
"3 datasets show consistent 2.5x ROI on marketing, recommend increased budget",
)
Best Practices
Source Attribution
Always tag memories with source information:
metadata = {
"source": "https://example.com/article",
"scraped_at": "2026-02-13T10:00:00Z",
}
Deduplication
Check before re-processing:
already_processed = memtrace.search_memories({
"content_contains": url,
"tags": ["processed"],
})
Importance Scoring
Prioritize findings over routine logs:
# Routine data collection
importance=0.5
# Key finding
importance=0.9
# Critical insight
importance=1.0
Temporal Queries
Use time windows for trend analysis:
# Recent data
since="7d"
# Long-term trends
since="90d"
Pattern Recording
Store patterns as decisions:
memtrace.decide(
agent_id,
"Pattern identified: prices drop every Tuesday",
"Observed consistently over 8 weeks across 3 competitors",
)
Benefits
- No duplicate work - Agent remembers what it already analyzed
- Knowledge accumulation - Builds understanding over time
- Pattern recognition - Connects insights across data points
- Source tracking - Always know where information came from
- Incremental learning - Updates conclusions as new data arrives
- Audit trail - Full history of research process and decisions