Content Intelligence
Parent: Shared Infrastructure OVERVIEWStatus: Active
Scope: Ingest → Analyze → Extract Patterns → Learn
Overview
Content Intelligence is the perception layer - it ingests content from multiple sources, analyzes it to extract patterns, and continuously learns what works.Copy
┌─────────────────────────────────────────────────────────────────────────┐
│ CONTENT INTELLIGENCE PIPELINE │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ SOURCES INGEST RAW CONTENT │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │Competitors│ │ Scrapers │ │ Text │ │
│ │ RSS │──────────────▶│ APIs │────────────▶│ Media │ │
│ │ Social │ │ Webhooks │ │ Links │ │
│ └──────────┘ └──────────┘ └──────────┘ │
│ │ │
│ ▼ │
│ ANALYSIS │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ NLP Pipeline │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ Keyword │ │ Entity │ │Sentiment │ │ Topic │ │ │
│ │ │Extraction│ │Recognition│ │ Analysis │ │ Modeling │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ PATTERNS │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ Platform Patterns (what works where) │ │
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
│ │ │ Twitter │ │ YouTube │ │LinkedIn │ │ Blog │ │ │
│ │ │hooks, │ │titles, │ │format, │ │SEO, │ │ │
│ │ │length │ │thumbnails│ │timing │ │structure│ │ │
│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ FEEDBACK LOOP │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ Performance Data → Pattern Scoring → Weight Adjustment │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────┘
Schema
Content Sources
Where content comes from for analysis.Copy
CREATE TABLE content_sources (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Source identification
name text NOT NULL,
source_type text NOT NULL, -- 'competitor', 'rss', 'api', 'scraper', 'manual', 'social'
url text,
-- Configuration
config jsonb DEFAULT '{}', -- API keys, selectors, auth
-- Categorization
vertical_id uuid REFERENCES publisher_verticals(id),
tags text[],
-- Scheduling
fetch_frequency interval DEFAULT '1 hour',
last_fetched_at timestamptz,
next_fetch_at timestamptz,
-- Health
status text DEFAULT 'active', -- 'active', 'paused', 'error', 'disabled'
error_count integer DEFAULT 0,
last_error text,
-- Stats
total_items_fetched integer DEFAULT 0,
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now()
);
CREATE INDEX idx_content_sources_type ON content_sources(source_type);
CREATE INDEX idx_content_sources_vertical ON content_sources(vertical_id);
CREATE INDEX idx_content_sources_next_fetch ON content_sources(next_fetch_at)
WHERE status = 'active';
Raw Content
Ingested content before analysis.Copy
CREATE TABLE raw_content (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Source reference
source_id uuid REFERENCES content_sources(id),
external_id text, -- ID from source system
-- Content
title text,
content_text text,
content_html text,
summary text,
-- Media
featured_image_url text,
media_urls text[],
-- Metadata
author text,
published_at timestamptz,
url text,
-- Categorization
vertical_id uuid REFERENCES publisher_verticals(id),
content_type text, -- 'article', 'video', 'social_post', 'podcast'
-- Processing status
processing_status text DEFAULT 'pending', -- 'pending', 'processing', 'analyzed', 'failed'
analyzed_at timestamptz,
-- Deduplication
content_hash text, -- Hash for dedup
-- Metrics (if available from source)
source_metrics jsonb DEFAULT '{}', -- {views, likes, shares, comments}
created_at timestamptz DEFAULT now(),
UNIQUE(source_id, external_id)
);
CREATE INDEX idx_raw_content_source ON raw_content(source_id);
CREATE INDEX idx_raw_content_status ON raw_content(processing_status);
CREATE INDEX idx_raw_content_vertical ON raw_content(vertical_id);
CREATE INDEX idx_raw_content_published ON raw_content(published_at DESC);
CREATE INDEX idx_raw_content_hash ON raw_content(content_hash);
Content Analysis
Extracted insights from content.Copy
CREATE TABLE content_analysis (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Content reference
content_id uuid REFERENCES raw_content(id) UNIQUE NOT NULL,
-- Keywords & Entities
keywords text[], -- Extracted keywords
keyword_scores jsonb DEFAULT '{}', -- {keyword: relevance_score}
entities jsonb DEFAULT '[]', -- [{name, type, salience}]
-- Topics & Categories
topics text[], -- High-level topics
categories text[], -- Content categories
-- Sentiment & Tone
sentiment text, -- 'positive', 'negative', 'neutral', 'mixed'
sentiment_score numeric, -- -1 to 1
tone text[], -- ['informative', 'casual', 'urgent']
-- Structure Analysis
word_count integer,
reading_time_minutes integer,
heading_count integer,
paragraph_count integer,
-- SEO Analysis
title_length integer,
meta_description_length integer,
keyword_density jsonb DEFAULT '{}',
-- Content Quality Signals
quality_score numeric, -- 0-100 composite score
originality_score numeric, -- 0-100 uniqueness
readability_score numeric, -- Flesch-Kincaid or similar
-- Embeddings (for semantic search)
embedding vector(1536), -- OpenAI ada-002 embedding
-- AI Model Used
model_used text,
analysis_cost numeric(10,6),
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now()
);
CREATE INDEX idx_content_analysis_content ON content_analysis(content_id);
CREATE INDEX idx_content_analysis_keywords ON content_analysis USING GIN(keywords);
CREATE INDEX idx_content_analysis_topics ON content_analysis USING GIN(topics);
CREATE INDEX idx_content_analysis_sentiment ON content_analysis(sentiment);
CREATE INDEX idx_content_analysis_quality ON content_analysis(quality_score DESC);
CREATE INDEX idx_content_analysis_embedding ON content_analysis
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
Platforms
Platform definitions and characteristics.Copy
CREATE TABLE platforms (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Platform identity
name text UNIQUE NOT NULL, -- 'twitter', 'youtube', 'linkedin', 'wordpress'
display_name text NOT NULL, -- 'Twitter/X', 'YouTube', 'LinkedIn'
platform_type text NOT NULL, -- 'social', 'blog', 'video', 'email', 'podcast'
-- Constraints
max_title_length integer,
max_content_length integer,
max_hashtags integer,
max_media_items integer,
-- Optimal Content Specs
optimal_title_length integer,
optimal_content_length integer,
optimal_posting_times jsonb DEFAULT '[]', -- [{day, hour, timezone}]
-- Content Format
supports_markdown boolean DEFAULT false,
supports_html boolean DEFAULT false,
supports_images boolean DEFAULT true,
supports_video boolean DEFAULT false,
supports_links boolean DEFAULT true,
-- API Configuration
api_config jsonb DEFAULT '{}',
rate_limits jsonb DEFAULT '{}',
-- Status
status text DEFAULT 'active',
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now()
);
Platform Patterns
What works on each platform - learned from performance data.Copy
CREATE TABLE platform_patterns (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Platform reference
platform_id uuid REFERENCES platforms(id) NOT NULL,
-- Pattern identification
pattern_type text NOT NULL, -- 'hook', 'structure', 'timing', 'hashtags', 'cta', 'media'
pattern_name text NOT NULL,
-- Pattern definition
pattern_description text,
pattern_example text,
pattern_template text, -- Template format if applicable
-- Performance data
sample_size integer DEFAULT 0,
avg_engagement_rate numeric,
avg_reach numeric,
avg_clicks numeric,
-- Confidence
confidence_score numeric, -- 0-1 based on sample size and consistency
-- Categorization
vertical_id uuid REFERENCES publisher_verticals(id), -- null = all verticals
content_type text, -- null = all types
-- Learning
last_calculated_at timestamptz,
data_points jsonb DEFAULT '[]', -- Historical performance samples
-- Status
status text DEFAULT 'active', -- 'active', 'testing', 'deprecated'
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now(),
UNIQUE(platform_id, pattern_type, pattern_name, vertical_id)
);
CREATE INDEX idx_platform_patterns_platform ON platform_patterns(platform_id);
CREATE INDEX idx_platform_patterns_type ON platform_patterns(pattern_type);
CREATE INDEX idx_platform_patterns_vertical ON platform_patterns(vertical_id);
CREATE INDEX idx_platform_patterns_confidence ON platform_patterns(confidence_score DESC);
Content Platform Scores
How well content fits each platform.Copy
CREATE TABLE content_platform_scores (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- References
content_id uuid REFERENCES raw_content(id) NOT NULL,
platform_id uuid REFERENCES platforms(id) NOT NULL,
-- Scores
fit_score numeric NOT NULL, -- 0-100: how well content fits platform
predicted_engagement numeric, -- Predicted engagement rate
predicted_reach numeric, -- Predicted reach
-- Breakdown
score_factors jsonb DEFAULT '{}', -- {length: 80, tone: 90, hooks: 75}
-- Recommendations
suggested_modifications text[], -- What to change for better fit
suggested_posting_time timestamptz,
-- Actual performance (filled after posting)
actual_engagement numeric,
actual_reach numeric,
created_at timestamptz DEFAULT now(),
UNIQUE(content_id, platform_id)
);
CREATE INDEX idx_content_platform_scores_content ON content_platform_scores(content_id);
CREATE INDEX idx_content_platform_scores_platform ON content_platform_scores(platform_id);
CREATE INDEX idx_content_platform_scores_fit ON content_platform_scores(fit_score DESC);
Pattern Feedback
Performance data that feeds back into pattern learning.Copy
CREATE TABLE pattern_feedback (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- References
pattern_id uuid REFERENCES platform_patterns(id),
content_id uuid REFERENCES raw_content(id),
distribution_id uuid REFERENCES distributions(id),
-- Pattern application
pattern_applied boolean NOT NULL, -- Was this pattern used?
pattern_strength numeric, -- 0-1 how strongly pattern was applied
-- Performance
impressions integer,
engagements integer,
clicks integer,
shares integer,
-- Calculated
engagement_rate numeric,
click_through_rate numeric,
-- Comparison
vs_average_engagement numeric, -- % above/below platform average
vs_average_reach numeric,
-- Time context
posted_at timestamptz,
day_of_week integer,
hour_of_day integer,
created_at timestamptz DEFAULT now()
);
CREATE INDEX idx_pattern_feedback_pattern ON pattern_feedback(pattern_id);
CREATE INDEX idx_pattern_feedback_content ON pattern_feedback(content_id);
CREATE INDEX idx_pattern_feedback_posted ON pattern_feedback(posted_at DESC);
Content Ingestion
Fetch Content from Sources
Copy
// Cron: Every 15 minutes
async function fetchAllSources() {
const { data: sources } = await supabase
.from('content_sources')
.select('*')
.eq('status', 'active')
.lt('next_fetch_at', new Date().toISOString())
.order('next_fetch_at')
.limit(10)
for (const source of sources) {
try {
await fetchSource(source)
// Update fetch time
await supabase
.from('content_sources')
.update({
last_fetched_at: new Date().toISOString(),
next_fetch_at: new Date(Date.now() + parseDuration(source.fetch_frequency)).toISOString(),
error_count: 0
})
.eq('id', source.id)
} catch (error) {
await supabase
.from('content_sources')
.update({
error_count: source.error_count + 1,
last_error: error.message,
status: source.error_count >= 5 ? 'error' : 'active'
})
.eq('id', source.id)
}
}
}
async function fetchSource(source) {
let items = []
switch (source.source_type) {
case 'rss':
items = await fetchRSS(source.url)
break
case 'api':
items = await fetchAPI(source.url, source.config)
break
case 'scraper':
items = await runScraper(source.url, source.config)
break
case 'social':
items = await fetchSocialMedia(source.config)
break
}
// Deduplicate and store
for (const item of items) {
const contentHash = hashContent(item.title + item.content_text)
// Check for duplicate
const { data: existing } = await supabase
.from('raw_content')
.select('id')
.eq('content_hash', contentHash)
.single()
if (existing) continue
await supabase
.from('raw_content')
.insert({
source_id: source.id,
external_id: item.id,
title: item.title,
content_text: item.content,
content_html: item.html,
author: item.author,
published_at: item.published_at,
url: item.url,
featured_image_url: item.image,
vertical_id: source.vertical_id,
content_type: item.type || 'article',
content_hash: contentHash,
source_metrics: item.metrics || {}
})
// Increment counter
await supabase
.from('content_sources')
.update({
total_items_fetched: source.total_items_fetched + 1
})
.eq('id', source.id)
}
}
RSS Fetcher
Copy
import Parser from 'rss-parser'
async function fetchRSS(url: string) {
const parser = new Parser()
const feed = await parser.parseURL(url)
return feed.items.map(item => ({
id: item.guid || item.link,
title: item.title,
content: item.contentSnippet || item.content,
html: item.content,
author: item.creator || item.author,
published_at: item.pubDate ? new Date(item.pubDate).toISOString() : null,
url: item.link,
image: item.enclosure?.url || extractImage(item.content),
type: 'article'
}))
}
Social Media Fetcher
Copy
async function fetchSocialMedia(config) {
const items = []
switch (config.platform) {
case 'twitter':
items.push(...await fetchTwitter(config))
break
case 'linkedin':
items.push(...await fetchLinkedIn(config))
break
case 'youtube':
items.push(...await fetchYouTube(config))
break
}
return items
}
async function fetchTwitter(config) {
// Use Twitter API v2
const response = await fetch(
`https://api.twitter.com/2/users/${config.user_id}/tweets?` +
`max_results=100&tweet.fields=created_at,public_metrics`,
{
headers: {
'Authorization': `Bearer ${config.bearer_token}`
}
}
)
const data = await response.json()
return data.data.map(tweet => ({
id: tweet.id,
title: null,
content: tweet.text,
author: config.username,
published_at: tweet.created_at,
url: `https://twitter.com/${config.username}/status/${tweet.id}`,
type: 'social_post',
metrics: {
likes: tweet.public_metrics?.like_count,
retweets: tweet.public_metrics?.retweet_count,
replies: tweet.public_metrics?.reply_count,
impressions: tweet.public_metrics?.impression_count
}
}))
}
Content Analysis
Analysis Pipeline
Copy
// Process pending content
async function analyzeContent() {
const { data: pending } = await supabase
.from('raw_content')
.select('*')
.eq('processing_status', 'pending')
.limit(20)
for (const content of pending) {
try {
// Mark as processing
await supabase
.from('raw_content')
.update({ processing_status: 'processing' })
.eq('id', content.id)
// Run analysis
const analysis = await runAnalysis(content)
// Store results
await supabase
.from('content_analysis')
.insert({
content_id: content.id,
...analysis
})
// Mark complete
await supabase
.from('raw_content')
.update({
processing_status: 'analyzed',
analyzed_at: new Date().toISOString()
})
.eq('id', content.id)
// Score for each platform
await scoreForPlatforms(content.id, analysis)
} catch (error) {
await supabase
.from('raw_content')
.update({
processing_status: 'failed'
})
.eq('id', content.id)
}
}
}
async function runAnalysis(content) {
const text = content.content_text || content.title
// Use AI for comprehensive analysis
const prompt = `
Analyze this content and extract:
1. Keywords (5-10 most relevant)
2. Named entities with types (person, org, location, product)
3. Main topics (2-3)
4. Sentiment (positive/negative/neutral/mixed) with score (-1 to 1)
5. Tone descriptors (2-3 words like "informative", "casual", "urgent")
6. Quality indicators
Content:
${text.slice(0, 4000)}
Respond in JSON format.
`
const aiResponse = await callAI(prompt, 'gpt-4o-mini')
const parsed = JSON.parse(aiResponse)
// Calculate additional metrics
const wordCount = text.split(/\s+/).length
const readingTime = Math.ceil(wordCount / 200)
// Generate embedding
const embedding = await generateEmbedding(text)
return {
keywords: parsed.keywords,
keyword_scores: parsed.keyword_scores || {},
entities: parsed.entities,
topics: parsed.topics,
categories: parsed.categories || [],
sentiment: parsed.sentiment,
sentiment_score: parsed.sentiment_score,
tone: parsed.tone,
word_count: wordCount,
reading_time_minutes: readingTime,
heading_count: (text.match(/^#+\s/gm) || []).length,
paragraph_count: text.split(/\n\n+/).length,
quality_score: calculateQualityScore(parsed, wordCount),
readability_score: calculateReadability(text),
embedding: embedding,
model_used: 'gpt-4o-mini',
analysis_cost: 0.00015 // Approximate
}
}
Quality Score Calculation
Copy
function calculateQualityScore(analysis, wordCount) {
let score = 50 // Base score
// Word count (optimal: 1000-2000)
if (wordCount >= 1000 && wordCount <= 2000) {
score += 15
} else if (wordCount >= 500 && wordCount <= 3000) {
score += 10
} else if (wordCount < 300) {
score -= 10
}
// Keyword density
if (analysis.keywords?.length >= 5) score += 10
// Entity richness
if (analysis.entities?.length >= 3) score += 10
// Sentiment clarity
if (analysis.sentiment !== 'mixed') score += 5
// Topic focus
if (analysis.topics?.length >= 2 && analysis.topics?.length <= 3) score += 10
return Math.min(100, Math.max(0, score))
}
function calculateReadability(text) {
// Simplified Flesch-Kincaid
const sentences = text.split(/[.!?]+/).length
const words = text.split(/\s+/).length
const syllables = countSyllables(text)
const fk = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
return Math.min(100, Math.max(0, fk))
}
Platform Scoring
Score Content for Each Platform
Copy
async function scoreForPlatforms(contentId, analysis) {
const { data: platforms } = await supabase
.from('platforms')
.select('*')
.eq('status', 'active')
for (const platform of platforms) {
const score = calculatePlatformFit(analysis, platform)
await supabase
.from('content_platform_scores')
.upsert({
content_id: contentId,
platform_id: platform.id,
fit_score: score.total,
predicted_engagement: score.predicted_engagement,
score_factors: score.factors,
suggested_modifications: score.suggestions
}, {
onConflict: 'content_id,platform_id'
})
}
}
function calculatePlatformFit(analysis, platform) {
const factors = {}
const suggestions = []
// Length fit
const optimalLength = platform.optimal_content_length || 1000
const lengthRatio = analysis.word_count / optimalLength
if (lengthRatio >= 0.7 && lengthRatio <= 1.3) {
factors.length = 90
} else if (lengthRatio >= 0.5 && lengthRatio <= 1.5) {
factors.length = 70
} else {
factors.length = 50
suggestions.push(
lengthRatio < 0.5
? `Expand content (currently ${analysis.word_count} words, optimal ${optimalLength})`
: `Condense content (currently ${analysis.word_count} words, optimal ${optimalLength})`
)
}
// Tone fit (platform-specific)
const platformTones = {
twitter: ['casual', 'witty', 'provocative'],
linkedin: ['professional', 'informative', 'thought-leadership'],
youtube: ['engaging', 'educational', 'entertaining'],
wordpress: ['informative', 'detailed', 'authoritative']
}
const matchingTones = analysis.tone?.filter(t =>
platformTones[platform.name]?.includes(t)
).length || 0
factors.tone = Math.min(100, 50 + matchingTones * 25)
// Readability fit
const readabilityTargets = {
twitter: 80, // Very easy
linkedin: 60, // Standard
youtube: 70, // Easy
wordpress: 50 // Can be more complex
}
const readabilityDiff = Math.abs(
analysis.readability_score - (readabilityTargets[platform.name] || 60)
)
factors.readability = Math.max(50, 100 - readabilityDiff)
// Calculate total
const weights = { length: 0.3, tone: 0.4, readability: 0.3 }
const total = Object.entries(factors).reduce((sum, [key, value]) =>
sum + value * (weights[key] || 0.33), 0
)
// Predict engagement based on score
const baseEngagement = getBaseEngagement(platform.name)
const predicted_engagement = baseEngagement * (total / 100)
return {
total: Math.round(total),
factors,
suggestions,
predicted_engagement
}
}
Pattern Learning
Extract Patterns from Performance
Copy
// Cron: Weekly
async function updatePlatformPatterns() {
const { data: platforms } = await supabase
.from('platforms')
.select('id, name')
.eq('status', 'active')
for (const platform of platforms) {
await extractHookPatterns(platform)
await extractTimingPatterns(platform)
await extractStructurePatterns(platform)
await extractHashtagPatterns(platform)
}
}
async function extractHookPatterns(platform) {
// Get top performing content
const { data: topContent } = await supabase
.from('distributions')
.select(`
id,
content:raw_content(title, content_text),
engagement_rate,
impressions
`)
.eq('platform_id', platform.id)
.gte('impressions', 1000)
.order('engagement_rate', { ascending: false })
.limit(100)
// Extract first lines (hooks)
const hooks = topContent.map(d => ({
hook: extractFirstLine(d.content.content_text),
engagement: d.engagement_rate,
impressions: d.impressions
}))
// Analyze hook patterns with AI
const prompt = `
Analyze these high-performing content hooks and identify patterns:
${JSON.stringify(hooks.slice(0, 20))}
Identify 5-7 hook patterns that appear in successful content.
For each pattern, provide:
1. Pattern name (e.g., "Question Hook", "Statistic Lead")
2. Description
3. Template example
4. Average engagement rate
Respond in JSON format.
`
const patterns = await callAI(prompt, 'gpt-4o')
const parsed = JSON.parse(patterns)
for (const pattern of parsed.patterns) {
await supabase
.from('platform_patterns')
.upsert({
platform_id: platform.id,
pattern_type: 'hook',
pattern_name: pattern.name,
pattern_description: pattern.description,
pattern_template: pattern.template,
sample_size: hooks.length,
avg_engagement_rate: pattern.avg_engagement,
confidence_score: Math.min(1, hooks.length / 50),
last_calculated_at: new Date().toISOString()
}, {
onConflict: 'platform_id,pattern_type,pattern_name,vertical_id'
})
}
}
async function extractTimingPatterns(platform) {
// Analyze performance by day/hour
const { data: byTiming } = await supabase.rpc('get_performance_by_timing', {
p_platform_id: platform.id
})
// Find best performing times
const sorted = byTiming.sort((a, b) => b.avg_engagement - a.avg_engagement)
const topTimes = sorted.slice(0, 5)
await supabase
.from('platform_patterns')
.upsert({
platform_id: platform.id,
pattern_type: 'timing',
pattern_name: 'optimal_posting_times',
pattern_description: 'Best times to post based on engagement data',
pattern_template: JSON.stringify(topTimes),
sample_size: byTiming.reduce((sum, t) => sum + t.sample_count, 0),
avg_engagement_rate: topTimes[0]?.avg_engagement,
confidence_score: Math.min(1, byTiming.length / 100),
last_calculated_at: new Date().toISOString()
}, {
onConflict: 'platform_id,pattern_type,pattern_name,vertical_id'
})
}
// Supabase function
/*
CREATE OR REPLACE FUNCTION get_performance_by_timing(p_platform_id uuid)
RETURNS TABLE (
day_of_week integer,
hour_of_day integer,
avg_engagement numeric,
sample_count integer
)
LANGUAGE sql STABLE
AS $$
SELECT
EXTRACT(DOW FROM published_at)::integer as day_of_week,
EXTRACT(HOUR FROM published_at)::integer as hour_of_day,
AVG(engagement_rate) as avg_engagement,
COUNT(*)::integer as sample_count
FROM distributions
WHERE platform_id = p_platform_id
AND published_at > now() - interval '90 days'
AND engagement_rate IS NOT NULL
GROUP BY 1, 2
HAVING COUNT(*) >= 5
ORDER BY 3 DESC;
$$;
*/
Semantic Search
Find Similar Content
Copy
async function findSimilarContent(query: string, limit = 10) {
// Generate query embedding
const queryEmbedding = await generateEmbedding(query)
// Search by vector similarity
const { data: similar } = await supabase.rpc('match_content_by_embedding', {
query_embedding: queryEmbedding,
match_threshold: 0.7,
match_count: limit
})
return similar
}
async function findContentForTopic(topic: string, verticalId?: string) {
const { data: content } = await supabase
.from('content_analysis')
.select(`
content_id,
content:raw_content(title, url, published_at),
topics,
quality_score,
sentiment
`)
.contains('topics', [topic])
.order('quality_score', { ascending: false })
.limit(20)
return content
}
// Supabase function
/*
CREATE OR REPLACE FUNCTION match_content_by_embedding(
query_embedding vector(1536),
match_threshold float DEFAULT 0.7,
match_count int DEFAULT 10
)
RETURNS TABLE (
content_id uuid,
title text,
url text,
similarity float
)
LANGUAGE sql STABLE
AS $$
SELECT
ca.content_id,
rc.title,
rc.url,
1 - (ca.embedding <=> query_embedding) as similarity
FROM content_analysis ca
JOIN raw_content rc ON ca.content_id = rc.id
WHERE ca.embedding IS NOT NULL
AND 1 - (ca.embedding <=> query_embedding) > match_threshold
ORDER BY ca.embedding <=> query_embedding
LIMIT match_count;
$$;
*/
Queries
Top Performing Content by Platform
Copy
SELECT
p.name as platform,
rc.title,
d.engagement_rate,
d.impressions,
ca.quality_score,
ca.sentiment
FROM distributions d
JOIN platforms p ON d.platform_id = p.id
JOIN raw_content rc ON d.content_id = rc.id
LEFT JOIN content_analysis ca ON rc.id = ca.content_id
WHERE d.published_at > now() - interval '30 days'
ORDER BY d.engagement_rate DESC
LIMIT 50;
Pattern Performance
Copy
SELECT
p.name as platform,
pp.pattern_type,
pp.pattern_name,
pp.sample_size,
pp.avg_engagement_rate,
pp.confidence_score,
pp.last_calculated_at
FROM platform_patterns pp
JOIN platforms p ON pp.platform_id = p.id
WHERE pp.status = 'active'
ORDER BY p.name, pp.pattern_type, pp.avg_engagement_rate DESC;
Content Quality Distribution
Copy
SELECT
CASE
WHEN quality_score >= 80 THEN 'Excellent'
WHEN quality_score >= 60 THEN 'Good'
WHEN quality_score >= 40 THEN 'Average'
ELSE 'Needs Improvement'
END as quality_tier,
COUNT(*) as content_count,
ROUND(AVG(quality_score), 1) as avg_score
FROM content_analysis
WHERE created_at > now() - interval '30 days'
GROUP BY 1
ORDER BY avg_score DESC;
Source Health Report
Copy
SELECT
name,
source_type,
status,
total_items_fetched,
last_fetched_at,
error_count,
last_error
FROM content_sources
ORDER BY
CASE status
WHEN 'error' THEN 1
WHEN 'active' THEN 2
ELSE 3
END,
last_fetched_at DESC;
Related Documentation
| Document | Purpose |
|---|---|
| audience-intelligence.md | User profiles, segmentation |
| generation-engine.md | Content generation |
| distribution-system.md | Publishing content |
| SCHEMA.md | Full table definitions |