Skip to main content

Content Intelligence

Parent: Shared Infrastructure OVERVIEW
Status: Active
Scope: Ingest → Analyze → Extract Patterns → Learn

Overview

Content Intelligence is the perception layer - it ingests content from multiple sources, analyzes it to extract patterns, and continuously learns what works.
┌─────────────────────────────────────────────────────────────────────────┐
│                   CONTENT INTELLIGENCE PIPELINE                          │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                          │
│   SOURCES                     INGEST                   RAW CONTENT       │
│   ┌──────────┐               ┌──────────┐             ┌──────────┐      │
│   │Competitors│              │  Scrapers │             │  Text    │      │
│   │   RSS    │──────────────▶│  APIs     │────────────▶│  Media   │      │
│   │  Social  │              │  Webhooks │             │  Links   │      │
│   └──────────┘               └──────────┘             └──────────┘      │
│                                                             │            │
│                                                             ▼            │
│   ANALYSIS                                                               │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │  NLP Pipeline                                                    │   │
│   │  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────┐        │   │
│   │  │ Keyword  │  │ Entity   │  │Sentiment │  │ Topic    │        │   │
│   │  │Extraction│  │Recognition│ │ Analysis │  │ Modeling │        │   │
│   │  └──────────┘  └──────────┘  └──────────┘  └──────────┘        │   │
│   └─────────────────────────────────────────────────────────────────┘   │
│                                    │                                     │
│                                    ▼                                     │
│   PATTERNS                                                               │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │  Platform Patterns (what works where)                            │   │
│   │  ┌─────────┐  ┌─────────┐  ┌─────────┐  ┌─────────┐            │   │
│   │  │ Twitter │  │ YouTube │  │LinkedIn │  │  Blog   │            │   │
│   │  │hooks,   │  │titles,  │  │format,  │  │SEO,     │            │   │
│   │  │length   │  │thumbnails│ │timing   │  │structure│            │   │
│   │  └─────────┘  └─────────┘  └─────────┘  └─────────┘            │   │
│   └─────────────────────────────────────────────────────────────────┘   │
│                                    │                                     │
│                                    ▼                                     │
│   FEEDBACK LOOP                                                          │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │  Performance Data → Pattern Scoring → Weight Adjustment          │   │
│   └─────────────────────────────────────────────────────────────────┘   │
│                                                                          │
└─────────────────────────────────────────────────────────────────────────┘

Schema

Content Sources

Where content comes from for analysis.
CREATE TABLE content_sources (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Source identification
  name text NOT NULL,
  source_type text NOT NULL,              -- 'competitor', 'rss', 'api', 'scraper', 'manual', 'social'
  url text,
  
  -- Configuration
  config jsonb DEFAULT '{}',              -- API keys, selectors, auth
  
  -- Categorization
  vertical_id uuid REFERENCES publisher_verticals(id),
  tags text[],
  
  -- Scheduling
  fetch_frequency interval DEFAULT '1 hour',
  last_fetched_at timestamptz,
  next_fetch_at timestamptz,
  
  -- Health
  status text DEFAULT 'active',           -- 'active', 'paused', 'error', 'disabled'
  error_count integer DEFAULT 0,
  last_error text,
  
  -- Stats
  total_items_fetched integer DEFAULT 0,
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now()
);

CREATE INDEX idx_content_sources_type ON content_sources(source_type);
CREATE INDEX idx_content_sources_vertical ON content_sources(vertical_id);
CREATE INDEX idx_content_sources_next_fetch ON content_sources(next_fetch_at) 
  WHERE status = 'active';

Raw Content

Ingested content before analysis.
CREATE TABLE raw_content (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Source reference
  source_id uuid REFERENCES content_sources(id),
  external_id text,                       -- ID from source system
  
  -- Content
  title text,
  content_text text,
  content_html text,
  summary text,
  
  -- Media
  featured_image_url text,
  media_urls text[],
  
  -- Metadata
  author text,
  published_at timestamptz,
  url text,
  
  -- Categorization
  vertical_id uuid REFERENCES publisher_verticals(id),
  content_type text,                      -- 'article', 'video', 'social_post', 'podcast'
  
  -- Processing status
  processing_status text DEFAULT 'pending', -- 'pending', 'processing', 'analyzed', 'failed'
  analyzed_at timestamptz,
  
  -- Deduplication
  content_hash text,                      -- Hash for dedup
  
  -- Metrics (if available from source)
  source_metrics jsonb DEFAULT '{}',      -- {views, likes, shares, comments}
  
  created_at timestamptz DEFAULT now(),
  
  UNIQUE(source_id, external_id)
);

CREATE INDEX idx_raw_content_source ON raw_content(source_id);
CREATE INDEX idx_raw_content_status ON raw_content(processing_status);
CREATE INDEX idx_raw_content_vertical ON raw_content(vertical_id);
CREATE INDEX idx_raw_content_published ON raw_content(published_at DESC);
CREATE INDEX idx_raw_content_hash ON raw_content(content_hash);

Content Analysis

Extracted insights from content.
CREATE TABLE content_analysis (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Content reference
  content_id uuid REFERENCES raw_content(id) UNIQUE NOT NULL,
  
  -- Keywords & Entities
  keywords text[],                        -- Extracted keywords
  keyword_scores jsonb DEFAULT '{}',      -- {keyword: relevance_score}
  entities jsonb DEFAULT '[]',            -- [{name, type, salience}]
  
  -- Topics & Categories
  topics text[],                          -- High-level topics
  categories text[],                      -- Content categories
  
  -- Sentiment & Tone
  sentiment text,                         -- 'positive', 'negative', 'neutral', 'mixed'
  sentiment_score numeric,                -- -1 to 1
  tone text[],                            -- ['informative', 'casual', 'urgent']
  
  -- Structure Analysis
  word_count integer,
  reading_time_minutes integer,
  heading_count integer,
  paragraph_count integer,
  
  -- SEO Analysis
  title_length integer,
  meta_description_length integer,
  keyword_density jsonb DEFAULT '{}',
  
  -- Content Quality Signals
  quality_score numeric,                  -- 0-100 composite score
  originality_score numeric,              -- 0-100 uniqueness
  readability_score numeric,              -- Flesch-Kincaid or similar
  
  -- Embeddings (for semantic search)
  embedding vector(1536),                 -- OpenAI ada-002 embedding
  
  -- AI Model Used
  model_used text,
  analysis_cost numeric(10,6),
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now()
);

CREATE INDEX idx_content_analysis_content ON content_analysis(content_id);
CREATE INDEX idx_content_analysis_keywords ON content_analysis USING GIN(keywords);
CREATE INDEX idx_content_analysis_topics ON content_analysis USING GIN(topics);
CREATE INDEX idx_content_analysis_sentiment ON content_analysis(sentiment);
CREATE INDEX idx_content_analysis_quality ON content_analysis(quality_score DESC);
CREATE INDEX idx_content_analysis_embedding ON content_analysis 
  USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);

Platforms

Platform definitions and characteristics.
CREATE TABLE platforms (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Platform identity
  name text UNIQUE NOT NULL,              -- 'twitter', 'youtube', 'linkedin', 'wordpress'
  display_name text NOT NULL,             -- 'Twitter/X', 'YouTube', 'LinkedIn'
  platform_type text NOT NULL,            -- 'social', 'blog', 'video', 'email', 'podcast'
  
  -- Constraints
  max_title_length integer,
  max_content_length integer,
  max_hashtags integer,
  max_media_items integer,
  
  -- Optimal Content Specs
  optimal_title_length integer,
  optimal_content_length integer,
  optimal_posting_times jsonb DEFAULT '[]', -- [{day, hour, timezone}]
  
  -- Content Format
  supports_markdown boolean DEFAULT false,
  supports_html boolean DEFAULT false,
  supports_images boolean DEFAULT true,
  supports_video boolean DEFAULT false,
  supports_links boolean DEFAULT true,
  
  -- API Configuration
  api_config jsonb DEFAULT '{}',
  rate_limits jsonb DEFAULT '{}',
  
  -- Status
  status text DEFAULT 'active',
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now()
);

Platform Patterns

What works on each platform - learned from performance data.
CREATE TABLE platform_patterns (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Platform reference
  platform_id uuid REFERENCES platforms(id) NOT NULL,
  
  -- Pattern identification
  pattern_type text NOT NULL,             -- 'hook', 'structure', 'timing', 'hashtags', 'cta', 'media'
  pattern_name text NOT NULL,
  
  -- Pattern definition
  pattern_description text,
  pattern_example text,
  pattern_template text,                  -- Template format if applicable
  
  -- Performance data
  sample_size integer DEFAULT 0,
  avg_engagement_rate numeric,
  avg_reach numeric,
  avg_clicks numeric,
  
  -- Confidence
  confidence_score numeric,               -- 0-1 based on sample size and consistency
  
  -- Categorization
  vertical_id uuid REFERENCES publisher_verticals(id), -- null = all verticals
  content_type text,                      -- null = all types
  
  -- Learning
  last_calculated_at timestamptz,
  data_points jsonb DEFAULT '[]',         -- Historical performance samples
  
  -- Status
  status text DEFAULT 'active',           -- 'active', 'testing', 'deprecated'
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now(),
  
  UNIQUE(platform_id, pattern_type, pattern_name, vertical_id)
);

CREATE INDEX idx_platform_patterns_platform ON platform_patterns(platform_id);
CREATE INDEX idx_platform_patterns_type ON platform_patterns(pattern_type);
CREATE INDEX idx_platform_patterns_vertical ON platform_patterns(vertical_id);
CREATE INDEX idx_platform_patterns_confidence ON platform_patterns(confidence_score DESC);

Content Platform Scores

How well content fits each platform.
CREATE TABLE content_platform_scores (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- References
  content_id uuid REFERENCES raw_content(id) NOT NULL,
  platform_id uuid REFERENCES platforms(id) NOT NULL,
  
  -- Scores
  fit_score numeric NOT NULL,             -- 0-100: how well content fits platform
  predicted_engagement numeric,           -- Predicted engagement rate
  predicted_reach numeric,                -- Predicted reach
  
  -- Breakdown
  score_factors jsonb DEFAULT '{}',       -- {length: 80, tone: 90, hooks: 75}
  
  -- Recommendations
  suggested_modifications text[],         -- What to change for better fit
  suggested_posting_time timestamptz,
  
  -- Actual performance (filled after posting)
  actual_engagement numeric,
  actual_reach numeric,
  
  created_at timestamptz DEFAULT now(),
  
  UNIQUE(content_id, platform_id)
);

CREATE INDEX idx_content_platform_scores_content ON content_platform_scores(content_id);
CREATE INDEX idx_content_platform_scores_platform ON content_platform_scores(platform_id);
CREATE INDEX idx_content_platform_scores_fit ON content_platform_scores(fit_score DESC);

Pattern Feedback

Performance data that feeds back into pattern learning.
CREATE TABLE pattern_feedback (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- References
  pattern_id uuid REFERENCES platform_patterns(id),
  content_id uuid REFERENCES raw_content(id),
  distribution_id uuid REFERENCES distributions(id),
  
  -- Pattern application
  pattern_applied boolean NOT NULL,       -- Was this pattern used?
  pattern_strength numeric,               -- 0-1 how strongly pattern was applied
  
  -- Performance
  impressions integer,
  engagements integer,
  clicks integer,
  shares integer,
  
  -- Calculated
  engagement_rate numeric,
  click_through_rate numeric,
  
  -- Comparison
  vs_average_engagement numeric,          -- % above/below platform average
  vs_average_reach numeric,
  
  -- Time context
  posted_at timestamptz,
  day_of_week integer,
  hour_of_day integer,
  
  created_at timestamptz DEFAULT now()
);

CREATE INDEX idx_pattern_feedback_pattern ON pattern_feedback(pattern_id);
CREATE INDEX idx_pattern_feedback_content ON pattern_feedback(content_id);
CREATE INDEX idx_pattern_feedback_posted ON pattern_feedback(posted_at DESC);

Content Ingestion

Fetch Content from Sources

// Cron: Every 15 minutes
async function fetchAllSources() {
  const { data: sources } = await supabase
    .from('content_sources')
    .select('*')
    .eq('status', 'active')
    .lt('next_fetch_at', new Date().toISOString())
    .order('next_fetch_at')
    .limit(10)
  
  for (const source of sources) {
    try {
      await fetchSource(source)
      
      // Update fetch time
      await supabase
        .from('content_sources')
        .update({
          last_fetched_at: new Date().toISOString(),
          next_fetch_at: new Date(Date.now() + parseDuration(source.fetch_frequency)).toISOString(),
          error_count: 0
        })
        .eq('id', source.id)
      
    } catch (error) {
      await supabase
        .from('content_sources')
        .update({
          error_count: source.error_count + 1,
          last_error: error.message,
          status: source.error_count >= 5 ? 'error' : 'active'
        })
        .eq('id', source.id)
    }
  }
}

async function fetchSource(source) {
  let items = []
  
  switch (source.source_type) {
    case 'rss':
      items = await fetchRSS(source.url)
      break
    case 'api':
      items = await fetchAPI(source.url, source.config)
      break
    case 'scraper':
      items = await runScraper(source.url, source.config)
      break
    case 'social':
      items = await fetchSocialMedia(source.config)
      break
  }
  
  // Deduplicate and store
  for (const item of items) {
    const contentHash = hashContent(item.title + item.content_text)
    
    // Check for duplicate
    const { data: existing } = await supabase
      .from('raw_content')
      .select('id')
      .eq('content_hash', contentHash)
      .single()
    
    if (existing) continue
    
    await supabase
      .from('raw_content')
      .insert({
        source_id: source.id,
        external_id: item.id,
        title: item.title,
        content_text: item.content,
        content_html: item.html,
        author: item.author,
        published_at: item.published_at,
        url: item.url,
        featured_image_url: item.image,
        vertical_id: source.vertical_id,
        content_type: item.type || 'article',
        content_hash: contentHash,
        source_metrics: item.metrics || {}
      })
    
    // Increment counter
    await supabase
      .from('content_sources')
      .update({
        total_items_fetched: source.total_items_fetched + 1
      })
      .eq('id', source.id)
  }
}

RSS Fetcher

import Parser from 'rss-parser'

async function fetchRSS(url: string) {
  const parser = new Parser()
  const feed = await parser.parseURL(url)
  
  return feed.items.map(item => ({
    id: item.guid || item.link,
    title: item.title,
    content: item.contentSnippet || item.content,
    html: item.content,
    author: item.creator || item.author,
    published_at: item.pubDate ? new Date(item.pubDate).toISOString() : null,
    url: item.link,
    image: item.enclosure?.url || extractImage(item.content),
    type: 'article'
  }))
}

Social Media Fetcher

async function fetchSocialMedia(config) {
  const items = []
  
  switch (config.platform) {
    case 'twitter':
      items.push(...await fetchTwitter(config))
      break
    case 'linkedin':
      items.push(...await fetchLinkedIn(config))
      break
    case 'youtube':
      items.push(...await fetchYouTube(config))
      break
  }
  
  return items
}

async function fetchTwitter(config) {
  // Use Twitter API v2
  const response = await fetch(
    `https://api.twitter.com/2/users/${config.user_id}/tweets?` +
    `max_results=100&tweet.fields=created_at,public_metrics`,
    {
      headers: {
        'Authorization': `Bearer ${config.bearer_token}`
      }
    }
  )
  
  const data = await response.json()
  
  return data.data.map(tweet => ({
    id: tweet.id,
    title: null,
    content: tweet.text,
    author: config.username,
    published_at: tweet.created_at,
    url: `https://twitter.com/${config.username}/status/${tweet.id}`,
    type: 'social_post',
    metrics: {
      likes: tweet.public_metrics?.like_count,
      retweets: tweet.public_metrics?.retweet_count,
      replies: tweet.public_metrics?.reply_count,
      impressions: tweet.public_metrics?.impression_count
    }
  }))
}

Content Analysis

Analysis Pipeline

// Process pending content
async function analyzeContent() {
  const { data: pending } = await supabase
    .from('raw_content')
    .select('*')
    .eq('processing_status', 'pending')
    .limit(20)
  
  for (const content of pending) {
    try {
      // Mark as processing
      await supabase
        .from('raw_content')
        .update({ processing_status: 'processing' })
        .eq('id', content.id)
      
      // Run analysis
      const analysis = await runAnalysis(content)
      
      // Store results
      await supabase
        .from('content_analysis')
        .insert({
          content_id: content.id,
          ...analysis
        })
      
      // Mark complete
      await supabase
        .from('raw_content')
        .update({
          processing_status: 'analyzed',
          analyzed_at: new Date().toISOString()
        })
        .eq('id', content.id)
      
      // Score for each platform
      await scoreForPlatforms(content.id, analysis)
      
    } catch (error) {
      await supabase
        .from('raw_content')
        .update({
          processing_status: 'failed'
        })
        .eq('id', content.id)
    }
  }
}

async function runAnalysis(content) {
  const text = content.content_text || content.title
  
  // Use AI for comprehensive analysis
  const prompt = `
Analyze this content and extract:
1. Keywords (5-10 most relevant)
2. Named entities with types (person, org, location, product)
3. Main topics (2-3)
4. Sentiment (positive/negative/neutral/mixed) with score (-1 to 1)
5. Tone descriptors (2-3 words like "informative", "casual", "urgent")
6. Quality indicators

Content:
${text.slice(0, 4000)}

Respond in JSON format.
`
  
  const aiResponse = await callAI(prompt, 'gpt-4o-mini')
  const parsed = JSON.parse(aiResponse)
  
  // Calculate additional metrics
  const wordCount = text.split(/\s+/).length
  const readingTime = Math.ceil(wordCount / 200)
  
  // Generate embedding
  const embedding = await generateEmbedding(text)
  
  return {
    keywords: parsed.keywords,
    keyword_scores: parsed.keyword_scores || {},
    entities: parsed.entities,
    topics: parsed.topics,
    categories: parsed.categories || [],
    sentiment: parsed.sentiment,
    sentiment_score: parsed.sentiment_score,
    tone: parsed.tone,
    word_count: wordCount,
    reading_time_minutes: readingTime,
    heading_count: (text.match(/^#+\s/gm) || []).length,
    paragraph_count: text.split(/\n\n+/).length,
    quality_score: calculateQualityScore(parsed, wordCount),
    readability_score: calculateReadability(text),
    embedding: embedding,
    model_used: 'gpt-4o-mini',
    analysis_cost: 0.00015 // Approximate
  }
}

Quality Score Calculation

function calculateQualityScore(analysis, wordCount) {
  let score = 50 // Base score
  
  // Word count (optimal: 1000-2000)
  if (wordCount >= 1000 && wordCount <= 2000) {
    score += 15
  } else if (wordCount >= 500 && wordCount <= 3000) {
    score += 10
  } else if (wordCount < 300) {
    score -= 10
  }
  
  // Keyword density
  if (analysis.keywords?.length >= 5) score += 10
  
  // Entity richness
  if (analysis.entities?.length >= 3) score += 10
  
  // Sentiment clarity
  if (analysis.sentiment !== 'mixed') score += 5
  
  // Topic focus
  if (analysis.topics?.length >= 2 && analysis.topics?.length <= 3) score += 10
  
  return Math.min(100, Math.max(0, score))
}

function calculateReadability(text) {
  // Simplified Flesch-Kincaid
  const sentences = text.split(/[.!?]+/).length
  const words = text.split(/\s+/).length
  const syllables = countSyllables(text)
  
  const fk = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
  return Math.min(100, Math.max(0, fk))
}

Platform Scoring

Score Content for Each Platform

async function scoreForPlatforms(contentId, analysis) {
  const { data: platforms } = await supabase
    .from('platforms')
    .select('*')
    .eq('status', 'active')
  
  for (const platform of platforms) {
    const score = calculatePlatformFit(analysis, platform)
    
    await supabase
      .from('content_platform_scores')
      .upsert({
        content_id: contentId,
        platform_id: platform.id,
        fit_score: score.total,
        predicted_engagement: score.predicted_engagement,
        score_factors: score.factors,
        suggested_modifications: score.suggestions
      }, {
        onConflict: 'content_id,platform_id'
      })
  }
}

function calculatePlatformFit(analysis, platform) {
  const factors = {}
  const suggestions = []
  
  // Length fit
  const optimalLength = platform.optimal_content_length || 1000
  const lengthRatio = analysis.word_count / optimalLength
  if (lengthRatio >= 0.7 && lengthRatio <= 1.3) {
    factors.length = 90
  } else if (lengthRatio >= 0.5 && lengthRatio <= 1.5) {
    factors.length = 70
  } else {
    factors.length = 50
    suggestions.push(
      lengthRatio < 0.5 
        ? `Expand content (currently ${analysis.word_count} words, optimal ${optimalLength})`
        : `Condense content (currently ${analysis.word_count} words, optimal ${optimalLength})`
    )
  }
  
  // Tone fit (platform-specific)
  const platformTones = {
    twitter: ['casual', 'witty', 'provocative'],
    linkedin: ['professional', 'informative', 'thought-leadership'],
    youtube: ['engaging', 'educational', 'entertaining'],
    wordpress: ['informative', 'detailed', 'authoritative']
  }
  
  const matchingTones = analysis.tone?.filter(t => 
    platformTones[platform.name]?.includes(t)
  ).length || 0
  factors.tone = Math.min(100, 50 + matchingTones * 25)
  
  // Readability fit
  const readabilityTargets = {
    twitter: 80,    // Very easy
    linkedin: 60,   // Standard
    youtube: 70,    // Easy
    wordpress: 50   // Can be more complex
  }
  
  const readabilityDiff = Math.abs(
    analysis.readability_score - (readabilityTargets[platform.name] || 60)
  )
  factors.readability = Math.max(50, 100 - readabilityDiff)
  
  // Calculate total
  const weights = { length: 0.3, tone: 0.4, readability: 0.3 }
  const total = Object.entries(factors).reduce((sum, [key, value]) => 
    sum + value * (weights[key] || 0.33), 0
  )
  
  // Predict engagement based on score
  const baseEngagement = getBaseEngagement(platform.name)
  const predicted_engagement = baseEngagement * (total / 100)
  
  return {
    total: Math.round(total),
    factors,
    suggestions,
    predicted_engagement
  }
}

Pattern Learning

Extract Patterns from Performance

// Cron: Weekly
async function updatePlatformPatterns() {
  const { data: platforms } = await supabase
    .from('platforms')
    .select('id, name')
    .eq('status', 'active')
  
  for (const platform of platforms) {
    await extractHookPatterns(platform)
    await extractTimingPatterns(platform)
    await extractStructurePatterns(platform)
    await extractHashtagPatterns(platform)
  }
}

async function extractHookPatterns(platform) {
  // Get top performing content
  const { data: topContent } = await supabase
    .from('distributions')
    .select(`
      id,
      content:raw_content(title, content_text),
      engagement_rate,
      impressions
    `)
    .eq('platform_id', platform.id)
    .gte('impressions', 1000)
    .order('engagement_rate', { ascending: false })
    .limit(100)
  
  // Extract first lines (hooks)
  const hooks = topContent.map(d => ({
    hook: extractFirstLine(d.content.content_text),
    engagement: d.engagement_rate,
    impressions: d.impressions
  }))
  
  // Analyze hook patterns with AI
  const prompt = `
Analyze these high-performing content hooks and identify patterns:
${JSON.stringify(hooks.slice(0, 20))}

Identify 5-7 hook patterns that appear in successful content.
For each pattern, provide:
1. Pattern name (e.g., "Question Hook", "Statistic Lead")
2. Description
3. Template example
4. Average engagement rate

Respond in JSON format.
`
  
  const patterns = await callAI(prompt, 'gpt-4o')
  const parsed = JSON.parse(patterns)
  
  for (const pattern of parsed.patterns) {
    await supabase
      .from('platform_patterns')
      .upsert({
        platform_id: platform.id,
        pattern_type: 'hook',
        pattern_name: pattern.name,
        pattern_description: pattern.description,
        pattern_template: pattern.template,
        sample_size: hooks.length,
        avg_engagement_rate: pattern.avg_engagement,
        confidence_score: Math.min(1, hooks.length / 50),
        last_calculated_at: new Date().toISOString()
      }, {
        onConflict: 'platform_id,pattern_type,pattern_name,vertical_id'
      })
  }
}

async function extractTimingPatterns(platform) {
  // Analyze performance by day/hour
  const { data: byTiming } = await supabase.rpc('get_performance_by_timing', {
    p_platform_id: platform.id
  })
  
  // Find best performing times
  const sorted = byTiming.sort((a, b) => b.avg_engagement - a.avg_engagement)
  const topTimes = sorted.slice(0, 5)
  
  await supabase
    .from('platform_patterns')
    .upsert({
      platform_id: platform.id,
      pattern_type: 'timing',
      pattern_name: 'optimal_posting_times',
      pattern_description: 'Best times to post based on engagement data',
      pattern_template: JSON.stringify(topTimes),
      sample_size: byTiming.reduce((sum, t) => sum + t.sample_count, 0),
      avg_engagement_rate: topTimes[0]?.avg_engagement,
      confidence_score: Math.min(1, byTiming.length / 100),
      last_calculated_at: new Date().toISOString()
    }, {
      onConflict: 'platform_id,pattern_type,pattern_name,vertical_id'
    })
}

// Supabase function
/*
CREATE OR REPLACE FUNCTION get_performance_by_timing(p_platform_id uuid)
RETURNS TABLE (
  day_of_week integer,
  hour_of_day integer,
  avg_engagement numeric,
  sample_count integer
)
LANGUAGE sql STABLE
AS $$
  SELECT 
    EXTRACT(DOW FROM published_at)::integer as day_of_week,
    EXTRACT(HOUR FROM published_at)::integer as hour_of_day,
    AVG(engagement_rate) as avg_engagement,
    COUNT(*)::integer as sample_count
  FROM distributions
  WHERE platform_id = p_platform_id
    AND published_at > now() - interval '90 days'
    AND engagement_rate IS NOT NULL
  GROUP BY 1, 2
  HAVING COUNT(*) >= 5
  ORDER BY 3 DESC;
$$;
*/

Find Similar Content

async function findSimilarContent(query: string, limit = 10) {
  // Generate query embedding
  const queryEmbedding = await generateEmbedding(query)
  
  // Search by vector similarity
  const { data: similar } = await supabase.rpc('match_content_by_embedding', {
    query_embedding: queryEmbedding,
    match_threshold: 0.7,
    match_count: limit
  })
  
  return similar
}

async function findContentForTopic(topic: string, verticalId?: string) {
  const { data: content } = await supabase
    .from('content_analysis')
    .select(`
      content_id,
      content:raw_content(title, url, published_at),
      topics,
      quality_score,
      sentiment
    `)
    .contains('topics', [topic])
    .order('quality_score', { ascending: false })
    .limit(20)
  
  return content
}

// Supabase function
/*
CREATE OR REPLACE FUNCTION match_content_by_embedding(
  query_embedding vector(1536),
  match_threshold float DEFAULT 0.7,
  match_count int DEFAULT 10
)
RETURNS TABLE (
  content_id uuid,
  title text,
  url text,
  similarity float
)
LANGUAGE sql STABLE
AS $$
  SELECT 
    ca.content_id,
    rc.title,
    rc.url,
    1 - (ca.embedding <=> query_embedding) as similarity
  FROM content_analysis ca
  JOIN raw_content rc ON ca.content_id = rc.id
  WHERE ca.embedding IS NOT NULL
    AND 1 - (ca.embedding <=> query_embedding) > match_threshold
  ORDER BY ca.embedding <=> query_embedding
  LIMIT match_count;
$$;
*/

Queries

Top Performing Content by Platform

SELECT 
  p.name as platform,
  rc.title,
  d.engagement_rate,
  d.impressions,
  ca.quality_score,
  ca.sentiment
FROM distributions d
JOIN platforms p ON d.platform_id = p.id
JOIN raw_content rc ON d.content_id = rc.id
LEFT JOIN content_analysis ca ON rc.id = ca.content_id
WHERE d.published_at > now() - interval '30 days'
ORDER BY d.engagement_rate DESC
LIMIT 50;

Pattern Performance

SELECT 
  p.name as platform,
  pp.pattern_type,
  pp.pattern_name,
  pp.sample_size,
  pp.avg_engagement_rate,
  pp.confidence_score,
  pp.last_calculated_at
FROM platform_patterns pp
JOIN platforms p ON pp.platform_id = p.id
WHERE pp.status = 'active'
ORDER BY p.name, pp.pattern_type, pp.avg_engagement_rate DESC;

Content Quality Distribution

SELECT 
  CASE 
    WHEN quality_score >= 80 THEN 'Excellent'
    WHEN quality_score >= 60 THEN 'Good'
    WHEN quality_score >= 40 THEN 'Average'
    ELSE 'Needs Improvement'
  END as quality_tier,
  COUNT(*) as content_count,
  ROUND(AVG(quality_score), 1) as avg_score
FROM content_analysis
WHERE created_at > now() - interval '30 days'
GROUP BY 1
ORDER BY avg_score DESC;

Source Health Report

SELECT 
  name,
  source_type,
  status,
  total_items_fetched,
  last_fetched_at,
  error_count,
  last_error
FROM content_sources
ORDER BY 
  CASE status 
    WHEN 'error' THEN 1 
    WHEN 'active' THEN 2 
    ELSE 3 
  END,
  last_fetched_at DESC;

DocumentPurpose
audience-intelligence.mdUser profiles, segmentation
generation-engine.mdContent generation
distribution-system.mdPublishing content
SCHEMA.mdFull table definitions