MrDuhaz
/
feeds-duhaz-fr-v4


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
							
import { serve } from "https://deno.land/std@0.168.0/http/server.ts"
import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'

interface RSSItem {
  title: string;
  description: string;
  link: string;
  pubDate: string;
  guid?: string;
  content?: string;
  image?: string;
}

interface RSSFeed {
  title: string;
  description: string;
  items: RSSItem[];
}

const corsHeaders = {
  'Access-Control-Allow-Origin': '*',
  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
}

// Helper function to extract text content from XML tags
function extractTextContent(xml: string, tagName: string): string {
  const regex = new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i');
  const match = xml.match(regex);
  if (match && match[1]) {
    // Remove CDATA wrapper if present and clean HTML tags
    let content = match[1].replace(/<!\[CDATA\[(.*?)\]\]>/s, '$1');
    content = content.replace(/<[^>]*>/g, '').trim();
    return content;
  }
  return '';
}

// Helper function to extract attribute from XML tags
function extractAttribute(xml: string, tagName: string, attributeName: string): string {
  const regex = new RegExp(`<${tagName}[^>]*${attributeName}=["']([^"']*)["'][^>]*>`, 'i');
  const match = xml.match(regex);
  return match ? match[1] : '';
}

// Helper function to extract YouTube video ID from URL
function getYouTubeVideoId(url: string): string | null {
  if (!url) return null;
  
  const patterns = [
    /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/,
    /youtube\.com\/.*[?&]v=([a-zA-Z0-9_-]{11})/
  ];
  
  for (const pattern of patterns) {
    const match = url.match(pattern);
    if (match) return match[1];
  }
  
  return null;
}

// Helper function to generate YouTube thumbnail URL
function generateYouTubeThumbnail(videoUrl: string): string | null {
  const videoId = getYouTubeVideoId(videoUrl);
  return videoId ? `https://img.youtube.com/vi/${videoId}/hqdefault.jpg` : null;
}

serve(async (req) => {
  if (req.method === 'OPTIONS') {
    return new Response('ok', { headers: corsHeaders })
  }

  try {
    const supabaseClient = createClient(
      Deno.env.get('SUPABASE_URL') ?? '',
      Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') ?? ''
    )

    const { feedId, feedUrl } = await req.json()

    console.log(`Fetching RSS for feed: ${feedId}, URL: ${feedUrl}`)

    // Fetch RSS content
    const response = await fetch(feedUrl)
    if (!response.ok) {
      throw new Error(`Failed to fetch RSS: ${response.statusText}`)
    }

    const rssText = await response.text()
    console.log(`RSS content length: ${rssText.length}`)
    
    // Parse RSS using regex patterns
    const items: RSSItem[] = [];
    
    // Split by item or entry tags
    const itemRegex = /<(item|entry)[^>]*>([\s\S]*?)<\/\1>/gi;
    let itemMatch;
    
    while ((itemMatch = itemRegex.exec(rssText)) !== null) {
      const itemXml = itemMatch[0];
      
      const title = extractTextContent(itemXml, 'title');
      const description = extractTextContent(itemXml, 'description') || 
                         extractTextContent(itemXml, 'summary') ||
                         extractTextContent(itemXml, 'content');
      
      let link = extractTextContent(itemXml, 'link');
      if (!link) {
        // Try to get link from href attribute
        link = extractAttribute(itemXml, 'link', 'href');
      }
      
      const pubDate = extractTextContent(itemXml, 'pubDate') || 
                     extractTextContent(itemXml, 'published') ||
                     extractTextContent(itemXml, 'updated');
      
      const guid = extractTextContent(itemXml, 'guid') || 
                  extractTextContent(itemXml, 'id') || 
                  link;
      
      // Try to extract image from enclosure or content
      let image = extractAttribute(itemXml, 'enclosure', 'url');
      if (!image && description) {
        const imgMatch = description.match(/src=["']([^"']*\.(jpg|jpeg|png|gif|webp))[^"']*/i);
        if (imgMatch) {
          image = imgMatch[1];
        }
      }
      
      // Generate YouTube thumbnail if this is a YouTube link and no image found
      if (!image && link) {
        const youtubeImage = generateYouTubeThumbnail(link);
        if (youtubeImage) {
          image = youtubeImage;
        }
      }

      if (title && guid) {
        items.push({
          title,
          description: description || '',
          link: link || '',
          pubDate: pubDate || new Date().toISOString(),
          guid,
          image: image || '',
          content: description || ''
        });
      }
    }

    console.log(`Parsed ${items.length} items from RSS feed`)

    // Save articles to database
    const articlesToInsert = items.map(item => {
      // Calculate read time (rough estimate: 200 words per minute)
      const wordCount = (item.description || '').split(' ').length
      const readTime = Math.max(1, Math.ceil(wordCount / 200))

      // Parse and validate date
      let publishedAt: string;
      try {
        if (item.pubDate) {
          publishedAt = new Date(item.pubDate).toISOString();
        } else {
          publishedAt = new Date().toISOString();
        }
      } catch (error) {
        console.log(`Invalid date format: ${item.pubDate}, using current date`);
        publishedAt = new Date().toISOString();
      }

      return {
        feed_id: feedId,
        title: item.title,
        description: item.description,
        content: item.content || item.description,
        url: item.link || null,
        image_url: item.image || null,
        published_at: publishedAt,
        guid: item.guid,
        read_time: readTime
      }
    })

    console.log(`Preparing to insert ${articlesToInsert.length} articles`)

    // Insert articles (on conflict do nothing to avoid duplicates)
    const { error: insertError } = await supabaseClient
      .from('articles')
      .upsert(articlesToInsert, { 
        onConflict: 'feed_id,guid',
        ignoreDuplicates: true 
      })

    if (insertError) {
      console.error('Error inserting articles:', insertError)
      throw insertError
    }

    // Get current article count for this feed
    const { count: currentArticleCount, error: countError } = await supabaseClient
      .from('articles')
      .select('*', { count: 'exact', head: true })
      .eq('feed_id', feedId)

    if (countError) {
      console.error('Error counting articles:', countError)
    }

    // Update feed's last_fetched_at and article_count
    const { error: updateError } = await supabaseClient
      .from('feeds')
      .update({ 
        last_fetched_at: new Date().toISOString(),
        status: 'active',
        article_count: currentArticleCount || 0
      })
      .eq('id', feedId)

    if (updateError) {
      console.error('Error updating feed:', updateError)
      throw updateError
    }

    console.log(`Successfully processed ${articlesToInsert.length} articles for feed ${feedId}`)

    return new Response(
      JSON.stringify({ 
        success: true, 
        articlesProcessed: articlesToInsert.length 
      }),
      { 
        headers: { ...corsHeaders, 'Content-Type': 'application/json' } 
      }
    )

  } catch (error) {
    console.error('Error in fetch-rss function:', error)
    return new Response(
      JSON.stringify({ 
        error: error.message,
        success: false 
      }),
      { 
        status: 500,
        headers: { ...corsHeaders, 'Content-Type': 'application/json' }
      }
    )
  }
})