| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251 |
- import { serve } from "https://deno.land/std@0.168.0/http/server.ts"
- import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
- interface RSSItem {
- title: string;
- description: string;
- link: string;
- pubDate: string;
- guid?: string;
- content?: string;
- image?: string;
- }
- interface RSSFeed {
- title: string;
- description: string;
- items: RSSItem[];
- }
- const corsHeaders = {
- 'Access-Control-Allow-Origin': '*',
- 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
- }
- // Helper function to extract text content from XML tags
- function extractTextContent(xml: string, tagName: string): string {
- const regex = new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i');
- const match = xml.match(regex);
- if (match && match[1]) {
- // Remove CDATA wrapper if present and clean HTML tags
- let content = match[1].replace(/<!\[CDATA\[(.*?)\]\]>/s, '$1');
- content = content.replace(/<[^>]*>/g, '').trim();
- return content;
- }
- return '';
- }
- // Helper function to extract attribute from XML tags
- function extractAttribute(xml: string, tagName: string, attributeName: string): string {
- const regex = new RegExp(`<${tagName}[^>]*${attributeName}=["']([^"']*)["'][^>]*>`, 'i');
- const match = xml.match(regex);
- return match ? match[1] : '';
- }
- // Helper function to extract YouTube video ID from URL
- function getYouTubeVideoId(url: string): string | null {
- if (!url) return null;
-
- const patterns = [
- /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/,
- /youtube\.com\/.*[?&]v=([a-zA-Z0-9_-]{11})/
- ];
-
- for (const pattern of patterns) {
- const match = url.match(pattern);
- if (match) return match[1];
- }
-
- return null;
- }
- // Helper function to generate YouTube thumbnail URL
- function generateYouTubeThumbnail(videoUrl: string): string | null {
- const videoId = getYouTubeVideoId(videoUrl);
- return videoId ? `https://img.youtube.com/vi/${videoId}/hqdefault.jpg` : null;
- }
- serve(async (req) => {
- if (req.method === 'OPTIONS') {
- return new Response('ok', { headers: corsHeaders })
- }
- try {
- const supabaseClient = createClient(
- Deno.env.get('SUPABASE_URL') ?? '',
- Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') ?? ''
- )
- const { feedId, feedUrl } = await req.json()
- console.log(`Fetching RSS for feed: ${feedId}, URL: ${feedUrl}`)
- // Fetch RSS content
- const response = await fetch(feedUrl)
- if (!response.ok) {
- throw new Error(`Failed to fetch RSS: ${response.statusText}`)
- }
- const rssText = await response.text()
- console.log(`RSS content length: ${rssText.length}`)
-
- // Parse RSS using regex patterns
- const items: RSSItem[] = [];
-
- // Split by item or entry tags
- const itemRegex = /<(item|entry)[^>]*>([\s\S]*?)<\/\1>/gi;
- let itemMatch;
-
- while ((itemMatch = itemRegex.exec(rssText)) !== null) {
- const itemXml = itemMatch[0];
-
- const title = extractTextContent(itemXml, 'title');
- const description = extractTextContent(itemXml, 'description') ||
- extractTextContent(itemXml, 'summary') ||
- extractTextContent(itemXml, 'content');
-
- let link = extractTextContent(itemXml, 'link');
- if (!link) {
- // Try to get link from href attribute
- link = extractAttribute(itemXml, 'link', 'href');
- }
-
- const pubDate = extractTextContent(itemXml, 'pubDate') ||
- extractTextContent(itemXml, 'published') ||
- extractTextContent(itemXml, 'updated');
-
- const guid = extractTextContent(itemXml, 'guid') ||
- extractTextContent(itemXml, 'id') ||
- link;
-
- // Try to extract image from enclosure or content
- let image = extractAttribute(itemXml, 'enclosure', 'url');
- if (!image && description) {
- const imgMatch = description.match(/src=["']([^"']*\.(jpg|jpeg|png|gif|webp))[^"']*/i);
- if (imgMatch) {
- image = imgMatch[1];
- }
- }
-
- // Generate YouTube thumbnail if this is a YouTube link and no image found
- if (!image && link) {
- const youtubeImage = generateYouTubeThumbnail(link);
- if (youtubeImage) {
- image = youtubeImage;
- }
- }
- if (title && guid) {
- items.push({
- title,
- description: description || '',
- link: link || '',
- pubDate: pubDate || new Date().toISOString(),
- guid,
- image: image || '',
- content: description || ''
- });
- }
- }
- console.log(`Parsed ${items.length} items from RSS feed`)
- // Save articles to database
- const articlesToInsert = items.map(item => {
- // Calculate read time (rough estimate: 200 words per minute)
- const wordCount = (item.description || '').split(' ').length
- const readTime = Math.max(1, Math.ceil(wordCount / 200))
- // Parse and validate date
- let publishedAt: string;
- try {
- if (item.pubDate) {
- publishedAt = new Date(item.pubDate).toISOString();
- } else {
- publishedAt = new Date().toISOString();
- }
- } catch (error) {
- console.log(`Invalid date format: ${item.pubDate}, using current date`);
- publishedAt = new Date().toISOString();
- }
- return {
- feed_id: feedId,
- title: item.title,
- description: item.description,
- content: item.content || item.description,
- url: item.link || null,
- image_url: item.image || null,
- published_at: publishedAt,
- guid: item.guid,
- read_time: readTime
- }
- })
- console.log(`Preparing to insert ${articlesToInsert.length} articles`)
- // Insert articles (on conflict do nothing to avoid duplicates)
- const { error: insertError } = await supabaseClient
- .from('articles')
- .upsert(articlesToInsert, {
- onConflict: 'feed_id,guid',
- ignoreDuplicates: true
- })
- if (insertError) {
- console.error('Error inserting articles:', insertError)
- throw insertError
- }
- // Get current article count for this feed
- const { count: currentArticleCount, error: countError } = await supabaseClient
- .from('articles')
- .select('*', { count: 'exact', head: true })
- .eq('feed_id', feedId)
- if (countError) {
- console.error('Error counting articles:', countError)
- }
- // Update feed's last_fetched_at and article_count
- const { error: updateError } = await supabaseClient
- .from('feeds')
- .update({
- last_fetched_at: new Date().toISOString(),
- status: 'active',
- article_count: currentArticleCount || 0
- })
- .eq('id', feedId)
- if (updateError) {
- console.error('Error updating feed:', updateError)
- throw updateError
- }
- console.log(`Successfully processed ${articlesToInsert.length} articles for feed ${feedId}`)
- return new Response(
- JSON.stringify({
- success: true,
- articlesProcessed: articlesToInsert.length
- }),
- {
- headers: { ...corsHeaders, 'Content-Type': 'application/json' }
- }
- )
- } catch (error: any) {
- console.error('Error in fetch-rss function:', error)
- return new Response(
- JSON.stringify({
- error: error?.message || 'Unknown error',
- success: false
- }),
- {
- status: 500,
- headers: { ...corsHeaders, 'Content-Type': 'application/json' }
- }
- )
- }
- })
|