index.ts 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import { serve } from "https://deno.land/std@0.168.0/http/server.ts"
  2. import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
  3. interface RSSItem {
  4. title: string;
  5. description: string;
  6. link: string;
  7. pubDate: string;
  8. guid?: string;
  9. content?: string;
  10. image?: string;
  11. }
  12. interface RSSFeed {
  13. title: string;
  14. description: string;
  15. items: RSSItem[];
  16. }
  17. const corsHeaders = {
  18. 'Access-Control-Allow-Origin': '*',
  19. 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
  20. }
  21. // Helper function to extract text content from XML tags
  22. function extractTextContent(xml: string, tagName: string): string {
  23. const regex = new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i');
  24. const match = xml.match(regex);
  25. if (match && match[1]) {
  26. // Remove CDATA wrapper if present and clean HTML tags
  27. let content = match[1].replace(/<!\[CDATA\[(.*?)\]\]>/s, '$1');
  28. content = content.replace(/<[^>]*>/g, '').trim();
  29. return content;
  30. }
  31. return '';
  32. }
  33. // Helper function to extract attribute from XML tags
  34. function extractAttribute(xml: string, tagName: string, attributeName: string): string {
  35. const regex = new RegExp(`<${tagName}[^>]*${attributeName}=["']([^"']*)["'][^>]*>`, 'i');
  36. const match = xml.match(regex);
  37. return match ? match[1] : '';
  38. }
  39. serve(async (req) => {
  40. if (req.method === 'OPTIONS') {
  41. return new Response('ok', { headers: corsHeaders })
  42. }
  43. try {
  44. const supabaseClient = createClient(
  45. Deno.env.get('SUPABASE_URL') ?? '',
  46. Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') ?? ''
  47. )
  48. const { feedId, feedUrl } = await req.json()
  49. console.log(`Fetching RSS for feed: ${feedId}, URL: ${feedUrl}`)
  50. // Fetch RSS content
  51. const response = await fetch(feedUrl)
  52. if (!response.ok) {
  53. throw new Error(`Failed to fetch RSS: ${response.statusText}`)
  54. }
  55. const rssText = await response.text()
  56. console.log(`RSS content length: ${rssText.length}`)
  57. // Parse RSS using regex patterns
  58. const items: RSSItem[] = [];
  59. // Split by item or entry tags
  60. const itemRegex = /<(item|entry)[^>]*>([\s\S]*?)<\/\1>/gi;
  61. let itemMatch;
  62. while ((itemMatch = itemRegex.exec(rssText)) !== null) {
  63. const itemXml = itemMatch[0];
  64. const title = extractTextContent(itemXml, 'title');
  65. const description = extractTextContent(itemXml, 'description') ||
  66. extractTextContent(itemXml, 'summary') ||
  67. extractTextContent(itemXml, 'content');
  68. let link = extractTextContent(itemXml, 'link');
  69. if (!link) {
  70. // Try to get link from href attribute
  71. link = extractAttribute(itemXml, 'link', 'href');
  72. }
  73. const pubDate = extractTextContent(itemXml, 'pubDate') ||
  74. extractTextContent(itemXml, 'published') ||
  75. extractTextContent(itemXml, 'updated');
  76. const guid = extractTextContent(itemXml, 'guid') ||
  77. extractTextContent(itemXml, 'id') ||
  78. link;
  79. // Try to extract image from enclosure or content
  80. let image = extractAttribute(itemXml, 'enclosure', 'url');
  81. if (!image && description) {
  82. const imgMatch = description.match(/src=["']([^"']*\.(jpg|jpeg|png|gif|webp))[^"']*/i);
  83. if (imgMatch) {
  84. image = imgMatch[1];
  85. }
  86. }
  87. if (title && guid) {
  88. items.push({
  89. title,
  90. description: description || '',
  91. link: link || '',
  92. pubDate: pubDate || new Date().toISOString(),
  93. guid,
  94. image: image || '',
  95. content: description || ''
  96. });
  97. }
  98. }
  99. console.log(`Parsed ${items.length} items from RSS feed`)
  100. // Save articles to database
  101. const articlesToInsert = items.map(item => {
  102. // Calculate read time (rough estimate: 200 words per minute)
  103. const wordCount = (item.description || '').split(' ').length
  104. const readTime = Math.max(1, Math.ceil(wordCount / 200))
  105. // Parse and validate date
  106. let publishedAt: string;
  107. try {
  108. if (item.pubDate) {
  109. publishedAt = new Date(item.pubDate).toISOString();
  110. } else {
  111. publishedAt = new Date().toISOString();
  112. }
  113. } catch (error) {
  114. console.log(`Invalid date format: ${item.pubDate}, using current date`);
  115. publishedAt = new Date().toISOString();
  116. }
  117. return {
  118. feed_id: feedId,
  119. title: item.title,
  120. description: item.description,
  121. content: item.content || item.description,
  122. url: item.link || null,
  123. image_url: item.image || null,
  124. published_at: publishedAt,
  125. guid: item.guid,
  126. read_time: readTime
  127. }
  128. })
  129. console.log(`Preparing to insert ${articlesToInsert.length} articles`)
  130. // Insert articles (on conflict do nothing to avoid duplicates)
  131. const { error: insertError } = await supabaseClient
  132. .from('articles')
  133. .upsert(articlesToInsert, {
  134. onConflict: 'feed_id,guid',
  135. ignoreDuplicates: true
  136. })
  137. if (insertError) {
  138. console.error('Error inserting articles:', insertError)
  139. throw insertError
  140. }
  141. // Get current article count for this feed
  142. const { count: currentArticleCount, error: countError } = await supabaseClient
  143. .from('articles')
  144. .select('*', { count: 'exact', head: true })
  145. .eq('feed_id', feedId)
  146. if (countError) {
  147. console.error('Error counting articles:', countError)
  148. }
  149. // Update feed's last_fetched_at and article_count
  150. const { error: updateError } = await supabaseClient
  151. .from('feeds')
  152. .update({
  153. last_fetched_at: new Date().toISOString(),
  154. status: 'active',
  155. article_count: currentArticleCount || 0
  156. })
  157. .eq('id', feedId)
  158. if (updateError) {
  159. console.error('Error updating feed:', updateError)
  160. throw updateError
  161. }
  162. console.log(`Successfully processed ${articlesToInsert.length} articles for feed ${feedId}`)
  163. return new Response(
  164. JSON.stringify({
  165. success: true,
  166. articlesProcessed: articlesToInsert.length
  167. }),
  168. {
  169. headers: { ...corsHeaders, 'Content-Type': 'application/json' }
  170. }
  171. )
  172. } catch (error) {
  173. console.error('Error in fetch-rss function:', error)
  174. return new Response(
  175. JSON.stringify({
  176. error: error.message,
  177. success: false
  178. }),
  179. {
  180. status: 500,
  181. headers: { ...corsHeaders, 'Content-Type': 'application/json' }
  182. }
  183. )
  184. }
  185. })