index.ts 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import { serve } from "https://deno.land/std@0.168.0/http/server.ts";
  2. import { isValidExternalUrl, verifyAuth } from '../_shared/security.ts';
  3. const corsHeaders = {
  4. 'Access-Control-Allow-Origin': '*',
  5. 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
  6. };
  7. interface RSSFeed {
  8. url: string;
  9. title: string;
  10. type: string;
  11. }
  12. serve(async (req) => {
  13. // Handle CORS preflight requests
  14. if (req.method === 'OPTIONS') {
  15. return new Response(null, { headers: corsHeaders });
  16. }
  17. try {
  18. // Authentication: Require authenticated user
  19. const auth = await verifyAuth(req);
  20. if (!auth) {
  21. console.log('Unauthorized access attempt to fetch-website-rss');
  22. return new Response(
  23. JSON.stringify({ success: false, error: 'Unauthorized' }),
  24. { status: 401, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  25. );
  26. }
  27. const { url } = await req.json();
  28. if (!url || typeof url !== 'string') {
  29. return new Response(
  30. JSON.stringify({ success: false, error: 'Valid URL is required' }),
  31. { status: 400, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  32. );
  33. }
  34. // SSRF Protection: Validate URL
  35. const urlValidation = isValidExternalUrl(url);
  36. if (!urlValidation.valid) {
  37. console.log(`SSRF blocked in fetch-website-rss: ${url} - ${urlValidation.error}`);
  38. return new Response(
  39. JSON.stringify({ success: false, error: urlValidation.error }),
  40. { status: 400, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  41. );
  42. }
  43. console.log('Fetching website RSS for URL:', url);
  44. // Fetch the webpage with timeout
  45. const controller = new AbortController();
  46. const timeoutId = setTimeout(() => controller.abort(), 30000); // 30s timeout
  47. let response;
  48. try {
  49. response = await fetch(url, {
  50. signal: controller.signal,
  51. headers: {
  52. 'User-Agent': 'Mozilla/5.0 (compatible; RSS Feed Detector/1.0)',
  53. },
  54. });
  55. } finally {
  56. clearTimeout(timeoutId);
  57. }
  58. if (!response.ok) {
  59. throw new Error(`Failed to fetch website: ${response.status}`);
  60. }
  61. const html = await response.text();
  62. // Limit content size
  63. if (html.length > 5 * 1024 * 1024) { // 5MB limit
  64. throw new Error('Website content too large');
  65. }
  66. const feeds: RSSFeed[] = [];
  67. // Extract site name from <title> tag
  68. const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
  69. const siteName = titleMatch ? titleMatch[1].trim() : new URL(url).hostname;
  70. // Look for RSS/Atom links in <link> tags
  71. const linkRegex = /<link[^>]+rel=["']alternate["'][^>]+>/gi;
  72. const links = html.match(linkRegex) || [];
  73. for (const link of links) {
  74. const typeMatch = link.match(/type=["']([^"']+)["']/i);
  75. const hrefMatch = link.match(/href=["']([^"']+)["']/i);
  76. const linkTitleMatch = link.match(/title=["']([^"']+)["']/i);
  77. if (typeMatch && hrefMatch) {
  78. const type = typeMatch[1];
  79. const href = hrefMatch[1];
  80. const title = linkTitleMatch ? linkTitleMatch[1] : 'RSS Feed';
  81. if (
  82. type.includes('application/rss+xml') ||
  83. type.includes('application/atom+xml') ||
  84. type.includes('application/xml')
  85. ) {
  86. // Convert relative URLs to absolute
  87. const feedUrl = href.startsWith('http') ? href : new URL(href, url).toString();
  88. // Validate the discovered feed URL too
  89. const feedUrlValidation = isValidExternalUrl(feedUrl);
  90. if (feedUrlValidation.valid) {
  91. feeds.push({
  92. url: feedUrl,
  93. title,
  94. type,
  95. });
  96. }
  97. }
  98. }
  99. }
  100. // Fallback: Check common RSS paths if no feeds found
  101. if (feeds.length === 0) {
  102. const commonPaths = ['/feed', '/rss', '/feed.xml', '/rss.xml', '/atom.xml', '/index.xml'];
  103. const baseUrl = new URL(url);
  104. for (const path of commonPaths) {
  105. const testUrl = `${baseUrl.origin}${path}`;
  106. // Validate fallback URL
  107. const testUrlValidation = isValidExternalUrl(testUrl);
  108. if (!testUrlValidation.valid) continue;
  109. try {
  110. const testController = new AbortController();
  111. const testTimeoutId = setTimeout(() => testController.abort(), 5000); // 5s timeout for fallback checks
  112. const testResponse = await fetch(testUrl, {
  113. method: 'HEAD',
  114. signal: testController.signal
  115. });
  116. clearTimeout(testTimeoutId);
  117. if (testResponse.ok) {
  118. feeds.push({
  119. url: testUrl,
  120. title: 'RSS Feed',
  121. type: 'application/rss+xml',
  122. });
  123. }
  124. } catch (error) {
  125. // Ignore errors for fallback checks
  126. console.log(`Failed to check ${testUrl}:`, (error as Error).message);
  127. }
  128. }
  129. }
  130. console.log('Found feeds:', feeds.length);
  131. if (feeds.length === 0) {
  132. return new Response(
  133. JSON.stringify({
  134. success: false,
  135. error: 'No RSS feeds found on this website',
  136. siteName
  137. }),
  138. { status: 200, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  139. );
  140. }
  141. // Return the first feed by default, or all feeds if multiple
  142. return new Response(
  143. JSON.stringify({
  144. success: true,
  145. rssUrl: feeds[0].url,
  146. siteName,
  147. feeds: feeds.length > 1 ? feeds : undefined,
  148. }),
  149. { status: 200, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  150. );
  151. } catch (error) {
  152. console.error('Error fetching website RSS:', error);
  153. return new Response(
  154. JSON.stringify({
  155. success: false,
  156. error: 'Failed to detect RSS feed'
  157. }),
  158. { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
  159. );
  160. }
  161. });