/* RESPONSIBLE TEAM: team-knowledge-foundations */

import { type Block } from '@intercom/interblocks.ts';
import { htmlToTextContent } from 'embercom/lib/html-unescape';

/**
 * Types for article content blocks
 */
interface BaseContentBlock {
  type: string;
  [key: string]: unknown;
}

interface ImageBlock extends BaseContentBlock {
  type: 'image';
  url?: string;
}

interface TableBlock extends BaseContentBlock {
  type: 'table';
  rows: {
    cells: {
      content: ContentBlock[];
    }[];
  }[];
}

interface NestedContentBlock extends BaseContentBlock {
  content: ContentBlock[] | Record<string, unknown>;
}

type ContentBlock = ImageBlock | TableBlock | NestedContentBlock | BaseContentBlock;

/**
 * Sanitizes article content by removing invalid image blocks and cleaning HTML content in text fields.
 *
 * Block Structure:
 * - Blocks are always objects at the top level (e.g., article.jsonBlocks array)
 * - Blocks have properties like 'type', 'url', 'text', etc.
 * - A block's 'content' property may contain nested content
 *
 * Nested Content:
 * - A block's 'content' can be an array of child blocks
 * - A block's 'content' can be an object with its own properties
 * - Nested content is processed recursively
 *
 * Image Blocks:
 * - Have a 'url' property for the image source
 * - Temporary images use blob URLs (e.g., 'blob:https://...')
 * - Blob URLs are temporary and only exist in browser memory
 *
 * Text Content:
 * - Text fields may contain HTML content
 * - HTML content is stripped to plain text
 *
 * @param blocks - Array of content blocks to sanitize
 * @returns Sanitized blocks with invalid images removed and HTML content cleaned
 */
export function sanitizeArticleContent(blocks: Block[]): Block[] {
  // Base case for recursion: If input is not an array,
  // return it unchanged. This handles non-array content values.
  if (!Array.isArray(blocks)) {
    return blocks as any;
  }

  return blocks.filter((block) => {
    // Handle image blocks - These are top-level blocks with a 'url' property
    if (block.type === 'image') {
      let imageBlock = block as { type: 'image'; url?: string };
      // Remove if:
      // 1. No URL exists (incomplete image block)
      // 2. URL is a blob URL (temporary, in-browser URL for images being uploaded)
      // We use startsWith because blob URLs always begin with 'blob:' protocol
      if (!imageBlock.url || imageBlock.url.startsWith('blob:')) {
        return false;
      }
    }

    // Handle text fields that may contain HTML content
    if (block.type === 'paragraph' && block.text) {
      let textBlock = block as { type: 'paragraph'; text: string };
      // Strip HTML content to plain text
      textBlock.text = htmlToTextContent(textBlock.text);
    }

    // Handle table blocks - These have rows and cells with nested content
    if (block.type === 'table') {
      let tableBlock = block as { type: 'table'; rows: { cells: { content: Block[] }[] }[] };
      if (tableBlock.rows) {
        tableBlock.rows.forEach((row) => {
          if (row.cells) {
            row.cells.forEach((cell) => {
              if (cell.content) {
                cell.content = sanitizeArticleContent(cell.content);
              }
            });
          }
        });
      }
    }

    // Handle other blocks with nested content
    if ('content' in block && block.content) {
      let nestedBlock = block as { content: Block[] | Record<string, unknown> };
      // Recursively sanitize nested content
      // - If content is an array of blocks: processes each nested block
      // - If content is not an array: returns unchanged (base case)
      if (Array.isArray(nestedBlock.content)) {
        nestedBlock.content = sanitizeArticleContent(nestedBlock.content);
      }
    }

    return true;
  });
}
