chore(ci): blog sync

coderPerseus · coderPerseus · commit b6ea903533c1 · 2024-12-15T15:57:24.000Z
diff --git a/data/blog/post-34.mdx b/data/blog/post-34.mdx
@@ -0,0 +1,201 @@
+---
+title: 使用 turndown 踩坑记录
+date: 2024-12-15T15:56:59Z
+slug: post-34
+author: chaseFunny：https://github.com/chaseFunny
+tags: ["踩坑记录","前端开发"]
+---
+
+最近开发中，遇到一个需求，需要自定义处理 bytemd 的粘贴逻辑，也就是：
+
+在 React 项目中使用了 bytemd 编辑器 (https://github.com/pd4d10/bytemd) ，我也没有做过类似的需求，最开始的实现思路就是：
+
+1）找到 bytemd 在哪里处理粘贴逻辑
+
+2）阻止默认的粘贴逻辑
+
+3）拿到粘贴的内容
+
+4）返回预期的内容
+
+
+
+## 寻找 bytemd 的自定义粘贴逻辑入口
+
+通过查阅源码得知：插件也就是一个函数，用于扩展 Bytemd 编辑器和查看器的功能，返回指定的对象类型，返回对象类型已经定义好了，是 BytemdPlugin 。它包含五个属性：
+
+- remark：自定义 Markdown 解析
+- rehype：HTML 解析
+- actions：注册操作，也就是定义我们编辑框上面哪些小图标的
+- editorEffect ：编辑器副作用
+- viewerEffect：查看器副作用
+
+现在我们需要的是 editorEffect ，它是接受一个函数，函数中会给我们 `ctx` 也就是编辑器上下文，在这里我们可以通过 `cxt.editor.on('paste',fn(cm,e){e.preventDefault();})` 来实现禁止默认粘贴逻辑，当我们这样写后，并且把这个插件注册到编辑器，我们粘贴就失效了，我们现在已经找到了粘贴逻辑的地方，下面我们来实现自定义粘贴的逻辑
+
+## 自定义粘贴逻辑
+
+我们需要先拿到用户粘贴的内容，也很简单：
+
+```ts
+ editor.on('paste', async (cm: any, e: ClipboardEvent) => {
+   const clipboardData = e.clipboardData;
+   const text = clipboardData.getData('text/plain'); // 文本内容
+   const html = clipboardData.getData('text/html'); // 原始 html 内容
+ }
+```
+
+现在我们需要对 html 内容进行解析，转为 md 格式，然后返回
+
+通过调研，发现好用的 turndown 库：用 JavaScript 编写的 HTML 到 Markdown 转换器，下面我们就使用 turndown 来实现 html => markdown 。直接看代码：
+
+```ts
+// ...
+const TurndownService = require('turndown')
+const turndownService = new TurndownService()
+const mdText = turndownService.turndown(html)
+// ...
+```
+
+但是这样写，会发现有很多问题：
+
+1. 代码块没有检测出来，没有高亮
+1. 行内代码块没有展示
+1. 代码中会有额外的转义斜杠
+1. 编程语言未识别
+1. 表格和删除线也不生效
+
+等等问题，所以我们需要进行额外的配置，才能正确的把 HTML 转为 markdown 格式
+
+下面经过我不断测试，最终得到的代码为：
+
+```ts
+import turndownService from 'turndown';
+import { gfm, strikethrough, tables } from 'turndown-plugin-gfm';
+
+/**
+ * 配置并返回 turndown 实例
+ */
+export function configureTurndown() {
+  const turndownServiceObj = new turndownService({
+    codeBlockStyle: 'fenced',
+  });
+
+  turndownServiceObj.use(gfm);
+  turndownServiceObj.use([tables, strikethrough]);
+
+  // 添加自定义规则
+  addCustomRules(turndownServiceObj);
+
+  return turndownServiceObj;
+}
+
+/**
+ * 为 turndown 添加自定义规则
+ */
+function addCustomRules(turndownServiceObj: turndownService) {
+  // 删除线
+  turndownServiceObj.addRule('strikethrough', {
+    filter: ['del', 's', 'strike'] as string[],
+    replacement: (content) => `~~${content}~~`,
+  });
+
+  // 代码块
+  turndownServiceObj.addRule('pre', {
+    filter: ['pre'],
+    replacement: (content, node: any) => {
+      const code = node.querySelector('code');
+      let language = '';
+
+      if (node.getAttribute('lang')) {
+        language = node.getAttribute('lang');
+      } else if (code?.className) {
+        const langMatch = code.className.match(/language-(\S+)/);
+        language = langMatch?.[1] || '';
+      } else if (node.className) {
+        const mdFencesMatch = node.className.match(/md-fences|language-(\S+)/);
+        language = mdFencesMatch?.[1] || '';
+      }
+
+      let codeContent = code ? code.textContent.trim() : content.trim();
+      codeContent = codeContent.replace(/\\([^\\])/g, '$1');
+      language = language.toLowerCase().replace(/[^a-z0-9+#]+/g, '');
+
+      return `\`\`\`${language}\n${codeContent}\n\`\`\`\n`;
+    },
+  });
+
+  // 行内代码
+  turndownServiceObj.addRule('inlineCode', {
+    filter: (node) => node.nodeName === 'CODE' && node.parentNode?.nodeName !== 'PRE',
+    replacement: (content) => `\`${content}\``,
+  });
+
+  // 表格
+  turndownServiceObj.addRule('table', {
+    filter: 'table',
+    replacement: function (content, node) {
+      const table = node as HTMLTableElement;
+      const rows = Array.from(table.rows);
+
+      const headers = Array.from(rows[0]?.cells || [])
+        .map((cell) => cell.textContent?.trim() || '')
+        .join(' | ');
+
+      const separator = Array.from(rows[0]?.cells || [])
+        .map(() => '---')
+        .join(' | ');
+
+      const data = rows
+        .slice(1)
+        .map((row) =>
+          Array.from(row.cells)
+            .map((cell) => cell.textContent?.trim() || '')
+            .join(' | '),
+        )
+        .join('\n');
+
+      return `\n| ${headers} |\n| ${separator} |\n${data ? `| ${data} |` : ''}\n\n`;
+    },
+  });
+}
+
+/**
+ * 处理粘贴的内容
+ */
+export async function handlePastedContent(html: string, text: string) {
+  const turndownServiceObj = configureTurndown();
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(html, 'text/html');
+  const images: HTMLImageElement[] = Array.from(doc.getElementsByTagName('img'));
+
+  // 转为 markdown 文本
+  const mdContent = turndownServiceObj.turndown(html);
+
+  // 如果没有图片，直接返回处理后的文本
+  if (images.length === 0) {
+    return mdContent || text;
+  }
+
+  // 处理图片上传
+  return await processImages(images, mdContent);
+}
+
+/**
+ * 处理图片上传
+ * @param images 图片元素数组
+ * @param mdContent markdown 文本
+ * @returns 处理图片上传后的 markdown 文本
+ */
+async function processImages(images: HTMLImageElement[], mdContent: string) {
+  // 自定义图片上传的逻辑，通过上传后的图地址替换源地址，得到新的内容 processedText
+  return processedText;
+}
+
+```
+
+这样得到的结果就是正常的了！以上的过程得到了 AI 的极大帮助，帮我定位问题，分析问题，找到解决思路等等
+
+
+
+---
+此文自动发布于：<a href="https://github.com/coderPerseus/blog/issues/34" target="_blank">github issues</a>