Java中如何将Markdown转成Word?

1、Maven依赖

<!-- 用于将Markdown转换成HTML -->
<dependency>
    <groupId>com.vladsch.flexmark</groupId>
    <artifactId>flexmark-all</artifactId>
    <version>0.64.8</version>
</dependency>
<!-- 用于将HTML转换成Word -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>5.3.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>5.3.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.2</version>
</dependency>

2、工具类

package com.pq.chat.ai.core.utils;

import cn.hutool.core.util.IdUtil;
import com.vladsch.flexmark.ext.tables.TablesExtension;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.data.MutableDataSet;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;

/**
 * Word工具类
 * @author THF
 * @date 2024/9/13 14:15
 */
@Slf4j
public class WordUtil {

    /**
     * 将MarkDown转换成HTML
     *
     * @param markdown Markdown
     * @return HTML
     */
    public static String markdownToHtml(String markdown) {
        // 设置解析器配置
        MutableDataSet options = new MutableDataSet();
        options.set(Parser.EXTENSIONS, List.of(TablesExtension.create()));
        // 创建Markdown解析器和HTML渲染器
        Parser parser = Parser.builder(options).build();
        HtmlRenderer renderer = HtmlRenderer.builder(options).build();
        // 将Markdown转换为HTML
        return renderer.render(parser.parse(markdown));
    }

    /**
     * HTML转换成Word
     *
     * @param html HTML
     * @return WORD文件
     */
    public static String htmlToWord(String html) {
        try {
            String html1 = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">" +
                    "<html xmlns=\"http://www.w3.org/1999/xhtml\">" +
                    "<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>" +
                    "</head><body>" +
                    // 你的内容
                    html +
                    "</body></html>";
            // 文件名称
            String fileName = IdUtil.fastSimpleUUID() + ".doc";
            //目标路径
            Path targetPath = Paths.get(FileProperties.BASE_PATH, fileName);
            Files.createDirectories(targetPath.getParent());
            // 保存doc文档
            FileOutputStream outputStream = new FileOutputStream(targetPath.toFile());
            // 生成doc格式的word文档,需要手动改为docx
            byte[] by = html1.getBytes(StandardCharsets.UTF_8);
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(by);
            POIFSFileSystem poifsFileSystem = new POIFSFileSystem();
            DirectoryEntry directoryEntry = poifsFileSystem.getRoot();
            directoryEntry.createDocument("WordDocument", byteArrayInputStream);
            poifsFileSystem.writeFilesystem(outputStream);
            byteArrayInputStream.close();
            //关闭输出流
            outputStream.close();
            return targetPath.toFile().getAbsolutePath();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

3、使用

//将Markdown转换成HTMl
String html = WordUtil.markdownToHtml(markdown.toString());
//将HTML转换成WORD,返回文件路径
String filePath = WordUtil.htmlToWord(html);
最后修改:2024 年 09 月 14 日
如果觉得我的文章对你有用,请随意赞赏