前言
源码weloe/FileConversion (github.com
- pdf 转 word
- pdf 转 图片
- word 转 图片
- word 转 html
- word 转 pdf
实现方法
主要使用了pdfbox Apache PDFBox | A Java PDF Library以及spire.doc Free Spire.Doc for Java | 100% 免费 Java Word 组件 (e-iceblue.cn两个工具包
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>http://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
</repositories>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.doc.free</artifactId>
<version>3.9.0</version>
</dependency>
</dependencies>
策略接口
public interface FileConversion {
boolean isSupport(String s;
String convert(String pathName,String dirAndFileName throws Exception;
}
PDF转图片实现
public class PDF2Image implements FileConversion{
private String suffix = ".jpg";
public static final int DEFAULT_DPI = 150;
@Override
public boolean isSupport(String s {
return "pdf2image".equals(s;
}
@Override
public String convert(String pathName,String dirAndFileName throws Exception {
String outPath = dirAndFileName + suffix;
if(Files.exists(Paths.get(outPath{
throw new RuntimeException(outPath+" 文件已存在";
}
pdf2multiImage(pathName,outPath,DEFAULT_DPI;
return outPath;
}
/**
* pdf转图片
* 多页PDF会每页转换为一张图片,下面会有多页组合成一页的方法
*
* @param pdfFile pdf文件路径
* @param outPath 图片输出路径
* @param dpi 相当于图片的分辨率,值越大越清晰,但是转换时间变长
*/
public void pdf2multiImage(String pdfFile, String outPath, int dpi {
if (dpi <= 0 {
// 如果没有设置DPI,默认设置为150
dpi = DEFAULT_DPI;
}
try (PDDocument pdf = PDDocument.load(new FileInputStream(pdfFile {
int actSize = pdf.getNumberOfPages(;
List<BufferedImage> picList = new ArrayList<>(;
for (int i = 0; i < actSize; i++ {
BufferedImage image = new PDFRenderer(pdf.renderImageWithDPI(i, dpi, ImageType.RGB;
picList.add(image;
}
// 组合图片
ImageUtil.yPic(picList, outPath;
} catch (IOException e {
e.printStackTrace(;
}
}
}
PDF转word实现
public class PDF2Word implements FileConversion {
private String suffix = ".doc";
@Override
public boolean isSupport(String s {
return "pdf2word".equals(s;
}
/**
*
* @param pathName
* @throws IOException
*/
@Override
public String convert(String pathName,String dirAndFileName throws Exception {
String outPath = dirAndFileName + suffix;
if(Files.exists(Paths.get(outPath{
throw new RuntimeException(outPath+" 文件已存在";
}
pdf2word(pathName, outPath;
return outPath;
}
private void pdf2word(String pathName, String outPath throws IOException {
PDDocument doc = PDDocument.load(new File(pathName;
int pagenumber = doc.getNumberOfPages(;
// 创建文件
createFile(Paths.get(outPath;
FileOutputStream fos = new FileOutputStream(outPath;
Writer writer = new OutputStreamWriter(fos, "UTF-8";
PDFTextStripper stripper = new PDFTextStripper(;
stripper.setSortByPosition(true;//排序
stripper.setStartPage(1;//设置转换的开始页
stripper.setEndPage(pagenumber;//设置转换的结束页
stripper.writeText(doc, writer;
writer.close(;
doc.close(;
}
}
word转html
public class Word2HTML implements FileConversion{
private String suffix = ".html";
@Override
public boolean isSupport(String s {
return "word2html".equals(s;
}
@Override
public String convert(String pathName, String dirAndFileName {
String outPath = dirAndFileName + suffix;
if(Files.exists(Paths.get(outPath{
throw new RuntimeException(outPath+" 文件已存在";
}
Document doc = new Document(;
doc.loadFromFile(pathName;
doc.saveToFile(outPath, FileFormat.Html;
doc.dispose(;
return outPath;
}
}
word转图片
public class Word2Image implements FileConversion{
private String suffix = ".jpg";
@Override
public boolean isSupport(String s {
return "word2image".equals(s;
}
@Override
public String convert(String pathName, String dirAndFileName throws Exception {
String outPath = dirAndFileName + suffix;
if(Files.exists(Paths.get(outPath{
throw new RuntimeException(outPath+" 文件已存在";
}
Document doc = new Document(;
//加载文件
doc.loadFromFile(pathName;
//上传文档页数,也是最后要生成的图片数
Integer pageCount = doc.getPageCount(;
// 参数第一个和第三个都写死 第二个参数就是生成图片数
BufferedImage[] image = doc.saveToImages(0, pageCount, ImageType.Bitmap;
// 组合图片
List<BufferedImage> imageList = Arrays.asList(image;
ImageUtil.yPic(imageList, outPath;
return outPath;
}
}
word转pdf
public class Word2PDF implements FileConversion{
private String suffix = ".pdf";
@Override
public boolean isSupport(String s {
return "word2pdf".equals(s;
}
@Override
public String convert(String pathName, String dirAndFileName throws Exception {
String outPath = dirAndFileName + suffix;
if(Files.exists(Paths.get(outPath{
throw new RuntimeException(outPath+" 文件已存在";
}
//加载word
Document document = new Document(;
document.loadFromFile(pathName, FileFormat.Docx;
//保存结果文件
document.saveToFile(outPath, FileFormat.PDF;
document.close(;
return outPath;
}
}
使用
输入转换方法,文件路径,输出路径(输出路径如果输入'null'则为文件同目录下同名不同后缀文件
- pdf2word
- pdf2image
- word2html
- word2image
- word2pdf
例如输入:
pdf2word D:\test\testpdf.pdf null
控制台输出:
转换方法: pdf2word 文件: D:\test\testFile.pdf
转换成功!文件路径: D:\test\testFile.doc