admin管理员组文章数量:1605173
Java实现pdf转word 完美转换,附代码,不限页数
可以直接下载可执行jar包
点击下载
文中用到的aspose-pdf jar包下载链接
aspose-pdf.jar
maven配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache/POM/4.0.0"
xmlns:xsi="http://www.w3/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache/POM/4.0.0 http://maven.apache/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>pdf2word</artifactId>
<version>1</version>
<properties>
<mavenpiler.source>8</mavenpiler.source>
<mavenpiler.target>8</mavenpiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.javassist</groupId>
<artifactId>javassist</artifactId>
<version>3.20.0-GA</version>
</dependency>
<!-- poi-ooxml是poi的升级版本-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-pdf</artifactId>
<version>22.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>demo.PDFHelper3</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<distributionManagement>
<repository>
<id>localRepository</id>
<url>file:C:/Users/shao/.m2/repository</url>
</repository>
</distributionManagement>
</project>
源代码
package demo;
import com.aspose.pdf.Document;
import com.aspose.pdf.SaveFormat;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Scanner;
public class PDFHelper3 {
public static void main(String[] args) throws IOException {
System.out.println("请输入要转换的pdf文件路径和文件名(例如C:\\A.pdf)," +
"根据文件大小需要一点的时间,请耐心等候!可以直接拖pdf文文件进来。");
Scanner sc = new Scanner(System.in);
String path = sc.nextLine();
path = path.replace("\\\\","\\\\\\\\");
path = path.replace("\"","");
pdf2doc(path);
System.out.println("请在源文件处查看docx文件");
}
//移除文字水印
public static boolean removeWatermark(File file) {
try {
XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
// 段落
List<XWPFParagraph> paragraphs = doc.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String text=paragraph.getText();
if("Evaluation Only. Created with Aspose.PDF. Copyright 2002-2021 Aspose Pty Ltd.".equals(text)){
List<XWPFRun> runs = paragraph.getRuns();
runs.forEach(e-> e.setText("",0));
}
}
FileOutputStream outStream = new FileOutputStream(file);
doc.write(outStream);
outStream.close();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public static void pdf2doc(String pdfPath) {
long old = System.currentTimeMillis();
try {
//新建一个pdf文档
String wordPath=pdfPath.substring(0,pdfPath.lastIndexOf("."))+".docx";
File file = new File(wordPath);
FileOutputStream os = new FileOutputStream(file);
//Address是将要被转化的word文档
Document doc = new Document(pdfPath);
//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换
doc.save(os, SaveFormat.DocX);
os.close();
//去除水印
removeWatermark(new File(wordPath));
//转化用时
long now = System.currentTimeMillis();
System.out.println("Pdf 转 Word 共耗时:" + ((now - old) / 1000.0) + "秒");
} catch (Exception e) {
System.out.println("Pdf 转 Word 失败...");
e.printStackTrace();
}
}
}
版权声明:本文标题:Java实现pdf转word 完美转换,附代码,不限页数 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://www.elefans.com/dianzi/1728479069a1159930.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论