java之word转txt,解决txt乱码

Shaka 5月前 ⋅ 126 阅读

 

public void execute(String srcPath, String targetPath) {
allFile = 0;
successCopy = 0;
File[] files = new File(srcPath).listFiles();
if (files == null) {
return;
}
try {
for (File srcFile : files) {
//如果文件夹存在进行复制否则进行重命名
if (srcFile.exists()) {
allFile++;
if (srcFile.isHidden()) {
continue;
}
successCopy++;

//获取扩展名
String srcFileName = srcFile.getName();
String[] temp = srcFileName.split("\\.");
String last = "." + temp[temp.length - 1];
if (StringUtils.equals(last, ".docx") || StringUtils.equals(last, ".docm")) {
//读取word
OPCPackage opcPackage = POIXMLDocument.openPackage(srcFile.getAbsolutePath());
POIXMLTextExtractor docx = new XWPFWordExtractor(opcPackage);

//建立目标文件
File target = new File(targetPath + "\\", srcFile.getName().replace(last, ".txt"));
FileWriter newFile = new FileWriter(target);
BufferedWriter bw = new BufferedWriter(newFile);

bw.write(docx.getText());
bw.newLine();
bw.flush();
bw.close();
} else if (StringUtils.equals(last, ".doc")) {
//读取word
InputStream is = new FileInputStream(srcFile.getAbsolutePath());
WordExtractor doc = new WordExtractor(is);

//建立目标文件
File target = new File(targetPath + "\\", srcFile.getName().replace(last, ".txt"));
OutputStreamWriter newFile = new OutputStreamWriter(new FileOutputStream(target),"GBK");
BufferedWriter bw = new BufferedWriter(newFile);

//读取文件至目标
bw.write(doc.getText());
bw.newLine();
bw.flush();
bw.close();
}
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (XmlException e) {
e.printStackTrace();
} catch (OpenXML4JException e) {
e.printStackTrace();
}
}

注意:本文归作者所有,未经作者允许,不得转载

全部评论: 0

    我有话说: