本文展示如何使用spire.doc for java提取word文档中的文本和图片。
提取文本
import com.spire.doc.document;
import java.io.filewriter;
import java.io.ioexception;
public class extracttext {
public static void main(string[] args) throws ioexception {
//加载word文档
document document = new document();
document.loadfromfile("c:\\users\\administrator\\desktop\\sample.docx");
//获取文档中的文本保存为string
string text=document.gettext();
//将string写入txt文件
writestringtotxt(text,"extractedtext.txt");
}
public static void writestringtotxt(string content, string txtfilename) throws ioexception {
filewriter fwriter= new filewriter(txtfilename,true);
try {
fwriter.write(content);
}catch(ioexception ex){
ex.printstacktrace();
}finally{
try{
fwriter.flush();
fwriter.close();
} catch (ioexception ex) {
ex.printstacktrace();
}
}
}
}
提取图片
import com.spire.doc.document;
import com.spire.doc.documents.documentobjecttype;
import com.spire.doc.fields.docpicture;
import com.spire.doc.interfaces.icompositeobject;
import com.spire.doc.interfaces.idocumentobject;
import javax.imageio.imageio;
import java.awt.image.bufferedimage;
import java.io.file;
import java.io.ioexception;
import java.util.arraylist;
import java.util.linkedlist;
import java.util.list;
import java.util.queue;
public class extractimages {
public static void main(string[] args) throws ioexception {
//加载word文档
document document = new document();
document.loadfromfile("c:\\users\\administrator\\desktop\\sample.docx");
//创建queue对象
queue nodes = new linkedlist();
nodes.add(document);
//创建list对象
list images = new arraylist();
//遍历文档中的子对象
while (nodes.size() > 0) {
icompositeobject node = nodes.poll();
for (int i = 0; i < node.getchildobjects().getcount(); i ) {
idocumentobject child = node.getchildobjects().get(i);
if (child instanceof icompositeobject) {
nodes.add((icompositeobject) child);
//获取图片并添加到list
if (child.getdocumentobjecttype() == documentobjecttype.picture) {
docpicture picture = (docpicture) child;
images.add(picture.getimage());
}
}
}
}
//将图片保存为png格式文件
for (int i = 0; i < images.size(); i ) {
file file = new file(string.format("output/图片-%d.png", i));
imageio.write(images.get(i), "png", file);
}
}
}