javapoiword
⑴ java POI 如何操作word 格式
1、環境支持
1.1 添加poi支持:包下載地址http://www.apache.org/dyn/closer.cgi/poi/release/
1.2 POI對Excel文件的讀取操作比較方便,POI還提供對Word的DOC格式文件的讀取。但在它的發行版本中沒有發布對Word支持的模塊,需要另外下載一個POI的擴展的Jar包。下載地址為http://www.ibiblio.org/maven2/org/textmining/tm-extractors/0.4/ 下載extractors-0.4_zip這個文件
package com.ray.poi.util;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.textmining.text.extraction.WordExtractor;
/**
* 讀寫doc
* @author wangzonghao
*
*/
public class POIWordUtil {
/**
* 讀入doc
* @param doc
* @return
* @throws Exception
*/
public static String readDoc(String doc) throws Exception {
// 創建輸入流讀取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 創建WordExtractor
extractor = new WordExtractor();
// 對DOC文件進行提取
text = extractor.extractText(in);
return text;
}
/**
* 寫出doc
* @param path
* @param content
* @return
*/
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {
// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();
ByteArrayInputStream s = new ByteArrayInputStream(b);
POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();
DocumentEntry de = directory.createDocument("WordDocument", s);
FileOutputStream ostream = new FileOutputStream(path);
fs.writeFilesystem(ostream);
s.close();
ostream.close();
} catch (IOException e) {
e.printStackTrace();
}
return w;
}
}
測試
package com.ray.poi.util;
import junit.framework.TestCase;
public class POIUtilTest extends TestCase {
public void testReadDoc() {
try{
String text = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}
}
public void testWriteDoc() {
String wr;
try {
wr = POIWordUtil.readDoc("E:/work_space/poi/com/ray/poi/util/demo.doc");
boolean b = POIWordUtil.writeDoc("c:\\demo.doc",wr);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
⑵ JAVA使用POI讀寫word 亂碼
寫
public static void main(String args[])
throws Exception
{
XWPFDocument doc = new XWPFDocument();
XWPFParagraph p1 = doc.createParagraph();
p1.setAlignment(ParagraphAlignment.CENTER);
p1.setBorderBottom(Borders.DOUBLE);
p1.setBorderTop(Borders.DOUBLE);
p1.setBorderRight(Borders.DOUBLE);
p1.setBorderLeft(Borders.DOUBLE);
p1.setBorderBetween(Borders.SINGLE);
p1.setVerticalAlignment(TextAlignment.TOP);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setBold(true);
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
r1.setTextPosition(100);
XWPFParagraph p2 = doc.createParagraph();
p2.setAlignment(ParagraphAlignment.RIGHT);
p2.setBorderBottom(Borders.DOUBLE);
p2.setBorderTop(Borders.DOUBLE);
p2.setBorderRight(Borders.DOUBLE);
p2.setBorderLeft(Borders.DOUBLE);
p2.setBorderBetween(Borders.SINGLE);
XWPFRun r2 = p2.createRun();
r2.setText("jumped over the lazy dog");
r2.setStrike(true);
r2.setFontSize(20);
XWPFRun r3 = p2.createRun();
r3.setText("and went away");
r3.setStrike(true);
r3.setFontSize(20);
r3.setSubscript(VerticalAlign.SUPERSCRIPT);
XWPFParagraph p3 = doc.createParagraph();
p3.setWordWrap(true);
p3.setPageBreak(true);
p3.setAlignment(ParagraphAlignment.BOTH);
p3.setSpacingLineRule(LineSpacingRule.EXACT);
p3.setIndentationFirstLine(600);
XWPFRun r4 = p3.createRun();
r4.setTextPosition(20);
r4.setText("To be, or not to be: that is the question: Whether 'tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; ");
r4.addBreak(BreakType.PAGE);
r4.setText("No more; and by a sleep to say we end The heart-ache and the thousand natural shocks That flesh is heir to, 'tis a consummation Devoutly to be wish'd. To die, to sleep; To sleep: perchance to dream: ay, there's the rub; .......");
r4.setItalic(true);
XWPFRun r5 = p3.createRun();
r5.setTextPosition(-10);
r5.setText("For in that sleep of death what dreams may come");
r5.addCarriageReturn();
r5.setText("When we have shuffled off this mortal coil,Must give us pause: there's the respectThat makes calamity of so long life;");
r5.addBreak();
r5.setText("For who would bear the whips and scorns of time,The oppressor's wrong, the proud man's contumely,");
r5.addBreak(BreakClear.ALL);
r5.setText("The pangs of despised love, the law's delay,The insolence of office and the spurns.......");
FileOutputStream out = new FileOutputStream("simple.docx");
doc.write(out);
out.close();
}
⑶ java 中用poi讀取word和用docx4j讀取word
不知道你是具體讀取Word裡面的什麼元素,下面以讀取文字和圖片為例吧,兩個代碼示例,你參考看看:
讀取文本
import com.spire.doc.Document;
import java.io.FileWriter;
import java.io.IOException;
public class ExtractText {
public static void main(String[] args) throws IOException {
//載入Word文檔
Document document = new Document();
document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");
//獲取文檔中的文本保存為String
String text=document.getText();
//將String寫入Txt文件
writeStringToTxt(text,"ExtractedText.txt");
}
public static void writeStringToTxt(String content, String txtFileName) throws IOException {
FileWriter fWriter= new FileWriter(txtFileName,true);
try {
fWriter.write(content);
}catch(IOException ex){
ex.printStackTrace();
}finally{
try{
fWriter.flush();
fWriter.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}}
2. 讀取圖片
import com.spire.doc.Document;
import com.spire.doc.documents.DocumentObjectType;
import com.spire.doc.fields.DocPicture;
import com.spire.doc.interfaces.ICompositeObject;
import com.spire.doc.interfaces.IDocumentObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
public class ExtractImages {
public static void main(String[] args) throws IOException {
//載入Word文檔
Document document = new Document();
document.loadFromFile("C:\Users\Administrator\Desktop\sample.docx");
//創建Queue對象
Queue nodes = new LinkedList();
nodes.add(document);
//創建List對象
List images = new ArrayList();
//遍歷文檔中的子對象
while (nodes.size() > 0) {
ICompositeObject node = nodes.poll();
for (int i = 0; i < node.getChildObjects().getCount(); i++) {
IDocumentObject child = node.getChildObjects().get(i);
if (child instanceof ICompositeObject) {
nodes.add((ICompositeObject) child);
//獲取圖片並添加到List
if (child.getDocumentObjectType() == DocumentObjectType.Picture) {
DocPicture picture = (DocPicture) child;
images.add(picture.getImage());
}
}
}
}
//將圖片保存為PNG格式文件
for (int i = 0; i < images.size(); i++) {
File file = new File(String.format("output/圖片-%d.png", i));
ImageIO.write(images.get(i), "PNG", file);
}
}
}
注意這里使用的jar包是spire.doc.jar,需要在java程序中先導入jar文件。
⑷ Java 利用poi 可以直接讀取word中的表格保持樣式生成新的word么
1.讀取word
2003及word
2007需要的jar包
讀取
2003
版本(.doc)的word文件相對來說比較簡單,只需要
poi-3.5-beta6-20090622.jar
和
poi-scratchpad-3.5-beta6-20090622.jar
兩個
jar
包即可,
而
2007
版本(.docx)就麻煩多,我說的這個麻煩不是我們寫代碼的時候麻煩,是要導入的
jar
包比較的多,有如下
7
個之多:
1.
openxml4j-bin-beta.jar
2.
poi-3.5-beta6-20090622.jar
3.
poi-ooxml-3.5-beta6-20090622.jar
4
.dom4j-1.6.1.jar
5.
geronimo-stax-api_1.0_spec-1.0.jar
6.
ooxml-schemas-1.0.jar
7.
xmlbeans-2.3.0.jar
其中
4-7
是
poi-ooxml-3.5-beta6-20090622.jar
所依賴的
jar
包(在
poi-bin-3.5-beta6-20090622.tar.gz
中的
ooxml-lib
目錄下可以找到)。
2.換行符號
硬換行:文件中換行,如果是鍵盤中使用了"enter"的換行。
軟換行:文件中一行的字元數容量有限,當字元數量超過一定值時,會自動切到下行顯示。
對程序來說,硬換行才是可以識別的、確定的換行,軟換行與字體大小、縮進有關。
3.讀取的注意事項
值得注意的是:
POI
在讀取不會讀取
word
文件中的圖片信息;
還有就是對於
2007
版的
word(.docx),
如果
word
文件中有表格,所有表格中的數據都會在讀取出來的字元串的最後。
4.讀取word文本內容代碼
1
import
java.io.File;
2
import
java.io.FileInputStream;
3
import
java.io.InputStream;
4
5
import
org.apache.poi.POIXMLDocument;
6
import
org.apache.poi.POIXMLTextExtractor;
7
import
org.apache.poi.hwpf.extractor.WordExtractor;
8
import
org.apache.poi.openxml4j.opc.OPCPackage;
9
import
org.apache.poi.xwpf.extractor.XWPFWordExtractor;
10
11
public
class
Test
{
12
public
static
void
main(String[]
args)
{
13
try
{
14
InputStream
is
=
new
FileInputStream(new
File("2003.doc"));
15
WordExtractor
ex
=
new
WordExtractor(is);
16
String
text2003
=
ex.getText();
17
System.out.println(text2003);
18
19
OPCPackage
opcPackage
=
POIXMLDocument.openPackage("2007.docx");
20
POIXMLTextExtractor
extractor
=
new
XWPFWordExtractor(opcPackage);
21
String
text2007
=
extractor.getText();
22
System.out.println(text2007);
23
24
}
catch
(Exception
e)
{
25
e.printStackTrace();
26
}
27
}
28
}
⑸ 如何使用JAVA,POI讀寫word文檔
public class HwpfTest {
@SuppressWarnings("deprecation")
@Test
public void testReadByExtractor() throws Exception {
InputStream is = new FileInputStream("D:\\test.doc");
WordExtractor extractor = new WordExtractor(is);
//輸出word文檔所有的文本
System.out.println(extractor.getText());
System.out.println(extractor.getTextFromPieces());
//輸出頁眉的內容
System.out.println("頁眉:" + extractor.getHeaderText());
//輸出頁腳的內容
System.out.println("頁腳:" + extractor.getFooterText());
//輸出當前word文檔的元數據信息,包括作者、文檔的修改時間等。
System.out.println(extractor.getMetadataTextExtractor().getText());
//獲取各個段落的文本
String paraTexts[] = extractor.getParagraphText();
for (int i=0; i<paraTexts.length; i++) {
System.out.println("Paragraph " + (i+1) + " : " + paraTexts[i]);
}
//輸出當前word的一些信息
printInfo(extractor.getSummaryInformation());
//輸出當前word的一些信息
this.printInfo(extractor.getDocSummaryInformation());
this.closeStream(is);
}
/**
* 輸出SummaryInfomation
* @param info
*/
private void printInfo(SummaryInformation info) {
//作者
System.out.println(info.getAuthor());
//字元統計
System.out.println(info.getCharCount());
//頁數
System.out.println(info.getPageCount());
//標題
System.out.println(info.getTitle());
//主題
System.out.println(info.getSubject());
}
/**
* 輸出DocumentSummaryInfomation
* @param info
*/
private void printInfo(DocumentSummaryInformation info) {
//分類
System.out.println(info.getCategory());
//公司
System.out.println(info.getCompany());
}
/**
* 關閉輸入流
* @param is
*/
private void closeStream(InputStream is) {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
⑹ Java poi操作word, 插入文字會多出一個換行
目測應該是多添加了段落,試著獲取原有單元格中的段落然後添加文本
⑺ 怎麼使用JAVA,POI讀寫word文檔
如何使用JAVA、POI讀寫word文檔??
能不能將一個word的內容完全讀過來,放到一個新生成的word文件中去,要求能將word中的表格、圖片等保留,格式不變。最好能給個例子?網上多是很早以前的那個解決方法如下:,只能讀文本內容,且新生成的word文件打開時總是要提示選擇編碼,不太好用,希望能有新的解決方案??!!
poi操作word
1.1 添加poi支持:包下載地址
1.2 POI對Excel文件的讀取操作比較方便,POI還提供對Word的DOC格式文件的讀取。但在它的發行版本中沒有發布對Word支持的模塊,需要另外下載一個POI的擴展的Jar包。下載地址為;下載extractors-0.4_zip這個文件
2、提取Doc文件內容
public static String readDoc(String doc) throws Exception {
// 創建輸入流讀取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 創建WordExtractor
extractor = new WordExtractor();
// 對DOC文件進行提取
text = extractor.extractText(in);
return text;
}
public static void main(String[] args) {
try{
String text = WordReader.readDoc("c:/test.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
}
}
3、寫入Doc文檔
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class WordWriter {
public static boolean writeDoc(String path, String content) {
boolean w = false;
try {
// byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes();
ByteArrayInputStream s = new ByteArrayInputStream(b);
POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot();
DocumentEntry de = directory.createDocument("WordDocument", s);
FileOutputStream ostream = new FileOutputStream(path);
fs.writeFilesystem(ostream);
s.close();
ostream.close();
} catch (IOException e) {
e.printStackTrace();
}
return w;
}
public static void main(String[] args) throws Exception{
String wr=WordReader.readDoc("D:\\test.doc");
boolean b = writeDoc("D:\\result.doc",wr);
⑻ java poi 怎麼控制輸出word每行的內容
你好,試試以下代碼行不行。
package com.sample;
import java.awt.Color;
import java.io.FileOutputStream;
import java.io.IOException;
import com.lowagie.text.Cell;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Element;
import com.lowagie.text.Font;
import com.lowagie.text.FontFactory;
import com.lowagie.text.Image;
import com.lowagie.text.PageSize;
import com.lowagie.text.Paragraph;
import com.lowagie.text.Phrase;
import com.lowagie.text.Table;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.rtf.RtfWriter2;
/**
*
* @author wangyanjun
* @email [email protected]
* @createDate Jun 12, 2008
*/
public class CreateWordDemo {
public void createDocContext(String file) throws DocumentException,
IOException {
// 設置紙張大小
Document document = new Document(PageSize.A4);
// 建立一個書寫器(Writer)與document對象關聯,通過書寫器(Writer)可以將文檔寫入到磁碟中
RtfWriter2.getInstance(document, new FileOutputStream(file));
document.open();
// 設置中文字體
BaseFont bfChinese = BaseFont.createFont("STSongStd-Light",
"UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
// 標題字體風格
Font titleFont = new Font(bfChinese, 12, Font.BOLD);
// 正文字體風格
Font contextFont = new Font(bfChinese, 10, Font.NORMAL);
Paragraph title = new Paragraph("標題");
// 設置標題格式對齊方式
title.setAlignment(Element.ALIGN_CENTER);
title.setFont(titleFont);
document.add(title);
String contextString = "iText是一個能夠快速產生PDF文件的java類庫。"
+ " \n"// 換行
+ "iText的java類對於那些要產生包含文本,"
+ "表格,圖形的只讀文檔是很有用的。它的類庫尤其與java Servlet有很好的給合。"
+ "使用iText與PDF能夠使你正確的控制Servlet的輸出。";
Paragraph context = new Paragraph(contextString);
// 正文格式左對齊
context.setAlignment(Element.ALIGN_LEFT);
context.setFont(contextFont);
// 離上一段落(標題)空的行數
context.setSpacingBefore(5);
// 設置第一行空的列數
context.setFirstLineIndent(20);
document.add(context);
//利用類FontFactory結合Font和Color可以設置各種各樣字體樣式
/**
* Font.UNDERLINE 下劃線,Font.BOLD 粗體
*/
Paragraph underline = new Paragraph("下劃線的實現", FontFactory.getFont(
FontFactory.HELVETICA_BOLDOBLIQUE, 18, Font.UNDERLINE,
new Color(0, 0, 255)));
document.add(underline);
// 設置 Table 表格
Table aTable = new Table(3);
int width[] = {25,25,50};
aTable.setWidths(width);//設置每列所佔比例
aTable.setWidth(90); // 占頁面寬度 90%
aTable.setAlignment(Element.ALIGN_CENTER);//居中顯示
aTable.setAlignment(Element.ALIGN_MIDDLE);//縱向居中顯示
aTable.setAutoFillEmptyCells(true); //自動填滿
aTable.setBorderWidth(1); //邊框寬度
aTable.setBorderColor(new Color(0, 125, 255)); //邊框顏色
aTable.setPadding(2);//襯距,看效果就知道什麼意思了
aTable.setSpacing(3);//即單元格之間的間距
aTable.setBorder(2);//邊框
//設置表頭
/**
* cell.setHeader(true);是將該單元格作為表頭信息顯示;
* cell.setColspan(3);指定了該單元格佔3列;
* 為表格添加表頭信息時,要注意的是一旦表頭信息添加完了之後,
* 必須調用 endHeaders()方法,否則當表格跨頁後,表頭信息不會再顯示
*/
Cell haderCell = new Cell("表格表頭");
haderCell.setHeader(true);
haderCell.setColspan(3);
aTable.addCell(haderCell);
aTable.endHeaders();
Font fontChinese = new Font(bfChinese, 12, Font.NORMAL, Color.GREEN);
Cell cell = new Cell(new Phrase("這是一個測試的 3*3 Table 數據", fontChinese ));
cell.setVerticalAlignment(Element.ALIGN_TOP);
cell.setBorderColor(new Color(255, 0, 0));
cell.setRowspan(2);
aTable.addCell(cell);
aTable.addCell(new Cell("#1"));
aTable.addCell(new Cell("#2"));
aTable.addCell(new Cell("#3"));
aTable.addCell(new Cell("#4"));
Cell cell3 = new Cell(new Phrase("一行三列數據", fontChinese ));
cell3.setColspan(3);
cell3.setVerticalAlignment(Element.ALIGN_CENTER);
aTable.addCell(cell3);
document.add(aTable);
document.add(new Paragraph("\n"));
//添加圖片
Image img=Image.getInstance("d:\\img01800.jpg");
img.setAbsolutePosition(0, 0);
img.setAlignment(Image.RIGHT);//設置圖片顯示位置
img.scaleAbsolute(12,35);//直接設定顯示尺寸
img.scalePercent(50);//表示顯示的大小為原尺寸的50%
img.scalePercent(25, 12);//圖像高寬的顯示比例
img.setRotation(30);//圖像旋轉一定角度
document.add(img);
document.close();
}
/**
* @param args
*/
public static void main(String[] args) {
CreateWordDemo word = new CreateWordDemo();
String file = "c:/demo1.doc";
try {
word.createDocContext(file);
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
⑼ 怎麼用java poi生成word表格
rt java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XwpfTUtil {
/*String filePath = "/sta.docx";
InputStream is;
XWPFDocument doc;
Map<String, Object> params = new HashMap<String, Object>();
{
params.put("${name}", "xxx");
params.put("${sex}", "男");
params.put("${political}", "共青團員");
params.put("${place}", "sssss");
params.put("${classes}", "3102");
params.put("${id}", "213123123");
params.put("${qq}", "213123");
params.put("${tel}", "312313213");
params.put("${oldJob}", "sadasd");
params.put("${swap}", "是");
params.put("${first}", "asdasd");
params.put("${second}", "綜合事務部");
params.put("${award}", "asda");
params.put("${achievement}", "完成科協網站的開發");
params.put("${advice}", "沒有建議");