10万行数据,80列,总共耗时12s。
经测试:入数据库140秒内【cpu 消耗 0.5核,内存消耗:150M内】
jdk8
1,添加poi mavne依赖:
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency>
2,创建目录 directory
并且在目录directory
新建一个接口 BigReadExcelProcessData.java
再实现该接口,新建 BigReadExcelProcessimpl.java
。
① :接口BigReadExcelProcessData.java
内容:
package com.murdock.examples.kuxingseng.directory;import java.util.Map;public interface BigReadExcelProcessData { void processData(Map map);}
②:实现类BigReadExcelProcessimpl.java
内容:
package com.murdock.examples.kuxingseng.directory.imp;import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.util.Map;public class BigReadExcelProcessimpl implements BigReadExcelProcessData { private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimpl.class); @Override public void processData(Map map) { logger.info("map:" + map); }}
3,封装excel 工具类,新建BigReadExcelUtils.java。
package com.murdock.examples.kuxingseng.utils;import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;import org.apache.commons.lang.StringUtils;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.model.SharedStringsTable;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.xml.sax.*;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;import java.io.InputStream;import java.util.*;import java.util.regex.Pattern;public class BigReadExcelUtils { private final static Logger log = LoggerFactory.getLogger(BigReadExcelUtils.class); private final int startRow; private final int endRow; private int currentRow = 0; private final String filename;//文件的绝对路径 private BigReadExcelProcessData processData; /** * 构造方法 */ public BigReadExcelUtils(String pathFileName, BigReadExcelProcessData process) throws Exception { if (StringUtils.isEmpty(pathFileName)) throw new Exception("file is null"); processData = process; this.filename = pathFileName; this.startRow = 0; this.endRow = 100_0000;//设置默认最大解析100W行数据 processSheet(); } /** * 指定获取第一个sheet */ private void processSheet() throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> it = r.getSheetsData(); while (it.hasNext()) { InputStream sheet1 = it.next(); InputSource sheetSource = new InputSource(sheet1); parser.parse(sheetSource); sheet1.close(); } } /** * 加载sax 解析器 */ private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { String saxParser = "com.sun.org.apache.xerces.internal.parsers.SAXParser"; XMLReader parser = XMLReaderFactory.createXMLReader(saxParser); ContentHandler handler = new PagingHandler(sst); parser.setContentHandler(handler); return parser; } private class PagingHandler extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private String index = null; private Object NULL = null; private Map<String, String> map = new HashMap<>(100);//初始化内存空间,性能优化,10W数据能减少30秒左右(受列影响) private Pattern pattern = Pattern.compile("^A[0-9]+$"); private PagingHandler(SharedStringsTable table) { this.sst = table; } /** * 获取key 值 */ @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (name.equals("c")) { index = attributes.getValue("r"); //判断是否是新的一行 if (pattern.matcher(index).find()) { if (map != NULL && isAccess() && !map.isEmpty()) { processData.processData(map); map.clear();//使用完数据,清理数据 } currentRow++; } if (isAccess()) { String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } } lastContents = ""; } /** * 获取value */ @Override public void endElement(String uri, String localName, String name) throws SAXException { if (isAccess()) { if (nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } if (name.equals("v")) { map.put(index, lastContents); } } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (isAccess()) { lastContents += new String(ch, start, length); } } @Override public void endDocument() throws SAXException { if (map != null && isAccess() && !map.isEmpty()) { processData.processData(map); map.clear(); } } } private boolean isAccess() { if (currentRow >= startRow && startRow <= endRow) { return true; } return false; }}
以上代码可以直接使用亲测试通过,处理60M Excel占用内存260M 左右。
4,测试:
package com.murdock.examples.kuxingseng.directory.imp;import com.murdock.examples.kuxingseng.directory.BigReadExcelProcessData;import com.murdock.examples.kuxingseng.utils.BigReadExcelUtils;import org.junit.Test;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import static org.junit.Assert.*;public class BigReadExcelProcessimplTest { private final static Logger logger = LoggerFactory.getLogger(BigReadExcelProcessimplTest.class); @Test public void processData() { String filePath = "D:\\tmp\\demo.xlsx"; try { BigReadExcelProcessData processData = new BigReadExcelProcessimpl(); new BigReadExcelUtils(filePath, processData); } catch (Exception e) { logger.error("process faile.", e); } }}
结果:
18:58:43.416 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M1=测试13, I1=测试9, E1=测试5, A1=测试1, N1=测试14, J1=测试10, F1=测试6, B1=测试2, O1=测试15, K1=测试11, G1=测试7, C1=测试3, L1=测试12, H1=测试8, D1=测试4}18:58:43.422 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M2=测试13, I2=测试9, E2=测试5, A2=测试1, N2=测试14, J2=测试10, F2=测试6, B2=测试2, O2=测试15, K2=测试11, G2=测试7, C2=测试3, L2=测试12, H2=测试8, D2=测试4}18:58:43.424 [main] INFO com.murdock.examples.kuxingseng.directory.imp.BigReadExcelProcessimpl - map:{M3=测试13, I3=测试9, E3=测试5, A3=测试1, N3=测试14, J3=测试10, F3=测试6, B3=测试2, O3=测试15, K3=测试11, G3=测试7, C3=测试3, L3=测试12, H3=测试8, D3=测试4}