通过POI统一读取Excel文件(兼容97-2003和2007+两种格式)
1、引言
由于系统需要提供给用户导入Excel文件数据的功能,但Excel文件有97-2003和2007+两种格式,且要求给用户有较大的灵活性。导入Excel文件的处理无非就是读取Excel文件的内容,然后根据一定的业务规则进行校验,校验正确后处理写入系统。对Excel文件的读取可通过JXL或POI两个Jar来完成,决定使用POI来开发,但POI对两种格式的处理又有所不同,那么如何通过POI灵活读取Excel文件的内容呢?
2、分析
Excel文件的读取有以下读取情况
(1)读取整个工作表中的所有内容
(2)读取工作表中指定区域块的内容
(3)读取工作表中指定行列的内容
(4)读取工作表中指定单元格的内容
情况(2)中,当区域块的内容为整个工作表的内容时,即为情况(1),也就是说情况(1)为情况(2)的特例。
情况(3)中,当指定行列范围内容中的列范围连续时,即为情况(2),也就是说情况(2)为情况(3)的特例。
情况(4)中,当指定单元格内容中的行范围连续时,即为情况(3),也就是说情况(3)为情况(4)的特例。
从上述4种情况的分析可知,前3种情况均可视为情况(4)的特例,从而将工作表范围转化为指定单元格范围的处理。由于指定单元格范围可能存在上述的4种情况,因而提供灵活的并且能够覆盖这些情况的配置方式显得尤为关键。
行列范围参数中均采用“,”作为不连续值的分割符,采用“-”作为两个连续值的连接符,这样简化了用户的参数配置,同时也保留了配置的灵活性,例如:
(1)12- 表示查询范围为从第十二行(列)到EXCEL中有记录的最后一行(列);
(2)12-24 表示查询范围为从第十二行(列)到第二十四行(列);
(3)12-24,30 表示查询范围为从第十二行(列)到第二十四行(列)、第三十行(列)等;
3、解决过程
(1)POI处理
对Excel的读取,主要涉及工作薄、工作薄、行数据、单元格等的处理,POI对97-2003和2007+两个版本的处理采用不同的类,如下图所示。
其中:
a)Workbook、Sheet、Row、Cell等为接口;
b)HSSFWorkbook、HSSFSheet、HSSFRow、HSSFCell为97-2003版本对应的处理实现类;
c)XSSFWorkbook、XSSFSheet、XSSFRow、XSSFCell为2007+版本对应的处理实现类;
(2)针对POI接口统一Excel处理类PoiExcelHelper
import java.util.ArrayList;import org.apache.poi.ss.usermodel.Cell;import org.apache.poi.ss.usermodel.Row;import org.apache.poi.ss.usermodel.Sheet;/** * Excel统一POI处理类(针对2003以前和2007以后两种格式的兼容处理) * @authorchengesheng * @date2012-5-3 下午03:10:23 * @notePoiHelper */public abstract class PoiExcelHelper {public static final String SEPARATOR = ",";public static final String CONNECTOR = "-";/** 获取sheet列表,子类必须实现 */public abstract ArrayList<String> getSheetList(String filePath);/** 读取Excel文件数据 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex) {return readExcel(filePath, sheetIndex, "1-", "1-");}/** 读取Excel文件数据 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows) {return readExcel(filePath, sheetIndex, rows, "1-");}/** 读取Excel文件数据 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String[] columns) {return readExcel(filePath, sheetIndex, "1-", columns);}/** 读取Excel文件数据,子类必须实现 */public abstract ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, String columns);/** 读取Excel文件数据 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, String[] columns) {int[] cols = getColumnNumber(columns);return readExcel(filePath, sheetIndex, rows, cols);}/** 读取Excel文件数据,子类必须实现 */public abstract ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, int[] cols);/** 读取Excel文件内容 */protected ArrayList<ArrayList<String>> readExcel(Sheet sheet, String rows, int[] cols) {ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>> ();// 处理行信息,并逐行列块读取数据String[] rowList = rows.split(SEPARATOR);for (String rowStr : rowList) {if (rowStr.contains(CONNECTOR)) {String[] rowArr = rowStr.trim().split(CONNECTOR);int start = Integer.parseInt(rowArr[0]) - 1;int end;if (rowArr.length == 1) {end = sheet.getLastRowNum();} else {end = Integer.parseInt(rowArr[1].trim()) - 1;}dataList.addAll(getRowsValue(sheet, start, end, cols));} else {dataList.add(getRowValue(sheet, Integer.parseInt(rowStr) - 1, cols));}}return dataList;}/** 获取连续行、列数据 */protected ArrayList<ArrayList<String>> getRowsValue(Sheet sheet, int startRow, int endRow,int startCol, int endCol) {if (endRow < startRow || endCol < startCol) {return null;}ArrayList<ArrayList<String>> data = new ArrayList<ArrayList<String>>();for (int i = startRow; i <= endRow; i++) {data.add(getRowValue(sheet, i, startCol, endCol));}return data;}/** 获取连续行、不连续列数据 */private ArrayList<ArrayList<String>> getRowsValue(Sheet sheet, int startRow, int endRow, int[] cols) {if (endRow < startRow) {return null;}ArrayList<ArrayList<String>> data = new ArrayList<ArrayList<String>>();for (int i = startRow; i <= endRow; i++) {data.add(getRowValue(sheet, i, cols));}return data;}/** 获取行连续列数据 */private ArrayList<String> getRowValue(Sheet sheet, int rowIndex, int startCol, int endCol) {if(endCol < startCol) {return null;}Row row = sheet.getRow(rowIndex);ArrayList<String> rowData = new ArrayList<String>();for (int i = startCol; i <= endCol; i++) {rowData.add(getCellValue(row, i));}return rowData;}/** 获取行不连续列数据 */private ArrayList<String> getRowValue(Sheet sheet, int rowIndex, int[] cols) {Row row = sheet.getRow(rowIndex);ArrayList<String> rowData = new ArrayList<String>();for (int colIndex : cols) {rowData.add(getCellValue(row, colIndex));}return rowData;}/** * 获取单元格内容 * * @param row * @param column * a excel column string like 'A', 'C' or "AA". * @return */protected String getCellValue(Row row, String column) {return getCellValue(row,getColumnNumber(column));}/** * 获取单元格内容 * * @param row * @param col * a excel column index from 0 to 65535 * @return */private String getCellValue(Row row, int col) {if (row == null) {return "";}Cell cell = row.getCell(col);return getCellValue(cell);}/** * 获取单元格内容 * * @param cell * @return */private String getCellValue(Cell cell) {if (cell == null) {return "";}String value = cell.toString().trim();try {// This step is used to prevent Integer string being output with// '.0'.Float.parseFloat(value);value=value.replaceAll("\\.0$", "");value=value.replaceAll("\\.0+$", "");return value;} catch (NumberFormatException ex) {return value;}}/** * Change excel column letter to integer number * * @param columns * column letter of excel file, like A,B,AA,AB * @return */private int[] getColumnNumber(String[] columns) {int[] cols = new int[columns.length];for(int i=0; i<columns.length; i++) {cols[i] = getColumnNumber(columns[i]);}return cols;}/** * Change excel column letter to integer number * * @param column * column letter of excel file, like A,B,AA,AB * @return */private int getColumnNumber(String column) {int length = column.length();short result = 0;for (int i = 0; i < length; i++) {char letter = column.toUpperCase().charAt(i);int value = letter - 'A' + 1;result += value * Math.pow(26, length - i - 1);}return result - 1;}/** * Change excel column string to integer number array * * @param sheet * excel sheet * @param columns * column letter of excel file, like A,B,AA,AB * @return */protected int[] getColumnNumber(Sheet sheet, String columns) {// 拆分后的列为动态,采用List暂存ArrayList<Integer> result = new ArrayList<Integer> ();String[] colList = columns.split(SEPARATOR);for(String colStr : colList){if(colStr.contains(CONNECTOR)){String[] colArr = colStr.trim().split(CONNECTOR);int start = Integer.parseInt(colArr[0]) - 1;int end;if(colArr.length == 1){end = sheet.getRow(sheet.getFirstRowNum()).getLastCellNum() - 1;}else{end = Integer.parseInt(colArr[1].trim()) - 1;}for(int i=start; i<=end; i++) {result.add(i);}}else{result.add(Integer.parseInt(colStr) - 1);}}// 将List转换为数组int len = result.size();int[] cols = new int[len]; for(int i = 0; i<len; i++) {cols[i] = result.get(i).intValue();}return cols;}}
import java.io.FileInputStream;import java.util.ArrayList;import org.apache.poi.hssf.usermodel.HSSFSheet;import org.apache.poi.hssf.usermodel.HSSFWorkbook;/** * Excel 读取(97-2003格式) * @authorchengesheng * @date2012-4-27 下午03:39:01 * @notePoiExcel2k3Helper */public class PoiExcel2k3Helper extends PoiExcelHelper {/** 获取sheet列表 */public ArrayList<String> getSheetList(String filePath) {ArrayList<String> sheetList = new ArrayList<String>(0);try {HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));int i = 0;while (true) {try {String name = wb.getSheetName(i);sheetList.add(name);i++;} catch (Exception e) {break;}}} catch (Exception e) {e.printStackTrace();}return sheetList;}/** 读取Excel文件内容 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, String columns) {ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>> ();try {HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));HSSFSheet sheet = wb.getSheetAt(sheetIndex);dataList = readExcel(sheet, rows, getColumnNumber(sheet, columns));} catch (Exception e) {e.printStackTrace();}return dataList;}/** 读取Excel文件内容 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, int[] cols) {ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>> ();try {HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));HSSFSheet sheet = wb.getSheetAt(sheetIndex);dataList = readExcel(sheet, rows, cols);} catch (Exception e) {e.printStackTrace();}return dataList;}}
import java.io.FileInputStream;import java.util.ArrayList;import java.util.Iterator;import org.apache.poi.xssf.usermodel.XSSFSheet;import org.apache.poi.xssf.usermodel.XSSFWorkbook;/** * Excel 读取(2007+新格式) * @authorchengesheng * @date2012-4-27 下午03:39:01 * @notePoiExcel2k7Helper */public class PoiExcel2k7Helper extends PoiExcelHelper {/** 获取sheet列表 */public ArrayList<String> getSheetList(String filePath) {ArrayList<String> sheetList = new ArrayList<String>(0);try {XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));Iterator<XSSFSheet> iterator = wb.iterator();while (iterator.hasNext()) {sheetList.add(iterator.next().getSheetName());}} catch (Exception e) {e.printStackTrace();}return sheetList;}/** 读取Excel文件内容 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, String columns) {ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>> ();try {XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));XSSFSheet sheet = wb.getSheetAt(sheetIndex);dataList = readExcel(sheet, rows, getColumnNumber(sheet, columns));} catch (Exception e) {e.printStackTrace();}return dataList;}/** 读取Excel文件内容 */public ArrayList<ArrayList<String>> readExcel(String filePath, int sheetIndex, String rows, int[] cols) {ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>> ();try {XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));XSSFSheet sheet = wb.getSheetAt(sheetIndex);dataList = readExcel(sheet, rows, cols);} catch (Exception e) {e.printStackTrace();}return dataList;}}
import java.util.ArrayList;/** * Excel统一POI处理测试类(针对2003以前和2007以后两种格式的兼容处理) * @authorchengesheng * @date2012-5-3 下午03:10:23 * @notePoiHelper */public abstract class PoiExcelTest {// *************************************************// ================以下为测试代码====================// *************************************************public static void main(String[] args){// 获取Excel文件的sheet列表testGetSheetList("c:/test.xlsx");// 获取Excel文件的第1个sheet的内容testReadExcel("c:/test.xls", 0);// 获取Excel文件的第2个sheet的第2、4-7行和第10行及以后的内容testReadExcel("c:/test.xlsx", 1, "2,4-7,10-");// 获取Excel文件的第3个sheet中a,b,g,h,i,j等列的所有内容testReadExcel("c:/test.xls", 2, new String[] {"a","b","g","h","i","j"});// 获取Excel文件的第4个sheet的第2、4-7行和第10行及以后,a,b,g,h,i,j等列的内容testReadExcel("c:/test.xlsx", 3, "2,4-7,10-", new String[] {"a","b","g","h","i","j"});}// 测试获取sheet列表private static void testGetSheetList(String filePath) {PoiExcelHelper helper = getPoiExcelHelper(filePath);// 获取Sheet列表ArrayList<String> sheets = helper.getSheetList(filePath);// 打印Excel的Sheet列表printList(filePath, sheets);}// 测试Excel读取private static void testReadExcel(String filePath, int sheetIndex) {PoiExcelHelper helper = getPoiExcelHelper(filePath);// 读取excel文件数据ArrayList<ArrayList<String>> dataList = helper.readExcel(filePath, sheetIndex);// 打印单元格数据printBody(dataList);}// 测试Excel读取private static void testReadExcel(String filePath, int sheetIndex, String rows) {PoiExcelHelper helper = getPoiExcelHelper(filePath);// 读取excel文件数据ArrayList<ArrayList<String>> dataList = helper.readExcel(filePath, sheetIndex, rows);// 打印单元格数据printBody(dataList);}// 测试Excel读取private static void testReadExcel(String filePath, int sheetIndex, String[] columns) {PoiExcelHelper helper = getPoiExcelHelper(filePath);// 读取excel文件数据ArrayList<ArrayList<String>> dataList = helper.readExcel(filePath, sheetIndex, columns);// 打印列标题printHeader(columns);// 打印单元格数据printBody(dataList);}// 测试Excel读取private static void testReadExcel(String filePath, int sheetIndex, String rows, String[] columns) {PoiExcelHelper helper = getPoiExcelHelper(filePath);// 读取excel文件数据ArrayList<ArrayList<String>> dataList = helper.readExcel(filePath, sheetIndex, rows, columns);// 打印列标题printHeader(columns);// 打印单元格数据printBody(dataList);}// 获取Excel处理类private static PoiExcelHelper getPoiExcelHelper(String filePath) {PoiExcelHelper helper;if(filePath.indexOf(".xlsx")!=-1) {helper = new PoiExcel2k7Helper();}else {helper = new PoiExcel2k3Helper();}return helper;}// 打印Excel的Sheet列表private static void printList(String filePath, ArrayList<String> sheets) {System.out.println();for(String sheet : sheets) {System.out.println(filePath + " ==> " + sheet);}}// 打印列标题private static void printHeader(String[] columns) {System.out.println();for(String column : columns) {System.out.print("\t\t" + column.toUpperCase());}}// 打印单元格数据private static void printBody(ArrayList<ArrayList<String>> dataList) {int index = 0;for(ArrayList<String> data : dataList) {index ++;System.out.println();System.out.print(index);for(String v : data) {System.out.print("\t\t" + v);}}}}