python 大文本文件 解析、入库并转换后写入新文件
#!/usr/bin/python2.7# encoding: utf-8''' 处理某大文本文件,结果同时写入数据库及文本文件 '''import refrom mylib.dbi import DataBaseInterfacefrom app_common import configclass Klass(object): FMT = '%(id)d, %(name)s' def __init__(self, **kwg): self.id = dict(kwg).get('id') self.name = dict(kwg).get('name') def __str__(self): ''' 按指定格式将对象属性格式化为字符串 ''' return self.FMT % self.__dict__ def sqltuple(self): ''' 按指定的顺序输出对象属性元组 ''' return tuple([self.id, self.name])patt = re.compile(r'^(?P<id>\d+)\t(?P<name>.*)[\s\r\n]+?$', re.I|re.X|re.U)def process(line): ''' 按预定的格式解析行,生成对象实例 ''' m = patt.match(line) return None if not m else Klass(**m.groupdict())dbi = DataBaseInterface(**config)dbi.open()# dbi.batch是DataBaseInterface的方法:# 用dbi.conn.executemany执行批量数据操作# 支持 with 自动初始化-关闭,支持缓冲空间自动控制buff = dbi.batch(insertsql) src = open(srcfilename,'r')wrt = open(wrtfilename, 'w')with src, wrt, buff: for ln in handle: instance = process(ln) if instance: wrt.write('%s\n'%instance) buff.append(instance.sqltuple())dbi.close()