例子、超级简单读取csv
1.用python读取csv文件:
csv是逗号分隔符格式 一般我们用的execl生成的格式是xls和xlsx 直接重命名为csv的话会报错:
Error: line contains NULL byte
insun解决方案:出错原因是直接是把后缀为xls的execl文件重命名为csv的 正常的要是另存为csv文件 就不会报错了
譬如我们有这么个csv文件:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import csv
with open('egg.csv','rb') as f:
reader = csv.reader(f)
for row in reader:
print row
打印出来是这样的list
['a', '1', '1', '1']
['a', '2', '2', '2']
['b', '3', '3', '3']
['b', '4', '4', '4']
['b', '5', '5', '5']
['b', '6', '6', '6']
['c', '7', '7', '7']
['c', '8', '8', '8']
['c', '9', '9', '9']
['c', '10', '10', '10']
['d', '11', '11', '11']
['e', '12', '12', '12']
['e', '13', '13', '13']
['e', '14', '14', '14']
2.用python写入并生成csv
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import csv
with open('egg2.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ',quotechar='|', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerow(['a', '1', '1', '2', '2'])
spamwriter.writerow(['b', '3', '3', '6', '4'])
spamwriter.writerow(['c', '7', '7', '10', '4'])
spamwriter.writerow(['d', '11','11','11', '1'])
spamwriter.writerow(['e', '12','12','14', '3'])
这样存进去的是存到一列了 跟我们原本意图存进5列不一样
使用python的csv生成excel所兼容的csv文件的话,主要就是创建writer时的参数时要有dialect=’excel’
代码修改为:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import csv
with open('egg2.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile,dialect='excel')
spamwriter.writerow(['a', '1', '1', '2', '2'])
spamwriter.writerow(['b', '3', '3', '6', '4'])
spamwriter.writerow(['c', '7', '7', '10', '4'])
spamwriter.writerow(['d', '11','11','11', '1'])
spamwriter.writerow(['e', '12','12','14', '3'])
例子(生成excel报表)
这边的程序相对比较简单,就不像之前一样画流程图了。一下介绍一下我们需要的python 模块:
模块名 版本 其他
mysql-connector 2.1.3 MySQL官网的python链接模块
xlwt 1.0.0 生成excel模块
程序展示
主程序
if __name__ == '__main__':
info = {
'host' :'192.168.137.11',
'user' :'root',
'password':'root',
'database':'test'
}
conn = mysql.connector.connect(**info)
cursor = conn.cursor()
sql = get_sql()
cursor.execute(sql)
# 获得excel的title
title = get_title(cursor)
# 获得需要的数据
data = get_select_data(cursor)
# 获得每一列的最大长度
max_len = get_col_max_length(data, title)
work_book = xlwt.Workbook(encoding='utf-8')
# 创建一个excel模板
work_sheet = work_book.add_sheet('查询数据')
# 生成excel title
work_sheet = create_excel_title(work_sheet, title, title_style)
# 生成 excel 数据
work_sheet = create_excel_body(work_sheet, data)
# 设置每一列适当的长度
work_sheet = set_work_sheet_col_len(work_sheet, max_len)
# 保存 excel
work_book.save('data_{time}.xls'.format(time=time.time()))
获得的sql方法
def get_sql():
'''
创建需要的sql语句
'''
sql = '''
SELECT tmp.mobile_phone AS '电话号码',
tmp.name AS '其中一个姓名',
tmall_shop_info.name AS '品牌商名称',
store.store_name AS '店铺名称',
tmp.num AS '重复个数'
FROM (
SELECT mobile_phone,
name,
store_no,
tmall_shop_id,
COUNT(*) AS num
FROM store_guide
WHERE mobile_phone IS NOT NULL
GROUP BY mobile_phone
HAVING num > 1
) AS tmp
LEFT JOIN tmall_shop_info USING(tmall_shop_id)
LEFT JOIN store USING(store_no)
'''
return sql
如果需要生成其他sql能查询出的数据直接就替换掉上面的sql就好了。
其他方法
def get_title(cursor):
'''
通过游标获得excel文件title
'''
return cursor.column_names
def get_select_data(cursor):
'''
通过游标获得数据列表(list)
'''
return [row for row in cursor]
def create_excel_title(work_sheet, title, title_style=None):
'''
生产exceltitle
'''
if not title_style:
title_style = default_style
for col_index, col_name in enumerate(title):
work_sheet.write(0, col_index, col_name, title_style)
return work_sheet
def create_excel_body(work_sheet, body, body_style=None):
'''
生成excel body信息
'''
if not title_style:
body_style = default_style
for row_num, row_data in enumerate(data, 1):
for col_index, col_value in enumerate(row_data):
work_sheet.write(row_num, col_index, col_value)
return work_sheet
def get_col_max_length(data, title):
'''
获得数据每列最大值长度
'''
col_len = map(len, map(str, title))
func = lambda x, y: y if y>x else x
for row in data:
row_len = map(len, map(str, row))
col_len = map(func, col_len, row_len)
return col_len
def set_work_sheet_col_len(work_sheet, max_len):
'''
设置列长度
'''
for col, len in enumerate(max_len):
work_sheet.col(col).width = 256 * (len + 1)
return work_sheet
以上的get_col_max_length方法中使用了map函数来获得每一列的最大长度是多少,为了能在excel中能设置适当的宽度。
excel样式设置
# 默认样式
default_style = xlwt.easyxf('''
pattern: pattern solid;
borders: left 1, right 1, top 1, bottom 1;
align: horiz center''',
num_format_str='0,000.00')
# 标题栏样式
title_style = xlwt.easyxf('''
pattern: pattern solid, fore_colour yellow;
font: name Times New Roman, color-index black, bold on;
borders: left 1, right 1, top 1, bottom 1;
align: horiz center''',
num_format_str='0,000.00')
# 时间格式样式
time_style = xlwt.easyxf(num_format_str='YYYY-MM-DD h:mm:ss')