python 批量处理文件


背景

批量处理文件, 将每个文件相同结构的内容 整理到 一个文件中, 每个文件都在新文件的 一行.
读取是按行读取

代码展示

import os

def eachFile(filepath):
    """ 读取文件夹下面的所有文件 的路径"""
    pathDir = os.listdir(filepath)
    file_path_list = list()
    for allDir in pathDir:
        child = os.path.join('%s%s' % (filepath, allDir))
        file_path_list.append(child)
    return file_path_list
file_path_list = eachFile("E:\\rssi\\")

for filename in file_path_list:    
    aa = list()
    with open(filename, "r", encoding="utf8") as f:
        lines = f.readlines()
        for index, line in enumerate(lines):
            if (index > 2) and (index < len(lines)-1):
                aa.append(line.strip())
    # print(aa)
    with open("d.txt", "a+", encoding="utf8") as b:
        b.write(" ".join(i for i in aa) + "\r\n")

批量处理html 页面然后解析

这个方法比到上面就显的很笨拙啦

from bs4 import BeautifulSoup
import DBChinaAdministration
db = DBChinaAdministration()
html_file1 = open("anhui.html", "r", encoding="utf8")
html_file2 = open("beijing.html", "r", encoding="utf8")
html_file3 = open("chongqing.html", "r", encoding="utf8")
html_file4 = open("fujiang.html", "r", encoding="utf8")
html_file5 = open("gansu.html", "r", encoding="utf8")
html_file6 = open("guangdong.html", "r", encoding="utf8")
html_file7 = open("guangxi.html", "r", encoding="utf8")
html_file8 = open("guizhou.html", "r", encoding="utf8")
html_file9 = open("hainan.html", "r", encoding="utf8")
html_file10 = open("hebei.html", "r", encoding="utf8")
html_file11 = open("heilongjiang.html", "r", encoding="utf8")
html_file12 = open("henan.html", "r", encoding="utf8")
html_file13 = open("hubei.html", "r", encoding="utf8")
html_file14 = open("hunan.html", "r", encoding="utf8")
html_file15 = open("jiangsu.html", "r", encoding="utf8")
html_file16 = open("jiangxi.html", "r", encoding="utf8")
html_file17 = open("jilin.html", "r", encoding="utf8")
html_file18 = open("liaoning.html", "r", encoding="utf8")
html_file19 = open("neimenggu.html", "r", encoding="utf8")
html_file20 = open("ningxia.html", "r", encoding="utf8")
html_file21 = open("qinghai.html", "r", encoding="utf8")
html_file22 = open("sanxi.html", "r", encoding="utf8")
html_file23 = open("shandong.html", "r", encoding="utf8")
html_file24 = open("shanghai.html", "r", encoding="utf8")
html_file25 = open("shanxishen.html", "r", encoding="utf8")
html_file26 = open("sichuan.html", "r", encoding="utf8")
html_file27 = open("tianjin.html", "r", encoding="utf8")
html_file28 = open("xinjiang.html", "r", encoding="utf8")
html_file29 = open("xizang.html", "r", encoding="utf8")
html_file30 = open("yunnan.html", "r", encoding="utf8")
html_file31 = open("zhejiang.html", "r", encoding="utf8")
file_list = list()
file_list.append(html_file1)
file_list.append(html_file2)
file_list.append(html_file3)
file_list.append(html_file4)
file_list.append(html_file5)
file_list.append(html_file6)
file_list.append(html_file7)
file_list.append(html_file8)
file_list.append(html_file9)
file_list.append(html_file10)
file_list.append(html_file11)
file_list.append(html_file12)
file_list.append(html_file13)
file_list.append(html_file14)
file_list.append(html_file15)
file_list.append(html_file16)
file_list.append(html_file17)
file_list.append(html_file18)
file_list.append(html_file19)
file_list.append(html_file20)
file_list.append(html_file21)
file_list.append(html_file22)
file_list.append(html_file23)
file_list.append(html_file24)
file_list.append(html_file25)
file_list.append(html_file26)
file_list.append(html_file27)
file_list.append(html_file28)
file_list.append(html_file29)
file_list.append(html_file30)
file_list.append(html_file31)
for html_file in file_list:
    html_page = html_file.read()

    soup = BeautifulSoup(html_page, "html.parser")
    a = soup.select('a')
    list_code = list()
    for index, i in enumerate(a):
        if index % 2 == 0:
            copycode = i.get("onclick")
            b = copycode.replace("(", ",")
            c = b.replace(")", ",")
            d = c.replace("'", "")
            d_split = d.split(",")
            if len(d_split):
                d_dict = dict()
                d_dict["administration_code"] = d_split[2]
                d_dict["name"] = d_split[3]
                list_code.append(d_dict)

    db.bulk_add(list_code)
    # print(list_code)

使用numpy 对 csv 文件进行操作

import csv
import numpy as np

import Operation

data = dict()
data[1] = ""
data[2] = ""
data[3] = ""
data[4] = ""
data[5] = ""
data[6] = ""
data[7] = ""
data[8] = ""
data[9] = ""
data[10] = ""
data[11] = ""
data[12] = ""
data[13] = ""
data[14] = ""
data[15] = ""
data[16] = ""


def get_data():
    with open('matchrule.csv', 'rt', encoding='utf8') as c:
        reader = csv.reader(c)
        rows = [row[1:] for row in reader]

    a = np.array(rows)

    k = list()
    row = [i for i in range(len(a))]
    for i in range(len(a[0])):
        col = list()
        for j in range(len(a)):
            col.append(i)
        y = a[row, col]
        k.append(y)

    # y = a[[0, 1, 2, 3], [0, 0, 0, 0]]

    # 获取所有要添加的对象数据(元素,字典对象, 对数据进行处理)
    instance = Operation()
    obj_data_list = list()
    for one in k:
        data_temp = dict()
        for index, element in enumerate(one):
            key = data.get(index, "")
            if key:
                element = instance.get_element(key, element)
                data_temp[key] = element
        obj_data_list.append(data_temp)

    # print(obj_data_list)
    return obj_data_list

if __name__ == '__main__':
    get_data()

转载自:https://blog.csdn.net/yangxiaodong88/article/details/79233203

You may also like...