分类： Python技术

Python技术

多线程与多进程

2021年4月1日
多线程与多进程
多线程与多进程已关闭评论

多线程与多进程

多线程与进程
进程（Process）

是计算机中的程序关于某数据集合上的一次运动活动，是系统进行资源分配
和调度的基本单位，是操作系统结构的基础。

线程（Thread）

有时被称为轻量级进程（Lightweight Process，LWP），是程序执行流的*
小单位。线程是进程中的一个实体，是被系统独立调度和分配的基本单位，
一个进程可以包含多个线程，但是线程不能包含多个进程。线程自己不拥有
系统资源，在单个程序中同时运行多个线程完成不同的工作，称为多线程。

线程与进程的区别

线程和进程的区别在于，子进程和父进程有不同的代码和数据空间，
而多个线程则共享数据空间，每个线程有自己的执行堆栈和程序计算器
为其执行上下文。

单线程实战

from time import ctime,sleep

def talk():
print(“start talk :%r”%ctime())
sleep(2)

def write():
print(“start write:%r”%ctime())
sleep(3)

if __name__ == ‘__main__’ :
talk()
write()
print(“All end !%r” %ctime())

多线程实战

# -*- coding: utf-8 -*-
from time import sleep,ctime
import threading

# 定义说和写
def talk(content,loop):
for i in range(loop):
print(“start talk:%s %s”%(content,ctime()))
sleep(2)

def write(content,loop):
for i in range(loop):
print(“start write:%s %s”%(content,ctime()))
sleep(3)
# 定义和加载说和写的线程
threads = []

t1 = threading.Thread(target=talk,args=(“hello”,2))
threads.append(t1)

t2 = threading.Thread(target=write,args=(“人生苦短，我用python！”,2))
threads.append(t2)
# 执行多线程
if __name__ == ‘__main__’:
for t in threads:
t.start()
for t in threads:
t.join()
print(“线程全部执行完成！%s”%ctime())

多进程实战

# -*- coding: utf-8 -*-
from time import ctime,sleep
# 导入multiprocessing进程模块
import multiprocessing

# 定义两个方法说和写
def talk(content,loop):
for i in range(loop):
print(“Tlak: %s %s”%(content,ctime))
sleep(2)

def write(content, loop):
for i in range (loop):
print (“write: %s %s” % (content, ctime))
sleep (3)

# 定义两个进程
process = []
p1 = multiprocessing.Process(target=talk,args=(“hello”,2))
process.append(p1)

p2 = multiprocessing.Process(target=write,args=(“人生苦短，我用python”,2))
process.append(p2)

# 调用进程
if __name__ == “__main”:
for p in process:
p.start()
for p in process:
p.join()
print(“调用进程结束 %s”%ctime())

python图像exif信息复制

2021年4月1日
python图像exif信息复制
python图像exif信息复制已关闭评论

python图像exif信息复制

图像exif信息copy
以下的函数实现了以单独一张影像的exif信息复制给一个文件夹下的所有其他影像，包括焦距、相机型号经纬度等exif信息。另外关于分辨率、图像大小参数的参数不会改变。

代码如下：
#coding=utf-8
import os
import piexif
def getFileList(dir,Filelist, ext=None):
“””
获取文件夹及其子文件夹中文件列表
输入 dir：文件夹根目录
输入 ext: 扩展名
返回：文件路径列表
“””
newDir = dir
if os.path.isfile(dir):
if ext is None:
Filelist.append(dir)
else:
if ext in dir[-3:]:
Filelist.append(dir)

elif os.path.isdir(dir):
for s in os.listdir(dir):
newDir=os.path.join(dir,s)
getFileList(newDir, Filelist, ext)

return Filelist

def main():
#exif基准图像
baseline_img=’C:/Users/Administrator/Desktop/****.JPG’
#需修改图像的文件夹
org_img_folder=’C:/Users/Administrator/****’
# 检索文件
imglist = getFileList(org_img_folder, [], ‘JPG’)
print(‘本次执行检索到 ‘+str(len(imglist))+’ 张图像\n’)

for imgpath in imglist:
imgname= os.path.splitext(os.path.basename(imgpath))[0]
# 对每幅图像执行相关操作
piexif.transplant(baseline_img, imgpath)
main()

python获取指定时间段内特定规律的日期列表

2021年4月1日
python获取指定时间段内特定规律的日期列表
python获取指定时间段内特定规律的日期列表已关闭评论

python获取指定时间段内特定规律的日期列表

前言：写小程序时，遇到个需要自定义回归时间的场景，下面是实现的效果图。

于我需求而言，我只需要用到fixed_interval、weekly、per_month三个方法返回的回归间隔列表就行，*后的这个make_date_from_list方法，是为了能在这里展示效果

TIP:没有做异常处理，以及时间格式化之类的。这里只是抛砖引玉给个原型砖头出来。 %title插图%num

实现代码：

class GetStrategyDedail:

today = datetime.date.today()

def __init__(self, begin_date, end_date):
self.begin_date = datetime.datetime.strptime(begin_date, “%Y-%m-%d”)
self.end_date = datetime.datetime.strptime(end_date, “%Y-%m-%d”)

def fixed_interval(self, num):
“””
固定间隔的类型，直接使用间隔数字
:param num:间隔的天数
:return:不同时间段内，对应策略的执行时间间隔列表
“””
detail_list = list()
date_num = (self.end_date – self.begin_date).days
for i in range(num, date_num+1, num):
detail_list.append(i)
detail_list.insert(0, 0)
return detail_list

def weekly(self, i, weeks=1):
“””
每周xx,每月xx的数据类型
:param i：0-星期一，1-星期二，2-星期三，3-星期四，4-星期五，5-星期六，6-星期日
:param weeks:间隔周数，默认间隔一周
:return:不同时间段内，对应策略的执行时间间隔列表
“””
while self.begin_date.weekday() != i:
self.begin_date += datetime.timedelta(days=1)

num = (self.begin_date.date() – self.today).days
fixed_list = self.fixed_interval(weeks * 7)
fixed_list.insert(0, num)

return fixed_list

def per_month(self, date):
“””
date:指定要获取的每个月的日期
:return:不同时间段内，对应策略的执行时间间隔列表
“””
date_list = list()
specific_list = list()
year = self.begin_date.year
begin_month = self.begin_date.month
end_month = self.end_date.month
day = self.end_date.day

while begin_month <= end_month:
designated_date = f'{year}-{begin_month}-{date}’
date_list.append(datetime.datetime.strptime(designated_date, “%Y-%m-%d”))
begin_month += 1

for specific_date in date_list:
num = (specific_date.date() – self.today).days
if num > 0:
specific_list.append(num)

if day < date:
specific_list.pop()

return specific_list

def make_date_from_list(self, num, interval=1, mode=’day’):
“””
:param interval: 间隔的周数，默认为1（每周）
:param num:根据mode来区分，分表代表：间隔的天数，周几（0-6/周一-周日），指定的月日期
:param mode:day, week, month
:return: 不同时间段内，对应策略的执行时间间隔列表
“””
date_list = list()

if mode == ‘day’:
details_list = self.fixed_interval(num)
elif mode == ‘week’:
details_list = self.weekly(num, interval)
elif mode == ‘month’:
details_list = self.per_month(num)

first_date = self.today + datetime.timedelta(details_list[0])

if mode == ‘day’:
for i in details_list:
date = self.begin_date.date() + datetime.timedelta(i)
date_list.append(date.isoformat())
elif mode == ‘month’:
for i in details_list:
date = self.today + datetime.timedelta(i)
date_list.append(date.isoformat())
else:
for i in details_list[1:]:
date = first_date + datetime.timedelta(i)
date_list.append(date.isoformat())

return date_list

python相机标定

2021年4月1日
python相机标定
python相机标定已关闭评论

python相机标定
采用的是张正友方法，参考以下文章：https://github.com/Nocami/PythonComputerVision-6-CameraCalibration
在该方法的基础上实现了批量的定标，便于软件设计。

# -*- coding: utf-8 -*-
from cv2 import cv2
import numpy as np
import glob
class Cphoto_pre_work:

def __init__(self):
self.obj_points = [] # 存储3D点
self.img_points = [] # 存储2D点
self.size=[]
self.ret=[]
self.mtx=[]
self.dist=[]
self.rvecs=[]
self.tvecs=[]
self.list1=[‘ret’,’mtx’,’dist’,’rvecs’,’tvecs’]
self.savesrc5=[]

def get_grid(self,src,src_corner):
#进行标定
#src:图像路径
#src_corner:棋盘格坐标路径
images = glob.glob(src)
# 设置寻找亚像素角点的参数，采用的停止准则是*大循环次数30和*大误差容限0.001
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)

# 获取标定板角点的位置
objp = np.zeros((4*6,3), np.float32)
objp[:,:2] = np.mgrid[0:6,0:4].T.reshape(-1,2) # 将世界坐标系建在标定板上，所有点的Z坐标全部为0，所以只需要赋值x和y
i=0
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
self.size = gray.shape[::-1]
ret, corners = cv2.findChessboardCorners(gray, (6, 4), None)

if ret:
self.obj_points.append(objp)
corners2 = cv2.cornerSubPix(gray, corners, (5, 5), (-1, -1), criteria) # 在原角点的基础上寻找亚像素角点

if [corners2]:
self.img_points.append(corners2)
else:
self.img_points.append(corners)
i+=1
cv2.drawChessboardCorners(img, (6, 4), corners, ret) # 记住，OpenCV的绘制函数一般无返回值
cv2.imwrite(src_corner+fname.rsplit(“\\”,1)[1],img)
# cv2.imwrite(‘./outimg/conimg’+str(i)+’.jpg’, img)
# cv2.waitKey(4000)
if self.img_points:
return True
else:
return False

def get_parameter5(self,savesrc):
# 保存相应5个标定参数，并存储
self.savesrc5=savesrc
self.ret, self.mtx, self.dist, self.rvecs, self.tvecs = cv2.calibrateCamera(self.obj_points, self.img_points,self.size, None, None)
np.save(savesrc+ self.list1[0],self.ret)
np.save(savesrc+ self.list1[1],self.mtx)
np.save(savesrc+ self.list1[2],self.dist)
np.save(savesrc+ self.list1[3],self.rvecs)
np.save(savesrc+ self.list1[4],self.tvecs)
def last_photo(self,imgsrc,src_par5,last_src):
#imgsrc:原始影像路径
#src_part5:5参数路径
#last_src:*后保存路径
#选择指定文件夹即可直接读入文件夹内保存的.npy数据,生成相片
origin_images = glob.glob(imgsrc+’\*.[jp][pn]g’)
for tempfname in origin_images:
tempimg=cv2.imread(tempfname)
h,w=tempimg.shape[:2]
# print(tempfname.rsplit(“\\”,1)[1])
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(np.load(src_par5+self.list1[1]+’.npy’),np.load(src_par5+self.list1[2]+’.npy’),(w,h),1,(w,h))#显示更大范围的图片（正常重映射之后会删掉一部分图像）
print (newcameramtx)
print(“——————使用undistort函数——————-“)
temp_dst=cv2.undistort(tempimg,np.load(src_par5+self.list1[1]+’.npy’),np.load(src_par5+ self.list1[2]+’.npy’),None,newcameramtx)
x,y,w,h = roi
tempdst1 = temp_dst[y:y+h,x:x+w]
cv2.imwrite(last_src+tempfname.rsplit(“\\”,1)[1], tempdst1)
print (“方法一:dst的大小为:”, tempdst1.shape)

if __name__ == ‘__main__’:
temp=Cphoto_pre_work()
temp.get_grid(‘.\images4\*.jpg’,’./savecorner’+’/’)
srccc=r’E:\canshu’+’\\’
temp.get_parameter5(srccc)
temp.last_photo(‘.\images4′,srccc,’./saveimg’+’/’)

nump使用

2021年4月1日
nump使用
nump使用已关闭评论

nump使用

import numpy as np

arr1 = np.array([1, 2, 3, 4])
print(arr1)
arr2 = np.array([[1, 2, 3, 1], [4, 5, 6, 3], [4, 8, 9, 10]])
print(arr2)
np_linspace = np.linspace(0, 1, 12)
print(np_linspace)

#矩阵的生成
np_zeros = np.zeros((2, 3))
print(np_zeros)
np_eye = np.eye(3)
print(np_eye)
np_diag = np.diag([1, 2, 3, 4])
print(np_diag)

# 数组的属性
print(arr2.shape)
print(arr2.ndim)
print(arr2.size)
print(arr2.dtype)
print(arr2.itemsize)

#类型转换
print(np.float(32))
print(np.int8(34))
print(bool(0))
print(np.float(True))
print(np.float(False))

#数组下标查找
print(arr2[2][:2])
print(arr2[0, 0:3])

#数组形状
arr2.shape = 4, 3
print(arr2)
print(arr1.reshape(2,2))
arr_1 = np.arange(24).reshape(4,6)
print(arr_1)
print(arr_1.ravel())
print(arr_1.flatten()) # 横向
print(arr_1.flatten(‘F’)) # 纵向

#改变数组
arr3 = np.array([5,6,7,8])
print(np.hstack((arr1,arr3)))
print(np.vstack((arr1,arr3)))
print(np.concatenate((arr1,arr3),axis=0))

#数组切割
print(np.hsplit(arr3,2))
print(np.vsplit(arr2,2))
print(np.split(arr2,2,axis=0))

#随机生成数字
print(np.random.random(100))
print(np.random.rand(10,5))
print(np.random.randn(10,5))
print(np.random.randint(2,10,size=[2,5]))

#矩阵生成与运算
maty1 = np.mat(‘1,2,3;4,5,6;7,8,9’)
print(maty1)
maty2 = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
print(maty2)
print(maty2*3)
print(maty2+maty1)
print(maty2*maty1)
print(np.dot(maty2,maty1))
print(maty1.T)
print(maty1.H)
print(maty1.I)

python群发邮件

2021年4月1日
python群发邮件
python群发邮件已关闭评论

python群发邮件

1. 前言
1.1 应朋友要求，写一个群发邮件的脚本，用来实现往每个人的邮箱里边发送自己的工资条

2. 数据格式，*后一列是邮箱地址

3. 脚本实现的功能
3.1 自定义邮件标题

3.2 记录发送成功或失败的个数，防止发送失败

4. 代码实现

# -*- coding: utf-8 -*-
# @Time : 2021/3/26 10:11
# @Author : liyf–95/02/02
# @File : send_email.py
# @Software: PyCharm

import xlrd
import time
import re
from email.mime.text import MIMEText
from smtplib import SMTP_SSL

from loguru import logger

# qq邮箱smtp服务器
host_server = ‘smtp.qq.com’
# sender_qq为发件人的qq号码
sender_qq = ‘123456@qq.com’
# 第三方客户端登录时需要的授权码，不是qq密码
pwd = ‘xxxxxxxxxxxxx’
# 发件人的邮箱
sender_qq_mail = ‘123456@qq.com’
# 获取当前月份
batch = time.strftime(“%Y-%m”, time.localtime())

suffix = time.strftime(“%Y%m”, time.localtime())

def get_success_error_counts():
“””
用来读取日志文件中的数据，并转成列表形式，方便调用该函数处理列表中的数据，用来做去重处理
:return: 列表
“””
success_email_list = []

try:
with open(f’success_log_{suffix}.txt’, ‘r’, encoding=’utf8′) as f:
results = f.readlines()
for res in results:
success_email_list.append(res.strip())
except Exception:
logger.error(f’success_log_{suffix}.txt 文件不存在，初始化列表为0！’)
success_email_list = success_email_list

error_list = []
try:
with open(f’error_log_{suffix}.txt’, ‘r’, encoding=’utf8′) as f:
results = f.readlines()
for res in results:
restr = res.strip()
email = re.findall(re.compile(r”email’: ‘(.*?)’, ‘”, re.S), restr)[0]
error_list.append(email)
except Exception:
error_list = error_list

return success_email_list, error_list

def read_excel(subject):
“””
读取excel数据
:param subject: 自定义的邮件标题
:return:
“””
workbook = xlrd.open_workbook(‘工资条2.xlsx’)

worksheet = workbook.sheet_by_index(0)

nrows = worksheet.nrows
# 定义一个空列表，用来存放每一个员工的数据，包括表头
total_list = []
for i in range(nrows):
data_list = worksheet.row_values(i)
if data_list[0] == ”:
pass
else:
total_list.append(data_list)
logger.info(f’数据读取完毕，共有 {len(total_list) – 1} 位同事’)
logger.info(‘————————————————————‘)
time.sleep(2)
for k, v in enumerate(total_list[1:]):
msg_content = ”
for i, j in enumerate(v):
if total_list[0][i] == ‘邮箱’:
pass
else:
# 有一些列的数据为空，处理数据
val = ‘无’ if str(v[i]).strip() == ” else v[i]
msg = f’# {total_list[0][i]}：{val}\n’
msg_content += msg
name = v[0]
email_addr = v[-1]
success, error = get_success_error_counts()
logger.info(f’正在向第 {k + 1}/{len(total_list) – 1} 位同事 {name} 发送邮件，请稍等…’)
if len(success) == 0:
# 说明日志中没有数据，即还没有发送成功的例子
# 开始发送数据
email_content = f’尊敬的 {name} 同事，您好，您的 {batch} 月份工资单信息如下：\n{msg_content}# 发送时间：{time.strftime(“%Y-%m-%d %H:%M:%S”, time.localtime())}’
send_email_to_member(email_addr, email_content, name, subject)
logger.info(f’已发送至邮箱：{email_addr}，接收人：{name}’)
else:
# 已经有发送成功的例子，并已存入success_log.txt文件中
email_list = [] # 用来存放success_log.txt文件中的邮箱地址，用来去重
for data in success:
email_list.append(str(data).split(‘—->’)[-1])
if email_addr in email_list:
# 如果需要发送邮件的邮箱地址在success_log.txt文件中，则说明该邮箱已经发送过，无需重复发送
logger.warning(f'{name} 同事：{email_addr} 已经发送过了，无需重复发送邮件！！’)
pass
else:
# 正常发送邮件
email_content = f’尊敬的 {name} 同事，你好，您的 {batch} 月份工资单信息如下：\n{msg_content}# 发送时间：{time.strftime(“%Y-%m-%d %H:%M:%S”, time.localtime())}’
send_email_to_member(email_addr, email_content, name, subject)
logger.info(f’已发送至邮箱：{email_addr}，接收人：{name}’)
logger.info(‘————————————————————‘)
logger.info(f’脚本运行结束！等待5分钟后自动关闭窗口（或者手动点击窗口右上角关闭）！！！’)
logger.info(‘运行结果：’)
logger.info(f'{len(total_list) – 1} 位同事已全部发送完毕’)
success, error = get_success_error_counts()
logger.info(f’Successd：{len(success)}，Failed：{len(error)}’)
time.sleep(300)

def send_email_to_member(email_addr, email_content, name, subject):
“””
发送邮件
:param email_addr: 收件人邮箱地址
:param email_content: 需要发送的正文内容
:param name: 收件人姓名
:return:
“””
# ssl登录
smtp = SMTP_SSL(host_server)
# set_debuglevel()是用来调试的。参数值为1表示开启调试模式，参数值为0关闭调试模式，
smtp.set_debuglevel(0)
smtp.ehlo(host_server)
smtp.login(sender_qq, pwd)

msg = MIMEText(email_content, “plain”, ‘utf-8′)
msg[“Subject”] = subject # 邮件标题
msg[“From”] = sender_qq_mail # 发件人
msg[“To”] = email_addr # 收件人邮箱
try:
smtp.sendmail(sender_qq_mail, email_addr, msg.as_string())
smtp.quit()
msg = f'{name} 发送成功，时间: {time.strftime(“%Y-%m-%d %H:%M:%S”, time.localtime())}—->{email_addr}’
with open(f’success_log_{suffix}.txt’, ‘a’, encoding=’utf8′) as f:
f.write(msg)
f.write(‘\n’)
f.close()
time.sleep(0.5)
except Exception as e:
logger.error(f’发送失败—>{name}\n原因：{e}’)
item = {}
item[‘name’] = name
item[’email’] = email_addr
item[‘error_reason’] = e
item[‘date’] = time.strftime(“%Y-%m-%d %H:%M:%S”, time.localtime())
with open(f’error_log_{suffix}.txt’, ‘a’, encoding=’utf8′) as f:
f.write(str(item))
f.write(‘\n’)
f.close()

if __name__ == ‘__main__’:
subject = input(‘请输入自定义邮件标题：’)
logger.info(f’自定义邮件标题为：{subject}’)
read_excel(subject)
5. 逻辑梳理
5.1 该脚本的使用qq邮箱发送，其中获取授权码可以点击这里参考博客

5.2 注意事项

5.2.1 excel名称必须为 `工资条2.xlsx`

5.2.2 `邮箱` 列必须为*后一列，`姓名` 列必须在*列，因为代码中 `name=v[0], email_addr=v[-1]` 是固定的。可以自己做适当修改

5.2.3 表头名称可以随意改动，列数也可以随意增减，但要保证 `邮箱` 和 `姓名` 列存在

6. success_log_202103.txt 和 error_log_202103.txt 的作用
6.1 success_log_202103.txt

6.1.1 用来记录发送成功的数据

6.1.2 发送邮件之前，会先读取该txt文件，并判断要发送的email地址是否在txt里边，如果存在，则不发送，防止重复发送

6.2 error_log_202103.txt

6.2.1 一般情况下，没有这个文件，但是由于一些不可控因素，比如邮箱地址不存在或者断网等，会导致发送邮件失败

6.2.2 发送失败之后会把当前发送的数据记录下来，就会生成这个文件

6.2.3 如果该文件有数据，则首先检查是否是邮箱不正确导致的，如果不是，重新运行exe文件

7. 测试
7.1 使用 `pyinstaller -F send_email.py` 打包 py 文件为 exe 可执行文件

7.2 运行截图

%title插图%num

7.3 邮件内容

%title插图%num

Python系列11-Python文件操作

2021年4月1日
Python系列11-Python文件操作
Python系列11-Python文件操作已关闭评论

Python系列11-Python文件操作

文章目录
一.从文件中读取数据
1.1 读取整个文件
1.2 逐行读取
1.3 创建一个包含文件各行内容的列表
1.4 使用文件的内容
二. 写入文件
2.1 写入空文件
2.2 写入多行
2.3 附加到文件
三.存储数据
3.1 使用json.dump() 和json.load()
3.2 保存和读取用户生成的数据
3.3 重构
参考:
一.从文件中读取数据
文本文件可存储的数据量多得难以置信：天气数据、交通数据、社会经济数据、文学作品等。每当需要分析或修改存储在文件中的信息时，读取文件都很有用，对数据分析应用
程序来说尤其如此。例如，你可以编写一个这样的程序：读取一个文本文件的内容，重新设置这些数据的格式并将其写入文件，让浏览器能够显示这些内容。
要使用文本文件中的信息，首先需要将信息读取到内存中。为此，你可以一次性读取文件的全部内容，也可以以每次一行的方式逐步读取。

1.1 读取整个文件
要读取文件，需要一个包含几行文本的文件。
下面首先来创建一个文件，它包含关系型数据库的名称以及排名。

Oracle 1
MySQL 2
SQL Server 3
PostgreSQL 4

代码:
文件路径给一定要是’/’，’’会报错

with open(‘E:/python/file_test/db.txt’) as file_object:
contents = file_object.read()
print(contents)

测试记录:

Oracle 1
MySQL 2
SQL Server 3
PostgreSQL 4

1.2 逐行读取
读取文件时，常常需要检查其中的每一行：你可能要在文件中查找特定的信息，或者要以某种方式修改文件中的文本。

要以每次一行的方式检查文件，可对文件对象使用for 循环。

代码:

filename = ‘E:/python/file_test/db.txt’

with open(filename) as file_object:
for line in file_object:
print(line)

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/file_test1.py
Oracle 1

MySQL 2

SQL Server 3

PostgreSQL 4

Process finished with exit code 0

我们打印每一行时，发现空白行更多了
为何会出现这些空白行呢？因为在这个文件中，每行的末尾都有一个看不见的换行符，而print 语句也会加上一个换行符，因此每行末尾都有两个换行符：一个来自文件，另一
个来自print 语句。要消除这些多余的空白行，可在print 语句中使用rstrip().

代码:

filename = ‘E:/python/file_test/db.txt’

with open(filename) as file_object:
for line in file_object:
print(line.rstrip())

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/file_test1.py
Oracle 1
MySQL 2
SQL Server 3
PostgreSQL 4

Process finished with exit code 0

1.3 创建一个包含文件各行内容的列表
使用关键字with 时，open() 返回的文件对象只在with 代码块内可用。如果要在with 代码块外访问文件的内容，可在with 代码块内将文件的各行存储在一个列表中，并在with 代码块外使用该列表：你可以立即处理文件的各个部分，也可推迟到程序后面再处理。

代码:

filename = ‘E:/python/file_test/db.txt’

with open(filename) as file_object:
lines = file_object.readlines()

for line in lines:
print(line.rstrip())

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/file_test1.py
Oracle 1
MySQL 2
SQL Server 3
PostgreSQL 4

Process finished with exit code 0

1.4 使用文件的内容
将文件读取到内存中后，就可以以任何方式使用这些数据了。下面以简单的方式使用圆周率的值。首先，我们将创建一个字符串，它包含文件中存储的所有数字，且没有任何空格。

文件信息:

3.1415926535
8979323846
2643383279

代码:

filename = ‘E:/python/file_test/pi.txt’

with open(filename) as file_object:
lines = file_object.readlines()

pi_string = ”
for line in lines:
line_format = line.rstrip()
line_format = line_format.lstrip()
pi_string += line_format

print(pi_string)
print(len(pi_string))

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/file_test2.py
3.141592653589793238462643383279
32

Process finished with exit code 0

二. 写入文件
保存数据的*简单的方式之一是将其写入到文件中。通过将输出写入文件，即便关闭包含程序输出的终端窗口，这些输出也依然存在：你可以在程序结束运行后查看这些输出，可与别人分享输出文件，还可编写程序来将这些输出读取到内存中并进行处理。

2.1 写入空文件
要将文本写入文件，你在调用open() 时需要提供另一个实参，告诉Python你要写入打开的文件。为明白其中的工作原理，我们来将一条简单的消息存储到文件中，而不是将其打印到屏幕上。

代码:

filename = ‘E:/python/file_test/programming.txt’

with open(filename, ‘w’) as file_object:
file_object.write(“I love programming.”)

测试记录:

C:\>more E:\python\file_test\programming.txt
I love programming.

C:\>

2.2 写入多行
函数write() 不会在你写入的文本末尾添加换行符，因此如果你写入多行时没有指定换行符，文件看起来可能不是你希望的那样.

代码:

filename = ‘E:/python/file_test/programming.txt’

with open(filename, ‘w’) as file_object:
file_object.write(“I love programming.\n”)

测试记录:

C:\>more E:\python\file_test\programming.txt
I love programming.
I love creating new games.

C:\>

2.3 附加到文件
如果你要给文件添加内容，而不是覆盖原有的内容，可以附加模式打开文件。你以附加模式打开文件时，Python不会在返回文件对象前清空文件，而你写入到文件的行都将添加到文件末尾。如果指定的文件不存在，Python将为你创建一个空文件。

代码:

filename = ‘E:/python/file_test/programming.txt’

with open(filename, ‘a’) as file_object:
file_object.write(“I also love finding meaning in large databases.\n”)
file_object.write(“I lowe creating apps that can run in a browser.\n”)

测试记录:

C:\>more E:\python\file_test\programming.txt
I love programming.
I love creating new games.
I also love finding meaning in large databases.
I lowe creating apps that can run in a browser.

三.存储数据
很多程序都要求用户输入某种信息，如让用户存储游戏首选项或提供要可视化的数据。不管专注的是什么，程序都把用户提供的信息存储在列表和字典等数据结构中。用户关闭程序时，你几乎总是要保存他们提供的信息；一种简单的方式是使用模块json 来存储数据。

模块json让你能够将简单的Python数据结构转储到文件中，并在程序再次运行时加载该文件中的数据。你还可以使用json 在Python程序之间分享数据。更重要的是，JSON数据格式并非Python专用的，这让你能够将以JSON格式存储的数据与使用其他编程语言的人分享。这是一种轻便格式，很有用，也易于学习。

3.1 使用json.dump() 和json.load()
我们来编写一个存储一组数字的简短程序，再编写一个将这些数字读取到内存中的程序。*个程序将使用json.dump() 来存储这组数字，而第二个程序将使用json.load() 。

函数json.dump() 接受两个实参：要存储的数据以及可用于存储数据的文件对象。下面演示了如何使用json.dump() 来存储数字列.

代码:

import json

numbers = [2 ,3 ,5 ,7 ,11 ,13]

filename = ‘E:/python/file_test/numbers.json’
with open(filename, ‘w’) as f_obj:
json.dump(numbers, f_obj)

测试记录:

C:\>more E:\python\file_test\numbers.json
[2, 3, 5, 7, 11, 13]

C:\>

下面演示了如何使用json.load() 来读取数字列.
代码:

import json

filename = ‘E:/python/file_test/numbers.json’
with open(filename) as f_obj:
numbers = json.load(f_obj)

print(numbers)

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/venv/json_test1.py
[2, 3, 5, 7, 11, 13]

Process finished with exit code 0

3.2 保存和读取用户生成的数据
对于用户生成的数据，使用json 保存它们大有裨益，因为如果不以某种方式进行存储，等程序停止运行时用户的信息将丢失。下面来看一个这样的例子：用户首次运行程序时被提示输入自己的名字，这样再次运行程序时就记住他了。

我们先来存储用户的名字.

代码:

import json
# 如果以前存储了用户名，就加载它
# 否则，就用提示用户输入用户名并存储它

filename = ‘E:/python/file_test/username.json’

try:
with open(filename) as f_obj:
username = json.load(f_obj)
except FileNotFoundError:
username = input(“What is your name?”)
with open(filename, ‘w’) as f_obj:
json.dump(username, f_obj)
print(“We’ll remember you when you come back, ” + username + “!”)
else:
print(“Welcome back, ” + username + “!”)

测试记录:

— *次
E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/json_test2.py
What is your name? Oracle
We’ll remember you when you come back, Oracle!

Process finished with exit code 0

— 第二次
E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/json_test2.py
Welcome back, Oracle!

Process finished with exit code 0

3.3 重构
你经常会遇到这样的情况：代码能够正确地运行，但可做进一步的改进——将代码划分为一系列完成具体工作的函数。这样的过程被称为重构。重构让代码更清晰、更易于理解、更容易扩展。

要重构json_test2.py，可将其大部分逻辑放到一个或多个函数中。json_test2.py的重点是问候用户，因此我们将其所有代码都放到一个名为greet_user() 的函数中，获取存量的用户名放在一个名为get_stored_username()的函数中，获取新的输入用户名的信息放在一个名为get_new_username()的函数中。

这样每个函数只做一件事情，greet_user() 没有过多的代码，直接调用其余的几个函数即可。

代码:

import json

def get_stored_username():
“””如果存储了用户名,就获取它”””
filename = ‘E:/python/file_test/username.json’
try:
with open(filename) as f_obj:
username = json.load(f_obj)
except FileNotFoundError:
return None
else:
return username

def get_new_username():
“””提示用户输入用户”””
username = input(“What is your name?”)
filename = ‘E:/python/file_test/username.json’
with open(filename, ‘w’) as f_obj:
json.dump(usernmae, f_obj)
return username

def greet_user():
“””问候用户，并指出其名字”””
username = get_stored_username()
if username:
print(“Welcome back, ” + username + “!”)
else:
username = get_new_username()
print(“We’ll remeber you when you come back, ” + username + “!”)

greet_user()

测试记录:

E:\python\learn_python1\venv\Scripts\python.exe E:/python/learn_python1/json_test2.py
Welcome back, Oracle!

Process finished with exit code 0

参考:
1.Python编程:从入门到实践

2021年4月1日
python实现读取pdf格式文档
已关闭评论

python实现读取pdf格式文档

文章目录
一、准备工作
二、部分变量的含义
三、PDFMiner类之间的关系
四、代码实现
python实现读取pdf格式文档
一、准备工作
安装对应的库
pip install pdfminer3k
pip install pdfminer.six

二、部分变量的含义
PDFDocument（pdf文档对象）
PDFPageInterpreter（解释器）
PDFParser（pdf文档分析器）
PDFResourceManager（资源管理器）
PDFPageAggregator（聚合器）
LAParams（参数分析器）

三、PDFMiner类之间的关系 %title插图%num

PDFMiner的相关文档(点击跳转)

四、代码实现
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# datetime:2021/3/17 12:12
# software: PyCharm
# version: python 3.9.2

def changePdfToText(filePath):
“””
解析pdf 文本，保存到同名txt文件中

param：
filePath: 需要读取的pdf文档的目录
introduced module:
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
import os.path
“””
file = open(filePath, ‘rb’) # 以二进制读模式打开
# 用文件对象来创建一个pdf文档分析器
praser = PDFParser(file)
# 创建一个PDF文档
doc = PDFDocument(praser, ”) # praser ：上面创建的pdf文档分析器，第二个参数是密码，设置为空就好了
# 连接分析器与文档对象
praser.set_document(doc)
# 检测文档是否提供txt转换，不提供就忽略
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
# 创建PDf 资源管理器来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
result = [] # 内容列表
# 循环遍历列表，每次处理一个page的内容
for page in PDFPage.create_pages(doc):
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
for x in layout:
if hasattr(x, “get_text”):
result.append(x.get_text())
fileNames = os.path.splitext(filePath) # 分割
# 以追加的方式打开文件
with open(fileNames[0] + ‘.txt’, ‘a’, encoding=”utf-8″) as f:
results = x.get_text()
# print(results) 这个句可以取消注释就可以在控制台将所有内容输出了
f.write(results) # 写入文件

# 调用示例：

# path = u’E:\\1.pdf’
# changePdfToText(path)

pandas数值运算方法

2021年4月1日
pandas数值运算方法
pandas数值运算方法已关闭评论

pandas数值运算方法

1、通用函数：保留索引：
对Series或DataFrame对象使用numpy的通用函数时，返回的是保留索引的pandas对象

2、通用函数：索引对齐：
当两个Series或DataFrame对象进行二元计算时，pandas会在计算过程中自动，对齐两个对象的索引。

1.Series索引对齐，结果输出的是，索引的并集，但是只有交集才有计算结果，其他返回为空，当然，也可以设置空值，

添加fill_value参数，进行填补空值。若两个都是空值，则不进行填充（必须使用通用函数，例如add、sub）

2.DataFrame索引对齐，结果同上，只不过是同时对齐列索引和行索引。就算是顺序不同，结果的索引都会自动排列的。

若想补充空值，同样的使用通用函数，添加fill_value参数即可

运算符通用函数
+ add()
– sub()、subtract()
* mul()、multiply()
/ div()
// floordiv()
% mod()
** pow()
以上通用函数，前面可以加一个r（例如radd()），表示顺序颠倒1+2变为2+1

3、通用函数：Series和DataFrame的计算

Series和DataFrame进行计算时，默认是从行上面减的（axis=1），若想在列上面减，可以使用通用函数，设置axis=0即可

他们进行计算时，与前面的一样，都是进行索引自动对其的。

python多进程

2021年4月1日
python多进程
python多进程已关闭评论

python多进程

1.使用进程完成多任务
1.1多进程示例程序
# from threading import Thread # 1 多线程方式1
import threading # 2 多线程方式2
import time
import multiprocessing # 多进程方式

def test1():
while True:
print(“1———-“)
time.sleep(1)

def test2():
while True:
print(“2———-“)
time.sleep(1)

def main():
# Thread() # 1
# tp1 = threading.Thread(target=test1) # 2
# tp2 = threading.Thread(target=test2)
# tp1.start()
# tp2.start()

tp1 = multiprocessing.Process(target=test1) # 2
tp2 = multiprocessing.Process(target=test2)
tp1.start()
tp2.start()

if __name__ == “__main__”:
main()

进程完成多任务所耗费的资源比较大

线程依赖于进程没有进程就没有线程

2. 通过队列完成进程间的通信
示例程序

import multiprocessing

def download_from_web(q):
# 模拟从网络下载数据
data = [11, 22, 33, 44]
# 向队列写入数据
for temp in data:
q.put(temp)
print(“—download complete.—“)

def analysis_data(q):
“””数据处理”””
waiting_analysis_data = list()
# 从队列获取数据
while True:
data = q.get()
waiting_analysis_data.append(data)
if q.empty():
break
# 模拟数据处理
print(waiting_analysis_data)

def main():
# 1.创建一个队列
q = multiprocessing.Queue(3)
# 2.创建多个进程，将队列的引用当作实参进行传递到里面
p1 = multiprocessing.Process(target=download_from_web, args=(q,))
p2 = multiprocessing.Process(target=analysis_data,args=(q,))
p1.start()
p2.start()

if __name__ == “__main__”:
main()

3. 进程池
示例程序

import os
import multiprocessing

def copy_file(file_name, old_folder_name, new_folder_name):
“””完成问价的复制”””
print(“=====>模拟COPY文件：从%s—>到%s 文件名是：%s” %(old_folder_name, new_folder_name, file_name))

def main():
# 1.获取用户要复制的文件夹的名字
old_folder_name = input(“请输入要COPY的文件夹的名字：”)
# 2. 创建一个新的文件夹
try:
new_folder_name = old_folder_name + “[复件]”
os.mkdir(new_folder_name)
except:
pass
# 3. 获取文件夹的所有待COPY的文件名字 listdir()
file_names = os.listdir(old_folder_name)
# 4. 创建进程池
po = multiprocessing.Pool(5)
# 5. 向进程池中添加copy文件任务
for file_name in file_names:
po.apply_async(copy_file,args=(file_name, old_folder_name, new_folder_name))
po.close()
po.join()

# 5. 复制原文件夹中的文件到新的文件夹中

if __name__ == “__main__”:
main()

2024 年 10 月
一	二	三	四	五	六	日
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31