2021年4月25日 – 第 5 页

knn算法

原理：返回欧式距离中*近的k个点中分类数目*多的那个label

#定义一个knn函数：
class Knnclassify:
def __init__(self,k):
assert k>0,’error’
self.k = k
self._xtrain = None
self._ytrain = None

def fit(self,xdata,ydata):
assert xdata.shape[0] == ydata.shape[0],’error’
assert self.k <= xdata.shape[0],’error’

self._xtrain = xdata
self._ytrain = ydata

return self

def predict(self,target):
assert self._xtrain is not None and self._ytrain is not None,’error’
assert target.shape[1] == self._xtrain.shape[1],’error’

res = [self._predict(i) for i in target]
return np.array(res)

def _predict(self,x):
assert x.shape[0] == self._xtrain.shape[1],’error’
dis = [sqrt(np.sum((i-x)**2)) for i in x_train]
nearest = np.argsort(dis)
top = [y_train[j] for j in nearest[:k]]
return Counter(top).most_common(1)[0][0]

def __repr__(self):
return f’knn(k={self.k})’

调用sklearn中封装好的knn：

from sklearn.neighbors import KNeighborsClassifier
#传入k的值
knn_classifier = KNeighborsClassifier(n_neighbors=6)
#传入数据集进行拟合
knn_classifier.fit(x_train,y_train)

#传入目标值，必须是一个矩阵形式
x = np.array([])
knn_classifier.predict(x.reshape(1,-1))

python井字棋。分与人机和朋友下的那种

python井字棋快来看看孩子的头发
怎么用python做井字棋游戏，废话不多说上代码！
相关说明
怎么用python做井字棋游戏，废话不多说上代码！
觉得有帮助送我上去！！！！！！！！！！！！！！！！！！！

（这行不是代码：比较繁琐没简化。为什么？问就是头发没了）
import time
import random
def printf(a):
for i in range(len(a)):
print(a[i])
def luozi(a,b,c):#落子方法。参数说明a：棋盘。b：位置(是逻辑位置哦注意转换)。c：棋子
del a[int(b[0])-1][int(b[1])-1:int(b[1])]
a[int(b[0])-1].insert(int(b[1])-1,c)
def shuying(a,b):#判断输赢的方法
for i in range(0,2):
if a[i][0]==a[i][1]==a[i][2]==b:
return 1
for i in range(0,2):
if a[0][i]==a[1][i]==a[2][i]==b:
return 1
if a[0][0]==a[1][1]==a[2][2]==b or a[0][2]==a[1][1]==a[2][0]==b:
return 1
else:return 0
def duzi(a,b,c):#ai防守堵子方法b被堵方，c是堵方。就是将15种赢的方法遍历一遍
for i in range(0,2):
if a[i][0]==a[i][1]==b and a[i][2]==’0′:
luozi(a,[i+1,3],c)
return 0
if a[i][0]==a[i][2]==b and a[i][1]==’0′:
luozi(a,[i+1,2],c)
return 0
if a[i][1]==a[i][2]==b and a[i][0]==’0′:
luozi(a,[i+1,1],c)
return 0
if a[0][i]==a[1][i]==b and a[2][i]==’0′:
luozi(a,[3,i+1],c)
return 0
if a[0][i]==a[2][i]==b and a[1][i]==’0′:
luozi(a,[2,i+1],c)
return 0
if a[1][i]==a[2][i]==b and a[0][i]==’0′:
luozi(a,[1,i+1],c)
return 0
if a[0][0]==a[1][1]==b and a[2][2]==’0′:
luozi(a,[3,3],c)
return 0
if a[0][0]==a[2][2]==b and a[1][1]==’0′:
luozi(a,[2,2],c)
return 0
if a[1][1]==a[2][2]==b and a[0][0]==’0′:
luozi(a,[1,1],c)
return 0
if a[0][2]==a[1][1]==b and a[2][0]==’0′:
luozi(a,[3,1],c)
return 0
if a[0][2]==a[2][0]==b and a[1][1]==’0′:
luozi(a,[2,2],c)
return 0
if a[1][1]==a[2][0]==b and a[0][2]==’0′:
luozi(a,[1,3],c)
return 0
def ailuozi(a,c):#ai进攻落子算法，和上面差不多
for i in range(0,2):
if a[i][0]==a[i][1]==c and a[i][2]==’0′:
luozi(a,[i+1,3],c)
return 0
if a[i][0]==a[i][2]==c and a[i][1]==’0′:
luozi(a,[i+1,2],c)
return 0
if a[i][1]==a[i][2]==c and a[i][0]==’0′:
luozi(a,[i+1,1],c)
return 0
if a[0][i]==a[1][i]==c and a[2][i]==’0′:
luozi(a,[3,i+1],c)
return 0
if a[0][i]==a[2][i]==c and a[1][i]==’0′:
luozi(a,[2,i+1],c)
return 0
if a[1][i]==a[2][i]==c and a[0][i]==’0′:
luozi(a,[1,i+1],c)
return 0
if a[0][0]==a[1][1]==c and a[2][2]==’0′:
luozi(a,[3,3],c)
return 0
if a[0][0]==a[2][2]==c and a[1][1]==’0′:
luozi(a,[2,2],c)
return 0
if a[1][1]==a[2][2]==c and a[0][0]==’0′:
luozi(a,[1,1],c)
return 0
if a[0][2]==a[1][1]==c and a[2][0]==’0′:
luozi(a,[3,1],c)
return 0
if a[0][2]==a[2][0]==c and a[1][1]==’0′:
luozi(a,[2,2],c)
return 0
if a[1][1]==a[2][0]==c and a[0][2]==’0′:
luozi(a,[1,3],c)
return 0
def AI(a,b,c,d,e):#b表示人，c表示AI,d表示方式谁先下1人先下。0ai先下,e表示第几轮下棋了很有必要。
if d==1:
if a[1][1]==b and e==1:#如果井字棋中心位置是人下的，ai只能下四角不然ai输。
#定义随机下四角
suiji=[[1,1],[1,3],[3,1],[3,3]]
suiji1=random.choice(suiji)
luozi(a,suiji1,c)
return 0
if a[1][1]==b and e==2 and ((a[0][0]==b and a[2][2]==c) or (a[0][2]==b and a[2][0]==c) or (a[2][0]==b and a[0][2]==c) or (a[2][2]==b and a[0][0]==c)):#一个特殊情况,其他情况就先调用进攻方法再调用防守方法(下面会做介绍)
suiji=[[1,1],[1,3],[3,1],[3,3]]
while True:
suiji1=random.choice(suiji)
if a[suiji1[0]-1][suiji1[1]-1]==’0′:
luozi(a,suiji1,c)
break
return 0
if a[1][1]==b and e==2:
if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0
if a[1][1]==b and e>2:
if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0
if a[1][1]!=b and e==1:
luozi(a,[1][1],c)
return 0
if a[1][1]!=b and e>=2:
if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0
else:
if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0
if d==2 and e==2:
if a[0][1]==b or a[1][0]==b or a[1][2]==b or a[2][1]==b:
suiji=[[1,1],[1,3],[3,1],[3,3]]
suiji1=random.choice(suiji)
luozi(a,suiji1,c)
return 0
if a[0][0]==b or a[0][2]==b or a[2][0]==b or a[2][2]==b:
suiji=[[1,1],[1,3],[3,1],[3,3]]#随机落子害好复杂没时间了
while True:
suiji1=random.choice(suiji)
if a[suiji1[0]-1][suiji1[1]-1]==’0′:
luozi(a,suiji1,c)
break
return 0
if d==2 and e>2:
if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0
while True:
game=int(input(“选择与谁下？1：与你的朋友。2：与AI下。3：退出游戏”))
a=[[‘0′,’0′,’0’],[‘0′,’0′,’0’],[‘0′,’0′,’0′]]#注意是str’0’
if game==1:
b1=input(“你用什么符号？”)
b2=input(“你对手用什么符号？”)
printf(a)
while True:
c=list(input(“请落子（标准是数字，数字举例(11)就是在*行，*列落子）”))
luozi(a,c,b1)
printf(a)
if shuying(a,b1)==1:
print(b1,”方胜利”)
break
c=list(input(“(对手)请落子（标准是数字，数字举例(11)就是在*行，*列落子）”))
luozi(a,c,b2)
printf(a)
if shuying(a,b2)==1:
print(b2,”方胜利”)
break
if ‘0’ not in a[0] and ‘0’ not in a[1] and ‘0’ not in a[2]:
print(‘平局！’)
break
print(“游戏结束：”)
if game==2:
p=int(input(“谁先下？1：AI这么强当然是我先下拉。2：AI算什么让它先下。”))
b2=’x’
if p==1:
e=1
b1=input(“你用什么符号？AI用的是x符号”)
printf(a)
while True:
c=list(input(“请落子（标准是数字，数字举例(11)就是在*行，*列落子）”))
luozi(a,c,b1)
printf(a)
print(“AI落子”)
time.sleep(1)
if shuying(a,b1)==1:
print(“怎么可能你尽然胜利了!”)
break
AI(a,b1,b2,p,e)
e+=1
printf(a)
if shuying(a,b2)==1:
print(“AI胜利,AI表示你这个人类很谦虚”)
break
if ‘0’ not in a[0] and ‘0’ not in a[1] and ‘0’ not in a[2]:
print(‘平局’)
break
if p==2:
b1=input(“你用什么符号？AI用的是x符号”)
del a[1][1:2];a[1].insert(1,b2)
printf(a)
c=list(input(“请落子（标准是数字，数字举例(11)就是在*行，*列落子）”))
luozi(a,c,b1)
printf(a)
e=2
while True:
AI(a,b1,b2,p,e)
e+=1
print(“AI落子”)
time.sleep(1)
printf(a)
if shuying(a,b2)==1:
print(“AI胜利”)
str1=’哈哈哈，愚蠢的人类还敢小看我！’
for i in str1:
time.sleep(0.5)
print(i,end=””)
break
c=list(input(“请落子（标准是数字，数字举例(11)就是在*行，*列落子）”))
luozi(a,c,b1)
printf(a)
if shuying(a,b1)==1:
print(“怎么可能你尽然胜利了!”)
break
if ‘0’ not in a[0] and ‘0’ not in a[1] and ‘0’ not in a[2]:
print(‘平局’)
break
if game==3:
break

相关说明
前两轮就是下，第三轮就要用到堵子和进攻方法。（这里就体现了参数e的重要性了）
先进攻：判断那个方法（15种）可以赢。注意我进攻方法每种情况都return了，就是防止连续判断。

if ailuozi(a,c)==0:
return 0
else:
duzi(a,b,c)
return 0

time包就是更加真实吧
*后还可以加上，判断棋盘上先有没有棋子。没有才可以下（很简单的代码加在每次落子前面）根据返回值来判断。

def panduan(a,c)：#a还是棋盘c是一个含两个元素的list
if a[int(c[0])-1][int(c[1])-1]!=’0′:
print(“该位置已经有棋子了！请重新落子”)
return 0
else:
luozi(a,c,b)#b棋子
return 1

yolov5半自动化标注图片生成图片数据的xml

yolov5半自动化标注图片，生成图片数据的xml，标签格式为VOC格式

1.训练自己的yolov5 模型
2.执行 yolov5 的detect.py 文件，（里面的权重文件路径和jpg路径需要改为自己的路径）后面加 –save-txt ，命令如下：
sudo python detect.py –save-txt
3.txt文件转换xml. 转化的代码如下。

# 将 txt 标签文件转换为 xml 标签文件, 修改dict中的类，以及xml txt 和jpg 路径。

from xml.dom.minidom import Document
import os
import cv2

def makexml(txtPath,xmlPath,picPath): #读取txt路径，xml保存路径，数据集图片所在路径
dict = {‘0’: ” person”, #字典对类型进行转换，自己的标签的类。
‘1’: “dog “,
}
files = os.listdir(txtPath)
for i, name in enumerate(files):
xmlBuilder = Document()
annotation = xmlBuilder.createElement(“annotation”) # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile=open(txtPath+name)
txtList = txtFile.readlines()
img = cv2.imread(picPath+name[0:-4]+”.jpg”)
Pheight,Pwidth,Pdepth=img.shape
for i in txtList:
oneline = i.strip().split(” “)

folder = xmlBuilder.createElement(“folder”)#folder标签
folderContent = xmlBuilder.createTextNode(“VOC2007”)
folder.appendChild(folderContent)
annotation.appendChild(folder)

filename = xmlBuilder.createElement(“filename”)#filename标签
filenameContent = xmlBuilder.createTextNode(name[0:-4]+”.png”)
filename.appendChild(filenameContent)
annotation.appendChild(filename)

size = xmlBuilder.createElement(“size”) # size标签
width = xmlBuilder.createElement(“width”) # size子标签width
widthContent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthContent)
size.appendChild(width)
height = xmlBuilder.createElement(“height”) # size子标签height
heightContent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightContent)
size.appendChild(height)
depth = xmlBuilder.createElement(“depth”) # size子标签depth
depthContent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthContent)
size.appendChild(depth)
annotation.appendChild(size)

object = xmlBuilder.createElement(“object”)
picname = xmlBuilder.createElement(“name”)
nameContent = xmlBuilder.createTextNode(dict[oneline[0]])
picname.appendChild(nameContent)
object.appendChild(picname)
pose = xmlBuilder.createElement(“pose”)
poseContent = xmlBuilder.createTextNode(“Unspecified”)
pose.appendChild(poseContent)
object.appendChild(pose)
truncated = xmlBuilder.createElement(“truncated”)
truncatedContent = xmlBuilder.createTextNode(“0”)
truncated.appendChild(truncatedContent)
object.appendChild(truncated)
difficult = xmlBuilder.createElement(“difficult”)
difficultContent = xmlBuilder.createTextNode(“0”)
difficult.appendChild(difficultContent)
object.appendChild(difficult)
bndbox = xmlBuilder.createElement(“bndbox”)
xmin = xmlBuilder.createElement(“xmin”)
mathData=int(((float(oneline[1]))*Pwidth+1)-(float(oneline[3]))*0.5*Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin)
ymin = xmlBuilder.createElement(“ymin”)
mathData = int(((float(oneline[2]))*Pheight+1)-(float(oneline[4]))*0.5*Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin)
xmax = xmlBuilder.createElement(“xmax”)
mathData = int(((float(oneline[1]))*Pwidth+1)+(float(oneline[3]))*0.5*Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax)
ymax = xmlBuilder.createElement(“ymax”)
mathData = int(((float(oneline[2]))*Pheight+1)+(float(oneline[4]))*0.5*Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax)
object.appendChild(bndbox)

annotation.appendChild(object)

f = open(xmlPath+name[0:-4]+”.xml”, ‘w’)
xmlBuilder.writexml(f, indent=’\t’, newl=’\n’, addindent=’\t’, encoding=’utf-8′)
f.close()

makexml(“/home/wenzen/project/V5/yolov5-fall/runs_/labels/”, #txt文件夹
“/home/wenzen/project/V5/yolov5-fall/runs_/xml_02/”, #xml文件夹
“/home/wenzen/下载/fall/all/”) #图片数据文件夹

模型构造、参数访问初始化共享

4.1 模型构造
4.1.1 继承Module构造模型
4.1.2 Module子类
4.1.2.1 Sequential
4.1.2.2 ModuleList
4.1.2.3 ModuleDict
4.1.3 试着构建复杂模型
4.2 模型参数相关
4.2.1 访问参数
4.2.2 初始化模型参数
4.2.3 自定义初始化方法
4.2.4 共享参数
说明
4.1 模型构造
4.1.1 继承Module构造模型
Module类是nn模块里提供的一个模型构造类，是所有神经网络模块的基类，可以继承它来自定义模型。继承Module类构造多层感知机。定义的MLP类重载了Module类的__init__函数和forward函数。它们分别用于创建模型参数和定义前向计算。

%matplotlib inline
import torch
from IPython import display
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = “png”
import numpy as np
import random
import sys
sys.path.append(“..”)
from d2lzh_pytorch import *
import os
os.environ[“KMP_DUPLICATE_LIB_OK”]=”TRUE”

class MLP(nn.Module):
# 声明带有模型参数的层，这里声明了两个全连接层
def __init__(self, **kwargs):
# 调用MLP父类Module的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
# 参数，如“模型参数的访问、初始化和共享”一节将介绍的模型参数params
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Linear(784, 256)
self.act = nn.ReLU()
self.output = nn.Linear(256, 10)

# 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
def forward(self, x):
a = self.act(self.hidden(x))
return self.output(a)

以上的MLP类中无须定义反向传播函数。系统将通过自动求梯度而自动生成反向传播所需的backward函数。

我们可以实例化MLP类得到模型变量net。下面的代码初始化net并传入输入数据X做一次前向计算。其中，net(X)会调用MLP继承自Module类的__call__函数，这个函数将调用MLP类定义的forward函数来完成前向计算。

X = torch.rand(2, 784)
net = MLP()
print(net)
net(X)

MLP(
(hidden): Linear(in_features=784, out_features=256, bias=True)
(act): ReLU()
(output): Linear(in_features=256, out_features=10, bias=True)
)

tensor([[-0.0387, -0.0412, 0.2092, 0.1522, -0.1067, 0.1860, -0.0786, 0.0300,
-0.0347, 0.1138],
[ 0.0488, 0.0427, 0.2431, 0.1437, -0.1839, 0.1328, -0.1228, 0.0328,
-0.1240, 0.0826]], grad_fn=<AddmmBackward>)

注意，这里并没有将Module类命名为Layer（层）或者Model（模型）之类的名字，这是因为该类是一个可供自由组建的部件。它的子类既可以是一个层（如PyTorch提供的Linear类），又可以是一个模型（如这里定义的MLP类），或者是模型的一个部分。我们下面通过两个例子来展示它的灵活性。

4.1.2 Module子类
我们刚刚提到，Module类是一个通用的部件。事实上，PyTorch还实现了继承自Module的可以方便构建模型的类: 如Sequential、ModuleList和ModuleDict等等

4.1.2.1 Sequential
当模型的前向计算为简单串联各个层的计算时，Sequential类可以通过更加简单的方式定义模型。这正是Sequential类的目的：它可以接收一个子模块的有序字典（OrderedDict）或者一系列子模块作为参数来逐一添加Module的实例，而模型的前向计算就是将这些实例按添加的顺序逐一计算。

下面我们实现一个与Sequential类有相同功能的MySequential类

class MySequential(nn.Module):
from collections import OrderedDict
def __init__(self, *args):
super(MySequential, self).__init__()
if len(args) == 1 and isinstance(args[0], OrderedDict): # 如果传入的是一个OrderedDict
for key, module in args[0].items():
self.add_module(key, module) # add_module方法会将module添加进self._modules(一个OrderedDict)
else: # 传入的是一些Module
for idx, module in enumerate(args):
self.add_module(str(idx), module)
def forward(self, input):
# self._modules返回一个 OrderedDict，保证会按照成员添加时的顺序遍历成员
for module in self._modules.values():
input = module(input)
return input

#用MySequential类来实现前面描述的MLP类，并使用随机初始化的模型做一次前向计算。
net = MySequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10)
)
print(net)
net(X)

MySequential(
(0): Linear(in_features=784, out_features=256, bias=True)
(1): ReLU()
(2): Linear(in_features=256, out_features=10, bias=True)
)

tensor([[ 0.2538, 0.0296, 0.2009, -0.2214, 0.1374, 0.1585, -0.1090, 0.2164,
-0.0628, 0.0303],
[ 0.1401, -0.1001, 0.1719, 0.0153, 0.0389, 0.1669, -0.1041, 0.2167,
0.0235, 0.1929]], grad_fn=<AddmmBackward>)

4.1.2.2 ModuleList
ModuleList接收一个子模块的列表作为输入，也可以类似List那样进行append和extend操作:

net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10))
print(net[-1])
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
(0): Linear(in_features=784, out_features=256, bias=True)
(1): ReLU()
(2): Linear(in_features=256, out_features=10, bias=True)
)

4.1.2.3 ModuleDict
ModuleDict接收一个子模块的字典作为输入, 也可以类似字典那样进行添加访问操作:

和ModuleList一样，ModuleDict实例仅仅是存放了一些模块的字典，并没有定义forward函数需要自己定义。同样，ModuleDict也与Python的Dict有所不同，ModuleDict里的所有模块的参数会被自动添加到整个网络中。

net = nn.ModuleDict({
‘linear’: nn.Linear(784, 256),
‘act’: nn.ReLU(),
})
net[‘output’] = nn.Linear(256, 10) # 添加
print(net[‘linear’]) # 访问
print(net.output)
print(net)
# net(torch.zeros(1, 784)) # 会报NotImplementedError

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
(act): ReLU()
(linear): Linear(in_features=784, out_features=256, bias=True)
(output): Linear(in_features=256, out_features=10, bias=True)
)

4.1.3 试着构建复杂模型
上面介绍的这些类可以使模型构造更加简单，且不需要定义forward函数，但直接继承Module类可以*大地拓展模型构造的灵活性。下面我们构造一个稍微复杂点的网络FancyMLP。在这个网络中，我们通过get_constant函数创建训练中不被迭代的参数，即常数参数。在前向计算中，除了使用创建的常数参数外，我们还使用Tensor的函数和Python的控制流，并多次调用相同的层。

class FancyMLP(nn.Module):
def __init__(self, **kwargs):
super(FancyMLP, self).__init__(**kwargs)

self.rand_weight = torch.rand((20, 20), requires_grad=False) # 不可训练参数（常数参数）
self.linear = nn.Linear(20, 20)

def forward(self, x):
x = self.linear(x)
# 使用创建的常数参数，以及nn.functional中的relu函数和mm函数
x = nn.functional.relu(torch.mm(x, self.rand_weight.data) + 1)

# 复用全连接层。等价于两个全连接层共享参数
x = self.linear(x)
# 控制流，这里我们需要调用item函数来返回标量进行比较
while x.norm().item() > 1:
x /= 2
if x.norm().item() < 0.8:
x *= 10

在这个FancyMLP模型中，我们使用了常数权重rand_weight（注意它不是可训练模型参数）、做了矩阵乘法操作（torch.mm）并重复使用了相同的Linear层。下面我们来测试该模型的前向计算

X = torch.rand(2, 20)
net = FancyMLP()
print(net)
net(X)

FancyMLP(
(linear): Linear(in_features=20, out_features=20, bias=True)
)

tensor(0.4051, grad_fn=<SumBackward0>)

FancyMLP和Sequential类都是Module类的子类，所以我们可以嵌套调用它们。

class NestMLP(nn.Module):
def __init__(self, **kwargs):
super(NestMLP, self).__init__(**kwargs)
self.net = nn.Sequential(nn.Linear(40, 30), nn.ReLU())

def forward(self, x):
return self.net(x)

net = nn.Sequential(NestMLP(), nn.Linear(30, 20), FancyMLP())

X = torch.rand(2, 40)
print(net)
net(X)

Sequential(
(0): NestMLP(
(net): Sequential(
(0): Linear(in_features=40, out_features=30, bias=True)
(1): ReLU()
)
)
(1): Linear(in_features=30, out_features=20, bias=True)
(2): FancyMLP(
(linear): Linear(in_features=20, out_features=20, bias=True)
)
)

tensor(-0.2525, grad_fn=<SumBackward0>)

4.2 模型参数相关
from torch.nn import init
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))
print(net)
X = torch.rand(2,4)
Y = net(X).sum()
print(Y)

Sequential(
(0): Linear(in_features=4, out_features=3, bias=True)
(1): ReLU()
(2): Linear(in_features=3, out_features=1, bias=True)
)
tensor(-0.2687, grad_fn=<SumBackward0>)

4.2.1 访问参数
对于Sequential实例中含模型参数的层，我们可以通过Module类的parameters()或者named_parameters方法来访问所有参数（以迭代器的形式返回），后者除了返回参数Tensor外还会返回其名字。下面，访问多层感知机net的所有参数：

print(type(net.named_parameters()))
for name, param in net.named_parameters():
print(name, param)

<class ‘generator’>
0.weight Parameter containing:
tensor([[-0.1270, -0.4299, 0.4027, 0.4511],
[-0.3526, 0.3099, 0.1313, 0.1578],
[ 0.4411, -0.2710, -0.3702, -0.0549]], requires_grad=True)
0.bias Parameter containing:
tensor([ 0.2990, -0.1793, -0.2047], requires_grad=True)
2.weight Parameter containing:
tensor([[ 0.0835, -0.5109, -0.5549]], requires_grad=True)
2.bias Parameter containing:
tensor([-0.1805], requires_grad=True)

返回的名字自动加上了层数的索引作为前缀。我们再来访问net中单层的参数。对于使用Sequential类构造的神经网络，我们可以通过方括号[]来访问网络的任一层。索引0表示隐藏层为Sequential实例*先添加的层。

因为这里是单层的所以没有了层数索引的前缀。另外返回的param的类型为torch.nn.parameter.Parameter，其实这是Tensor的子类，和Tensor不同的是如果一个Tensor是Parameter，那么它会自动被添加到模型的参数列表里

for name, param in net[0].named_parameters():
print(name, param.size(), type(param))

weight torch.Size([3, 4]) <class ‘torch.nn.parameter.Parameter’>
bias torch.Size([3]) <class ‘torch.nn.parameter.Parameter’>

#和Tensor不同的是如果一个Tensor是Parameter
#那么它会自动被添加到模型的参数列表里
class MyModel(nn.Module):
def __init__(self, **kwargs):
super(MyModel, self).__init__(**kwargs)
self.weight1 = nn.Parameter(torch.rand(20, 20))
self.weight2 = torch.rand(20, 20)
def forward(self, x):
pass

n = MyModel()
for name, param in n.named_parameters():
print(name)

weight1

上面的代码中weight1在参数列表中但是weight2却没在参数列表中。

因为Parameter是Tensor，即Tensor拥有的属性它都有，比如可以根据data来访问参数数值，用grad来访问参数梯度。

4.2.2 初始化模型参数
PyTorch中nn.Module的模块参数都采取了较为合理的初始化策略，但我们经常需要使用其他方法来初始化权重。PyTorch的init模块里提供了多种预设的初始化方法

for name, param in net.named_parameters():
if ‘weight’ in name:
init.normal_(param,mean=0,std=0.01)
print(name,param.data)

0.weight tensor([[ 0.0036, -0.0119, -0.0024, 0.0030],
[-0.0020, 0.0009, -0.0042, -0.0017],
[-0.0095, -0.0018, 0.0102, -0.0183]])
2.weight tensor([[ 0.0076, 0.0040, -0.0136]])

#使用常数来初始化权重参数。
for name, param in net.named_parameters():
if ‘bias’ in name:
init.constant_(param, val=0)
print(name, param.data)

0.bias tensor([0., 0., 0.])
2.bias tensor([0.])

4.2.3 自定义初始化方法
有时候我们需要的初始化方法并没有在init模块中提供。这时，可以实现一个初始化方法，从而能够像使用其他初始化方法那样使用它。在这之前我们先来看看PyTorch是怎么实现这些初始化方法的，例如torch.nn.init.normal_：

def normal_(tensor, mean=0, std=1):
with torch.no_grad():
return tensor.normal_(mean, std)

可以看到这就是一个inplace改变Tensor值的函数，而且这个过程是不记录梯度的。类似的我们来实现一个自定义的初始化方法。在下面的例子里，我们令权重有一半概率初始化为0，有另一半概率初始化为[−10,−5]和[5,10]两个区间里均匀分布的随机数。

def init_weight_(tensor):
with torch.no_grad():
tensor.uniform_(-10, 10)
tensor *= (tensor.abs() >= 5).float()

for name, param in net.named_parameters():
if ‘weight’ in name:
init_weight_(param)
print(name, param.data)

0.weight tensor([[ 5.7987, 6.0930, 0.0000, -5.6870],
[-5.9683, -0.0000, -0.0000, 6.2262],
[-0.0000, -0.0000, -6.4857, 0.0000]])
2.weight tensor([[-0., 0., -0.]])

4.2.4 共享参数
Module类的forward函数里多次调用同一个层。此外，如果我们传入Sequential的模块是同一个Module实例的话参数也是共享的：

linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(linear, linear)
print(net)
for name, param in net.named_parameters():
init.constant_(param, val=3)
print(name, param.data)

Sequential(
(0): Linear(in_features=1, out_features=1, bias=False)
(1): Linear(in_features=1, out_features=1, bias=False)
)
0.weight tensor([[3.]])

#在内存中，这两个线性层其实一个对象:
print(id(net[0]) == id(net[1]))
print(id(net[0].weight) == id(net[1].weight))

True
True

因为模型参数里包含了梯度，所以在反向传播计算时，这些共享的参数的梯度是累加的:

x = torch.ones(1, 1)
print(net(x))
y = net(x).sum()
print(y)
y.backward()
print(net[0].weight.grad) # 单次梯度是3，两次所以就是6\

tensor([[9.]], grad_fn=<MmBackward>)
tensor(9., grad_fn=<SumBackward0>)
tensor([[6.]])

说明
本博客是对如何使用pytorch用于深度学习学习过程的记录和总结。
学习教程为：《动手学深度学习》和https://tangshusen.me/Dive-into-DL-PyTorch/#/
这里推荐这个网址，将动手学深度学习改为了Pytorch实现，很有意义！
代码是借鉴了学习教程并从自己写的Jupyter中导出的，复制进Jupyter可以运行

TP50、TP90、TP99的理解和使用

一 TP50、TP90、TP99 的概念
1.1 什么是 TP
TP 是 Top Percentile 的缩写，中文译作百分位。

1.2 什么是百分位
百分位是一个统计学的术语。

如果将一组数据从小到大排序，并计算相应的累计百分位，则某一百分位所对应数据的值就称为这一百分位的百分位数。可表示为：一组N个观测值按数值大小排列。如，处于P%位置的值称第P百分位数。

1.3 TP50、TP90、TP99 怎么理解
TP50、TP90、TP99 是工程性能指标，以网络请求耗时为例：

TP50：表示满足百分之五十的网络请求所需的*低耗时。
TP90：表示满足百分之九十的网络请求所需的*低耗时。
TP99：表示满足百分之九十九的网络请求所需的*低耗时。
二 TP50、TP90、TP99 的计算
2.1 简单举例
举个例子：有四次请求耗时分别为：

10ms，1000ms，100ms，2ms

那么我们可以这样计算TP99：4次请求中，99%的请求数为4*0.99，进位取整也就是4次，满足这全部4次请求的的*低耗时为1000ms，也就是TP99的答案是1000ms。

2.2 python 实现
运用 numpy 包的 percentile 方法实现 TP50、TP90、TP99 的计算。

import numpy as np

data = []
with open(‘文件路径’, ‘r’) as f:
for line in f.readlines():
data.append(float(line.strip()))
print(np.percentile(data, 95))
文件路径：是*对路径，如 /User/local/a.txt，其中 a.txt 是网络请求耗时数组。

可以将代码中的95修改成50、90、99，来实现 TP50、TP90、TP99 的计算。

opencv笔记（1）——图像基本处理

图像基本处理
文章目录
图像基本处理
一、数据读取——图像
二、数据读取——视频
三、常用函数
一、数据读取——图像
cv2.IMREAD_COLOR; #彩色图像
CV2.IMREAD_GRAYSCALE; #灰度图像

import cv2 #opencv读取的格式是BGR
import matplotlib.pyplot as plt
import numpy as np #基本数据计算工具包
%matplotlib inline #专有魔法指令，即时显示

img = cv2.imread(‘cat.jpg’) #当前路径下读取图像
# 树莓派中读取图像要写明路径，如
# img = cv2.imread(‘/home/pi/mu_code/opencv/alien.jpg’)
img2 = cv2.imread(‘cat.jpg’，CV2.IMREAD_GRAYSCALE) #当前路径下读取灰度图像

img为三通道的像素值。
img.shape：H,W

#图像的显示，也可以创建多个窗口
cv2.imshow(‘image’,img)
#等待时间，毫秒级，0表示任意键终止
cv2.waitKey(0)
cv2.destoryAllWindows()

#函数封装
def cv_show(name,img):
cv2.imshow(name,img)
cv2.imwrite(‘mycat.png’,img) #保存
cv2.waitKey(0)
cv2.destoryAllWindows()

二、数据读取——视频
vc = cv2.VideoCapture(‘test,mp4’) #打开test,mp4

# 检查是否打开正确
if vc.isOpened():
open, frame = vc.read()
else:
open = False

while open:
ret, frame = vc.read()
if frame is None:
break
if ret == True:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #转换为灰度图
cv2.imshow(‘result’, gray)
if cv2.waitKey(10) & 0xFF == 27: #27为退出键
vc.release() #关闭视频
cv2.destoryAllWindows()

ret：bool值：True：读取到图片，
False：没有读取到图片
frame：一帧的图片

# 该程序打开摄像头，显示视频，按q拍摄一张照片并储存下来

import cv2

cap = cv2.VideoCapture(0) #持续打开摄像头

while(1):
# 获得图片
ret, frame = cap.read()
# 展示图片
cv2.imshow(“capture”, frame)
if cv2.waitKey(1) & 0xFF == ord(‘q’):
# 存储图片
cv2.imwrite(“camera.jpg”, frame)
break

cap.release()
cv2.destroyAllWindows()

截取部分图像数据

#截取部分图像数据
img = cv2.imread(‘cat.jpg’)
cat = img[0:200,0:200] #要显示的像素点区域
cv_show(‘cat’, cat) #窗口名，显示内容

颜色通道提取

#颜色通道提取
b, g, r = cv2.split(img) #切片
img = cv2.merge((b, g, r)) #组合

只保留R, G, B

#只保留R
cur_img = img.copy()
cur_img[:,:,0] = 0
cur_img[:,:,1] = 0
cv_show(‘R’, cur_img)
#只保留G
cur_img = img.copy()
cur_img[:,:,0] = 0
cur_img[:,:,2] = 0
cv_show(‘R’, cur_img)
#只保留B
cur_img = img.copy()
cur_img[:,:,1] = 0
cur_img[:,:,2] = 0
cv_show(‘R’, cur_img)

B, G R 分别为0, 1, 2

边界填充

top_size, bottom_size, left_size, right_size = (50, 50, 50, 50) #要填充的像素大小
replicate = cv2.copyMakeBorder(img,top_size, bottom_size, left_size, right_size ,borderType==cv2.BORDER_REPLICATE)
reflect = cv2.copyMakeBorder(img,top_size, bottom_size, left_size, right_size ,cv2.BORDER_REFLECT)
reflect101 = cv2.copyMakeBorder(img,top_size, bottom_size, left_size, right_size ,cv2.BORDER_REFLECT_101)
wrap = cv2.copyMakeBorder(img,top_size, bottom_size, left_size, right_size ,cv2.BORDER_WRAP)
constant = cv2.copyMakeBorder(img,top_size, bottom_size, left_size, right_size ,cv2.BORDER_CONSTANT, value=0)

import matplotlib.pyplot as plt

# 显示图像
plt.subplot(231), plt.imshow(img, ‘gray’), plt.title(‘ORIGINAL’) # 原图
plt.subplot(232), plt.imshow(replicate, ‘gray’), plt.title(‘REPLICATE’)
plt.subplot(233), plt.imshow(reflect, ‘gray’), plt.title(‘REFLECT’)
plt.subplot(234), plt.imshow(reflect101, ‘gray’), plt.title(‘REFLECT_101’)
plt.subplot(235), plt.imshow(wrap, ‘gray’), plt.title(‘WRAP’)
plt.subplot(236), plt.imshow(constant, ‘gray’), plt.title(‘CONSTANT’)

plt.show()

数值计算

img_cat = cv2.imread(‘cat.jpg’)
img_dog = cv2.imread(‘dog.jpg’)

img_cat2 = img_cat + 10 # 每个位置都+10
img_cat[:5,:,0] # 前5行

img_cat2 + img_cat[:5,:,0]
# (e.g 142 + 152 = 294; 294 – 256 = 38;
所以值为38，只能是0-255，否则取余)

cv2.add(img_cat, img_cat2)[:5,:,0]
# (大于255就取255)

图像融合

img_cat.shape # 看img_cat的shape值(414,500,3)
img_dog = cv2.resize(img_dog, (500, 414))#原(429,499,3)
img_dog.shape # 现(414,500,3)
res = cv2.addWeighted(img_cat, 0.4, img_dog, 0.6, 0)

三、常用函数
cv2.imshow(name,img)

name：窗口名称；img ：窗口内容

cv2.waitkey(timeout)

显示图片时间timeout，单位为ms,0代表一直显示

cv2.destoryAllWindows()

关闭窗口

cv2.imread(filepath,flags)

读取图片，默认是三通道(BGR)的彩色图，flags=0读入灰度图

cv2.imwrite(filename,img)

保存图像，filename：文件名；img：内容

b,g,r=cv2.split(img)

颜色通道提取,opencv提取格式为BGR(注意不是RBG)\

cv.merge(b,g,r)

重新组合颜色通道

cv2.copyMakeBorder(img,top, bottom, left, right,borderType)

填充边界;img：内容；top, bottom, left, right：四区域的大小；borderType：填充种类

cv2.add(img1, img2)

img1和img2的rgb值相加，超过255取255

cv2.addWeighted(img1,α,img2,β,b)

为图片添加权重, 混合图像，img1=X1；img2=X2；αX1+βX2+b

cv2.resize(img, (width, height))

变换图像大小（注意img.shape的出来的是(height,width))

cv2.resize(img, (0, 0)， fx=a, fy=b)

变换图像大小（(height,width)为(0,0)，a为x变为原来a倍，b为y变为原来b倍)

cv_show(‘name’, contents)

name：窗口名称；contents：内容

python中的with关键字原理详解

对于系统资源如文件、数据库连接、socket 而言，应用程序打开这些资源并执行完业务逻辑之后，必须做的一件事就是要关闭（释放）该资源。
在代码中经常会看见 with open(file) as f 对文件进行操作，其中 with 关键字到底有什么用处呢？让我们一起来看看如何正确关闭一个文件。

1.普通版
def fun1():
f = open(“test.txt”, “w”)
f.write(“hello python”)
f.close

这样写有一个潜在的问题，如果在调用 write 的过程中，出现了异常进而导致后续代码无法继续执行，close 方法无法被正常调用，因此资源就会一直被该程序占用者释放。那么该如何改进代码呢？

2.进阶版
def fun2():
try:
f = open(“test.txt”, “w”)
f.write(“hello python”)
except Exception as e:
print(e)
finally:
f.close()

改良版本的程序是对可能发生异常的代码处进行 try 捕获，使用 try/finally 语句，该语句表示如果在 try代码块中程序出现了异常，后续代码就不再执行，而直接跳转到 except 代码块。而无论如何，finally 块的代码*终都会被执行。因此，只要把 close 放在 finally 代码中，文件就一定会关闭。

3.高级版
def fun3():
with open(“test.txt”, “w”) as f:
f.write(“hello python”)

一种更加简洁、优雅的方式就是用with关键字。open方法的返回值赋值给变量f，当离开with代码块的时候，系统会自动调用 f.close() 方法，with 的作用和使用 try/finally 语句是一样的。那么它的实现原理是什么？在讲with的原理前要涉及到另外一个概念，就是上下文管理器（Context Manager）。
什么是上下文？
上下文在不同的地方表示不同的含义，要感性理解。在编程中 context 上下文其实说白了就是环境。
例如一个 APP 应用，在切换界面的时候，要保存你是在哪个屏幕跳过来的等等信息，以便你点击返回的时候能正确跳回，如果不存肯定就无法正确跳回了。
再比如线程、协程进行任务切换时，程序怎么能知道切换到另一个任务，是从头开始执行还是从中间呢？其上下文就起到作用，就是任务本身会对其环境进行保存，做到哪里了，做了多少，各种状态都会标识记录，从而形成了上下文环境，因此在切换时根据每个任务的上下文环境，继续执行，从而达到多任务。
上下文管理器
任何类实现了__enter__()和__exit__()方法的对象都可称之为上下文管理器。
上下文管理器对象可以使用with关键字。

4.用类还原with的实现原理
class Fun4(object):
def __init__(self, file_name, mode):
self.file_name = file_name
self.mode = mode
def __enter__(self):
self.f = open(self.file_name, self.mode)
return self.f
def __exit__(self,*args):
self.f.close()
with Fun4(“test.txt”, “w”) as f:
f.write(“hello python”)
“””
首先Test4(“1.txt”, “w”)初始化实例对象，
然后with会寻找类中是否有__enter__和__exit__，
如果有则调用__enter__函数，
*后__enter__()方法返回资源对象，这里就是你将要打开
的那个文件对象，__exit__()方法处理一些清除工作。
“””

5.使用contextmanager装饰器，实现with功能
from contextlib import contextmanager
“””
Python还提供了一个contextmanager的装饰器，更进一步简化
了上下文管理器的实现方式。通过yield将函数分割成两部分，yield之前的
语句在__enter__方法中执行，yield之后的语句在__exit__方法中执行。
紧跟在yield后面的值是函数的返回值。
“””
@contextmanager
def fun5(path, mode):
f = open(path, mode)
yield f
f.close()
with fun5(“test.txt”, “w”) as f:
f.write(“hello python”)

总结：
Python 提供了 with 语法用于简化资源操作的后续清除操作，是 try/finally 的替代方法，实现原理建立在上下文管理器之上。此外，Python 还提供了一个 contextmanager 装饰器，更进一步简化上下管理器的实现方式。

用大并发的手段批量爬取小图片

一起偷偷学Python
本专栏专为Python入门级选手打造，从Python基础，到Python爬虫，快乐学习不掉队。
看，未来
¥29.90
订阅专栏

网页分析
网址：百度图片：超前消费

因为我下一场直播（2021.4.24）的主题是超前消费，那就爬超前消费主题的图片吧。

看了一下，这个网页是下拉式的动态网页，根据我们前几篇的经验，直接找网络包吧，也不要花太多时间了。
不过在网络包这里栽了跟头，浪费了一些时间，因为：

网址的结构是这样的（仅放不同）

‘pn=30&rn=30&gsm=1e&1619227748027=’
‘pn=60&rn=30&gsm=3c&1619227748310=’
‘pn=90&rn=30&gsm=5a&1619227779562=’
‘pn=120&rn=30&gsm=78&1619227779728=’
1
2
3
4
gsm的规律好找，后面那个规律不好找啊！！！

（时间戳加上三个随机数）

关于线程与线程池的理论基础在这篇里面：精写15篇，学会Python爬虫 – （7）线程、互斥锁、线程池丨蓄力计划
需要多了解的小伙伴点击蓝字即可。

代码实现
import threadpool
import requests
import json
import random

data_list = []

user_agent_list = [
“Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36”,

“Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)”,

]

def get_url(url):
try:
res = requests.get(url, headers={“User-Agent”: random.choice(user_agent_list)}).content
data = json.loads(res)[‘data’]
for d in data:
data_list.append(d[‘thumbURL’])
except:
print(‘failed’ + url)

def get_pic(url):
”’
这是一个处理数据的函数，即将被送入线程池
:param url: 网址
:return: 无
”’

pic = requests.get(url).content

# 把Reponse对象的内容以二进制数据的形式返回
name = url.replace(‘https://ss0.bdstatic.com/’,”).replace(‘,’,”).replace(‘/’,”)

photo = open(name, ‘wb’)
# 新建了一个文件ppt.jpg，这里的文件没加路径，它会被保存在程序运行的当前目录下。
# 图片内容需要以二进制wb读写。你在学习open()函数时接触过它。
photo.write(pic)
# 获取pic的二进制内容
photo.close()
# 关闭文件

def Thread_Pool(outdata, datalist=None, Thread_num=5):
”’
线程池操作，创建线程池、规定线程池执行任务、将任务放入线程池中、收工
:param outdata: 函数指针，线程池执行的任务
:param datalist: 给前面的函数指针传入的参数列表
:param Thread_num: 初始化线程数
:return: 暂无
”’
pool = threadpool.ThreadPool(Thread_num) # 创建Thread_num个线程

tasks = threadpool.makeRequests(outdata, datalist) # 规定线程执行的任务
# outdata是函数名，datalist是一个参数列表，线程池会依次提取datalist中的参数引入到函数中来执行函数，所以参数列表的长度也就是线程池所要执行的任务数量。

[pool.putRequest(req) for req in tasks] # 将将要执行的任务放入线程池中

pool.wait() # 等待所有子线程执行完之后退出

def main():

url_list = [‘太占地方啦，自己扒拉几个包吧’]

Thread_Pool(get_url, datalist=url_list)
Thread_Pool(get_pic, datalist=data_list)

main()

sklearn中的特征处理

特征提取
字典特征提取
from sklearn.feature_extraction import DictVectorizer
# 字典特征处理的 sklearn API
def dictvec():
# 实例化对象 sparse = True 代表返回sparse 矩阵
dict = DictVectorizer(sparse=True)
# 传入参数必须是字典或者带有字典的可迭代对象注意sparse为true时调用toarray方法将转为与sparse为false相同
data = dict.fit_transform([{‘city’:’北京’ , ‘temperatyre’:100},
{‘city’:’上海’ , ‘temperatyre’:60},
{‘city’:’深圳’ , ‘temperatyre’:20}]).toarray()
# 这里打出原列表
print(dict.inverse_transform(data))
print(data)
# data返回值
# (0, 1) 1.0
# (0, 3) 100.0
# (1, 0) 1.0
# (1, 3) 60.0
# (2, 2) 1.0
# (2, 3) 20.0
# 当设置sparse为false时
dict1 = DictVectorizer(sparse=False)
data2 = dict1.fit_transform(dict.inverse_transform(data))
print(data2)
print(dict1.transform([{‘city’:’北京’ , ‘temperatyre’:100},
{‘city’:’上海’ , ‘temperatyre’:60},
{‘city’:’深圳’ , ‘temperatyre’:20}]))
# data2的值
# [[0. 1. 0. 100.]
# [1. 0. 0. 60.]
# [0. 0. 1. 20.]]
# 前三行是one-hot编码对key
return None

处理字典数据特征：就是保留数值型数据，对字符串类型数据进行one- hot编码处理

文本特征提取
from sklearn.feature_extraction.text import CountVectorizer
import jieba
# 处理中文字符串的包将中文字符串按照词划分

def textdev():
# 获取文本特征

# 实例化对象
cv = CountVectorizer()
data = [
‘I like Python and math’,
‘I dislike C and C++’
]
# fit_transform 返回soarse矩阵 toarray后变成矩阵
data_res = cv.fit_transform(data).toarray()
print(cv.get_feature_names())
print(data_res)

处理英文字符特征，就是将多个字符串共有的词汇提取出来，然后统计每个字符串中词汇出现的次数

对于中文字符串的处理
from sklearn.feature_extraction.text import CountVectorizer
import jieba
def cut_words():
a = ‘今天是个好日子’
b = ‘昨天是个好日子’
c = ‘希望以后的每天都是好日子’
# jieba.cut将句子按照词拆开。返回列表，.join方法将列表按照‘ ’划分
a1 = ‘ ‘.join(jieba.cut(a))
b1 = ‘ ‘.join(jieba.cut(b))
c1 = ‘ ‘.join(jieba.cut(c))
return a1, b1, c1
def deal_chinese(a1, b1, c1):
# 将划分后的字符串传给cv 就可以来读取中文了
cv = CountVectorizer()
data = [a1,b1,c1]
data_array = cv.fit_transform(data).toarray()
print(cv.get_feature_names())
print(data_array)

对于中文的处理就是将一个句子变成一个个词语，然后计算词在每个字符串中出现的次数

tfidf方法统计词频
tf: term frequency 词频
idf: 逆文档频率公式: log(总文档数/该词出现的次数)
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import jieba
def cut_words():
a = ‘今天是个好日子’
b = ‘昨天是个好日子’
c = ‘希望以后的每天都是好日子’
# jieba.cut将句子按照词拆开。返回列表，.join方法将列表按照‘ ’划分
a1 = ‘ ‘.join(jieba.cut(a))
b1 = ‘ ‘.join(jieba.cut(b))
c1 = ‘ ‘.join(jieba.cut(c))
return a1, b1, c1
def deal_chinese(a1, b1, c1):
# 将划分后的字符串传给cv 就可以来读取中文了
cv = CountVectorizer()
data = [a1,b1,c1]
data_array = cv.fit_transform(data).toarray()
print(cv.get_feature_names())
print(data_array)
def frequency(a1, b1, c1):
# 接下来使用tf idf方法统计词频忽略掉那些出现很多但无关紧要的词汇
tf = TfidfVectorizer()
data = tf.fit_transform([a1, b1, c1]).toarray()
print(tf.get_feature_names())
print(data)

tfidf 方法就是忽略掉那些出现次数很多但是相对无关紧要的词语，比如在一篇讲述机器学习的文章中忽略掉诸如 “因为”、“我们”等相对无关紧要的词语，保留“算法”、“实验”等高频词汇。

特征处理
数字特征处理
归一化处理
标准化处理
sklearn中的归一化处理API
from sklearn.preprocessing import MinMaxScaler
import numpy as np
x = np.array([[1, 2, 3], [1,2,4], [2,5,6]])

travel = MinMaxScaler(feature_range=(0,2))
# 按照列进行归一化 (x – x(min)) / (x(max) – x(min)) feature_range 可选范围默认（0-1）
data = travel.fit_transform(x)
print(data)

归一化处理时我们默认处理的特征对结果的影响相同使得每个特征对*终结果不会造成更大的影响
然而，归一化处理的结果容易受到异常点较大的影响即鲁棒性较差

sklearn中的标准化处理API
from sklearn.preprocessing import StandardScaler
travel_standard = StandardScaler()
# 这个变换方式是 (x-mean)/(sigma）避免了因为*值的过大或者过小产生的影响
data_standard = travel_standard.fit_transform(data)
print(data_standard)
# 获取列均值
print(travel_standard.mean_)
# 获取列方差
print(travel_standard.std_)

标准化处理避免了异常点的出现导致的较大数据偏差，适合于数据量较大时的特征处理

缺失值的处理
sklearn 处理API
# from sklearn.preprocessing import I
# 这是2.1之前的用法
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# from sklearn.impute import IterativeImputer
# 这个包不稳定，这样导不进去
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
im = IterativeImputer()
data = [[1, 2], [np.nan, 3], [4, 5]]
print(im.fit_transform((data)))

Interative内可传参数 strategy 可选择中位数与众数默认平均值mean填充

pandas 处理缺失值
import pandas as pd
import numpy as np
from numpy import nan
data = [
[1,2,3],
[nan, nan, 2],
[nan, nan, nan],
[8, 8, nan]
]
df1 = pd.DataFrame(data)
# 常数填充填充时inplace可以设置为true 这样原始数据会被覆盖
df2 = df1.fillna(100)
# 按照列索引填充传入一个字典
df3 = df1.fillna({0:10, 1 : 20, 2 : 40})

print(df1)
print(‘df2’)
print(df2)
print(‘df3’)
print(df3)

前端如何关闭已经发送的请求

前端如何关闭已经发送的请求
文章目录
前端如何关闭已经发送的请求
前言
一、相关介绍
二、解决方案
1.【axios】axios.CancelToken;
(1) 普通请求（未在axios基础上进行二次封装—统一封装）
(2) 根据业务需求提取所有的 api 进行封装:
注意：方案1是解决前端关闭请求比较常用的方法，以下为附带不同类型情况下，取消请求的方案
2.【原生】XMLHttpRequest.abort();
3.【jquery】ajax.abort();
4.微信小程序
5.【uni-app】 requestTask.abort();
总结
前言
项目中写支付模块，心跳模式请求接口，为了避免产生多个请求同时发送，且都处于请求状态，需要前端处理，在下次请求开始的时候主动关闭未完成的请求。

一、相关介绍
关于Axios

Axios 是一个基于 promise 的 HTTP 库，可以用在浏览器和 node.js 中。

Axios 使用 cancel token 取消请求

二、解决方案
1.【axios】axios.CancelToken;
(1) 普通请求（未在axios基础上进行二次封装—统一封装）
var CancelToken = axios.CancelToken;

var cancel;

axios.get(‘/user/12345’, {

cancelToken: new CancelToken(function executor(c) {

// executor 函数接收一个 cancel 函数作为参数

cancel = c;

})

});

// 取消请求

cancel();

(2) 根据业务需求提取所有的 api 进行封装:
util文件夹下的request

import axios from ‘axios’
import {
message,
Popconfirm
} from ‘antd’
import {
getToken
} from ‘./auth’

// create an axios instance
const service = axios.create({
baseURL: process.env.VUE_APP_BASE_API, // url = base url + request url
withCredentials: false, // send cookies when cross-domain requests
timeout: 300100 // request timeout
})

service.interceptors.request.use(
config => {
config.headers[‘X-Token’] = getToken()
config.headers[‘contentType’] = “application/json”

return config
},
error => {
return Promise.reject(error)
}
)

service.interceptors.response.use(
response => {
const res = response.data
// if the custom code is not 20000, it is judged as an error.
if (res.code !== ‘success’) {
if (res.code === ‘noLogin’ || res.code === ‘user-not-login’) {
window.location = ‘#/login’
}

return Promise.reject(res || ‘error’)
} else {
return res
}
},
error => {
if (error && error.response) {
console.log(‘全局的拦截’)
console.log(error,’看下error对象’)
switch (error.response.status) {
case 400:
error.message = ‘请检查您的网络’
break

case 401:
error.message = ‘请检查您的网络’
break

case 403:
error.message = ‘请检查您的网络’
break

case 404:
error.message = `请求地址出错: ${error.response.config.url}`
break

case 405:
error.message = ‘请检查您的网络’
break

case 408:
error.message = ‘请求超时’
break

case 501:
error.message = ‘服务器忙,请您稍后尝试’
break

case 502:
error.message = ‘服务器忙,请您稍后尝试’
break

case 503:
error.message = ‘服务器忙,请您稍后尝试’
break

case 504:
error.message = ‘服务器忙,请您稍后尝试’
break

case 505:
error.message = ‘服务器忙,请您稍后尝试’
break

default:
}
}
return Promise.reject(error)
}
)

export default service

api文件夹下的index.js

import request from ‘../utils/request’ // 配置过的Axios 对象

import axios from ‘axios’

export function getLatenessDetailSize(params, that) {

return request({

url: ‘/api/v1/behaviour/latenessDetailSize’,

method: ‘post’,

params: params,

//取消请求的关键配置

cancelToken: new axios.CancelToken(function executor(c) { // 设置 cancel token

// 注意：此处that可能会报错，本质上此处是将取消函数暴露出去，可以是传入的组件对象；
// 如果实在无法传入组件对象，也可以将取消请求暴露出来的函数挂到window下

that.source = c;//或者window.source = c

})

}

export xxx

组件页面中使用api,并在合适的时机取消请求

import { getLatenessDetail } from “../api”;

export default {

data() {

return {

tableData: [],

total: 0,

page: 1,

loadTable: false,

params: { },

source: null

}

methods: {

cancelQuest() {

if (typeof this.source === ‘function’) {

this.source(‘终止请求’); //取消请求

}

getTableData(val) {

this.cancelQuest() // 请求发送前调用

this.page = val

this.loadTable = true

getLatenessDetail(this.params, (val – 1) * 10, this)

.then(

res => {

this.loadTable = false

this.tableData = res.data

}

)

}

注意：方案1是解决前端关闭请求比较常用的方法，以下为附带不同类型情况下，取消请求的方案
2.【原生】XMLHttpRequest.abort();
如果该请求已被发出，XMLHttpRequest.abort() 方法将终止该请求。

let xhr = new XMLHttpRequest(),
method = “GET”,
url = “https://xxx”;
xhr.open(method,url,true);

xhr.send();

xhr.abort(); // 终止请求

3.【jquery】ajax.abort();
let jqueryAjax ;
if(jqueryAjax ){
jqueryAjax .abort(); // 终止请求
}
jqueryAjax = $.ajax({
type: “POST”,
url:url,
dataType: “json”,
success: function(data) {
do thing…
},error: function () {
}
});

4.微信小程序
if (requestTask) {
requestTask.abort(); // 终止请求
}
let requestTask = wx.request({
url: ‘xxx’,
data: {
x: ”,
y: ”
},
header: {
‘content-type’: ‘application/json’ // 默认值
},
success (res) {
console.log(res.data)
}
})

5.【uni-app】 requestTask.abort();
const requestTask = uni.request({
url: ‘xxx’,
data: {
name: ‘name’,
age: 18
},
success: function(res) {
console.log(res.data);
}
});

// 终止请求
requestTask.abort();

总结
关于前端如何关闭已经发送请求的解决方案总结

2021 年 4 月
一	二	三	四	五	六	日
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30