将正则表达式转换成内部格式,提高执行效率

strr="PYTHON666Java"

pat=re.compile(r"Python",re.I) #模式修正符:忽略大小写

print(pat.search(strr))


```python
import re

#match函数和search函数

# match函数--匹配开头
# search函数--匹配任意位置

#这两个函数都是一次匹配,匹配到一次就不再往后继续匹配了

strr="javapythonjavahtmlpythonjs"

pat=re.compile(r"python")

print(pat.search(strr).group())
import re

#findall()   查找所有匹配的内容,装到列表中
#finditer()  查找所有匹配的内容,装到迭代器中

strr="hello--------hello-----------\
---------hello-----------------\
---------hello--hello----------------\
----------hello---------hello----hello----------"

pat=re.compile(r"hello")

#print(pat.findall(strr))
data=pat.finditer(strr)

list1=[]

for i in data:
	list1.append(i.group())

print(list1)
import re

#split()  按照能够匹配的子串将字符串分割后返回列表
#sub()    sub方法 用于替换

strr1="张三,,,李四,,,,,,,,,王五,,,,,,,,赵六"

pat1=re.compile(r",+")

result1=pat1.split(strr1)



strr2="hello 123,hello 456!"

pat2=re.compile(r"\d+")

result2=pat2.sub("666",strr2)



print(result2)
import re
import requests

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Ap\
pleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Sa\
fari/537.36"
}

response=requests.get("http://changyongdianhuahaoma.51240.com/",headers=headers).text

pat1=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>(.*?)</td>[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?</tr>'
pat2=r'<tr bgcolor="#EFF7F0">[\s\S]*?<td>[\s\S]*?</td>[\s\S]*?<td>(.*?)</td>[\s\S]*?</tr>'

pattern1=re.compile(pat1)
pattern2=re.compile(pat2)

data1=pattern1.findall(response)
data2=pattern2.findall(response)

resultlist=[]
for i in range(0,len(data1)):
	resultlist.append(data1[i]+data2[i])

print(resultlist)
import urllib.request
import re
# 全局取消证书验证
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\
537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}

page_num = int(input("请问要爬取第几页呢:"))
page=str((page_num-1)*20)

url="https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start="+page+"&limit=20"

req=urllib.request.Request(url,headers=headers)

data=urllib.request.urlopen(req).read().decode()

pat1=r'"rating":\["(.*?)","\d+"\]'
pat2=r'"title":"(.*?)"'

pattern1=re.compile(pat1,re.I)
pattern2=re.compile(pat2,re.I)

data1=pattern1.findall(data)
data2=pattern2.findall(data)

for x in range(len(data1)):
	print("排名:",x+1,"电影名:",data2[x],"豆瓣评分:",data1[x])