
代码可能没啥效果了,分享贴出来,方便以后查看
效果图

keyword.txt文件
js
www.tx7878.cn|绝对压力变送器
www.tx7878.cn|EJA变送器
www.tx7878.cn|静压式液位计
www.tx7878.cn|温度漂移
www.tx7878.cn|弹簧管压力表
www.tx7878.cn|压力控制
www.tx7878.cn|料位计
www.tx7878.cn|高温压力传感器
www.tx7878.cn|界位
www.tx7878.cn|磁致伸缩传感器
www.tx7878.cn|连杆浮球液位开关
www.tx7878.cn|磁浮子液位计
www.tx7878.cn|液位控制系统
www.tx7878.cn|差压传感器
www.tx7878.cn|射频导纳
www.tx7878.cn|料位仪
www.tx7878.cn|超声流量计
www.tx7878.cn|数字压力计
www.tx7878.cn|压力分布
Baidu点击快排
python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import time,random,requests,json
from selenium.webdriver.common.keys import Keys
import os, subprocess,datetime
import multiprocessing
import undetected_chromedriver as uc
# 排除关键词列表
# exclude_keywords = []
# cmd = '"C:\Program Files\Google\Chrome\Application\chrome.exe" ' \
# '--remote-debugging-port=9222 ' \
# '--user-data-dir="C:\selenium\ChromeProfile"'
# subprocess.run(cmd)
# with open('./stealth.min.js') as f:
# js = f.read()
#屏蔽的目标网站
ExcludetargetURL = [
"https://www.baidu.com/"
]
class GoogleSearch:
def __init__(self):
options = uc.ChromeOptions()
self.depth_deep = 15
options.add_argument(f'--user-agent={self.random_useragent()}')
options.add_argument("--disable-popup-blocking")
options.add_argument('--ignore-ssl-errors=true')
options.add_argument("--disable-extensions")
options.add_argument("--disable-popup-blocking")
options.add_argument("--profile-directory=Default")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--disable-plugins-discovery")
options.add_argument('--incognito')
options.add_argument("--disable-infobars")
options.add_argument("--no-default-browser-check")
# options.add_experimental_option("useAutomationExtension", False)
options.add_argument('--load-images=no')
options.add_argument('--no-first-run')
options.add_argument('--no-service-autorun')
options.add_argument('--no-default-browser-check')
options.add_argument('--password-store=basic')
options.add_argument('--no-sandbox')
options.add_argument('--proxy-type=http')
options.add_argument('--ssl-protocol=TLSv1')
# options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# 去除顶部浏览器提示
# options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 修改webdriver浏览器特征
# options.add_experimental_option('useAutomationExtension', False)
# options.add_argument('headless')
self.driver = uc.Chrome(headless=True,use_subprocess=False,executable_path='./chromedriver.exe',options=options)
# self.driver = webdriver.Chrome(executable_path='./chromedriver.exe', options=options)
self.driver.implicitly_wait(30)
self.driver.delete_all_cookies()
#随机浏览器UA
def random_useragent(self):
UaList = [
#360
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
#chrome
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
#"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
#firefox
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
#ie11
#"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
#ie8
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
#2345王牌
#"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",
#搜狗
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
#opera
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.0.0 Safari/537.36"
]
return random.choice(UaList)
#随机滑动500-1000的距离
def random_scroll(self):
for i in range(random.randint(1,3)):
self.driver.execute_script(f'window.scrollTo(0, {random.randint(500, 20000)})')
time.sleep(0.4)
return True
# 随机浏览器屏幕分辨率
def random_screen(self):
screen_list = [
(1920, 1080),
(1600, 900),
(1440, 900),
(1366, 768),
(1280, 1024),
(1280, 800),
(1024, 768)
]
return random.choice(screen_list)
#设置电脑屏幕分辨率
def set_screen(self):
self.driver.set_window_size(1920, 1080)
# 随机延迟0.2-3秒
def random_delay(self):
time.sleep(random.uniform(1, 3))
return True
# 随机点击页面
def random_click(self,elements):
if len(elements) > 0:
# 随机选择一个元素
click_index = random.randint(0, len(elements) - 1)
try:
# 尝试获取链接并触发点击
link = elements[click_index].find_element(By.TAG_NAME, 'a')
self.driver.execute_script("arguments[0].click();", link)
self.random_delay()
# 检查是否有新窗口打开
windows = self.driver.window_handles
if len(windows) > 1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
self.driver.switch_to.window(windows[0])
self.random_delay()
except Exception as e:
print(f"An error occurred: {e}")
def elementCLick(self,element,keyword,index):
# 通常新打开的窗口或标签页是列表中的最后一个
# self.driver.execute_script("arguments[0].click();", element)
self.random_delay()
try:
self.random_scroll()
try:
element.click()
except:
href = element.find_element(By.TAG_NAME, 'a').get_attribute('href')
cmd = f"window.open('{href}');"
self.driver.execute_script(cmd)
with open("result.txt", "a+",encoding="utf-8") as f:
#获取当前时间
datetime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
f.write(f"时间:{datetime},ip:{self.get_public_ip()} 关键词:{keyword} ,排名:{index} \n")
print("写入成功")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
break
self.driver.switch_to.window(windows[0])
except Exception as e:
print(f"An error occurred: {e}")
pass
def get_search_results(self, keyword):
results = []
try:
window_size = self.random_screen()
self.driver.set_window_size(window_size[0], window_size[1])
self.random_delay()
input_keyword = self.driver.find_element(By.ID, "kw")
input_keyword.clear()
input_keyword.send_keys(keyword)
self.random_delay()
action = random.randint(0, 1)
if action == 0:
# 点击搜索按钮
self.driver.find_element(By.ID, "su").click()
self.random_delay()
print("鼠标点击按钮")
else:
input_keyword.send_keys(Keys.ENTER)
print("按下了Enter")
self.random_delay()
print('打开百度成功',self.driver.title)
self.random_scroll()
elements_hrefs = self.driver.find_elements(By.XPATH, '//h3[@class="c-title t t tts-title"]')
# self.random_click(elements_hrefs)
return elements_hrefs
except Exception as e:
print("打开百度失败",e)
results = []
return results
#获取真实的url
def get_real_url(self, url):
try:
url = requests.get(url, headers={'User-Agent': self.random_useragent()},allow_redirects=False)
real_url = url.headers['location']
return real_url
except Exception as e:
print("获取真实URL失败",e)
return ''
def get_public_ip(self):
response = requests.get("http://httpbin.org/ip")
data = response.json()
ip = data['origin']
return ip
def search_google(self, item, depth=1):
try:
# 去除唯一值,防止重复搜索
url_ip,keyword = item
self.driver.get(f'https://www.baidu.com')
print(">>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",url_ip,">>>>>>>>>>>>>>>>")
elements_hrefs = self.get_search_results(keyword)
# 模拟用户真实行为,滑动屏幕
for index, element in enumerate(elements_hrefs):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
if url_ip in real_url:
print("存在真实url",real_url)
self.elementCLick(element,keyword,index)
return True
connt = 1
for pn in range(self.depth_deep):
print(f"============={connt}==============")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
self.driver.switch_to.window(windows[0])
break
try:
elems = self.driver.find_elements(By.CLASS_NAME,'n')
elems[-1].click()
except:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
elems = self.driver.find_elements(By.CLASS_NAME,'n')
elems[-1].click()
self.random_delay()
elements_hrefs = self.driver.find_elements(By.XPATH, '//h3[@class="c-title t t tts-title"]')
self.random_click(elements_hrefs)
for index, element in enumerate(elements_hrefs):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
# print(real_url)
if url_ip in real_url:
print("存在真实url",real_url)
depth = 20
# self.elementCLick(element,keyword,index)
self.elementCLick(element,keyword,connt*len(elements_hrefs)+index)
return True
connt +=1
except Exception as e:
print("搜索失败",e)
return False
def read_from_file(self):
with open('keyword.txt', 'r', encoding='utf-8') as file:
for line in file.readlines():
self.search_google(line.strip().split("|"))
self.driver.quit()
print("搜索完成")
# 包含关键词列表
def write_to_file(self, title):
with open("result.txt", "a+", encoding="utf-8") as file:
file.write(title + "\n")
# 随机切换ip
def random_ip():
import os
try:
os.system("rasdial 宽带连接 /disconnect")
time.sleep(5)
os.system("rasdial 宽带连接 21261637 147258")
time.sleep(10)
except Exception as e:
print("切换ip失败",e)
# ip = requests.get("http://httpbin.org/ip")
# data = ip.json()
# ip_list = data['origin'].split(", ")
# # 随机切换ip
# random.shuffle(ip_list)
# return ip_list[0]
if __name__ == "__main__":
multiprocessing.freeze_support()
for cou in range(1,16):
# random_ip()
google_search = GoogleSearch()
google_search.read_from_file()
time.sleep(10)
百度快排发包代码
python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import time,random,requests,json
from selenium.webdriver.common.keys import Keys
import os, subprocess,datetime
import multiprocessing
import undetected_chromedriver as uc
# 排除关键词列表
# exclude_keywords = []
# cmd = '"C:\Program Files\Google\Chrome\Application\chrome.exe" ' \
# '--remote-debugging-port=9222 ' \
# '--user-data-dir="C:\selenium\ChromeProfile"'
# subprocess.run(cmd)
# with open('./stealth.min.js') as f:
# js = f.read()
#屏蔽的目标网站
ExcludetargetURL = [
"https://openinstall.io/"
]
class GoogleSearch:
def __init__(self):
options = webdriver.ChromeOptions()
self.depth_deep = 15
options.add_argument(f'--user-agent={self.random_useragent()}')
options.add_argument("--disable-popup-blocking")
options.add_argument('--ignore-ssl-errors=true')
options.add_argument("--disable-extensions")
options.add_argument("--disable-popup-blocking")
options.add_argument("--profile-directory=Default")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--disable-plugins-discovery")
options.add_argument('--incognito')
options.add_argument("--disable-infobars")
options.add_argument("--no-default-browser-check")
# options.add_experimental_option("useAutomationExtension", False)
options.add_argument('--load-images=no')
options.add_argument('--no-first-run')
options.add_argument('--no-service-autorun')
options.add_argument('--no-default-browser-check')
options.add_argument('--password-store=basic')
options.add_argument('--no-sandbox')
options.add_argument('--proxy-type=http')
options.add_argument('--ssl-protocol=TLSv1')
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# 去除顶部浏览器提示
# options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 修改webdriver浏览器特征
# options.add_experimental_option('useAutomationExtension', False)
# options.add_argument('headless')
# self.driver = uc.Chrome(headless=True,use_subprocess=False,executable_path='./chromedriver.exe',options=options)
self.driver = webdriver.Chrome(executable_path='./chromedriver.exe', options=options)
self.driver.implicitly_wait(30)
# self.driver.delete_all_cookies()
#随机浏览器UA
def random_useragent(self):
UaList = [
#360
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
#chrome
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
#"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
#firefox
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
#ie11
#"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
#ie8
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
#2345王牌
#"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",
#搜狗
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
#opera
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.0.0 Safari/537.36"
]
return random.choice(UaList)
#随机滑动500-1000的距离
def random_scroll(self):
for i in range(random.randint(1,3)):
self.driver.execute_script(f'window.scrollTo(0, {random.randint(500, 20000)})')
time.sleep(0.4)
return True
# 随机浏览器屏幕分辨率
def random_screen(self):
screen_list = [
(1920, 1080),
(1600, 900),
(1440, 900),
(1366, 768),
(1280, 1024),
(1280, 800),
(1024, 768)
]
return random.choice(screen_list)
#设置电脑屏幕分辨率
def set_screen(self):
self.driver.set_window_size(1920, 1080)
# 随机延迟0.2-3秒
def random_delay(self):
time.sleep(random.uniform(2, 5))
return True
# 随机点击页面
def random_click(self,elements):
if len(elements) > 0:
# 随机选择一个元素
click_index = random.randint(0, len(elements) - 1)
try:
# 尝试获取链接并触发点击
link = elements[click_index].find_element(By.TAG_NAME, 'a')
self.driver.execute_script("arguments[0].click();", link)
self.random_delay()
# 检查是否有新窗口打开
windows = self.driver.window_handles
if len(windows) > 1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
self.driver.switch_to.window(windows[0])
self.random_delay()
except Exception as e:
print(f"An error occurred: {e}")
def elementCLick(self,element,keyword,index):
# 通常新打开的窗口或标签页是列表中的最后一个
# self.driver.execute_script("arguments[0].click();", element)
self.random_delay()
try:
self.random_scroll()
try:
element.click()
except:
href = element.find_element(By.TAG_NAME, 'a').get_attribute('href')
cmd = f"window.open('{href}');"
self.driver.execute_script(cmd)
with open("result.txt", "a+",encoding="utf-8") as f:
#获取当前时间
datetime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
f.write(f"时间:{datetime},ip:{self.get_public_ip()} 关键词:{keyword} ,排名:{index} \n")
print("写入成功")
# windows = self.driver.window_handles
# new_window = windows[-1]
# self.driver.switch_to.window(new_window)
# links = WebDriverWait(self.driver, 10).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'a')))
# if links:
# random_link = random.choice(links)
# random_link.click()
# self.random_scroll()
# print(f"Clicked on random link: {random_link.get_attribute('href')}")
# else:
# print("No links found on the page.")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
break
self.driver.switch_to.window(windows[0])
except Exception as e:
print(f"An error occurred: {e}")
pass
def get_search_results(self, keyword):
results = []
try:
window_size = self.random_screen()
self.driver.set_window_size(window_size[0], window_size[1])
self.random_delay()
input_keyword = self.driver.find_element(By.ID, "kw")
input_keyword.clear()
input_keyword.send_keys(keyword)
self.random_delay()
action = random.randint(0, 1)
if action == 0:
# 点击搜索按钮
self.driver.find_element(By.ID, "su").click()
self.random_delay()
# print("鼠标点击按钮")
else:
input_keyword.send_keys(Keys.ENTER)
# print("按下了Enter")
self.random_delay()
# print('打开百度成功',self.driver.title)
# self.random_scroll()
elements_hrefs = self.driver.find_elements(By.XPATH, '//h3[@class="c-title t t tts-title"]')
# self.random_click(elements_hrefs)
return elements_hrefs
except Exception as e:
print("打开百度失败",e)
results = []
return results
#获取真实的url
def get_real_url(self, url):
try:
url = requests.get(url, headers={'User-Agent': self.random_useragent()},allow_redirects=False)
real_url = url.headers['location']
return real_url
except Exception as e:
print("获取真实URL失败",e)
return ''
def get_public_ip(self):
response = requests.get("http://httpbin.org/ip")
data = response.json()
ip = data['origin']
return ip
def search_google(self, item, depth=1):
try:
# 去除唯一值,防止重复搜索
url_ip,keyword = item
self.driver.get(f'https://www.baidu.com')
print(">>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",url_ip,">>>>>>>>>>>>>>>>")
elements_hrefs = self.get_search_results(keyword)
# 模拟用户真实行为,滑动屏幕
for index, element in enumerate(elements_hrefs):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
if url_ip in real_url:
print("存在真实url",real_url)
self.elementCLick(element,keyword,index)
return True
connt = 1
for pn in range(self.depth_deep):
print(f"暂时无数据包,正在加载第{pn+1}页")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
self.driver.switch_to.window(windows[0])
break
try:
elems = self.driver.find_elements(By.CLASS_NAME,'n')
elems[-1].click()
except:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
elems = self.driver.find_elements(By.CLASS_NAME,'n')
elems[-1].click()
self.random_delay()
elements_hrefs = self.driver.find_elements(By.XPATH, '//h3[@class="c-title t t tts-title"]')
# self.random_click(elements_hrefs)
for index, element in enumerate(elements_hrefs):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
# print(real_url)
if url_ip in real_url:
print("存在真实url",real_url)
depth = 20
# self.elementCLick(element,keyword,index)
self.elementCLick(element,keyword,connt*len(elements_hrefs)+index)
return True
connt +=1
except Exception as e:
print("搜索失败",e)
return False
def read_from_file(self):
with open('keyword.txt', 'r', encoding='utf-8') as file:
for line in file.readlines():
self.search_google(line.strip().split("|"))
self.driver.quit()
print("搜索完成")
# 包含关键词列表
def write_to_file(self, title):
with open("result.txt", "a+", encoding="utf-8") as file:
file.write(title + "\n")
# 随机切换ip
def random_ip():
import os
try:
os.system("rasdial 宽带连接 /disconnect")
time.sleep(5)
os.system("rasdial 宽带连接 21261637 147258")
time.sleep(10)
except Exception as e:
print("切换ip失败",e)
# ip = requests.get("http://httpbin.org/ip")
# data = ip.json()
# ip_list = data['origin'].split(", ")
# # 随机切换ip
# random.shuffle(ip_list)
# return ip_list[0]
if __name__ == "__main__":
multiprocessing.freeze_support()
for cou in range(1,16):
# random_ip()
google_search = GoogleSearch()
google_search.read_from_file()
time.sleep(10)
搜狗快排点击
python
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import time,random,requests,json5
from selenium.webdriver.common.keys import Keys
import os, subprocess,datetime
import multiprocessing,re,platform
# 根据平台选择WebDriver
if platform.system() == "Windows":
from undetected_chromedriver import ChromeOptions, Chrome as WebDriver
else:
from selenium import webdriver
WebDriver = webdriver.Chrome
ChromeOptions = webdriver.ChromeOptions
class GoogleSearch:
SEARCH_ENGINE_URL = 'https://www.sogou.com/?'
NEXT_PAGE_ID = 'sogou_next'
RESULT_CLASS_NAME = 'vrwrap'
def __init__(self):
self.depth_deep = 10
options = ChromeOptions()
options.add_argument(f'--user-agent={self.random_useragent()}')
options.add_argument("--disable-popup-blocking")
options.add_argument('--ignore-ssl-errors=true')
options.add_argument("--disable-extensions")
options.add_argument("--profile-directory=Default")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--disable-plugins-discovery")
options.add_argument('--incognito')
options.add_argument("--disable-infobars")
options.add_argument("--no-default-browser-check")
options.add_argument('--load-images=no')
options.add_argument('--no-first-run')
options.add_argument('--no-service-autorun')
options.add_argument('--password-store=basic')
options.add_argument('--no-sandbox')
# options.add_argument('--proxy-type=http')
# options.add_argument('--ssl-protocol=TLSv1')
# 初始化WebDriver
if platform.system() == "Windows":
self.driver = WebDriver(options=options,headless=True,use_subprocess=False)
else:
self.driver = WebDriver(executable_path='./chromedriver.exe', options=options)
self.driver.implicitly_wait(30)
self.driver.delete_all_cookies()
# options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# 去除顶部浏览器提示
# options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 修改webdriver浏览器特征
# options.add_experimental_option('useAutomationExtension', False)
# options.add_argument('headless')
#随机浏览器UA
def random_useragent(self):
UaList = [
#360
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
#chrome
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
#"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
#firefox
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",
#ie11
#"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
#ie8
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",
#2345王牌
#"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",
#搜狗
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
#opera
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.0.0 Safari/537.36"
]
return random.choice(UaList)
#随机滑动500-1000的距离
def random_scroll(self):
for i in range(random.randint(1,3)):
self.driver.execute_script(f'window.scrollTo(0, {random.randint(500, 20000)})')
time.sleep(0.4)
return True
# 随机浏览器屏幕分辨率
def random_screen(self):
screen_list = [
(1920, 1080),
(1600, 900),
(1440, 900),
(1366, 768),
(1280, 1024),
(1280, 800),
(1024, 768)
]
return random.choice(screen_list)
#设置电脑屏幕分辨率
def set_screen(self):
self.driver.set_window_size(1920, 1080)
# 随机延迟0.2-3秒
def random_delay(self):
time.sleep(random.uniform(1, 3))
return True
# 随机点击页面
def random_click(self,elements):
if len(elements) > 0:
# 随机选择一个元素
click_index = random.randint(0, len(elements) - 1)
try:
# 尝试获取链接并触发点击
link = elements[click_index].find_element(By.TAG_NAME, 'a')
self.driver.execute_script("arguments[0].click();", link)
self.random_delay()
# 检查是否有新窗口打开
windows = self.driver.window_handles
if len(windows) > 1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
self.driver.switch_to.window(windows[0])
self.random_delay()
except Exception as e:
print(f"An error occurred: {e}")
def elementCLick(self,element,keyword,index):
# 通常新打开的窗口或标签页是列表中的最后一个
# self.driver.execute_script("arguments[0].click();", element)
self.random_delay()
try:
self.random_scroll()
try:
element.find_element(By.TAG_NAME, 'a').click()
except:
href = element.find_element(By.TAG_NAME, 'a').get_attribute('href')
cmd = f"window.open('{href}');"
self.driver.execute_script(cmd)
times = random.randint(10,24)
print(f"停留{times}秒")
time.sleep(random.randint(10,24))
self.random_scroll()
with open("result.txt", "a+",encoding="utf-8") as f:
#获取当前时间
datetime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
f.write(f"时间:{datetime},ip:{self.get_public_ip()} 关键词:{keyword} ,排名:{index} \n")
print("写入成功")
# windows = self.driver.window_handles
# new_window = windows[-1]
# self.driver.switch_to.window(new_window)
# links = WebDriverWait(self.driver, 10).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'a')))
# if links:
# random_link = random.choice(links)
# random_link.click()
# self.random_scroll()
# print(f"Clicked on random link: {random_link.get_attribute('href')}")
# else:
# print("No links found on the page.")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
break
self.driver.switch_to.window(windows[0])
except Exception as e:
print(f"An error occurred: {e}")
pass
def get_search_results(self, url_ip,keyword):
results = []
try:
window_size = self.random_screen()
self.driver.set_window_size(window_size[0], window_size[1])
self.random_delay()
input_keyword = self.driver.find_element(By.ID, "query")
input_keyword.clear()
# input_keyword.send_keys(f"site {url_ip} {keyword}" )
input_keyword.send_keys(keyword )
self.random_delay()
action = random.randint(0, 1)
if action == 0:
# 点击搜索按钮
self.driver.find_element(By.ID, "stb").click()
self.random_delay()
print("鼠标点击按钮")
else:
input_keyword.send_keys(Keys.ENTER)
print("按下了Enter")
self.random_delay()
print('打开搜狗成功',self.driver.title)
self.random_scroll()
elements_hrefs = self.driver.find_elements(By.CLASS_NAME, 'vrwrap')
# self.random_click(elements_hrefs)
return elements_hrefs
except Exception as e:
print("打开搜狗失败",e)
results = []
return results
#获取真实的url
def get_real_url(self, url):
try:
url = requests.get(url, headers={'User-Agent': self.random_useragent()},allow_redirects=False)
real_url = url.text
real_url = re.findall(r'window.location.replace\("(.*?)"\)', real_url)[0]
print(f"解析真实Url:{real_url}")
return real_url
except Exception as e:
print("获取真实URL失败",e)
return ''
def get_public_ip(self):
response = requests.get("http://httpbin.org/ip")
data = response.json()
ip = data['origin']
return ip
def search_google(self, item, depth=1):
try:
# 去除唯一值,防止重复搜索
url_ip,keyword = item
self.driver.get(self.SEARCH_ENGINE_URL)
print(">>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",url_ip,">>>>>>>>>>>>>>>>")
results = self.get_search_results(url_ip,keyword)
# 模拟用户真实行为,滑动屏幕
for index, element in enumerate(results):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
if url_ip in real_url:
print("存在真实url",real_url)
self.elementCLick(element,keyword,index+1)
return True
connt = 1
for pn in range(self.depth_deep):
print(f"============={connt}==============")
while True:
windows = self.driver.window_handles
if len(windows)>1:
new_window = windows[-1]
self.driver.switch_to.window(new_window)
# 等待新页面加载(根据需要调整等待时间)
# self.random_delay()
# 在新窗口中关闭
self.driver.close()
# 切换回原始窗口
# self.random_delay()
else:
self.driver.switch_to.window(windows[0])
break
try:
elems = self.driver.find_elements(By.ID,'sogou_next')
elems[-1].click()
except:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
elems = self.driver.find_elements(By.CLASS_NAME,'n')
elems[-1].click()
self.random_delay()
elements_hrefs = self.driver.find_elements(By.CLASS_NAME, 'vrwrap')
# self.random_click(elements_hrefs)
for index, element in enumerate(elements_hrefs):
url = element.find_element(By.XPATH,'.//a').get_attribute('href')
real_url = self.get_real_url(url)
if real_url != '':
# print(real_url)
if url_ip in real_url:
print("存在真实url",real_url)
self.depth = 20
# self.elementCLick(element,keyword,index)
self.elementCLick(element,keyword,connt*len(elements_hrefs)+index)
return True
connt +=1
except Exception as e:
print("搜索失败",e)
return False
def read_from_file(self):
with open('keyword.txt', 'r', encoding='utf-8') as file:
for line in file.readlines():
self.search_google(line.strip().split("|"))
self.driver.quit()
print("搜索完成")
# 包含关键词列表
def write_to_file(self, title):
with open("result.txt", "a+", encoding="utf-8") as file:
file.write(title + "\n")
# 随机切换ip
def random_ip():
import os
try:
os.system("rasdial 宽带连接 /disconnect")
time.sleep(5)
os.system("rasdial 宽带连接 21261637 147258")
time.sleep(10)
except Exception as e:
print("切换ip失败",e)
if __name__ == "__main__":
multiprocessing.freeze_support()
for cou in range(1,16):
google_search = GoogleSearch()
google_search.read_from_file()
time.sleep(10)