Я новичок и без образования, помогите пожалуйста
Написал парсер, который ходит по списку сайтов из Эксель и по возможности извлекает соц сети.
Проблема в том, что я не понимаю как сделать, чтобы он извлекал сразу же все соц сети, а не каждый раз для определенной делал заход. Пробовал переносить Data, чтобы он не отбирал снова, но он ее просто не видит в поиске допустим дискордан,помогите пожалуйся. А так Эта штука работает
# Ищет тг и твиттер по сайтам и дискорд
options = Options()
ua = UserAgent()
userAgent = ua.random
option = webdriver.ChromeOptions()
option.add_argument('headless')
option.add_argument(f'user-agent={userAgent}')
browser = webdriver.Chrome(executable_path='//Users//kainen//Downloads//chromedriver//chromedriver', options=option)
def telegram(tg_web):
try:
if tg_web:
browser.get(f'{tg_web}')
soup = BeautifulSoup(browser.page_source, 'lxml')
#time.sleep(2)
scrapped_page = soup.findAll('a')
#print(scrapped_page)
data = []
for All_tags in scrapped_page:
All_link = All_tags.get('href')
data.append(All_link)
for i in range(len(data)):
tg_followers = ''
if 't.me' in data[i]:
tg_followers = data[i]
break
else:
tg_followers = ''
except Exception as e:
tg_followers = ''
print(f'Error occur with link {tg_web}: {e}')
return tg_followers
def twitter(tw_web):
try:
if tw_web:
browser.get(f'{tw_web}')
soup = BeautifulSoup(browser.page_source, 'lxml')
scrapped_page = soup.findAll('a')
#print(scrapped_page)
data = []
for All_tags in scrapped_page:
All_link = All_tags.get('href')
data.append(All_link)
for i in range(len(data)):
tw_followers = ''
if 'twitter' in data[i]:
tw_followers = data[i]
break
else:
tw_followers = ''
except Exception as e:
tw_followers = ''
print(f'Error occur with link {tw_web}: {e}')
return tw_followers
def discord(disc_web):
try:
if disc_web:
browser.get(f'{disc_web}')
soup = BeautifulSoup(browser.page_source, 'lxml')
scrapped_page = soup.findAll('a')
#print(scrapped_page)
data = []
for All_tags in scrapped_page:
All_link = All_tags.get('href')
data.append(All_link)
for i in range(len(data)):
disc_followers = ''
if 'discord' in data[i]:
disc_followers = data[i]
break
else:
disc_followers = ''
except Exception as e:
disc_followers = ''
print(f'Error occur with link {tw_web}: {e}')
return disc_followers
def linked(linked_web):
try:
if linked_web:
browser.get(f'{linked_web}')
soup = BeautifulSoup(browser.page_source, 'lxml')
scrapped_page = soup.findAll('a')
#print(scrapped_page)
data = []
for All_tags in scrapped_page:
All_link = All_tags.get('href')
data.append(All_link)
for i in range(len(data)):
linked_followers = ''
if 'linked' in data[i]:
linked_followers = data[i]
break
else:
linked_followers = ''
except Exception as e:
linked_followers = ''
print(f'Error occur with link {linked_web}: {e}')
return linked_followers
def main():
print(f'\nRescoring started at {datetime.datetime.now()}')
writer = pd.ExcelWriter(f'{os.getcwd()}/Closed_lost_2020_followers.xlsx')
df = pd.read_excel('//Users//kainen//Downloads//chromedriver//Result_25.xlsx', sheet_name=0)
df['Telegram_followers'] = ''
df['Twitter_followers'] = ''
df['Discord_followers'] = ''
df['linked_followers'] = ''
for index, row in df.iterrows():
try:
df.loc[index, 'Telegram_followers'] = telegram(row['telegram'])
df.loc[index, 'Twitter_followers'] = twitter(row['telegram'])
df.loc[index, 'Discord_followers'] = discord(row['telegram'])
df.loc[index, 'linked_followers'] = linked(row['telegram'])
print(f'Lead: {row["id"]}, tg: {df.loc[index, "Telegram_followers"]}, tw: {df.loc[index, "Twitter_followers"]}, disc: {df.loc[index, "Discord_followers"]}, linked: {df.loc[index, "linked_followers"]}')
except Exception as e:
print(f'Error occured with lead {row["id"]}: {e}')
df.to_excel(writer, sheet_name='Closed_lost_with_followers', index=False)
writer.save()
print (f'\nScript executed!\n')
return
#schedule.every(30).minutes.do(main)
if __name__ == '__main__':
try:
main()
except Exception as e:
print(f'\nError occured: {e}')
#access_token = get_access_token('amocrm_followers_scrapping.db')
#print(access_token)