import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
main_data = {
'Organization_name': [],
'Organization_voen': [],
'Organization_address': [],
'Event_name': [],
'Event_number': [],
'Classification_code': [],
'Suggested_price': [],
'Event_start_date': [],
'Submission_deadline': [],
'Envelope_opening_date': [],
'Participation_fee': [],
'Participation_description': [],
'Usage_fee': [],
'Usage_description': [],
'Full_name': [],
'Contact': [],
'Position': [],
'Phone_number': []
}
nested_data = {
'Heading': [],
'Disclosure': [],
'Quantity': [],
'Measure_unit': [],
'Code': []
}
def get_total_items_from_link(api_link='https://etender.gov.az/api/events?EventType=2&PageSize=1&PageNumber=1&EventStatus=1&Keyword=&buyerOrganizationName=&PrivateRfxId=&publishDateFrom=&publishDateTo=&AwardedparticipantName=&AwardedparticipantVoen=&DocumentViewType='):
try:
response = requests.get(api_link, timeout=20)
if response.status_code == 200:
data = response.json()
return data['totalItems']
except requests.Timeout:
return -1
return -1
def get_total_items_from_id(id):
total_items = -1
template = f'https://etender.gov.az/api/events/{id}/bomLines?PageSize=1&PageNumber=1'
try:
response = requests.get(template, timeout=20)
if response.status_code == 200:
data = response.json()
total_items = data['totalItems']
except requests.Timeout:
pass
return total_items
def get_all_events_ids(page_size):
template = f'https://etender.gov.az/api/events?EventType=2&PageSize={page_size}&PageNumber=1&EventStatus=1&Keyword=&buyerOrganizationName=&PrivateRfxId=&publishDateFrom=&publishDateTo=&AwardedparticipantName=&AwardedparticipantVoen=&DocumentViewType='
try:
response = requests.get(template, timeout=20)
data = response.json()
events_ids = [item['eventId'] for item in data['items']]
return events_ids
except requests.Timeout:
return []
def get_info_from_link(ids):
driver = webdriver.Chrome()
for id in ids:
link = f'https://etender.gov.az/main/competition/detail/{id}'
driver.get(link)
time.sleep(6)
selectors = [
...
]
for item_list, selector in selectors:
try:
element = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
)
item_list.append(str(element.text))
except (NoSuchElementException, StaleElementReferenceException):
item_list.append("None")
driver.quit()
time.sleep(6)
def get_fees(ids: list):
for id in ids:
template = f'https://etender.gov.az/api/events/{id}/info'
try:
response = requests.get(template, timeout=20)
if response.status_code == 200:
data = response.json()
main_data['Participation_fee'].append(str(data['participationFee']) if len(str(data['participationFee'])) > 0 else 'None')
main_data['Usage_fee'].append(str(data['viewFee']) if len(str(data['viewFee'])) > 0 else 'None')
else:
main_data['Participation_fee'].append('None')
main_data['Usage_fee'].append('None')
except requests.Timeout:
main_data['Participation_fee'].append('None')
main_data['Usage_fee'].append('None')
def get_fees_description(ids: list):
driver = webdriver.Chrome()
time.sleep(5)
for id in ids:
link = f'https://etender.gov.az/main/competition/detail/{id}'
driver.get(link)
selectors = [
...
]
for item_list, selector in selectors:
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
)
item_list.append(str(element.text))
except NoSuchElementException:
item_list.append("None")
driver.quit()
time.sleep(5)
def get_contact(ids):
for id in ids:
template = f'https://etender.gov.az/api/events/{id}/contact-persons'
try:
response = requests.get(template, timeout=20)
if response.status_code == 200:
data_list = response.json()
for data in data_list:
main_data['Full_name'].append(data.get('fullName', 'None') if data.get('fullName') else 'None')
main_data['Contact'].append(data.get('contact', 'None') if data.get('contact') else 'None')
main_data['Position'].append(data.get('position', 'None') if data.get('position') else 'None')
main_data['Phone_number'].append(data.get('phoneNumber', 'None') if data.get('phoneNumber') else 'None')
else:
main_data['Full_name'].append('None')
main_data['Contact'].append('None')
main_data['Position'].append('None')
main_data['Phone_number'].append('None')
except requests.Timeout:
main_data['Full_name'].append('None')
main_data['Contact'].append('None')
main_data['Position'].append('None')
main_data['Phone_number'].append('None')
def get_all_info_from_table(ids):
for id in ids:
heading, disclosure, quantity, measure, code = [], [], [], [], []
total_items = get_total_items_from_id(id)
table_link = f'https://etender.gov.az/api/events/{id}/bomLines?PageSize={total_items}&PageNumber=1'
try:
response = requests.get(table_link, timeout=20)
if response.status_code == 200:
data = response.json()
for item in data['items']:
heading.append(str(item.get('name', 'None')) if item.get('name') else 'None')
disclosure.append(str(item.get('description', 'None')) if item.get('description') else 'None')
quantity.append(str(item.get('quantity', 'None')) if item.get('quantity') else 'None')
measure.append(str(item.get('unitOfMeasure', 'None')) if item.get('unitOfMeasure') else 'None')
code.append(str(item.get('categoryCode', 'None')) if item.get('categoryCode') else 'None')
else:
heading.append('None')
disclosure.append('None')
quantity.append('None')
measure.append('None')
code.append('None')
except requests.Timeout:
heading.append('None')
disclosure.append('None')
quantity.append('None')
measure.append('None')
code.append('None')
nested_data['Heading'].append(heading)
nested_data['Disclosure'].append(disclosure)
nested_data['Quantity'].append(quantity)
nested_data['Measure_unit'].append(measure)
nested_data['Code'].append(code)
def fetch_data(ids):
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(get_info_from_link, ids),
executor.submit(get_all_info_from_table, ids),
executor.submit(get_fees, ids),
executor.submit(get_fees_description, ids),
executor.submit(get_contact, ids)
]
for future in as_completed(futures):
future.result()