#!/usr/bin/env bash
export LC_ALL=ru_RU.CP1251
# Variables
################################################################################
TR_URL='https://rutracker.org/forum'
TR_CATEGORY="$1"
DIR_DWN="$HOME/Torrents"
DIR_TMP='/tmp/rds'
DIR_TMP_CAT="$DIR_TMP/category_$TR_CATEGORY"
SC_UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:44.0) Gecko/20100101 Firefox/44.0'
# BEGIN
################################################################################
if [ -z $TR_CATEGORY ]; then
echo 'Please, enter category ID.'
echo 'Example: catalog-magnet.sh <ID_CATEGORY>'
exit
fi
echo "Let's Go!\n"
# Check and create directories
################################################################################
if [ ! -d $DIR_TMP ]; then
mkdir "$DIR_TMP"
fi
if [ ! -d $DIR_TMP_CAT ]; then
mkdir -p "$DIR_TMP_CAT"
else
# remove old files
rm -rf "$DIR_TMP_CAT"/*
fi
if [ ! -d $DIR_DWN ]; then
mkdir "$DIR_DWN"
fi
# Total pages
################################################################################
echo 'Get total pages in category...'
category_page=$(curl "$TR_URL/viewforum.php?f=$TR_CATEGORY&start=0" \
-A "$SC_UA" \
--show-error \
-L \
-s
)
# find latest pager link
# <a class="pg" href="viewforum.php?f=###&start=###">###</a>
total_pages=$(echo $category_page \
| sed -En 's/.*<a class=\"pg\" href=\".*\">([0-9]*)<\/a> .*/\1/p' \
| head -1
)
echo "...complete!\n"
sleep 1
# Category Page
################################################################################
echo 'Download category pages...'
for page in $(seq 1 $total_pages); do
page_link=$((page * 50 - 50)) # 50 items per page, ex. 0..50..100
category_pages=$(curl "$TR_URL/viewforum.php?f=$TR_CATEGORY&start=$page_link" \
-A "$SC_UA" \
--show-error \
-L \
-s
)
echo "$category_pages" > "$DIR_TMP_CAT/category_page_$page.html"
printf "\rProgress : %d of $total_pages" $page
done
echo "\n...complete!\n"
sleep 1
# Torrent ID
################################################################################
echo "Get torrent IDs..."
id_list="$DIR_TMP_CAT/ids_list_.txt_1"
touch "$id_list"
for page in $(seq 1 $total_pages); do
category_page="$DIR_TMP_CAT/category_page_$page.html"
# find torrent topic link
# <a id="tt-###" href="viewtopic.php?t=###">
ids=$(cat $category_page \
| sed -En 's/.*<a.*href=\"viewtopic\.php\?t=([0-9]*)\".*>.*/\1/p'
)
echo "$ids" >> "$id_list"
done
echo "...complete!\n"
sleep 1
# Magnet URL
################################################################################
echo 'Get magnet URLs...'
total_ids=$(cat $id_list \
| wc -l \
| sed 's/ //g'
)
i=1
magnet_list="$DIR_DWN/$TR_CATEGORY.txt_1"
if [ -f $magnet_link ]; then
rm -f "$magnet_list"
else
touch "$magnet_list"
fi
for id in $(cat $id_list); do
torrent_page=$(curl "$TR_URL/viewtopic.php?t=$id" \
-A "$SC_UA" \
--show-error \
-L \
-s
)
# find magnet link on page
# <a href="magnet:###">
magnet_link=$(echo $torrent_page \
| sed -En 's/.*<a.*href=\"(magnet:[^"]*)\".*>.*/\1/p'
)
#<meta name="description" content="Пратчетт Терри - Плоский мир/Тиффани-1 , Вольные мальцы (Маленький свободный народец) [Капитан Абр и Nelly, (ЛИ), 192 kbps, MP3] » [Аудио] Зарубежная фантастика, фэнтези, мистика, ужасы, фанфики :: RuTracker.org">
#<meta name="robots" content="nofollow">
content_name=$(echo $torrent_page \
| sed -En 's/.*<meta. *name=\"description\" *content=\"([^"]*)\».*>.*/\1/p'
)
#<span style="font-size: 24px; line-height: normal;">Вольные мальцы (Маленький свободный народец)</span><span class="post-br"><br></span><var class="postImg postImgAligned img-right" title="http://i64.fastpic.ru/big/2014/0604/e3/241f449d3fc497eb2114c206b50398e3.jpg"> </var><span class="post-b">Фамилия автора</span>: Пратчетт<br>
book_title=$(echo $torrent_page \
| sed -En 's/.*<span. *style=\"font-size\: *24px\; *line-height\: *normal\;\">*([^<]*)<\/span.*/\1/p'
)
#<span class="post-b">Исполнитель</span>: Шейх Сауд Шурейм<br>
author_surname=$(echo $torrent_page \
| sed -En 's/.*<span. *class=\"post-b\">Исполнитель<\/span>: *([^<]*)<br.*/\1/p'
)
# author_name=$(echo $torrent_page \
# artist=$(echo $torrent_page \
# cycle_series=$(echo $torrent_page \
# book_number=$(echo $torrent_page \
# genre=$(echo $torrent_page \
# description=$(echo $torrent_page \
# post_img=$(echo $torrent_page \
#<li class="dir collapsed"><div><b>./ </b>
# dir_name=$(echo $torrent_page \
# | sed -En 's/.*<li. *class=\"dir collapsed\"*>*<div.*>*<b.*>\.\/([^"]*)*<.*/\1/p'
# )
if [ $magnet_link ]; then
# echo "$magnet_link\t$TR_URL/viewtopic.php?t=$id\t$content_name" >> "$magnet_list"
echo "$magnet_link\t$content_name\t$book_title\t$author_surname" >> "$magnet_list"
fi
printf "\rProgress : %d of $total_ids" $i
i=$((i+1))
done
echo "\n...complete!\n"
# FINISH
################################################################################
total_links=$(cat $magnet_list \
| wc -l \
| sed 's/ //g'
)
echo "Total URLs : $total_links\n"
echo 'Enjoy...'
exit
root@s:~# python --version
Python 2.7.17
root@s:~# python3 --version
Python 3.6.9
#!/usr/bin/env python3
~# /srv/script.py
Traceback (most recent call last):
File "/srv/script.py", line 14, in <module>
f.write(newlines)
TypeError: write() argument must be str, not list
~# /srv/script.py
Traceback (most recent call last):
File "/srv/script.py", line 5, in <module>
with open(filename, 'r', encoding='utf8') as f:
TypeError: 'encoding' is an invalid keyword argument for this function
#!/usr/bin/env python
# coding: utf8
filename = '/srv/www/s.s/www/s-s1.csv' # туты имя файла пишешь
with open(filename, 'w') as f:
lines = f.readlines()
newlines = []
for data in lines:
data = data.split(';')
data[8] = data[8].replace(',', '.')
data = ';'.join(data)
newlines.append(data)
with open(filename, 'w') as f:
f.write(newlines)
~# /srv/script.py
Traceback (most recent call last):
File "/srv/script.py", line 14, in <module>
f.write(newlines)
TypeError: expected a string or other character buffer object
root@steinert:~# /srv/script.py
Traceback (most recent call last):
File "/srv/script.py", line 5, in <module>
with open(filename, 'r', encoding='utf8') as f:
TypeError: 'encoding' is an invalid keyword argument for this function
~# /srv/script.py
File "/srv/script.py", line 2
SyntaxError: Non-ASCII character '\xd1' in file /srv/script.py on line 2, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details
#!/usr/bin/env python3
filename = '/srv/www/s.s/www/s-s1.csv' # туты имя файла пишешь
with open(filename) as f:
data = f.read().split(';')
data[8] = data[8].replace(',', '.')
data = ';'.join(data)
with open(filename, 'w') as f:
f.write(data)
Но на будущее обязательно изучу, буду благодарен если направите в нужное место, из языков я немного знаю С\С++, с чего начать, куда смотреть по теме парсера?