Дешево Быстро и сердито:
$ ./parse.sh http://habrahabr.ru/post/148795/ | column -t
Now_is: Wed_Aug_1_00:28:55_MSK_2012
The_url_is: http://habrahabr.ru/post/148795/
Max_"+"_is: 13 RxB
Max_"-"_is: 30 Gangsta
Total_comments: 721
Top_commentor_is: 49 opium
Total_questions: 342
Total_links: 57
Most_popular_site_is: 4 http://twower.livejournal.com
Total_commentators: 189
Total_"+": 495
Total_"-": 101
Total_Sign: 394
Собственно сам код:
#!/bin/sh
v_temp=`mktemp`
echo -n "Now_is: "
date | sed 's/\s\+/_/g'
echo "The_url_is: $1"
wget -q --referer="http://www.google.com" --user-agent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6" -O $v_temp "$1"
sed -i '/comments_list/,/for_users_only_msg/!d' $v_temp
echo -n 'Max_"+"_is: '
sed -n "/class=\"\(score\|username\)\"/p" $v_temp | sed 'N;s/<[^>]*>//g;s/\n//;s/^\s\++//;/^\s\+–/d' | sort -n | tail -1
echo -n 'Max_"-"_is: '
sed -n "/class=\"\(score\|username\)\"/p" $v_temp | sed 'N;s/<[^>]*>//g;s/\n//;s/^\s\+–//;/^\s\++/d' | sort -n | tail -1
echo -n 'Total_comments: '
sed -n '/class="score"/p' $v_temp | wc -l
echo -n 'Top_commentor_is: '
grep username $v_temp | sort | uniq -c | sort -n | tail -1 | sed 's/<[^>]*>//g;'
echo -n 'Total_questions: '
sed -n 's)<[^>]*>))g;/\?/p' $v_temp | wc -l
echo -n 'Total_links: '
sed -n '/message html_format/,/^\s\+<\/div>/{s><a href>\n&>;/a href/p}' $v_temp | grep "a href" | wc -l
echo -n 'Most_popular_site_is: '
sed -n '/message html_format/,/^\s\+<\/div>/{s><a href>\n&>;/a href/p}' mktemp | egrep -o 'https?://[^/"]*' | sort | uniq -c | sort -n | grep -v habr | tail -1
echo -n 'Total_commentators: '
sed -n '/class="username"/p' $v_temp | sort -u | wc -l
echo -n 'Total_"+": '
sed -n '/class="score"/{s/.*uarr;\([0-9]\+\).*/\1/;p}' $v_temp | awk '{sum+=$1}END{print sum}'
echo -n 'Total_"-": '
sed -n '/class="score"/{s/.*darr;\([0-9]\+\).*/\1/;p}' $v_temp | awk '{sum+=$1}END{print sum}'
echo -n 'Total_Sign: '
sed -n '/class="score"/{s/.*uarr;\([0-9]\+\).*darr;\([0-9]\+\)/\1 \2/;p}' $v_temp | awk '{plus+=$1;minus+=$2}END{print plus-minus}'
rm $v_temp
Можно применять не только к вышеназванному топику. (: