Sometimes I want to follow the changes of some website which does not provide an RSS feed. For such cases I have written a simple bash script that I run periodically with cron.
Here it is:
#!/bin/bash # file with pages that should be checked PAGES=pages-to-follow.txt USER=me@example.com # goto working dir cd followthatpage for i in $( cat $PAGES ); do PAGE=$i if [[ $PAGE =~ ^[\#] ]]; then continue fi PAGE=$(echo $PAGE | tr -dc '[:print:]') # prefix of the filename is the md5sum NAME=rr-$(echo $PAGE | md5sum - | cut -d" " -f1) # check if initial ls $NAME* > /dev/null 2>&1 if [ "$?" -eq 0 ]; then # save page to file links -dump $PAGE > $NAME\_$(date +%F_%H-%M).page # remove old files if [ $(ls $NAME* | wc -l) -gt 2 ]; then TOBEREMOVED=$(ls $NAME* | head --lines=-2) rm $TOBEREMOVED fi # check if something changed and mail it DIFFERENCE=$(diff $NAME*) if [ "$?" -eq 1 ]; then echo -e "Changes in Page:\n$PAGE\n\nDifferences are:\n- Begin diff output ------------\n\n$DIFFERENCE\n\n- End of diff output ---------" | head --bytes=100000 | mail -s "Changes in $PAGE" $USER echo "$DIFFERENCE" > last-result-$NAME.diff fi else # first time # save page to file lynx -nolist -dump $PAGE > $NAME\_$(date +%F_%H-%M).page cat $NAME* | mail -s "Observation started $PAGE" $USER fi sleep 10 done