#!/bin/bash ### # Angry Sheriff # version 0.2 # # Get foreclosure listings (Sheriff's Sales) from NJ.com web site # (c) Murty Rompalli # # 10/31/2003 ### prog='Angry Sheriff v0.2' if [ x$1 = x ] then echo echo ERROR: Missing directory name echo echo Usage: $0 directory-name echo Choose directory name that does not exist \(To download fresh\), or echo choose existing directory name \(To use downloaded listings\) echo exit 1 else dir=`echo $1 | sed 's/\/$//'` out=$dir.list filter=$dir.filter fi ####### User configurable parameters # Price must be below limit and above minprice limit=110000 minprice=1000 topskip=7 bottomskip=10 # Create filter file by blocking listings containing any of the # following keywords # All keywords are case sensitive. # Use _ instead of space. excludewords=" ADDRESS_CANNOT_BE_DETERMINED Trenton Hightstown Hamilton neptune manalapan freehold Ewing Newark Keansburg Orange Irvington tinton_falls asbury_park atlantic_highlands long_branch red_bank jersey_city East_Windsor " ####### END of User configurable parameters tmp=tmp.$$ ### Get address getaddress() { # com.+nly k.+ as?\ ?:? # mun.+ty\ ?:? # ad.+s\ ?:? echo $1 | sed 's/\;//g' | sed -n 's/^.\+\(com[a-zA-Z0-9]\+nly\ k[a-zA-Z0-9]\+ as\?\ \?:\?\|mun[a-zA-Z0-9]\+ty\ \?:\?\|ad[dreDRE0-9]\+s\ \?:\?\)\(.\+\)\(N\.\?\ \?J\.\?\|NEW\.\?\ \?JERSEY\.\?\)\(\ \?[0-9][0-9][0-9][0-9][0-9]\|\ \).\+$/\2\ \3\ \4/ip' } ### Show info showinfo() { file=$1 maxprice=$2 printf "\n$maxprice\t$file\t" linecount=`cat $file | wc -l` # Dont scan the first few lines and last few lines linecount=`expr $linecount - $bottomskip` n=$topskip while [ $n -le $linecount ] do h=`expr $n + 3` block="`sed -n "$n,$h p" $file | sed -e "s/'//g" -e 's/"//g' | xargs echo`" address="`getaddress \"$block\"`" if [ "x$address" != x ] then echo $address break fi n=`expr $n + 1` done if [ "x$address" = x ] then echo ADDRESS CANNOT BE DETERMINED fi } ### Apply price rule applypricerule() { ( printf "\nPRICE\tFILE\t\tADDRESS\n" printf "\n-----\t----\t\t-------\n" ) | tee -a ../$out for file in `\ls page*` do prices=`grep '\$[1-9][0-9\,][0-9\,][0-9\.\,]\+' $file | sed 's/^.*\(\$[1-9][0-9\,][0-9\,][0-9\.\,]\+\).*$/\1/' | sed -e 's/\\\$//g' -e 's/\,//g' | awk -F. '{print $1}' | sed -e "s/'//g" -e 's/"//g' | xargs echo` # flag is 1 if the advt is for a house below our set limit price flag=1 maxprice=0 for price in $prices do if [ $price -gt $limit ] then flag=0 break fi if [ $price -gt $maxprice ] then maxprice=$price fi done if [ $flag -eq 1 ] then if [ $maxprice -lt $minprice ] then rm -f $file rejectedfiles="$rejectedfiles $file " else showinfo $file $maxprice fi else rm -f $file badfiles="$badfiles $file " fi done | tee -a ../$out echo echo ===== Rejected Files ===== echo $rejectedfiles echo echo ===== Deleted Files ===== echo $badfiles echo rm -f ../$tmp } ### Get files getfiles() { echo echo Creating directory $dir ... echo mkdir $dir || { echo Error: cannot create $dir exit } page=1 while [ $page -lt 31 ] do lynx -dump -verbose -nolist 'http://www.nj.com/classifieds/publicnotices/index.ssf/results.ata?timeframe=168&major=7&minor=2280%2C2043%2C658%2C1297&count=100&keyword=new+jersey&paper=&aff=njo&session=&page='$page | sed 's/\[INLINE\]//g' | grep '[a-zA-Z0-9]' | sed -n '/Click here to learn more about \"My List\"/,/More Search Results/p'\ > $tmp count=`grep 'Add To My List' $tmp | wc -l` echo Processing page $page $count [ $count -gt 0 ] || break cd $dir csplit -s -f page$page. ../$tmp '/Add To My List/' '{*}' cd .. page=`expr $page + 1` done echo Results saved in directory: $dir } ### Apply filter getfilter() { listfile=$1 grep ^[0-9] $listfile | while read line do found=0 for c in $excludewords do if [ $found -eq 1 ] then break fi city="`echo $c | tr '_' ' '`" echo $line | grep -i "$city" > /dev/null && found=1 done if [ $found -ne 1 ] then echo $line fi done | sort -n | tee $filter } ### Main program if [ -f $out ] then echo echo Using existing file: $out echo cat $out else ( echo echo ' ' $prog echo ' ' '(c)' Murty Rompalli echo echo ' ' Download and process echo ' ' Sheriff\'s sales in NJ echo ) | tee $out echo echo Generating output file: $out ... echo if [ -d $dir ] then echo echo Using existing files in $dir echo else echo echo Downloading files. Please wait... echo getfiles fi cd $dir || { echo Unable to change dir to $dir exit } applypricerule cd .. fi if [ -f $filter ] then echo echo Using existing filter: $filter echo cat $filter else echo echo Generating filter file: $filter ... echo if [ -f $out -a -r $out ] then getfilter $out else echo Error: Unable to open file: $out echo Error: Cannot create filter file: $filter exit fi fi echo echo Thank you for using $prog echo # END