#!/bin/bash
# Descripción: Bajador de subtítulos de subdivx.com
# Licencia: GPL 
# Versión: 0.6.7
# Fecha: 25/03/10
# Autor: maurol
#set -x

BURL="http://www.subdivx.com"
URL="$BURL/index.php?accion=5&masdesc=&buscar=%N%&oxfecha=2"
#UA="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.10) Gecko/2009042523 Firefox/3.0.10"
UA="Mozilla/5.0 Firefox/3.0.10"

UNZIP=/usr/bin/unzip
UNRAR=/usr/bin/unrar
WGET=/usr/bin/wget
TCS=/usr/bin/tcs
OCR=/usr/bin/gocr
PNG2=/usr/bin/pngtopnm

[ ! -x $WGET ] && echo "Error: $WGET not found." && exit 1
[ ! -x $TCS ] && echo "Error: $TCS not installed." && exit 1
[ ! -x $UNZIP ] && echo "Error: $UNZIP not found." && exit 1
[ ! -x $UNRAR ] && echo "Error: $UNRAR not found." && exit 1
#[ ! -x $OCR ] && echo "Error: $OCR not installed." && exit 1
#[ ! -x $PNG2 ] && echo "Error: $PNG2 not installed." && exit 1

F="$1"
[ "$F" = "-h" -o "$F" = "--help" ] && echo "Usage: $0 [\"movie[.avi]\" [packer]]" && exit 1
P="$2" # Packer
C=1

if [ -z "$F" ]
then
  Z=1 # flag to honor what was indicated on the command line (so no auto packer)
  F=*.avi
  echo $F
  C=0
  for FF in $F
  do
    [ ! -f "$FF" ] && echo "Usage: $0 [\"movie[.avi]\" [packer]]" && exit 1
    CD=`echo $FF | sed -n "s/.*\(cd[-_\.\ ]*[1-9]\).*/\1/I;T;p"`
    echo $CD
    if [ -n "$CD" ]
    then
      N=`echo $FF | sed "s/\(.*\)$CD.*/\1/"`
      echo $N
      if [ -z "$OLDN" ]
      then
        C=1
      elif [ "$OLDN" = "$N" ]
      then
        C=$[C + 1]
      fi
      OLDN=$N
      F=$N
    fi
  done
fi

[ $C -eq 0 ] && C=1
echo $C

F=`echo $F | sed 's/\.avi//i'`
echo $F

N=`echo $F | cut -f1 -d\[ | cut -f1 -d\( | sed 's/[ 	]*-.*//;s/\bdvdrip\b//i;s/\bdivx\b//i;s/\bxvid\b//i;s/\bac3\b//i;s/\bhdtv\b//i;s/\bproper\b//i;s/\blimited\b//i;s/\bdvdscr\b//i;s/\bint\b//i;s/\bts\b//i;s/\breadnfo\b//i;s/\b[br][dr]rip\b//i;s/[\ \.]*$//' | tr \.A-Z \ a-z` 
# No packer? try to autodetect.
[ -z "$P" -a -n "$Z" -a $C = 1 ] && P=`echo $F | sed 's/.*-//;s/([^)]*)//;s/.*[ \,]//;s/\.avi//i' | tr A-Z a-z`
[ -z "$P" -a -n "$Z" -a $C = 1 ] && P=`echo $N | sed 's/.*\ //' | sed 's/([^)]*)//'`
echo $N
echo $P

N=`echo $N | tr \  +`
U=`echo $URL | sed "s/%N%/$N/" | $TCS -futf -t8859-1`
$WGET --save-cookies /tmp/cookies.$$.txt -O - --quiet "$U" | $TCS -f8859-1 -tutf >/tmp/subget.$$

if [ -n "$P" ] # we have a "producer" or "packer"
then
  #U1=`sed -n "/\b$P\b/I{s/\b$P\b.*//I;s/.*http:\/*\([^\"]*\).*/\1/;p}" /tmp/subget.$$`
  U1=`sed ':a;N;s/\n//;ta' /tmp/subget.$$ | sed "/\b$P\b/I{s/\b$P\b.*//I;s/.*http:\/*\([^\"]*\).*/\1/}"`
else # no packer, so look for best match on number of cds, size, fps(if available.)
  #CDS
  grep -i "cds:[^0-9]*$C" /tmp/subget.$$ >/tmp/subget.1.$$
  # El 1ro
  U1=`sed -n "1{s/.*http:\/*\([^\"]*\).*/\1/p}" /tmp/subget.1.$$`
fi

if echo $U1 | grep -q "No se encontr" # not found
then
  # heuristics with the packer
  # is packer part of the actual name?
  NN=`echo "$N" | sed 's/+$P//'`
  if [ "$N" != "$NN" ]
  then # try again
    U=`echo $URL | sed "s/%N%/$N/" | $TCS -futf -t8859-1`
    $WGET --save-cookies /tmp/cookies.$$.txt -O - --quiet "$U" | $TCS -f8859-1 -tutf >/tmp/subget.2.$$
    U1=`sed -n "/\b$P\b/I{s/\b$P\b.*//I;s/.*http:\/*\([^\"]*\).*/\1/;p}" /tmp/subget.2.$$`
  fi
fi
echo $U1
# referrer
R1=`echo $U1 | sed 's/\-.*//'`
echo $R1

if [ -n "$U1" ] # we have a subtitle coincidence. Download it.
then
  # New simpler version
  if [ -n "$P" ]
  then
	  # TODO: Unnecessary indirection? Change U1 previous regex(s) to directly match
	  # the right 'bajar.php' URL.
	  $WGET --save-cookies /tmp/cookies.$$.txt -O - --quiet "$U1" | $TCS -f8859-1 -tutf >/tmp/subget.3.$$
	  U2=`sed -n "/bajar.php/{s/.*href=\"\/*\([^\"]*\).*/\1/;p;q}" /tmp/subget.3.$$`
  else
	  U2=$U1 # Already found direct Download URL (bajar.php)
  fi
  echo $U2
  #$WGET -O - --quiet --referer=$R1 "$U2" | $TCS -f8859-1 -tutf >/tmp/subget.4.$$
  # This is now the subtitle file
  $WGET --save-cookies /tmp/cookies.$$.txt -O - --quiet "$U2" >/tmp/subget.4.$$

  # (now unused) captcha! cracking code
  #CAPTCHA=`sed -n '/captcha\//s/[^\"]*\"\([^\"]*\)\".*/\1/p' /tmp/subget.4.$$`
  #BCAPTCHA=`basename $CAPTCHA .png`
  #$WGET -P /tmp -nH --quiet $BURL/$CAPTCHA
  #$PNG2 -alpha /tmp/$BCAPTCHA.png >/tmp/$BCAPTCHA.pgm
  #TEXT=`$OCR -C 0-9A-Z /tmp/$BCAPTCHA.pgm`
  #echo $TEXT
  #DESUB=`sed -n '/"desub"/s/.*value="\([^"]*\)".*/\1/p' /tmp/subget.4.$$`
  #U=`sed -n '/"u"/s/.*value="\([^"]*\)".*/\1/p' /tmp/subget.4.$$`
  #[ ${#TEXT} -eq 3 ] && $WGET -U "$UA" --load-cookies /tmp/cookies.$$.txt --referer=$R1 -O /tmp/subget.5.$$ --quiet "$BURL/bajar.php?captcha_user=$TEXT&idcaptcha=$BCAPTCHA&desub=$DESUB&u=$U" || echo "Failed: Retry!"
  #E=`file -b /tmp/subget.5.$$ | cut -f1 -d\  | tr A-Z a-z`
  E=`file -b /tmp/subget.4.$$ | cut -f1 -d\  | tr A-Z a-z`
  #echo $E
  case "$E" in
    rar)
      S=`$UNRAR -o+ x /tmp/subget.4.$$ | sed -n '/^Extracting .*srt/{s/Extracting  \(.*\)\.srt.*/\1/p}'`
    ;;
    zip)
      $UNZIP /tmp/subget.4.$$
    ;;
    *)
      echo "Unknown file type: $E." && exit 1
    ;;
  esac
  echo $S
  [ -n "$Z" ] && mv "$S".srt "$F".srt 2>/dev/null
fi

