#!/bin/bash
#
# The Qubes OS Project, http://www.qubes-os.org
#
# Copyright (C) 2013  Joanna Rutkowska <joanna@invisiblethingslab.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
# Requires: 
# - poppler-utils (pdftocairo, pdfinfo)
# - ImageMagick (convert)

INPUT_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX)
TEMP_PNG_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.png)
TEMP_RGB_FILE=$(mktemp --tmpdir qpdf-conversion-XXXXXXXX.pdf)
IMG_DEPTH=8

# Get the original (untrusted) PDF file...
cat > $INPUT_FILE

# now, let's convert it into a simple representation,
# and send back to the client.

# Note that we might be compromised at this point (due to exploitation of PDF
# parsing code) and so what we're sending back might very well be something
# totally different than a decent simple representation -- the client should
# never trust what we're sending back, and should discard anything that doesn't
# look like the simple representation!

NO_PAGES=$(pdfinfo $INPUT_FILE | grep "^Pages:" | sed -e "s/^Pages:[^0-9]*//")
if [ -z $NO_PAGES ]; then
    # Perhaps this is not a PDF, only some JPG/PNG/etc? Let's try it anyway...
    NO_PAGES=1
fi
echo $NO_PAGES

cd /tmp
PAGE=1
while [ $PAGE -le $NO_PAGES ]; do
    # if pdftocairo fails, lets try the ImageMagick's convert -- perhaps this is just some img file?
    pdftocairo $INPUT_FILE -png -f $PAGE -l $PAGE -singlefile $(basename $TEMP_PNG_FILE .png) || \
        convert $INPUT_FILE png:$TEMP_PNG_FILE
    IMG_WIDTH=$(identify -format "%w" $TEMP_PNG_FILE)
    IMG_HEIGHT=$(identify -format "%h" $TEMP_PNG_FILE)
    convert $TEMP_PNG_FILE -depth $IMG_DEPTH rgb:$TEMP_RGB_FILE
    echo "$IMG_WIDTH $IMG_HEIGHT"
    cat $TEMP_RGB_FILE
    PAGE=$(($PAGE+1))
done

# Cleanup tmp files...
# Note: our DispVM might get destroyed before the commands below
# complete, but that doesn't hurt us, because this is... well a DispVM.
rm -f $INPUT_FILE
rm -f $TEMP_PNG_FILE
rm -f $TEMP_RGB_FILE
