#!/bin/bash
usage() {
echo "Usage: $0 title"
echo "where title is the title of the page for which to find transclusions"
echo
echo "For example:"
echo "$0 'Πρότυπο:ξεν-'";
exit 1
}
if [ -z "$1" ]; then
usage
fi
eititle=`echo "$1" | sed -e 's/ /_/g;'`
tmp="./embed_tmp"
today=`date +"%B-%e-%Y"`
ext="$today"
mkdir -p $tmp
titles="$tmp/titles.$ext"
eicontinue=""
rm -f $titles.*
count=1
while [ 1 ]; do
echo getting embedded titles $count to $count+500
# επόμενοι 500
if [ -z "$eicontinue" ]; then
curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=embeddedin&eititle=$eititle&eilimit=500&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp
else
curl --retry 10 -H 'Expect:' -f "http://el.wiktionary.org/w/api.php?action=query&list=embeddedin&eititle=$eititle&eicontinue=$eicontinue&eilimit=500&format=xml" | sed -e 's/>/>\n/g;' > $titles.xml.temp
fi
if [ $? -ne 0 ]; then
echo "Error $? from curl, unable to get xml pages, bailing"
exit 1
fi
cat $titles.xml.temp >> $titles.xml
# get continue param
# format: <embeddedin eicontinue="500" />
eicontinue=`grep eicontinue $titles.xml.temp`
if [ -z "$eicontinue" ]; then
break;
else
eicontinue=`echo $eicontinue | awk -F'"' '{ print $2 }'`
fi
sleep 6
count=$(( $count+500 ))
done
# format <ei pageid="1192" ns="0" title="θάλασσα" />
cat $titles.xml | grep '<ei page' | awk -F'"' '{ print $6 }' | sed -e 's/^/[[/g; s/$/]]/g;' > $titles.txt
# done!
echo "done!"
exit 0