Αυτό το πρόγραμμα είναι αντίγραφο αυτού που έγραψε ο Ariel Glenn για τον έλεγχο των μεταφράσεων (βρίσκεται εδώ).

Η διαφορά βρίσκεται στο ότι χρησιμοποιεί ως πηγή για να ελέγξει τις μεταφράσεις το αρχείο all-lemmas-sorted.txt που παράγεται από το πρόγραμμα για τους διαγλωσσικούς συνδέσμους (interwikis) (δείτε Χρήστης:AtouBot/generate-iw-actionlist.sh). Το αρχείο αυτό βρίσκεται στον υποκατάλογο iw_tmp του φακέλου στον οποίο έχουμε εγκαταστήσει το generate-iw-actionlist.sh και πρέπει να αντιγραφεί στο φάκελο pywikipedia μαζί με αυτό εδώ το script.

Μετά από συμβουλή του Ariel, έκανα και κάποιες αλλαγές στη δομή της μεταβλητής wordList (πίνακας τύπου hash).

#!/usr/bin/python
# -*- coding: utf-8  -*-

"""
This bot goes over multiple pages of the home wiki, checks all
references to {{τ}} and updates the noentry parameter, 
according to whether they have an entry on the remote wiki
or not.  

Details of operation:

  It does not bother {{τ}} in html comments. Note that we rely on the 
  fact that our html comments which hide empty translation
  templates don't encompass more than one line.

  If a line is only partially commented out, e.g.
     <!-- * {{en}} : {{τ|en|XXX}} 
  perhaps because someone commented out a block of several
  lines and the comment closure is somewhere else,
  this code will leave undisturbed translations with the value
  XXX but will modify any others.

  This will not touch {{τ}} when located in other regions
  outside of the translation section (Μεταφράσεις).

  It converts references of {{t}} to {{τ}}.

  It strips leading and trailing blanks from all parameter names
  and values.

  It may reorder the parameters in a template without making
  other changes.  This is normal.

  If a template parameter is passed with no value to {{τ}}, it will
  be removed by this code. 
 
  Text outside of the {{τ}} or {{t}} will not be touched.

Notes about running this script:

  Make sure that the list of all lemmas on all projects is
  located in the same directory and is called 

    all-lemmas-sorted.txt

This script understands various command-line arguments:

    -start:        used as -start:page_name, specifies that the robot should
                   go alphabetically through all pages on the home wiki,
                   starting at the named page.

    -file:         used as -file:file_name, read a list of pages to treat
                   from the named textfile. Page titles should be enclosed
                   in [[double-squared brackets]].

    -ref:          used as -ref:page_name, specifies that the robot should
                   touch all pages referring to the named page.

    -cat:          used as -cat:category_name, specifies that the robot should
                   touch all pages in the named category.

All other parameters will be regarded as a page title; in this case, the bot
will only touch a single page.
"""
import wikipedia, wiktionary, pagegenerators, catlib
import sys
import re

class TranslationBot:

  def __init__(self, generator, acceptall = False):
    self.generator = generator
    self.acceptall = acceptall
    self.transStartTemplateCompiler = re.compile( u"{{(τ|t)\|")
    self.transTemplateCompiler = re.compile(u'({{(?:t|τ)\|[a-zA-Z\-]+\|[^\}]+}})')
    self.commentCompiler = re.compile( u"\<\!\-\-(.*?)\-\-\>" )    
    self.setupLemmaList()

  def setupLemmaList(self):
    # create hash of all lemmas on all projects by language code
    linecompiler = re.compile(u"(.*)")
    self.wordList = {}
    fileAllProjectLemmas =open('all-lemmas-sorted.txt', 'r')
    line1 = fileAllProjectLemmas.readline()
    line = linecompiler.search(line1).group(1)
    count = 0
    print "starting phase 1: preparing lemma list"
    while (line1):
      lemma, language = line.split("||",1)
      if not self.wordList.has_key(language):
          self.wordList[language] = {}
      self.wordList[language][lemma]=1

      line1 = fileAllProjectLemmas.readline()
      line = linecompiler.search(line1).group(1)
      count = count + 1
      if not ( count % 1000000 ):
          print "%s lemmas processed" % count
    print "done phase 1"
    fileAllProjectLemmas.close()

  def processSection(self, sectionText):
    sectionLines = re.split(u'\n',sectionText)
    newSectionLines = []
    for originalLine in sectionLines:
      # if it has a translation not in html comments...
      newLine = self.commentCompiler.sub(u'', originalLine)
      result = self.transStartTemplateCompiler.search(newLine)
      if (result):
        # ... update the translations in the line.
        newLine = self.processLine(originalLine)
        newSectionLines.append(newLine)
      else:
        newSectionLines.append(originalLine)

    newSection = u'\n'.join(newSectionLines)
    return newSection

  def processLine(self, line):
      textChunks = re.split(u'(<!--.*-->)', line)
      newChunks = []
      for chunk in textChunks:
        # comment? don't process it, just add it to the
        # pieces to be strung back together
        if chunk[0:3] == u'<!--':
          newChunks.append(chunk)
        else:
          newChunk = self.processChunk(chunk)
          newChunks.append(newChunk)
      newLine = u''.join(newChunks)
      return(newLine)

  def processChunk(self, chunk):
      translations = re.split(self.transTemplateCompiler, chunk)
      newTranslations = []
      for translation in translations:
        if re.match(self.transTemplateCompiler, translation):
          # this is a translation instead of a piece of 
          # text in between translations, process it.
          newTranslation = self.processTranslation(translation)
          newTranslations.append(newTranslation)
        else:
          newTranslations.append(translation)
      newChunk = u''.join(newTranslations)
      return(newChunk)

  def processTranslation(self, translation):
    # at last, here we are with a translation.

    # μορφή (με παραμέτρους σε τυχαία σειρά):
    #  {{τ|sq|iw=blot|link=blot|noentry=n|nowiki=n}}
    # με ίσως τυχαία κενά που θέλουμε να πετάξουμε

    # get the language code = langArg
    # get the translation itself = translArg
    # if there is anything else in there whatsoever, we will
    # echo it to the user and bail (return the string unchanged)

    # skip the {{τ| or {{t|

    # FIXME if there aren't two }} at the end we are hosed
    args = re.split(u'\|',translation[4:-2])

    if len(args) < 2:
      print "Something's broken: %s, skipping..." % translation
      return translation

    # get the iw, link parameters = iwArg, linkArg
    # skip noentry and nowiki args if present
    langArg = self.unistrip(args[0])
    if not re.match(u'[a-zA-Z\-]+',langArg):
      return translation

    translArg = False
    trArg = False
    iwArg = False
    linkArg = False

    for arg in args[1:]:
      if u'=' in arg:
        ( name, value ) = re.split(u'=',arg)
        name = self.unistrip(name)
        value = self.unistrip(value)
        if name == u"iw":
          iwArg = value
        elif name == u"link":
          linkArg = value
        elif name == u"noentry":
          pass
        elif name == u"nowiki":
          pass
        elif name == u"tr":
          trArg = value
        else:
          print "Something's broken: %s, skipping..." % translation
          return translation
      else:
        if translArg:
          # problem, this is the second unnamed param and we already
          # have a lang code
          print "Something's broken: %s, skipping..." % translation
          return translation
        else:
          translArg = self.unistrip(arg)

    # now check if we have entries, etc.
    # no translation actually in the template, whine and give up
    nowikiArg = False
    noentryArg = False

    if not translArg:
      print "Something's broken: %s, skipping..." % translation
      return translation

    if not self.wordList.has_key(langArg):
      #print u"%s not a wikt"%langArg
      nowikiArg = 1
    elif iwArg:
      tosearch = u'%s'%iwArg
      tosearch = tosearch.encode('utf-8')
      if not self.wordList[langArg].has_key(tosearch):
        #print u"%s not in %s.wikt" % (langArg, tosearch)
        noentryArg = 1
    elif translArg == u'XXX' or translArg == u'ΧΧΧ':
      # not actually a translation... prolly a half-commented-out
      # translation or some such, skip
      return translation
    else:
      tosearch = u'%s'%translArg
      tosearch = tosearch.encode('utf-8')
      if not self.wordList[langArg].has_key(tosearch):
        noentryArg = 1

    # reassemble the whole thing, clear out any extra spaces
    text = u"{{τ|%s|" % langArg
    text = text + u"%s"%translArg
    if nowikiArg:
      text = text + u'|nowiki=1'
    if noentryArg:
      text = text + u'|noentry=1'
    if iwArg:
      text = text + u"|iw=%s"%iwArg
    if linkArg:
      text = text + u"|link=%s"%linkArg
    if trArg:
      text = text + u"|tr=%s"%trArg
    text = text + u"}}"

    # done!
    return text

  def unistrip(self, unicodeString):
    realString = unicodeString.encode('utf-8')
    realString = str.strip(realString)
    return unicode(realString,'utf-8')

  def run(self):
    startOfTranslationSection = u"μεταφράσεις}}="
    sectionMarker = u'={{'
    for page in self.generator:
      try:
        wikipedia.output(u'Έλεγχος σελίδας: %s' % page.title())
        originalPage = page.get()
        pageSections = re.split(sectionMarker,originalPage)
        newPageSections = []
        for pageSection in pageSections:
          # if it's a translation section
          if pageSection[0:len(startOfTranslationSection)] == startOfTranslationSection:
            pageSection = self.processSection(pageSection)
          newPageSections.append(pageSection)
        newPage = sectionMarker.join(newPageSections)

        # we upload the text
        if (newPage == originalPage):
          wikipedia.output('No changes were necessary in %s' % page.title())
        else:
          wikipedia.output(u'ενημέρωση μεταφράσεων')
          wikipedia.setAction(u'ενημέρωση των μεταφράσεων (παραμέτρων του προτύπου τ)')
          wikipedia.output(u'>>> %s <<<' % page.title())
          wikipedia.showDiff(originalPage, newPage)

          if not self.acceptall:
              choice = wikipedia.inputChoice(u'Θέλετε να δεχτείτε αυτές τις αλλαγές;',  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
              if choice in ['a', 'A']:
                  self.acceptall = True
          if self.acceptall or choice in ['y', 'Y']:
              print "saving"
              page.put(newPage)
      
      except wikipedia.NoPage:
          print u"Η σελίδα %s δεν υπάρχει;;!!"%page.aslink()
      except wikipedia.IsRedirectPage:
          pass
      except wikipedia.LockedPage:
          pass

def main():
    #page generator
    gen = None
    pageTitle = []
    for arg in wikipedia.handleArgs():
        if arg:
            if arg.startswith('-start:'):
                gen = pagegenerators.AllpagesPageGenerator(arg[7:])
            elif arg.startswith('-ref:'):
                referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.ReferringPageGenerator(referredPage)
            elif arg.startswith('-links:'):
                linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:])
                gen = pagegenerators.LinkedPageGenerator(linkingPage)
            elif arg.startswith('-file:'):
                gen = pagegenerators.TextfilePageGenerator(arg[6:])
            elif arg.startswith('-cat:'):
                cat = catlib.Category(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            else:
                pageTitle.append(arg)

    if pageTitle:
        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        wikipedia.showHelp('touch')
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = TranslationBot(preloadingGen)
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()