#!/usr/bin/python3
#Δημιουργήθηκε από τον Xoristzatziki στο el.wiktionary.org
#2017
import os, time, re , sys
import importlib.util
from collections import OrderedDict
#Αρχίζει με κάποιον αριθμό = (σημάδι ισότητας) έχει κάποιο κείμενο, τελειώνει με ίδιο αριθμό = και ίσως με άχρηστο κείμενο
sectionsre = "(?P<AGROUP>=+)\s*?(?P<BGROUP>.+)\s*?(?P=AGROUP)(?P<CGROUP>.*)$"
#Αρχίζει με {{ και κλείνει με }}. Να έχουν αφαιρεθεί τα κενά στην αρχή και στο τέλος
sectionTFULLre = "(?P<TEMPLSTART>\{\{)(?P<TEMPLFULL>((?P<TEMPNAME>.*?)\|{1}(?P<PARAMS>.*))|(?P<TEMPNAMENOPARAMS>.*?))(?P<TEMPLEND>\}\})$"
translationsre = "(?P<STARTLINE>\{\{μτφ-αρχή.*\}\})\s*.*(?P<ENDLINE>\{\{μτφ-τέλος\}\})"
translinere = '^([*]\s*?|<!--\s*?\*\s*?)\{\{(?P<LANGISO>.*?)\}\}\s*?:\s*?\{\{τ\|(?P=LANGISO)\|.*'
foronomaAre = "(?P<ELIPSIS1>'*)*[Αα]ν[δτ]ρικό\s(?P<ELIPSIS2>'*)*(?P<ELIPSIS3>\[\[)*όνομα(?(ELIPSIS3)\]\]|)(?P=ELIPSIS2)(?P=ELIPSIS1)"
foronomaGre = "(?P<ELIPSIS1>'*)[Γγ]υναικείο(?P<ELIPSIS2>'*)(\[\[)όνομα(\]\])(?P=ELIPSIS2)(?P=ELIPSIS1)"
def test():
print(__file__)
def getSections(pagetitle, wikitext, languages, parts):
'''Επιστρέφει πίνακα με τα περιεχόμενα όλων των ενοτήτων.
Το κείμενο μπορεί να αναδημιουργηθεί χρησιμοποιώντας τα depth, originaltitle και content.
depth: περιέχει τον αριθμό των = που χρησιμοποιεί ο τίτλος της ενότητας
originaltitle: το ακριβές κείμενο στη γραμμή ενότητας
content: το περιεχόμενο κάτω από τη γραμμή ενότητας
βοηθητικά:
titletemplate: το πρότυπο όπως υπάρχει στον τίτλο της ενότητας
langiso: το iso της γλώσσας στην οποία ανήκει η ενότητα
ΠΡΟΣΟΧΗ !!! κρατάει το πρηγούμενο αν δεν βρει σωστό
ispartofspeech: αν η ενότητα είναι γνωστό μέρος του λόγου
garbage: πιθανά σκουπίδια.
langsections:list with dictionary που περιέχει τα δεδομένα για πιθανές ενότητες γλώσσας
(στις παρεχόμενες γλώσσες με την παράμετρο languages ή άλλο, γενικό, σφάλμα γλώσσας)
'''
#print(wikitext)
splittedlines = wikitext.splitlines(False)
#print('splittedlines',len(splittedlines))
sections = {}
langsections = []
xcounter = 0
sections[0] = {'depth':1, 'originaltitleline':'', 'title': pagetitle, 'langiso':'', 'langname':'',
'langhaswiki' : False, 'ispartofspeech' : False,
'titlelang' : '', 'content':'', 'garbage':'', 'mingarbage' : ''}
lastlang = '' #αρχικά δεν έχουμε γλώσσα
lastlangname = ''
allgarbage = ''
headword = ''
linecounter = 0
sectiontitle = ''
for line in splittedlines:
garbage = ''
sectionsrematch = re.match(sectionsre,line)
ispartofspeech = False
titlelang = ''
linecounter += 1
if sectionsrematch:
#new section found
sectiontitle = ''
#print(sectionsrematch)
xcounter += 1
agroup = sectionsrematch.groupdict()['AGROUP'] #τα = που έχει στην αρχή και στο τέλος
bgroup = sectionsrematch.groupdict()['BGROUP'] #το περιεχόμενο ανάμεσα στα =
cgroup = sectionsrematch.groupdict()['CGROUP'] #περιεχόμενο μετά τα = που μπορεί να περιέχει επιπλέον = ή άλλα σκουπίδια
depth = len(agroup) #αριθμός =
sectiontemplate = bgroup.strip()
#print(sectiontemplate, 'THE TITLE')
#originaltitleline = line
garbage = cgroup
titletemplate =''
if depth == 2 and not ('Σημειώσεις' in sectiontemplate or '{{αναφορές}}' in sectiontemplate):#υποτίθεται ότι είναι ενότητα γλώσσας ή η ενότητα για το <references>
langsections.append({'section':xcounter})
if len(bgroup) > 6: #αν έχει περιεχόμενο με πάνω από 6 χαρακτήρες, επειδή πρέπει να είναι {{-ΧΧ-}}
possiblelang = bgroup[3:-3] # θεωρητικά το iso
if possiblelang in languages:
#print('lastlang',lastlang)
langsections[-1]['iso'] = possiblelang
if lastlang != '':
'''Αν δεν είναι η πρώτη υπάρχουσα γλώσσα στο λήμμα,
η προηγούμενη ενότητα πρέπει να τελειώνει σε ---'''
if not sections[xcounter-1]['content'].endswith('\n\n\n----\n\n'):
garbage += str(linecounter) + ':Πρόβλημα στην αλλαγή γλώσσας.'+ '\n'
#print(str(linecounter) + ':Πρόβλημα στην αλλαγή γλώσσας.'+ '\n')
lastlang = possiblelang
lastlangname = languages[possiblelang]['όνομα']
if possiblelang == 'el' or possiblelang == 'grc' or possiblelang == 'gkm':
headword = "'''{{PAGENAME}}'''"
else:
if languages[possiblelang]['έχει βικιλεξικό']:
headword = "{{τ|" + possiblelang + "|{{PAGENAME}}}}"
else:
#print("languages[possiblelang]['έχει βικιλεξικό']", languages[possiblelang]['έχει βικιλεξικό'], possiblelang)
headword = "'''{{PAGENAME}}'''"
#print('headword',headword)
else:
#print('Λάθος γλώσσα ή ενότητα παραπομπών ή λάθος αριθμός = για την ενότητα', originaltitle)
garbage += str(linecounter) + ':Depth 2:'+ sectiontemplate + '\n'
headword = ''
#hasinvalidlangs = True
else:
#ίδιο με το σφάλμα στη γλώσσα
#print('ενότητα παραπομπών ή λάθος αριθμός = για την ενότητα', originaltitle)
garbage += str(linecounter) + ':Depth 2:'+ sectiontemplate + '\n'
headword = ''
#hasinvalidlangs = True
langsections[-1]['headword'] = headword
#langsections[-1]['possiblelang'] = possiblelang
else:
'''Κεφαλίδα υποενότητας μέσα στην τελευταία γλώσσα ή ενότητα που βρήκαμε.'''
if 'Σημειώσεις' in sectiontemplate:
titletemplate = 'Σημειώσεις'
else:
thematch = re.match(sectionTFULLre, sectiontemplate)
#if pagetitle == 'μυτιλοτροφείο':
#print(thematch)
#print(thematch.groupdict())
if thematch:
#print(thematch.groupdict())
if thematch.groupdict()['TEMPNAMENOPARAMS']:
'''Αν βρήκε ενότητα η οποία έχει σαν τίτλο πρότυπο χωρίς παραμέτρους.'''
titletemplate = thematch.groupdict()['TEMPNAMENOPARAMS'].strip()
theparamsstring = ''
elif thematch.groupdict()['TEMPNAME']:
'''Αν βρήκε ενότητα η οποία έχει σαν τίτλο πρότυπο με παραμέτρους.
Υποθέτω ότι είτε η παράμετρος είναι αρ είτε κάποιο iso γλώσσας.
'''
titletemplate = thematch.groupdict()['TEMPNAME'].strip()
theparamsstring = thematch.groupdict()['PARAMS']
splitted = theparamsstring.split("|")
possiblelang = splitted[0]
if lastlang == possiblelang:
#Έχει ίδια γλώσσα
pass
else:
#Ή δεν έχει ίδια γλώσσα ή είναι κάτι άλλο με παραμέτρους(;)
#Αν έχει άλλη γνωστή γλώσσα είναι λάθος
if possiblelang in languages:
garbage += 'Διαφορά στη γλώσσα.' + '\n'
garbage += str(linecounter) + ':Depth '+ str(depth) + ":" + sectiontemplate + '\n'
else:
#κενές αγκύλες ή κάποιο άλλο πρόβλημα
garbage += str(linecounter) + ':Depth '+ str(depth) + ":" + sectiontemplate + '\n'
else:
#Δεν έχει πρότυπο ή έχει και σκουπίδια μετά το πρότυπο
#titlelang = lastlang
#TODO:έλεγχος τι άλλο είναι
garbage += str(linecounter) + ':Depth '+ str(depth) + ":" + sectiontemplate + '\n'
ispartofspeech = (titletemplate in parts)
sectiontitle = titletemplate
#print('section c', xcounter)
sections[xcounter] = {'depth':depth, 'originaltitleline':line, 'title':sectiontitle, 'langiso':lastlang, 'langname':lastlangname,
'ispartofspeech' : ispartofspeech , 'content':''}
if len(cgroup):
sections[0]['garbage'] += cgroup + '\n'
#print(g)
else:
#print('ELSE')
sections[xcounter]['content'] = sections[xcounter]['content'] + line + '\n'
allgarbage += garbage
#print('END')
sections[0]['garbage'] = allgarbage
sections[0]['langsections'] = langsections
#sections[0]['hasinvalidlangs'] = str(hasinvalidlangs)
return sections
def fixedtabletext(thetablelines, sortedlanguages):
newtext = ''
fixedtable = fixOneTable(thetablelines, sortedlanguages)
fixedtable['translationssorted'].insert((len(fixedtable['translationssorted'])//2)+1,'{{μτφ-μέση}}')
for aline in fixedtable['translationssorted']:
newtext += aline + '\n'
newtext += '{{μτφ-τέλος}}' + '\n'
if len(fixedtable['duplicatetranslations']) or len(fixedtable['badisotranslations']) or len(fixedtable['noisotranslations']):
newtext += '{{μτφ-κατάταξη}}' + '\n'
for aline in fixedtable['duplicatetranslations']:
newtext += aline + '\n'
#thetext += '=====Χωρείς iso στο Module:Languages=====\n'
for aline in fixedtable['badisotranslations']:
newtext += aline + '\n'
#thetext += '=====Άλλα σφάλματα=====\n'
for aline in fixedtable['noisotranslations']:
newtext += aline + '\n'
return newtext
def fixsections(pagetitle, wikitext, languages, parts):
thesections = getSections(pagetitle, wikitext, languages, parts)
sortedlanguages = sortlanguages(languages)
newtext = ''
previoussectionlangiso = None
hasproblems = ''
categoriestoremove = []
uniquelangs = []
if len(thesections[0]['langsections'])>0:
if thesections[1]['depth'] !=2:
hasproblems += '|ξεκινά χωρίς γλώσσα'
for alang in thesections[0]['langsections']:
if not 'iso' in alang:
hasproblems += '|σφάλμα iso γλώσσας ' + str(alang['section'])
else:
if alang['iso'] in uniquelangs:
hasproblems += '|διπλή γλώσσα ' + str(alang['section'])
else:
uniquelangs.append(alang['iso'])
if len(hasproblems) == 0:
uniquelangnames = [sortedlanguages[x]['forsorting'] for x in uniquelangs]
if uniquelangnames != sorted(uniquelangnames):
hasproblems += '|αταξινόμητες ενότητες γλώσσας '
else:
#print('δεν έχει ενότητες γλώσσας==========')
hasproblems += '|δεν έχει ενότητες γλώσσας'
if len(thesections[0]['garbage']):
#print(thesections[0]['garbage'])
hasproblems += '|έχει σφάλματα'
for section in thesections:
if thesections[section]['depth'] == 1:
pass
else:
if previoussectionlangiso:
if previoussectionlangiso != thesections[section]['langiso']:
#newtext += 'ALLAGH' + str(section)
if not thesections[section-1]['content'].endswith('\n\n\n----\n\n'):
tmptext = thesections[section-1]['content'].rstrip()
if not tmptext.endswith('----'):
hasproblems += '|στην αλλαγή γλώσσας'
previoussectionlangiso = thesections[section]['langiso']
newtext += thesections[section]['originaltitleline'] + '\n'
if thesections[section]['title'] == 'μεταφράσεις':
#newtext += 'METAFRASEIS' + str(section)
if thesections[section]['langiso'] == 'el':
splittedlines = thesections[section]['content'].splitlines(keepends=True)
tabletitleend = ''
for line in splittedlines:
if line.startswith('{{μτφ-αρχή'):
#print('======table started')
insidetatable = True
thetablelines = []
newtext += line
elif line.strip() == '{{μτφ-τέλος}}':
if insidetatable:
insidetatable = False
if len(thetablelines) == 1 and thetablelines[0].rfind('{{βλ|') > -1:#elif line.rfind('{{βλ|') > -1 :
newtext += thetablelines[0]
newtext += '{{μτφ-τέλος}}' + '\n'
else:
fixedtable = fixOneTable(thetablelines, sortedlanguages)
fixedtable['translationssorted'].insert((len(fixedtable['translationssorted'])//2)+1,'{{μτφ-μέση}}')
for aline in fixedtable['translationssorted']:
newtext += aline + '\n'
newtext += '{{μτφ-τέλος}}' + '\n'
notranslationproblem = True
if len(fixedtable['duplicatetranslations']):
hasproblems += '|διπλή γλώσσα στις μεταφράσεις'
notranslationproblem = False
newtext += '{{μτφ-κατάταξη}}' + '\n'
for aline in fixedtable['duplicatetranslations']:
newtext += aline + '\n' #+ '{{διπλή μετάφραση}}\n'
if len(fixedtable['badisotranslations']):
hasproblems += '|λάθος iso στις μεταφράσεις'
if notranslationproblem:
notranslationproblem = False
newtext += '{{μτφ-κατάταξη}}' + '\n'
for aline in fixedtable['badisotranslations']:
newtext += aline + '\n'
if len(fixedtable['noisotranslations']):
hasproblems += '|μετάφραση χωρίς iso'
if notranslationproblem:
newtext += '{{μτφ-κατάταξη}}' + '\n'
for aline in fixedtable['noisotranslations']:
newtext += aline + '\n'
#print('=====table end')
elif line.strip() == '{{μτφ-μέση}}':
pass
#elif line.startswith( '{{βλ|'):
elif line.rfind('{{βλ|') > -1 :
if insidetatable:
thetablelines.append(line)
else:
newtext += line
elif line.startswith( '{{μτφ-υποκ'):
newtext += line
elif line.startswith( '{{μτφ-θηλ'):
newtext += line
#elif line.startswith( '{{μτφ-κατάταξη'):
#newtext += line
elif line.startswith( '{{μτφ-μεγεθ'):
newtext += line
elif line.startswith( '{{μτφ-υποκ'):
newtext += line
else:
if insidetatable:
thetablelines.append(line)
else:
newtext += line
else:
#newtext += '{{πρόβλημα στη θέση μετάφρασης}}' + "#1" + thesections[section]['title'] + '\n'
hasproblems += '|μεταφράσεις σε λάθος γλώσσα'
newtext += thesections[section]['content']
else:
if '{{μτφ-αρχή' in thesections[section]['content'] or '{{μτφ-τέλος' in thesections[section]['content'] :
hasproblems += '|πρόβλημα στη θέση μετάφρασης'
#newtext += '{{πρόβλημα στη θέση μετάφρασης}}' + "#2" + thesections[section]['title'] + '\n'
sectionsrematch1 = re.search(foronomaAre,thesections[section]['content'])
sectionsrematch2 = re.search(foronomaGre,thesections[section]['content'])
if sectionsrematch1:
#έχει, να αντικατασταθεί
#print(thesections[section]['title'])
if thesections[section]['title'] != 'κύριο όνομα':
hasproblems += '|κύριο όνομα σε λάθος κατηγορία'
replacer = '{{ονομαΑ'
if thesections[section]['langiso'] != 'el':
replacer += '|' + thesections[section]['langiso']
replacer += '}}'
#replacer = '{{ονομαΑ' + ('|' + thesections[section]['langiso']) if thesections[section]['langiso'] != 'el' + "}}"
newtext += re.sub(foronomaAre, replacer, thesections[section]['content'])
categoriestoremove.append('\n[[Κατηγορία:Ανδρικά ονόματα (' + languages[thesections[section]['langiso']]['όνομα'] + ')]]\n')
elif sectionsrematch2:
#έχει, να αντικατασταθεί
if thesections[section]['title'] != 'κύριο όνομα':
hasproblems += '|κύριο όνομα σε λάθος κατηγορία'
replacer = '{{ονομαΓ'
if thesections[section]['langiso'] != 'el':
replacer += '|' + thesections[section]['langiso']
replacer += '}}'
#replacer = '{{ονομαΓ' + ('|' + thesections[section]['langiso']) if thesections[section]['langiso'] != 'el' + "}}"
newtext += re.sub(foronomaAre, replacer, thesections[section]['content'])
categoriestoremove.append('\n[[Κατηγορία:Γυναικεία ονόματα (' + languages[thesections[section]['langiso']]['όνομα'] + ')]]\n')
else:
newtext += thesections[section]['content']
if len(hasproblems):
newtext += '{{σελίδα με πρόβλημα' + hasproblems + '}}'
if len(categoriestoremove):
for categorytoremove in categoriestoremove:
#print(categorytoremove)
newtext = newtext.replace(categorytoremove, '' )
return newtext
def checktranslations(thelineslist, languages):
sortedlanguages = sortlanguages(languages)
print('start=====')
tables = []
unknownlines = {}
thetablestarted = False
newtables = []
print('thelineslist', len(thelineslist))
for line in thelineslist:
if line.startswith('{{μτφ-αρχή'):
print('======table started')
thetablestarted = True
thetablelines = {}
thetablelines['number'] = len(tables)+1
#['number']] = thetablelines
if line.strip() == '{{μτφ-αρχή}}':
thetablelines['endtemplate'] = '}}'
else:
thetablelines['endtemplate'] = line.strip()[len('{{μτφ-αρχή'):]
elif line.strip() == '{{μτφ-τέλος}}':
thetablestarted = False
tables.append(thetablelines)
print('=====table end')
elif line.strip() == '{{μτφ-μέση}}':
pass
#elif line.startswith( '{{βλ|'):
elif line.rfind('{{βλ|') > -1 :
pass
elif line.startswith( '{{μτφ-υποκ'):
pass
elif line.startswith( '{{μτφ-θηλ'):
pass
elif line.startswith( '{{μτφ-κατάταξη'):
pass
elif line.startswith( '{{μτφ-μεγεθ'):
pass
elif line.startswith( '{{μτφ-υποκ'):
pass
else:
if thetablestarted:
if not ('content' in thetablelines.keys()):
thetablelines['content'] = []
thetablelines['content'].append(line)
else:
pass
#tblstrnum = str(len(tables))
#if tblstrnum in unknownlines:
#unknownlines[tblstrnum] += line
#else:
#unknownlines[tblstrnum] = line
newtext = ''
for atablecontent in tables:
if len(atablecontent['content']):
print("atablecontent['content']",atablecontent['content'])
fixedtable = fixOneTable(atablecontent['content'], sortedlanguages)
fixedtable['translationssorted'].insert((len(fixedtable['translationssorted'])//2)+1,'{{μτφ-μέση}}')
print("fixedtable['content']",fixedtable['translationssorted'])
#newtables.append({'number':atablecontent['content'],
#'endtemplate':atablecontent['endtemplate'],
#'content':fixedtable
#})
newtext += '{{μτφ-αρχή' + atablecontent['endtemplate'] + '\n'
for aline in fixedtable['translationssorted']:
newtext += aline + '\n'
newtext += '{{μτφ-τέλος}}' + '\n'
if len(fixedtable['duplicatetranslations']) or len(fixedtable['badisotranslations']) or len(fixedtable['noisotranslations']):
newtext += '{{μτφ-κατάταξη}}' + '\n'
for aline in fixedtable['duplicatetranslations']:
newtext += aline + '\n'
#thetext += '=====Χωρείς iso στο Module:Languages=====\n'
for aline in fixedtable['badisotranslations']:
newtext += aline + '\n'
#thetext += '=====Άλλα σφάλματα=====\n'
for aline in fixedtable['noisotranslations']:
newtext += aline + '\n'
return newtext
def getPartsFromString(thestring):
'''Διάβασε τα μέρη του λόγου από αλφαριθμητικό.'''
#langre = "^Languages\['(?P<LANG>.*)']\s*=\s*\{\s*name\s*'\s*(?P<LANGNAME>.+)'\s*,\s*cat\s*="
parts = {}
partre = "pos\['(?P<PART>.*)']\s*=\s*\{\s*\['link']\s*=\s*'(?P<PARTLINK>.+)'\s*,\s*\['κατηγορία']"
splittedlines = thestring.splitlines()
for line in splittedlines:
thematch = re.match(partre, line)
if thematch:
#print(thematch.group('PART'))
parts[thematch.group('PART')] = { 'link' : thematch.group('PARTLINK')}
return parts
def getLanguagesFromString(thestring):
'''Διάβασε τις γλώσσες από αλφαριθμητικό.'''
#langre = "^Languages\['(?P<LANG>.*)']\s*=\s*\{\s*name\s*'\s*(?P<LANGNAME>.+)'\s*,\s*cat\s*="
languages = {}
langre = "Languages\['(?P<LANGISO>.*)']\s*=\s*\{\s*name\s*=\s*'\s*(?P<LANGNAME>.+)'\s*,\s*cat\s*=.*wikiExists\s*=\s*(?P<HASWIKI>.*)\s*}"
splittedlines = thestring.splitlines()
for line in splittedlines:
thematch = re.match(langre, line)
if thematch:
#print(thematch.group('LANGNAME'))
languages[thematch.group('LANGISO')] = { 'όνομα' : thematch.group('LANGNAME'), 'έχει βικιλεξικό' : (thematch.group('HASWIKI').strip() == 'true')}
#print("#" + thematch.group('HASWIKI') + "#", ":", thematch.group('LANGNAME'))
return languages
def sortlanguages(languages):
sortedlanguages = []
translationsdict = OrderedDict()
for langiso in languages:
languages[langiso]['forsorting'] = sortkey(languages[langiso]['όνομα'])
#print(languages[langiso]['όνομα'], languages[langiso]['forsorting'])
for lang in sorted(languages, key=lambda x: languages[x]['forsorting']):
translationsdict[lang] = languages[lang]
translationsdict['el']['forsorting'] = '1' + translationsdict['el']['forsorting']
translationsdict['gkm']['forsorting'] = '2' + translationsdict['el']['forsorting']
translationsdict['grc']['forsorting'] = '3' + translationsdict['el']['forsorting']
translationsdict['διεθ']['forsorting'] = '0' + translationsdict['διεθ']['forsorting']
translationsdict['fro']['forsorting'] = translationsdict['fr']['forsorting'] + translationsdict['fro']['forsorting']
return translationsdict
def fixOneTable(thelineslist, sortedlanguages):
#sortedlanguages = sortlanguages(languages)
print("len(sortedlanguages)",len(sortedlanguages))
onetable = {}
translations = {}
duplicatetranslations = {}
badisotranslations = {}
noisotranslations = []
translationssorted = []
for line in thelineslist:
if not line.strip() == '{{μτφ-μέση}}':
if not line.strip() == '':
thematch = re.match(translinere, line.strip())
if thematch:
langiso = thematch.groupdict()['LANGISO']
if langiso in sortedlanguages:
if langiso in translations:# duplicate
duplicatetranslations[lang] = line.strip()
else:
translations[langiso] = line.strip()
else:
badisotranslations[langiso] = line.strip()
else:
noisotranslations.append(line.strip())
for lang in sortedlanguages:
if lang in translations:
translationssorted.append(translations[lang])
onetable['translationssorted'] = translationssorted
onetable['duplicatetranslations'] = duplicatetranslations
onetable['badisotranslations'] = badisotranslations
onetable['noisotranslations'] = noisotranslations
return onetable
def checklangsections(pagetitle, wikitext, sortedlanguages, parts):
thesections = getSections(pagetitle, wikitext, sortedlanguages, parts)
newtext = ''
previoussectionlangiso = None
hasproblems = ''
categoriestoremove = []
uniquelangs = []
if len(thesections[0]['langsections'])>0:
if thesections[1]['depth'] !=2:
hasproblems += '|ξεκινά χωρίς γλώσσα'
for alang in thesections[0]['langsections']:
if not 'iso' in alang:
hasproblems += '|σφάλμα iso γλώσσας ' + str(alang['section'])
else:
if alang['iso'] in uniquelangs:
hasproblems += '|διπλή γλώσσα ' + str(alang['section'])
else:
uniquelangs.append(alang['iso'])
if len(hasproblems) == 0:
uniquelangnames = [sortedlanguages[x]['forsorting'] for x in uniquelangs]
if uniquelangnames != sorted(uniquelangnames):
hasproblems += '|αταξινόμητες ενότητες γλώσσας '
else:
#print('δεν έχει ενότητες γλώσσας==========')
hasproblems += '|δεν έχει ενότητες γλώσσας'
return hasproblems
def sortkey(thestring):
letter = {}
letter["Α"] = "α"
letter["ά"] = "α"
letter["α"] = "α"
letter["ἀ"] = "α"
letter["ἁ"] = "α"
letter["ἂ"] = "α"
letter["ἃ"] = "α"
letter["ἄ"] = "α"
letter["ἅ"] = "α"
letter["ἆ"] = "α"
letter["ἇ"] = "α"
letter["Ἀ"] = "α"
letter["Ἁ"] = "α"
letter["Ἂ"] = "α"
letter["Ἃ"] = "α"
letter["Ἄ"] = "α"
letter["Ἅ"] = "α"
letter["Ἆ"] = "α"
letter["Ἇ"] = "α"
letter["ὰ"] = "α"
letter["ά"] = "α"
letter["ᾀ"] = "α"
letter["ᾁ"] = "α"
letter["ᾂ"] = "α"
letter["ᾃ"] = "α"
letter["ᾄ"] = "α"
letter["ᾅ"] = "α"
letter["ᾆ"] = "α"
letter["ᾇ"] = "α"
letter["ᾈ"] = "α"
letter["ᾉ"] = "α"
letter["ᾊ"] = "α"
letter["ᾋ"] = "α"
letter["ᾌ"] = "α"
letter["ᾍ"] = "α"
letter["ᾎ"] = "α"
letter["ᾏ"] = "α"
letter["ᾰ"] = "α"
letter["ᾱ"] = "α"
letter["ᾲ"] = "α"
letter["ᾳ"] = "α"
letter["ᾴ"] = "α"
letter["ᾶ"] = "α"
letter["ᾷ"] = "α"
letter["Ᾰ"] = "α"
letter["Ᾱ"] = "α"
letter["Ὰ"] = "α"
letter["Ά"] = "α"
letter["ᾼ"] = "α"
letter["Β"] = "β"
letter["β"] = "β"
letter["Γ"] = "γ"
letter["γ"] = "γ"
letter["Δ"] = "δ"
letter["δ"] = "δ"
letter["Έ"] = "ε"
letter["Ε"] = "ε"
letter["έ"] = "ε"
letter["ε"] = "ε"
letter["ἐ"] = "ε"
letter["ἑ"] = "ε"
letter["ἒ"] = "ε"
letter["ἓ"] = "ε"
letter["ἔ"] = "ε"
letter["ἕ"] = "ε"
letter["Ἐ"] = "ε"
letter["Ἑ"] = "ε"
letter["Ἒ"] = "ε"
letter["Ἓ"] = "ε"
letter["Ἔ"] = "ε"
letter["Ἕ"] = "ε"
letter["ὲ"] = "ε"
letter["Ὲ"] = "ε"
letter["Έ"] = "ε"
letter["Ζ"] = "ζ"
letter["ζ"] = "ζ"
letter["Ή"] = "η"
letter["Η"] = "η"
letter["ή"] = "η"
letter["η"] = "η"
letter["ἠ"] = "η"
letter["ἡ"] = "η"
letter["ἢ"] = "η"
letter["ἣ"] = "η"
letter["ἤ"] = "η"
letter["ἥ"] = "η"
letter["ἦ"] = "η"
letter["ἧ"] = "η"
letter["Ἠ"] = "η"
letter["Ἡ"] = "η"
letter["Ἢ"] = "η"
letter["Ἣ"] = "η"
letter["Ἤ"] = "η"
letter["Ἥ"] = "η"
letter["Ἦ"] = "η"
letter["Ἧ"] = "η"
letter["ὴ"] = "η"
letter["ή"] = "η"
letter["ᾐ"] = "η"
letter["ᾑ"] = "η"
letter["ᾒ"] = "η"
letter["ᾓ"] = "η"
letter["ᾔ"] = "η"
letter["ᾕ"] = "η"
letter["ᾖ"] = "η"
letter["ᾗ"] = "η"
letter["ᾘ"] = "η"
letter["ᾙ"] = "η"
letter["ᾚ"] = "η"
letter["ᾛ"] = "η"
letter["ᾜ"] = "η"
letter["ᾝ"] = "η"
letter["ᾞ"] = "η"
letter["ᾟ"] = "η"
letter["ῂ"] = "η"
letter["ῃ"] = "η"
letter["ῄ"] = "η"
letter["ῆ"] = "η"
letter["ῇ"] = "η"
letter["Ὴ"] = "η"
letter["Ή"] = "η"
letter["ῌ"] = "η"
letter["Θ"] = "θ"
letter["θ"] = "θ"
letter["Ί"] = "ι"
letter["ΐ"] = "ι"
letter["Ι"] = "ι"
letter["Ϊ"] = "ι"
letter["ί"] = "ι"
letter["ι"] = "ι"
letter["ϊ"] = "ι"
letter["ἰ"] = "ι"
letter["ἱ"] = "ι"
letter["ἲ"] = "ι"
letter["ἳ"] = "ι"
letter["ἴ"] = "ι"
letter["ἵ"] = "ι"
letter["ἶ"] = "ι"
letter["ἷ"] = "ι"
letter["Ἰ"] = "ι"
letter["Ἱ"] = "ι"
letter["Ἲ"] = "ι"
letter["Ἳ"] = "ι"
letter["Ἴ"] = "ι"
letter["Ἵ"] = "ι"
letter["Ἶ"] = "ι"
letter["Ἷ"] = "ι"
letter["ὶ"] = "ι"
letter["ί"] = "ι"
letter["ῖ"] = "ι"
letter["ῗ"] = "ι"
letter["Ῐ"] = "ι"
letter["Ῑ"] = "ι"
letter["Ὶ"] = "ι"
letter["Ί"] = "ι"
letter["Κ"] = "κ"
letter["κ"] = "κ"
letter["Λ"] = "λ"
letter["λ"] = "λ"
letter["Μ"] = "μ"
letter["μ"] = "μ"
letter["Ν"] = "ν"
letter["ν"] = "ν"
letter["Ξ"] = "ξ"
letter["ξ"] = "ξ"
letter["Ό"] = "ο"
letter["Ο"] = "ο"
letter["ο"] = "ο"
letter["ό"] = "ο"
letter["ὀ"] = "ο"
letter["ὁ"] = "ο"
letter["ὂ"] = "ο"
letter["ὃ"] = "ο"
letter["ὄ"] = "ο"
letter["ὅ"] = "ο"
letter["Ὀ"] = "ο"
letter["Ὁ"] = "ο"
letter["Ὂ"] = "ο"
letter["Ὃ"] = "ο"
letter["Ὄ"] = "ο"
letter["Ὅ"] = "ο"
letter["ὸ"] = "ο"
letter["ό"] = "ο"
letter["Ὸ"] = "ο"
letter["Ό"] = "ο"
letter["Π"] = "π"
letter["π"] = "π"
letter["Ρ"] = "ρ"
letter["ρ"] = "ρ"
letter["ῤ"] = "ρ"
letter["ῥ"] = "ρ"
letter["Ῥ"] = "ρ"
letter["Σ"] = "σ"
letter["ς"] = "σ"
letter["σ"] = "σ"
letter["Τ"] = "τ"
letter["τ"] = "τ"
letter["Ύ"] = "υ"
letter["Υ"] = "υ"
letter["Ϋ"] = "υ"
letter["ΰ"] = "υ"
letter["υ"] = "υ"
letter["ϋ"] = "υ"
letter["ύ"] = "υ"
letter["ὐ"] = "υ"
letter["ὑ"] = "υ"
letter["ὒ"] = "υ"
letter["ὓ"] = "υ"
letter["ὔ"] = "υ"
letter["ὕ"] = "υ"
letter["ὖ"] = "υ"
letter["ὗ"] = "υ"
letter["Ὑ"] = "υ"
letter["Ὓ"] = "υ"
letter["Ὕ"] = "υ"
letter["Ὗ"] = "υ"
letter["ὺ"] = "υ"
letter["ύ"] = "υ"
letter["ῠ"] = "υ"
letter["ῡ"] = "υ"
letter["ῢ"] = "υ"
letter["ΰ"] = "υ"
letter["ῦ"] = "υ"
letter["ῧ"] = "υ"
letter["Ῠ"] = "υ"
letter["Ῡ"] = "υ"
letter["Ὺ"] = "υ"
letter["Ύ"] = "υ"
letter["Φ"] = "φ"
letter["φ"] = "φ"
letter["Χ"] = "χ"
letter["χ"] = "χ"
letter["Ψ"] = "ψ"
letter["ψ"] = "ψ"
letter["Ώ"] = "ω"
letter["Ω"] = "ω"
letter["ω"] = "ω"
letter["ώ"] = "ω"
letter["ὠ"] = "ω"
letter["ὡ"] = "ω"
letter["ὢ"] = "ω"
letter["ὣ"] = "ω"
letter["ὤ"] = "ω"
letter["ὥ"] = "ω"
letter["ὦ"] = "ω"
letter["ὧ"] = "ω"
letter["Ὠ"] = "ω"
letter["Ὡ"] = "ω"
letter["Ὢ"] = "ω"
letter["Ὣ"] = "ω"
letter["Ὤ"] = "ω"
letter["Ὥ"] = "ω"
letter["Ὦ"] = "ω"
letter["Ὧ"] = "ω"
letter["ὼ"] = "ω"
letter["ώ"] = "ω"
letter["ᾠ"] = "ω"
letter["ᾡ"] = "ω"
letter["ᾢ"] = "ω"
letter["ᾣ"] = "ω"
letter["ᾤ"] = "ω"
letter["ᾥ"] = "ω"
letter["ᾦ"] = "ω"
letter["ᾧ"] = "ω"
letter["ᾨ"] = "ω"
letter["ᾩ"] = "ω"
letter["ᾪ"] = "ω"
letter["ᾫ"] = "ω"
letter["ᾬ"] = "ω"
letter["ᾭ"] = "ω"
letter["ᾮ"] = "ω"
letter["ᾯ"] = "ω"
letter["ῲ"] = "ω"
letter["ῳ"] = "ω"
letter["ῴ"] = "ω"
letter["ῶ"] = "ω"
letter["ῷ"] = "ω"
letter["Ὼ"] = "ω"
letter["Ώ"] = "ω"
letter["ῼ"] = "ω"
letter["Ω"] = "ω"
stringforsort = ''
for aletter in thestring:
if aletter in letter:
stringforsort += letter[aletter]
return stringforsort