Module:grk-stems >> Module:grk-stems/data     <<   Module:grk-stems/τεκμηρίωση (documentation)

Δοκιμές, στο Module:grk-stems/trials >> Module:grk-stems/trials/data

Παραγωγή θεμάτων άτονων, τονισμένων στη λήγουσα, στην παραλήγουσα για την ελληνική γλώσσα. Καλείται

  • είτε με #invoke μέσα σε Πρότυπα κλίσεων
  • είτε από επιμέρους χρηστικά «προτυπάκια» από το Module:stems όπως:
    • Πρότυπο:word-2 λέξη μείον 2 τελευταία γράμματα οποιασδήποτε γλώσσας
  • χρησιμοποιείται (με require) και σε Modules κλίσεων.

-- 2020.03.21. Sarri.greek
--[=[
STEM PRODUCTION for Greek monotonic or polytonic scripts (with character conversions at [[Module:grk-stems/data]])
	Produce stems for inflection tables
	from lemma or a given λήμμα=xxxx
	Trials, at [[Module:grk-stems/trials]]

Authors:
	2020.03.Sarri.greek
	previous modules by Flyax, Xoristzatziki. 

STRUCTURE
	1) MAIN CODE
		1. has_accent ? (chekcing if true or false) - export.hasaccent
		2. accent_0 removeaccent
		3. accent_1 put accent on the ultima
		4. accent_2 put accent on the penultima without synizesis
!!NOT CHECKED		6. accent_2syn put accent on the penultima, asking editor for synizesis number 
!!NOT CHECKED		7. Convert perispomene to oxia
!!NOT CHECKED		8. Convert oxia to perispomene
	2) a general function
	3) export functions (used at declension Module)
	4) samples for direct invoke
	5) NOTES-TRIALS

Terms:
	stemL = the stem as found at lemma (minus letters from the end)
	stem = lemma minus letters from the end, as defined in the inflection modules
	word0 or stem0 = no accent - Function: removeaccent or accent0
	word1 or stem1 = accent on the ultima: the last syllable's vowel
	word2 or stem2 = accent on the penultima: the syllable before the last
		+Modern Greek: consideration for synizisis: when 2 vowels are taken as one

PROBLEMS
main function:
	For function accent_penultima, I get 'time allocated expired'
function accent_2
	[[σκίουρος]], stem σκίουρ It analyses it as: σκί-ο-υρ = accent_3 instead of σκί-ουρ = accent_2
Umlaut - διαλυτικά
	cf /data page Why word-1 does not see the diaeresis? ο Βάιος του Βαΐου, ο Μάιος του Μαΐου
]=]--

local export = {}

local module_path = 'Module:grk-stems'
local m_data = require(module_path .."/data") --all character conversions


--------------------------------------------------------------------------
--                             MAIN CODES                               --
--------------------------------------------------------------------------


-- ===================== true or false: inquies whether it has an accent
-- previously Lf['έχει τόνο'] in  Module:κλίση/el/ουσιαστικό 
function has_accent(anygreekstring)
    --για κάθε χαρακτήρα
    for codepoint in mw.ustring.gcodepoint( anygreekstring ) do
        if m_data.accented_to_unaccented[mw.ustring.char(codepoint)] ~= nil then
            return true
        end
    end
    return false
end

-- export it
function export.hasaccent(frame)
return has_accent(term)
end

-- ===================== removeaccent or accent_0
-- previously in Module:grc-nouns-decl
-- Η συνάρτηση αφαιρεί τον τόνο από οποιαδήποτε λέξη
-- DO NOT add export.
function removeaccent(word)
    -- for every character / για κάθε χαρακτήρα
    local wordproduced = ''
    for codepoint in mw.ustring.gcodepoint( word ) do
        -- replace it according to instructions / αντικατάστησέ τον με βάση τον πίνακα
        mychar = mw.ustring.char(codepoint)
        convertedchar = m_data.accented_to_unaccented[mychar]            
        -- if an accented character is found / αν βρήκε τονισμένο γράμμα προς για αντικατάσταση
        if convertedchar ~= nil then
            -- add it / πρόσθεσέ το στη δημιουργούμενη κλείδα
            wordproduced = wordproduced .. convertedchar
        else
            -- let it be / αλλιώς άσε το ίδιο
            wordproduced = wordproduced .. mychar        
        end
    end
    return wordproduced
end

-- ===================== accent_ultima, place accent on the last vowel
--[=[
-- previously named neostonos in [[Module:grc-nouns-decl]] by Flyax
The function begins at the last letter of stem going backwards.
	When it reaches the first unaccented vowel, it adds accent
	The stem paramters should not have any accented vowels.
Η συνάρτηση αρχίζει από το τελευταίο γράμμα του θέματος και πηγαίνει προς το πρώτο. 
	Όταν συναντήσει το πρώτο (άτονο) φωνήεν, το τονίζει και τέλος
	Εννοείται ότι η μεταβλητή stem δεν πρέπει να περιέχει τονισμένα φωνήεντα.
]=]--
function accent_1(word)
    stempoint = mw.ustring.len(word)
    wordproduced = ""
    while stempoint > 0 do
       mychar = mw.ustring.sub(word, stempoint, stempoint)
       newchar = m_data.unaccented_to_accented[mychar]
       if newchar ~= nil then
          if stempoint > 1 then
              return mw.ustring.sub(word, 1, stempoint-1 ) .. newchar .. wordproduced
          else
              return newchar .. wordproduced
          end
       else
           wordproduced = mychar .. wordproduced
           stempoint = stempoint -1
       end
    end
    return wordproduced
end

-- ===================== accent_2  to penulitma, no synizesis
-- previously named tonospro in [[Module:grc-nouns-decl]] by Flyax
--[=[
It finds the vowel of the penultima and adds oxia.
	It supposes that the second vowel from the end, of the stem
	belongs to the penultima.
Η συνάρτηση βρίσκει το φωνήεν της προπαραλήγουσας και του βάζει οξεία.
	Σε αυτή τη φάση υποθέτει ότι το δεύτερο φωνήεν του θέματος από το τέλος
	ανήκει στην προπαραλήγουσα
]=]--
function accent_2(stem)
    stempoint = mw.ustring.len(stem)
    wordproduced = ""
    profound = false
    while stempoint > 0 do
       mychar = mw.ustring.sub(stem, stempoint, stempoint)
       newchar = m_data.unaccented_to_accented[mychar]
       if newchar ~= nil and profound then
            if stempoint > 1 then
                return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced
            else
                return newchar .. wordproduced
            end
       else
       	   if newchar ~= nil then profound = true end
           wordproduced = mychar .. wordproduced
           stempoint = stempoint -1
       end
    end
    return wordproduced
end

-- !!!!!!!!NOT CHECKED
-- ===================== accent to penulitma + synizesis
-- previously as neostonos in [[Module:el-verb-conj]] by Flyax, 2013
-- move accent to penultima or accent_2 (check synizesis)
-- μετάθεση του τόνου στην παραλήγουσα ελέγχοντας αν υπάρχει συνίζηση
function accent_2syn(word, syniz)
    local wordproduced = ""
    -- for every combination with accent+dialytics / για κάθε συνδυασμό γραμμάτων με τόνο και διαλυτικά
    -- for doubleaccented, newvalue in pairs(m_data.digraphs2) do
    for diplotonismeno, neatimh in pairs(m_data.digraphs2) do
    	-- beginning, ending = mw.ustring.find(word, doubleaccented)
        arxi, telos = mw.ustring.find(word, diplotonismeno)
        if arxi ~= nil then
            return mw.ustring.gsub(word, diplotonismeno, neatimh, 1) --replace once / μια φορά αντικατάσταση
            --accent was found and moved / βρέθηκε ο τόνος και μετατέθηκε
        end        
    end
    -- if dialytics are not found, we start from the end looking for an accent
    -- αν δεν βρέθηκαν διαλυτικά αρχίζουμε από το τέλος για να βρούμε πού είναι ο τόνος
    stempoint = mw.ustring.len(word)
    accentNotFound = true
    while accentNotFound do
       mychar = mw.ustring.sub(word, stempoint, stempoint)
       newchar = m_data.accented_to_unaccented[mychar]
       if newchar ~= nil then
           wordproduced = newchar .. wordproduced
           accentNotFound = false
       else
           wordproduced = mychar .. wordproduced
       end
       stempoint = stempoint -1
    end
    -- check if there is synizesis: defined by the editor
    -- να εξετάσουμε αν υπάρχει συνίζηση: πρέπει να μας το πει ο χρήστης, 
    -- EXAMPLES: αιφνιδιάζω (συνίζ=nil), μεριάζω (συνίζ=1), αδειάζω (συνίζ=2) (number of vowels counting as one)
    if syniz ~= nil then
        syniz = tonumber(syniz)
        wordproduced = mw.ustring.sub(word, stempoint+1-syniz, stempoint) .. wordproduced
        stempoint = stempoint - syniz
    end
    -- check if accent is on a digraph / να εξετάσουμε αν ο τόνος ήταν σε δίψηφο φωνήεν
    twoletters = mw.ustring.sub(word, stempoint, stempoint) .. newchar
    for _,v in pairs(m_data.digraphs) do
        if v == twoletters then
            wordproduced = mw.ustring.sub(word, stempoint, stempoint) .. wordproduced
            stempoint = stempoint -1
        end
    end
    return accent_1(mw.ustring.sub(word, 1, stempoint)) .. wordproduced
end

-- !!!!!!!!NOT CHECKED
-- ===================== Convert perispomene (circumflex) to oxia (= acute = tonos)
-- previously [[Module:grc-nouns-decl]] by Flyax
-- Η συνάρτηση αλλάζει την περισπωμένη του θέματος σε οξεία. Αν δεν βρει περισπωμένη δεν κάνει τίποτα.
function export.PerispomeniToOxia(stem)
    stempoint = mw.ustring.len(stem)
    wordproduced = ""
    while stempoint > 0 do
       mychar = mw.ustring.sub(stem, stempoint, stempoint)
       newchar = m_data.perispomeni_to_oxeia[mychar]
       if newchar ~= nil then
          if stempoint > 1 then
              return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced
          else
              return newchar .. wordproduced
          end
       else
           wordproduced = mychar .. wordproduced
           stempoint = stempoint -1
       end
    end
    return wordproduced
end

-- !!!!!!!!NOT CHECKED
-- ===================== Convert oxia (acute) to persipomeni (circumflex)
-- previously in [[Module:grc-nouns-decl]] by Flyax
-- Η συνάρτηση αλλάζει την οξεία του θέματος σε περισπωμένη. Αν δεν βρει οξεία δεν κάνει τίποτα.
 
function export.OxiaToPerispomeni(stem)
    stempoint = mw.ustring.len(stem)
    wordproduced = ""
    while stempoint > 0 do
       mychar = mw.ustring.sub(stem, stempoint, stempoint)
       newchar = m_data.oxeia_to_perispomeni[mychar]
       if newchar ~= nil then
          if stempoint > 1 then
              return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced
          else
              return newchar .. wordproduced
          end
       else
           wordproduced = mychar .. wordproduced
           stempoint = stempoint -1
       end
    end
    return wordproduced
end



--------------------------------------------------------------------------
--                 EXPORTS  for inflection Modules                      --
--                 plus i variants for direct invoke                     --
--------------------------------------------------------------------------
-- ΔΗΜΙΟΥΡΓΙΑ ΘΕΜΑΤΩΝ από το ΛΗΜΜΑ
-- PRODUCE STEMS from LEMMA

-- ============= wordi_1 =============== --
-- wordi= word invoked
-- lemma minus 1 letter, direct invoke
function export.wordi_1(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return mw.ustring.sub(lemma,1,-2)
end

-- ============= word_2 =============== --
-- lemma minus 2 letters, for [[Template:word-2]]
function export.word_2(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return mw.ustring.sub(lemma,1,-3)	
end


-- ============= wordi_2 =============== --
-- lemma minus 2 letters, direct invoke
function export.wordi_2(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return mw.ustring.sub(lemma,1,-3)
end


-- ============= word_3 =============== --
-- lemma minus 3 letters, for Template --όπως καπετάν-ιος, καπεταν-αίοι
function export.word_3(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return mw.ustring.sub(lemma,1,-4)	
end


-- ============= wordi_3 =============== --
-- lemma minus 3 letters, direct invoke --όπως καπετάν-ιος, καπεταν-αίοι
function export.wordi_3(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return mw.ustring.sub(lemma,1,-4)
end

-- ΒΑΖΟΥΜΕ ΤΟΝΟΥΣ
-- PUT ACCENTS on them

-- ============= WORD 0 =============== --
-- remove accent, for Templates
function export.word0(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return removeaccent(lemma)		-- or lemma = (mw.ustring.sub(lemma,1,-1)	
end


-- ============= 0i =============== --
-- remove accent, direct invoke
function export.word0i(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return removeaccent(mw.ustring.sub(lemma,1,-1))	
end

-- ============= WORD 1 =============== --
-- accent on ultima, for Templates
function export.word1(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end	
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return accent_1(removeaccent(lemma))		--or lemma = (mw.ustring.sub(lemma,1,-1)
end

-- ============= 1i =============== --
-- accent on ultima, direct invoke
function export.word1i(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
return accent_1(removeaccent(mw.ustring.sub(lemma,1,-1)))	
end

-- ============= WORD 2 =============== --
-- accent on penultima, for Templates
function export.word2(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end	
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return accent_2(removeaccent(lemma))	-- or lemma = (mw.ustring.sub(lemma,1,-1))
end

-- ============= 2i =============== --
-- accent on penultima, direct invoke
function export.word2i(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return accent_2(removeaccent(mw.ustring.sub(lemma,1,-1)))	
end

--------------------------------------------------------------------------
--                          NOT TESTED                             --
--------------------------------------------------------------------------

-- ============= WORD 2syn =============== --
-- accent on penultima plus synizesis, for Templates
function export.synizesis(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['συνίζ'] or ''
	else
		term = arg
	end
	if syniz ~= '' then syniz = term end
return term
end
function export.word2syn(arg)
	if type(arg) == 'table' and type(arg.getParent) == 'function' then
		local frame = arg
		local args = frame:getParent().args
		term = args['λήμμα'] or args['lemma'] or ''
	else
		term = arg
	end	
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	if term ~= '' then lemma = term end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
return accent_2syn(lemma, synizesis)	-- or lemma = (mw.ustring.sub(lemma,1,-1))
end

-- ============= 2syn_i =============== --
-- accent on penultima plus synizesis, direct invoke
function export.word2syn_i(frame)
--	myarg = frame:getParent().args --  for Template only
	myarg = frame.args -- for invoke only
    PAGENAME = mw.title.getCurrentTitle().text
    lemma = PAGENAME:match( "^%s*(.-)%s*$" )
	lemma_target = myarg['lemma'] or myarg['λήμμα'] or ''
	if lemma_target ~= '' then lemma = lemma_target end
    if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end
    if syniz ~= '' then syniz = myarg['συνίζ'] end
return accent_2syn(lemma, syniz)	-- or lemma = (mw.ustring.sub(lemma,1,-1))	
end


return export