-----------------------------------------------------------------------
--         FILE:  luaotfload-arabic.lua
--  DESCRIPTION:  part of luaotfload / arabic specific support
-----------------------------------------------------------------------

assert(luaotfload_module, "This is a part of luaotfload and should not be loaded independently") { 
    name          = "luaotfload-arabic",
    version       = "3.29",       --TAGVERSION
    date          = "2024-12-03", --TAGDATE
    description   = "luaotfload submodule / features",
    license       = "GPL v2.0",
    author        = "Marcel Krüger",
    copyright     = "The LaTeX Project",
}

local unicode = require'luaotfload-unicode'
local ccc = unicode.ccc

local node_new = node.direct.new
local setlink = node.direct.setlink
local is_char = node.direct.is_char
local getnext = node.direct.getnext

-- Mark combining marks
local mcm = {
    [0x0654] = true, -- ARABIC HAMZA ABOVE
    [0x0655] = true, -- ARABIC HAMZA BELOW
    [0x0658] = true, -- ARABIC MARK NOON GHUNNA
    [0x06DC] = true, -- ARABIC SMALL HIGH SEEN
    [0x06E3] = true, -- ARABIC SMALL LOW SEEN
    [0x06E7] = true, -- ARABIC SMALL HIGH YEH
    [0x06E8] = true, -- ARABIC SMALL HIGH NOON
    [0x08CA] = true, -- ARABIC SMALL HIGH FARSI YEH
    [0x08CB] = true, -- ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW
    [0x08CD] = true, -- ARABIC SMALL HIGH ZAH
    [0x08CE] = true, -- ARABIC LARGE ROUND DOT ABOVE
    [0x08CF] = true, -- ARABIC LARGE ROUND DOT BELOW
    [0x08D3] = true, -- ARABIC SMALL LOW WAW
    [0x08F3] = true, -- ARABIC SMALL HIGH WAW 
}

-- Implement AMTRA from UTR #53.
-- This assumes that the text is already normalized according to NFD. For most
-- fonts, normalizing to NFC should be good enough.
local function reorder_amtra(head, f)
    local n = head
    while n do
        local base, prev = n
        prev, n = n, getnext(n)
        while true do
            local char = is_char(n, f) -- is_char(nil, f) == is_char(0, f) == nil
            local this_ccc = ccc[char]
            if not this_ccc then break end -- ! This `break` is the hot path
            if this_ccc == 33 then
                local after_33, tail_33 = n
                repeat
                    tail_33 = after_33
                    after_33 = getnext(tail_33)
                    local char = is_char(after_33, f)
                    local after_ccc = ccc[char]
                until after_ccc ~= 33
                setlink(prev, after_33)
                setlink(tail_33, getnext(base))
                setlink(base, n)
                if prev == base then
                    prev = tail_33
                end
                n = after_33
            elseif this_ccc == 220 then
                local after_220, tail_220, found = n
                repeat
                    tail_220 = after_220
                    after_220 = getnext(tail_220)
                    local char = is_char(after_220, f)
                    if mcm[char] then found = true end
                    local after_ccc = ccc[char]
                until after_ccc ~= 220
                if found then
                    setlink(prev, after_220)
                    setlink(tail_220, getnext(base))
                    setlink(base, n)
                    if prev == base then
                        prev = tail_220
                    end
                    n = after_220
                    base = tail_220 -- Because ccc230 should get inserted after this
                else
                    prev, n = tail_220, after_220
                end
            elseif this_ccc == 230 then
                local after_230, tail_230, found = n
                repeat
                    tail_230 = after_230
                    after_230 = getnext(tail_230)
                    local char = is_char(after_230, f)
                    if mcm[char] then found = true end
                    local after_ccc = ccc[char]
                until after_ccc ~= 230
                if found then
                    setlink(prev, after_230)
                    setlink(tail_230, getnext(base))
                    setlink(base, n)
                    if prev == base then
                        prev = tail_230
                    end
                    n = after_230
                else
                    prev, n = tail_220, after_220
                end
            else
                prev, n = n, getnext(n)
            end
        end
    end
    return n
end

-- We need to run after normalize and ideally directly afterwards. So try to insert after normalize
-- or default to the start of the list such that normalize can insert itself before us later.
local normalize_index = 0
for i, manipulator in ipairs(fonts.constructors.features.otf.processors.node) do
  if manipulator.name == 'normalize' then
    normalize_index = i
  end
end
fonts.constructors.features.otf.register {
    name = 'amtra',
    default = 'auto',
    description = 'Apply Unicode Arabic Mark Rendering',
    initializers = {
        node = function(fonttable, value, features)
            if values == 'auto' then
                features.amtra = fonttable.properties.script == 'arab'
            end
        end,
    },
    processors = {
        position = normalize_index + 1,
        node = function(head, f)
            return reorder_amtra(head, f)
        end,
    },
}
