local p = {}
-- 转化为upvalue,提升速度
local type = type
local ipairs = ipairs
local concat = table.concat
local u_find = mw.ustring.find
local ruby_templates = { 'Photrans', 'Photrans2', 'Photransa', 'Ruby' }
local ruby_template_indexes = {}
for i, name in ipairs(ruby_templates) do
ruby_template_indexes[name] = i
end
---@param s string
local function ucfirst(s)
return s:sub(1, 1):upper()..s:sub(2)
end
local kanji_like_pattern = (function ()
local kanji_like_unicode_ranges = {
{ 0x2E80, 0x2EFF }, -- CJK部首补充
{ 0x3005, 0x3007 }, -- "々、〆、〇"
{ 0x31C0, 0x31EF }, -- CJK笔画
{ 0x3400, 0x4DBF }, -- CJK统一表意文字扩展A
{ 0x4E00, 0x9FFF }, -- CJK统一表意文字
{ 0xF900, 0xFAFF }, -- CJK兼容表意文字
{ 0x20000, 0x2A6DF }, -- CJK统一表意文字扩展B
{ 0x2A700, 0x2EE5F }, -- CJK统一表意文字扩展C-I
{ 0x2F800, 0x2FA1F }, -- CJK兼容表意文字补充
{ 0x30000, 0x323AF }, -- CJK统一表意文字扩展G-H
}
local char = mw.ustring.char
local parts = {}
for i, range in ipairs(kanji_like_unicode_ranges) do
parts[i] = type(range) == 'table' and char(range[1])..'-'..char(range[2]) or char(range)
end
return '['..concat(parts)..']'
end)()
local template_pattern = '{{%s*(%S[^{}|\n]-)%s*|('..kanji_like_pattern..'+)|([^{}|=\n]+)}}'
---获取出现最频繁的ruby模板名
---@param code string
---@return string
local function get_most_frequent_ruby_template_name(code)
-- 统计代码中`ruby_templates`出现次数,选择出现次数最多的作为`template_name_for_writing`
-- 代码中没有`ruby_templates`时,选择'Ruby'
-- 次数并列时,选择靠前的一个
local count = {}
for _, name in ipairs(ruby_templates) do
count[name] = 0
end
for template_name in mw.ustring.gmatch(code, template_pattern) do
local capitalized = ucfirst(template_name)
if count[capitalized] then
count[capitalized] = count[capitalized] + 1
end
end
local order = {}
for name, c in pairs(count) do
order[#order+1] = { name = name, count = c }
end
table.sort(order, function (a, b)
if a.count > b.count then return true end
if a.count < b.count then return false end
if ruby_template_indexes[a.name] < ruby_template_indexes[b.name] then return true end
return false
end)
return order[1].name
end
---@param code string
---@param template_name? string
---@return string
function p.transform_code(code, template_name)
local template_name_for_writing = template_name
if not template_name then
template_name = get_most_frequent_ruby_template_name(code)
end
local ucfirst_template_name = ucfirst(template_name)
if not template_name_for_writing and ucfirst_template_name ~= 'Ruby' then
template_name_for_writing = template_name
end
local parts = {
'{{振假名',
template_name_for_writing and '|template='..template_name_for_writing or '',
'\n|',
}
parts[#parts+1] = mw.ustring.gsub(
code,
'('..kanji_like_pattern..'?)'..template_pattern,
function (non_rb_kanji, name, arg1, arg2)
if ucfirst(name) ~= ucfirst_template_name then return end
if non_rb_kanji == '' then
return arg1..'('..arg2..')'
end
return non_rb_kanji..'|'..arg1..'('..arg2..')'
end
):match('^\n?(.-)\n?$')
parts[#parts+1] = '\n}}'
return concat(parts)
end
function p.transform(frame)
assert(mw.isSubsting(), '必须subst此模块')
local args = frame.args
local code = mw.text.trim(args[1])
local template_name = args.template ~= '' and args.template or nil
return p.transform_code(code, template_name)
end
return p