打开/关闭菜单
打开/关闭外观设置菜单
打开/关闭个人菜单
未登录
登录后可编辑和发表评论。

Module:Furigana/utils

来自Vocawiki

此模块的文档可以在Module:Furigana/utils/doc创建

local p = {}

-- 转化为upvalue,提升速度
local type = type
local ipairs = ipairs
local concat = table.concat
local u_find = mw.ustring.find

local ruby_templates = { 'Photrans', 'Photrans2', 'Photransa', 'Ruby' }
local ruby_template_indexes = {}
for i, name in ipairs(ruby_templates) do
	ruby_template_indexes[name] = i
end

---@param s string
local function ucfirst(s)
	return s:sub(1, 1):upper()..s:sub(2)
end

local kanji_like_pattern = (function ()
	local kanji_like_unicode_ranges = {
		{ 0x2E80,  0x2EFF },  -- CJK部首补充
		{ 0x3005,  0x3007 },  -- "々、〆、〇"
		{ 0x31C0,  0x31EF },  -- CJK笔画
		{ 0x3400,  0x4DBF },  -- CJK统一表意文字扩展A
		{ 0x4E00,  0x9FFF },  -- CJK统一表意文字
		{ 0xF900,  0xFAFF },  -- CJK兼容表意文字
		{ 0x20000, 0x2A6DF },  -- CJK统一表意文字扩展B
		{ 0x2A700, 0x2EE5F },  -- CJK统一表意文字扩展C-I
		{ 0x2F800, 0x2FA1F },  -- CJK兼容表意文字补充
		{ 0x30000, 0x323AF },  -- CJK统一表意文字扩展G-H
	}
	local char = mw.ustring.char
	local parts = {}
	for i, range in ipairs(kanji_like_unicode_ranges) do
		parts[i] = type(range) == 'table' and char(range[1])..'-'..char(range[2]) or char(range)
	end
	return '['..concat(parts)..']'
end)()

local template_pattern = '{{%s*(%S[^{}|\n]-)%s*|('..kanji_like_pattern..'+)|([^{}|=\n]+)}}'

---获取出现最频繁的ruby模板名
---@param code string
---@return string
local function get_most_frequent_ruby_template_name(code)
	-- 统计代码中`ruby_templates`出现次数,选择出现次数最多的作为`template_name_for_writing`
	-- 代码中没有`ruby_templates`时,选择'Ruby'
	-- 次数并列时,选择靠前的一个
	local count = {}
	for _, name in ipairs(ruby_templates) do
		count[name] = 0
	end
	for template_name in mw.ustring.gmatch(code, template_pattern) do
		local capitalized = ucfirst(template_name)
		if count[capitalized] then
			count[capitalized] = count[capitalized] + 1
		end
	end
	local order = {}
	for name, c in pairs(count) do
		order[#order+1] = { name = name, count = c }
	end
	table.sort(order, function (a, b)
		if a.count > b.count then return true end
		if a.count < b.count then return false end
		if ruby_template_indexes[a.name] < ruby_template_indexes[b.name] then return true end
		return false
	end)
	return order[1].name
end


---@param code string
---@param template_name? string
---@return string
function p.transform_code(code, template_name)
	local template_name_for_writing = template_name
	if not template_name then
		template_name = get_most_frequent_ruby_template_name(code)
	end
	local ucfirst_template_name = ucfirst(template_name)
	if not template_name_for_writing and ucfirst_template_name ~= 'Ruby' then
		template_name_for_writing = template_name
	end

	local parts = {
		'{{振假名',
		template_name_for_writing and '|template='..template_name_for_writing or '',
		'\n|',
	}
	parts[#parts+1] = mw.ustring.gsub(
		code,
		'('..kanji_like_pattern..'?)'..template_pattern,
		function (non_rb_kanji, name, arg1, arg2)
			if ucfirst(name) ~= ucfirst_template_name then return end
			if non_rb_kanji == '' then
				return arg1..'('..arg2..')'
			end
			return non_rb_kanji..'|'..arg1..'('..arg2..')'
		end
	):match('^\n?(.-)\n?$')
	parts[#parts+1] = '\n}}'

	return concat(parts)
end

function p.transform(frame)
	assert(mw.isSubsting(), '必须subst此模块')

	local args = frame.args
	local code = mw.text.trim(args[1])
	local template_name = args.template ~= '' and args.template or nil
	return p.transform_code(code, template_name)
end

return p