Module:DecodeEncode

From Timeline of History
Revision as of 16:38, 9 November 2023 by Karen (talk | contribs) (1 revision imported)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:DecodeEncode/doc

require('strict')
local p = {}

local function _getBoolean( boolean_str )
	-- from: module:String; adapted
	-- requires an explicit true
	local boolean_value

	if type( boolean_str ) == 'string' then
		boolean_str = boolean_str:lower()
		if boolean_str == 'true' or boolean_str == 'yes' or boolean_str == '1' then
			boolean_value = true
		else
			boolean_value = false
		end
	elseif type( boolean_str ) == 'boolean' then
		boolean_value = boolean_str
	else
		boolean_value = false
	end
	return boolean_value
end

function p.decode( frame )
	local s = frame.args['s'] or ''
	local subset_only = _getBoolean(frame.args['subset_only'] or false)

	return p._decode( s, subset_only )
end

function p._decode( s, subset_only )
	-- U+2009 THIN SPACE: workaround for bug: HTML entity   is decoded incorrect. Entity   gets decoded properly
	s = mw.ustring.gsub( s, ' ', ' ' )
	-- U+03B5 ε GREEK SMALL LETTER EPSILON: workaround for bug (phab:T328840): HTML entity ε is decoded incorrect for gsub(). Entity ε gets decoded properly
	s = mw.ustring.gsub( s, 'ε', 'ε' )

	local ret = mw.text.decode( s, not subset_only )

	return ret
end

function p.encode( frame )
	local s = frame.args['s'] or ''
	local charset = frame.args['charset']

	return p._encode( s, charset )
end

function p._encode( s, charset )
	-- example: charset = '_&©−°\\\"\'\=' -- do escape with backslash not %;
	local ret

	if charset and charset ~= '' then
		ret = mw.text.encode( s, charset )
	else
		-- use default: chartset = '<>&"\' ' (outer quotes = lua required; space = NBSP)
		ret = mw.text.encode( s )
	end 
	
	return ret
end

return p