Module:BibTeX

local p = {}

local u = mw.ustring

local function trim(s)
	if s == nil then return nil end
	return mw.text.trim(s)
end

local function strip_outer_braces(s)
	s = trim(s or "")
	-- remove one layer of surrounding { ... } if it matches as a whole
	if s:match("^%b{}$") then
		return s:sub(2, -2)
	end
	return s
end

local function strip_outer_quotes(s)
	s = trim(s or "")
	if s:match('^".*"$') then
		return s:sub(2, -2)
	end
	return s
end

local function normalize_value(s)
	s = strip_outer_quotes(strip_outer_braces(s))
	s = s:gsub("\\&", "&")
	s = s:gsub("~", " ")
	s = s:gsub("%s+", " ")
	return trim(s)
end

local function normalize_suffix(s)
	s = trim(s or "")
	if s == "" then return nil end
	s = s:gsub("%.$","") -- drop trailing dot for matching
	if s == "" then return nil end --in case it was just a dot
	local low = mw.ustring.lower(s)
	if low == "jr" then return "Jr." end -- Do we use . any more?
	if low == "sr" then return "Sr." end
	-- Roman numerals (II, III, IV, …)
	if s:match("^[IVXLCDM]+$") then return s end -- assuming caps
	return nil
end

local function normalize_pages(pages)
	pages = trim(pages or "")
	if pages == "" then return nil end
	-- TeX conventions: -- en dash, --- em dash
	pages = pages:gsub("%-%-%-", "—")
	pages = pages:gsub("%-%-", "–")
	return pages
end

-- Very small TeX->Unicode helper for common BibTeX name/title encodings.
local function tex_to_unicode(s)
	if not s or s == "" then return s end

	local accents = {
		["'"] = {a="á",e="é",i="í",o="ó",u="ú",y="ý",A="Á",E="É",I="Í",O="Ó",U="Ú",Y="Ý",c="ć",C="Ć",n="ń",N="Ń",s="ś",S="Ś",z="ź",Z="Ź"},
		["`"] = {a="à",e="è",i="ì",o="ò",u="ù",A="À",E="È",I="Ì",O="Ò",U="Ù"},
		['"'] = {a="ä",e="ë",i="ï",o="ö",u="ü",y="ÿ",A="Ä",E="Ë",I="Ï",O="Ö",U="Ü"},
		["^"] = {a="â",e="ê",i="î",o="ô",u="û",A="Â",E="Ê",I="Î",O="Ô",U="Û"},
		["~"] = {a="ã",n="ñ",o="õ",A="Ã",N="Ñ",O="Õ"},
		["c"] = {c="ç",C="Ç"},
		["v"] = {s="š",S="Š",z="ž",Z="Ž",c="č",C="Č",r="ř",R="Ř",n="ň",N="Ň"},
		["u"] = {a="ă",A="Ă",g="ğ",G="Ğ"},
		["H"] = {o="ő",O="Ő",u="ű",U="Ű"},
		["."] = {z="ż",Z="Ż"},
		["="] = {a="ā",A="Ā",e="ē",E="Ē",i="ī",I="Ī",o="ō",O="Ō",u="ū",U="Ū"},
	}

	-- \'{a} / \'a etc.
	s = s:gsub("\\([\"'`%^~%.=vucH])%s*%{?%s*([A-Za-z])%s*%}?", function(acc, letter)
		local m = accents[acc]
		return (m and m[letter]) or letter
	end)

	-- common ligatures/specials
	s = s:gsub("\\ae", "æ"):gsub("\\AE", "Æ")
	s = s:gsub("\\oe", "œ"):gsub("\\OE", "Œ")
	s = s:gsub("\\aa", "å"):gsub("\\AA", "Å")
	s = s:gsub("\\o", "ø"):gsub("\\O", "Ø")
	s = s:gsub("\\ss", "ß")

	-- remove brace-protection used for capitalization
	s = s:gsub("[{}]", "")

	return s
end

local function read_balanced_braces(src, i)
	-- src[i] must be "{"
	local n = #src
	local depth = 0
	local start_inner = nil
	local j = i
	while j <= n do
		local ch = src:sub(j, j)
		local prev = (j > 1) and src:sub(j-1, j-1) or ""

		if ch == "{" and prev ~= "\\" then
			depth = depth + 1
			if depth == 1 then
				start_inner = j + 1
			end
		elseif ch == "}" and prev ~= "\\" then
			depth = depth - 1
			if depth == 0 then
				local inner = src:sub(start_inner or (i+1), j - 1)
				return inner, j + 1
			end
		end
		j = j + 1
	end
	return nil, i -- unbalanced
end

local function read_quoted(src, i)
	-- src[i] must be '"'
	local n = #src
	local j = i + 1
	local out = {}
	while j <= n do
		local ch = src:sub(j, j)
		local prev = (j > 1) and src:sub(j-1, j-1) or ""
		if ch == '"' and prev ~= "\\" then
			return table.concat(out), j + 1
		end
		table.insert(out, ch)
		j = j + 1
	end
	return table.concat(out), i
end

local function skip_ws_and_commas(src, i)
	local n = #src
	while i <= n do
		local ch = src:sub(i, i)
		if ch == " " or ch == "\n" or ch == "\r" or ch == "\t" or ch == "," then
			i = i + 1
		else
			break
		end
	end
	return i
end

local function read_word(src, i)
	local n = #src
	local j = i
	while j <= n do
		local ch = src:sub(j, j)
		if ch:match("[%w_%-%:%.]") then
			j = j + 1
		else
			break
		end
	end
	return src:sub(i, j - 1), j
end

local function read_value(src, i, closing_char)
	i = skip_ws_and_commas(src, i)
	local ch = src:sub(i, i)
	if ch == "{" then
		local inner, ni = read_balanced_braces(src, i)
		return inner or "", ni
	elseif ch == '"' then
		local inner, ni = read_quoted(src, i)
		return inner or "", ni
	else
		-- bareword until comma or closing_char
		local n = #src
		local j = i
		while j <= n do
			local c = src:sub(j, j)
			if c == "," or c == closing_char then
				break
			end
			j = j + 1
		end
		return src:sub(i, j - 1), j
	end
end

local function parse_bibtex(src)
	src = trim(src or "")
	if src == "" then
		return nil, "empty input"
	end

	local at = src:find("@", 1, true)
	if not at then
		return nil, "no @ found"
	end
	src = src:sub(at)

	-- entry type
	local entry_type = src:match("^@%s*([%a]+)")
	if not entry_type then
		return nil, "cannot read entry type"
	end
	entry_type = u.lower(entry_type)

	-- opening delimiter
	local open_pos = src:find("{", 1, true) or src:find("(", 1, true)
	if not open_pos then
		return nil, "no opening { or ("
	end
	local open_ch = src:sub(open_pos, open_pos)
	local close_ch = (open_ch == "{") and "}" or ")"

	local i = open_pos + 1
	i = skip_ws_and_commas(src, i)

	-- citekey until first comma
	local comma = src:find(",", i, true)
	if not comma then
		return nil, "no comma after citekey"
	end
	local citekey = trim(src:sub(i, comma - 1))
	i = comma + 1

	local fields = {}
	while i <= #src do
		i = skip_ws_and_commas(src, i)
		local ch = src:sub(i, i)
		if ch == "" then break end
		if ch == close_ch then
			break
		end

		local name, ni = read_word(src, i)
		name = u.lower(trim(name or ""))
		i = ni

		i = skip_ws_and_commas(src, i)
		-- expect '=' (allow whitespace)
		if src:sub(i, i) == "=" then
			i = i + 1
		else
			-- skip forward to '=' if present
			local eq = src:find("=", i, true)
			if not eq then break end
			i = eq + 1
		end

		local rawv, nvi = read_value(src, i, close_ch)
		i = nvi

		local v = normalize_value(rawv)
		v = tex_to_unicode(v)

		if v ~= nil and v ~= "" then
			fields[name] = v
		end
	end

	return {
		entryType = entry_type,
		citekey = citekey,
		fields = fields
	}
end

local function month_to_name(m)
	if not m or m == "" then return nil end
	m = trim(m)
	local map = {
		["1"]="January",["2"]="February",["3"]="March",["4"]="April",["5"]="May",["6"]="June",
		["7"]="July",["8"]="August",["9"]="September",["10"]="October",["11"]="November",["12"]="December",
		jan="January",feb="February",mar="March",apr="April",may="May",jun="June",jul="July",aug="August",
		sep="September",sept="September",oct="October",nov="November",dec="December"
	}
	-- leading zeros in strings, numbers, etc.
	local n = tonumber(m)
	if n and n >= 1 and n <= 12 then
		return map[tostring(n)]
	end
	local low = u.lower(m)
	return map[low] or map[m] or m
end

local function split_authors(author_field)
	if not author_field or author_field == "" then return {} end
	local s = author_field

	-- split on " and " at brace depth 0
	local parts = {}
	local buf = {}
	local depth = 0
	local i = 1
	while i <= #s do
		local ch = s:sub(i,i)
		if ch == "{" then depth = depth + 1 end
		if ch == "}" and depth > 0 then depth = depth - 1 end

		if depth == 0 and string.lower(s:sub(i, i+4)) == " and " then
			table.insert(parts, trim(table.concat(buf)))
			buf = {}
			i = i + 5
		else
			table.insert(buf, ch)
			i = i + 1
		end
	end
	local last = trim(table.concat(buf))
	if last ~= "" then table.insert(parts, last) end

	local authors = {}
	for _, a in ipairs(parts) do
		a = trim(a)
		if a and a ~= "" then
			-- literal org author if whole thing brace-wrapped
			if a:match("^%b{}$") and not a:find(",") then
				table.insert(authors, { literal = strip_outer_braces(a) })
			elseif a:find(",") then
				local name_parts = mw.text.split(a, "%s*,%s*")
				if #name_parts == 3 then
					local last_name  = trim(name_parts[1] or "")
					local suffix     = normalize_suffix(name_parts[2])
					local first_name
					if (suffix) then
						first_name = trim(name_parts[3] or "")
					else
						first_name = trim(name_parts[2] or "")
						suffix     = normalize_suffix(name_parts[3])	
					end
					if suffix then
					    first_name  = first_name .. ' ' .. suffix
					end
					table.insert(authors, { last = last_name, first = first_name})
				else
					local first_chunk, last_chunk = a:match("^(.-),%s*(.+)$")
					local suffix = normalize_suffix(last_chunk)
					local first_name, last_name
					if (suffix) then
						-- if we can split "first_chunk" (note already in first last order)
						first_name, last_name = first_chunk:match("^(.-)%s+(.+)$")
						first_name = first_name .. ' ' .. suffix
					else
						first_name = last_chunk
						last_name  = first_chunk
					end
					table.insert(authors, { last = trim(last_name or ""), first = trim(first_name or "") })
				end
			else
				local tokens = mw.text.split(a, "%s+")
				local suffix = normalize_suffix(tokens[#tokens])
				if suffix then table.remove(tokens) end
				-- "First von Last" heuristic: fold trailing lowercase particles into last name
				if #tokens == 1 then
					table.insert(authors, { literal = tokens[1] })
				else
					local j = #tokens - 1
					while j >= 1 and tokens[j]:match("^[%l][%l%-%']*$") do
						j = j - 1
					end
					local first_name = table.concat(tokens, " ", 1, j)
					if (suffix) then
						first_name = first_name .. ' ' .. suffix
					end
					local last_name = table.concat(tokens, " ", j + 1, #tokens)
					table.insert(authors, { last = trim(last_name), first = trim(first_name) })
				end
			end
		end
	end
	return authors
end

-- Numbers from Module:Citation/CS1/Configuration - may need updating sometimes
local FREE_DOI_REGISTRANTS = {
	['1045'] =1, ['1074'] =1, ['1096'] =1, ['1100'] =1, ['1155'] =1, ['1186'] =1, ['1194'] =1, ['1371'] =1, ['1613'] =1, ['1629'] =1, ['1989'] =1, ['1999'] =1, ['2147'] =1,
	['2196'] =1, ['3285'] =1, ['3389'] =1, ['3390'] =1, ['3906'] =1, ['3748'] =1, ['3814'] =1, ['3847'] =1, ['3897'] =1, ['4061'] =1, ['4089'] =1, ['4103'] =1, ['4172'] =1,
	['4175'] =1, ['4230'] =1, ['4236'] =1, ['4239'] =1, ['4240'] =1, ['4249'] =1, ['4251'] =1, ['4252'] =1, ['4253'] =1, ['4254'] =1, ['4291'] =1, ['4292'] =1, ['4329'] =1,
	['4330'] =1, ['4331'] =1, ['5086'] =1, ['5194'] =1, ['5210'] =1, ['5306'] =1, ['5312'] =1, ['5313'] =1, ['5314'] =1, ['5315'] =1, ['5316'] =1, ['5317'] =1, ['5318'] =1,
	['5319'] =1, ['5320'] =1, ['5321'] =1, ['5334'] =1, ['5402'] =1, ['5409'] =1, ['5410'] =1, ['5411'] =1, ['5412'] =1, ['5492'] =1, ['5493'] =1, ['5494'] =1, ['5495'] =1,
	['5496'] =1, ['5497'] =1, ['5498'] =1, ['5499'] =1, ['5500'] =1, ['5501'] =1, ['5527'] =1, ['5528'] =1, ['5662'] =1, ['6064'] =1, ['6219'] =1, ['7167'] =1, ['7217'] =1,
	['7287'] =1, ['7482'] =1, ['7490'] =1, ['7554'] =1, ['7717'] =1, ['7759'] =1, ['7766'] =1, ['9778'] =1, ['11131']=1, ['11569']=1, ['11647']=1, ['11648']=1, ['12688']=1,
	['12703']=1, ['12715']=1, ['12942']=1, ['12998']=1, ['13105']=1, ['14256']=1, ['14293']=1, ['14303']=1, ['15215']=1, ['15347']=1, ['15412']=1, ['15560']=1, ['16995']=1,
    ['17645']=1, ['18637']=1, ['19080']=1, ['19173']=1, ['20944']=1, ['21037']=1, ['21105']=1, ['21468']=1, ['21767']=1, ['21425']=1, ['22261']=1, ['22323']=1, ['22459']=1,
    ['24105']=1, ['24196']=1, ['24966']=1, ['26775']=1, ['30845']=1, ['32545']=1, ['35711']=1, ['35712']=1, ['35713']=1, ['35995']=1, ['36648']=1, ['37126']=1, ['37532']=1,
    ['37871']=1, ['47128']=1, ['47622']=1, ['47959']=1, ['52437']=1, ['52975']=1, ['53288']=1, ['53347']=1, ['54081']=1, ['54947']=1, ['55667']=1, ['55914']=1, ['57009']=1,
    ['58647']=1, ['59081']=1,
}

local function doi_registrant(doi)
	-- expects canonical "10.<registrant>/<suffix>"
	return doi and doi:match("^10%.(%d+)/")
end

local function is_free_doi(doi)
	local r = doi_registrant(doi)
	return r and FREE_DOI_REGISTRANTS[r] or false
end

local function strip_doi(doi)
	if not doi or doi == "" then return nil end
	doi = trim(doi)
	doi = doi:gsub("^https?://(dx%.)?doi%.org/", "")
	doi = doi:gsub("^doi:%s*", "")
	return doi
end

local function wikitext_escape(s)
	if not s or s == "" then return s end
	-- keep it simple: avoid breaking template params
	s = s:gsub("|", "&#124;")
	return s
end

local cite_template_for_type = {
	article = "Cite journal",
	book = "Cite book",
	inbook = "Cite book",
	incollection = "Cite book",
	inproceedings = "Cite conference",
	conference = "Cite conference",
	techreport = "Cite report",
	report = "Cite report",
	phdthesis = "Cite thesis",
	mastersthesis = "Cite thesis",
	thesis = "Cite thesis",
	misc = "Cite web",
}

local function build_cite(data, forced_type)
	local f = data.fields
	local et = forced_type or data.entryType
	local tpl = cite_template_for_type[et] or "Cite journal"

	local params = {}

	-- authors
	local authors = split_authors(f.author)
	for i, a in ipairs(authors) do
		if a.literal then
			params["author" .. i] = a.literal
		else
			if a.last and a.last ~= "" then params["last" .. i] = a.last end
			if a.first and a.first ~= "" then params["first" .. i] = a.first end
		end
	end

	-- date/year/month
	local year = f.year
	local month = month_to_name(f.month)
	if year and month then
		params.date = month .. " " .. year
	elseif f.date then
		params.date = f.date
	elseif year then
		params.year = year
	end

	-- title-ish fields
	if f.title then params.title = f.title end
	if f.subtitle and not params.title then params.title = f.subtitle end

	-- container fields by type
	if tpl == "Cite journal" then
		if f.journal then params.journal = f.journal end
		if f.volume then params.volume = f.volume end
		if f.number then params.issue = f.number end
	elseif tpl == "Cite book" then
		if f.publisher then params.publisher = f.publisher end
		if f.address then params.location = f.address end
		if f.location then params.location = f.location end
		if f.edition then params.edition = f.edition end
		if f.series then params.series = f.series end
		if f.booktitle then params.title = f.booktitle end
	elseif tpl == "Cite conference" then
		if f.booktitle then params.booktitle = f.booktitle end
		if f.organization then params.publisher = f.organization end
	end

	-- common bibliographic bits
	if f.pages and f.pages ~= "{}" then
		local pg = normalize_pages(f.pages)
		if pg then params.pages = pg end
	end
	if f.url then params.url = f.url end
	if f.doi then
		local doi = strip_doi(f.doi)
		if doi and doi ~= "" then
			params.doi = doi
			if is_free_doi(doi) then
				params["doi-access"] = "free"
			end
		end
	end
	if f.isbn then params.isbn = f.isbn end
	if f.issn then params.issn = f.issn end
	if f.publisher and not params.publisher then params.publisher = f.publisher end

	-- stable-ish ordering for output
	local ordered = {}

	local function add(k)
		if params[k] and params[k] ~= "" then
			table.insert(ordered, {k = k, v = params[k]})
		end
	end

	-- author params (last/first or authorN)
	for i = 1, #authors do
		add("last" .. i); add("first" .. i); add("suffix" .. i); add("author" .. i)
	end

	add("date"); add("year")
	add("title")
	add("journal"); add("booktitle")
	add("publisher"); add("location")
	add("volume"); add("issue")
	add("pages")
	add("doi"); add("doi-access")
	add("isbn"); add("issn")
	add("url")

	local out = { "{{", tpl }
	for _, kv in ipairs(ordered) do
		table.insert(out, "|" .. kv.k .. "=" .. wikitext_escape(kv.v))
	end
	table.insert(out, "}}")
	return table.concat(out)
end

local function lc_or_nil(s)
	s = trim(s)
	if not s or s == "" then
		return nil
	end
	return mw.ustring.lower(s)  -- non-empty string
end

function p.main(frame)
	local args = frame.args
	local parent = frame:getParent() and frame:getParent().args or {}

	local bib = args[1]
	if (not bib or bib == "") and parent[1] and parent[1] ~= "" then
		bib = parent[1]
	end
	if (not bib or bib == "") then
		bib = args.BibTeX or args.BibTex or args.bibtex or parent.BibTeX or parent.BibTex or parent.bibtex
	end

    local forced = lc_or_nil(args.type) or lc_or_nil(parent.type)

	local debug = args.debug or parent.debug

	local data, err = parse_bibtex(bib or "")
	if not data then
		return '<span class="error">BibTeX parse error: ' .. wikitext_escape(err or "unknown") .. "</span>"
	end

	if debug and tostring(debug) ~= "" then
		return mw.text.nowiki(mw.text.jsonEncode(data))
	end
	
	local cite_wikitext = build_cite(data, forced)
	if lc_or_nil(args.display) or lc_or_nil(parent.display) then
		return "<nowiki>" .. cite_wikitext .. "</nowiki>"
	end

	return frame:preprocess(cite_wikitext)
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.