Module:Naval Vessel Register URL/data extraction tool

require ('strict');

--[[--------------------------< A F D B _ T >------------------------------------------------------------------

Special case augmentation for the various floating drydock sections that are not listed individually in the NVR.
indexes into this table are from the NVR data-shipid attribute.

]]

local AFDB_t = {
	['239'] = {'A', 'G', 'H', 'I', 'J'},										-- AFDB-1
	['2099'] = {'E'},															-- AFDB-1
	['170'] = {'F'},															-- AFDB-1
	['2103'] = {'A', 'G'},														-- AFDB-2
	['1794'] = {'B'},															-- AFDB-2
	['1795'] = {'C'},															-- AFDB-2
	['2100'] = {'E', 'I'},														-- AFDB-2
	['1793'] = {'J'},															-- AFDB-2
	['5372'] = {'A', 'B', 'C', 'D', 'E', 'G'},									-- AFDB-7
	['1549'] = {'F'},															-- AFDB-7
	}


--[[-------------------------< N A M E _ C A S E >--------------------------------------------------------------

Make mixed case names from uppercase names: HARRY L GLUCKSMAN to Harry L Glucksman 

]]

local function name_case (name)
	local s =  mw.text.trim (name or '');										-- no leading or trailing white space
	local words = mw.text.split (s, ' ');										-- split the name at the spaces it there are any
	for i, s in ipairs (words) do												-- for each 'word' of the name
		s = string.lower (s)													-- make it lowercase
		words[i] = mw.getContentLanguage():ucfirst(s);							-- then uppercase the the first character
	end

	s = table.concat (words, ' ');												-- put the name back together
	words = mw.text.split (s, '\'');											-- now handle apostrophes if any (O'Brien, etc)
	for i, s in ipairs (words) do
		words[i] = mw.getContentLanguage():ucfirst(s);
	end
	return table.concat (words, '\\\'');										-- put the name back together and done
end


--[[--------------------------< A D D _ D A T A >--------------------------------------------------------------

adds formatted individual ship data to the hull classification (group) table in <holding_t>

]]

local function add_data (holding_t, group, hull_num, ship_id, name)
	if not holding_t[group] then												-- when no <group> table
		holding_t[group] = {};													-- create one
	end

	name = name_case (name);
	name = name:gsub ('\"woody\"', '\"Woody\"');								-- special case to fix Hershel "woody" Williams (ESB-4)

	table.insert (holding_t[group], table.concat ({								-- add fromatted ship data
		'\t\t[\'',																-- open the index
		hull_num,																-- index by hul number
		'\'] = {\'',															-- clode index, open sequence
		ship_id,																-- NVR ship identifier (becomes part of url)
		'\', \'',																-- separator
		name,																	-- ship name
		'\'}'																	-- and close the sequence
		}));
end


--[[--------------------------< A F D B _ A D D >--------------------------------------------------------------

special case for AFDB sections.  uses <AFDB_t> to create entries for the various floating drydock sections that
are not listed individually in the NVR.

]]

local function afdb_add (holding_t, group, hull_num, ship_id, name)
	if AFDB_t[ship_id] then														-- if this AFDB floating drydock has a section sequence
		for _, section in ipairs (AFDB_t[ship_id]) do							-- spin through the sequence
			add_data (holding_t, group, hull_num..section, ship_id, name);		-- modify the hullnumber to include the section identifier and create an individual entry
		end
	else
		add_data (holding_t, group, hull_num, ship_id, name);					-- no sections; create and entry for the drydock as a whole
	end
end


--[[--------------------------< M A I N >----------------------------------------------------------------------

{{#invoke:Sandbox/Naval Vessel Register URL/data extraction tool|main}}

<tr><td>AK 3000</td><td><span title="View AK 3003" class="hullLink" data-shipid="926">AK 3003</span></td><td class="sorting_1">1ST LT ALEX BONNYMAN</td></tr>

Constitutiom, Maine, Texas html data look like this:
<tr><td></td><td><span title="View * 0" class="hullLink" data-shipid="1315">* 0</span></td><td class="sorting_1">CONSTITUTION</td></tr>

TODO: make this more robust?  sorting the table at NVR rearranges the html tags so the gmatch() pattern doesn't.

]]

local function main (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	content = content:gsub ('""', '"');											-- special case for HERSHEL ""WOODY"" WILLIAMS (ESB-4)

	local ship_id, hull_num, name, group;
	local holding_t = {};														-- holds tables of sequences indexed by <group>

	for ship_id, hull_num, name in string.gmatch (content, 'shipid="(%d+)">([^<]+)</span></td><td class="sorting_1">([^<]+)') do
		group = hull_num:match ('^[%a%-]+');									-- attempt to extract a group name from the hull number (the alpha and hyphen characters)

		hull_num = hull_num:gsub (' +', '-');									-- ensure that the hull number has wikipedia format (<alpha + hyphens><space><digits> -> <alpha + hyphens><hyphen><digits>)
		if hull_num and group then												-- both are required and both normally provided
			if 'AFDB' == group then												-- special case for floating fry dock sections
				afdb_add (holding_t, group, hull_num, ship_id, name);
			else
				add_data (holding_t, group, hull_num, ship_id, name);
			end

		else																	-- special case for constitution, maine, and texas
			if hull_num then
				group = hull_num:match ('%d');
			end
			group = ({['0'] = '0_CONSTITUTION', ['1'] = '1_MAINE', ['2'] = '2_TEXAS'})[group];	-- this naming to force these entries to top of list
			hull_num = group;

			if group then
				add_data (holding_t, group, hull_num, ship_id, name);
			end
		end
	end

	local function numeric_sort_fn (a, b)										-- local function to do numeric sorting
		a = tonumber (a:match ('%[\'[A-Z%-]+(%d+)'));							-- extract numeric portion of the hull number
		b = tonumber (b:match ('%[\'[A-Z%-]+(%d+)'));

		return a < b;															-- evaluate and return boolean result
	end

	local out_t = {};

	for group_key, values_t in pairs (holding_t) do								-- for each group
		if 'AFDB' == group_key then												-- special case for AFDB-1, AFDB-2, and AFDB-7 sections
			table.sort (values_t);												-- ascending alpha sort works until AFDB-10 comes into being
		else																	-- all other ships
			table.sort (values_t, numeric_sort_fn);								-- ascending sort numerically ships in group by hull number
		end
		
		table.insert (out_t, table.concat ({									-- add this group to the output table
			'\t[\'',															-- open the group key
			group_key,															-- add the key
			'\'] = {\n',														-- and close
			table.concat (values_t, ',\n'),										-- make and add a big string from ship sequences in group
			'\n\t\t},'															-- and close group table
			}));
	end

	table.sort (out_t);															-- ascending sort the output

	out_t = {table.concat (out_t, '\n\n')};										-- replace <out_t> content with a big damn string as sequence [1]

	table.insert (out_t, 1, table.concat ({										-- add lua table name and associated markup
		'<syntaxhighlight lang="lua">return {',									-- use syntaxhighlight for styling
		string.rep ('\t', 18),													-- string of tab chars to properly place timestamp comment
		os.date ('!-- created %FT%H:%M:%S UTC\n'),											-- add the timestamp (ISO 8601 YYYY-MM-DD format)
		}));

	table.insert (out_t, '\n\t}</syntaxhighlight>');							-- and the table's closing '}'

	return frame:preprocess (table.concat (out_t):gsub ('%d_', ''));			-- tostring and format for output; gsub fixes constitution, maine, and texas entries
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	main = main,
	}

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.