Module:Naval Vessel Register URL/data extraction tool
This tool reads a local copy of data from the NVR web page to extract information required by Module:Naval Vessel Register URL.
To use this tool:
- open a blank sandbox page for editing – can be any page, there will be no need to save it unless you want to
- copy and paste this line into the sandbox:
{{#invoke:Naval Vessel Register URL/data extraction tool|main}}
- in another browser window, open the NVR hull classification page for Ships Listed by Name or Service Craft Listed by Name
- from the horizontal alphabetical menu choose 'ALL'
- choose 'All entries per page'
- DO NOT sort the table!
- hover the mouse pointer over the first item in the data table
- right click and choose 'Inspect' from the context menu
- hover the mouse pointer over the html table's
<tbody>tag - right click and choose 'Copy > Copy element' from the context menu
- paste the element into the sandbox below the line added at step 2
- click Show preview to run the tool
- copy the rendered result and paste over the same Lua table in Module:Naval Vessel Register URL/data
- repeat from step 3 for the other list
require ('strict');
--[[--------------------------< A F D B _ T >------------------------------------------------------------------
Special case augmentation for the various floating drydock sections that are not listed individually in the NVR.
indexes into this table are from the NVR data-shipid attribute.
]]
local AFDB_t = {
['239'] = {'A', 'G', 'H', 'I', 'J'}, -- AFDB-1
['2099'] = {'E'}, -- AFDB-1
['170'] = {'F'}, -- AFDB-1
['2103'] = {'A', 'G'}, -- AFDB-2
['1794'] = {'B'}, -- AFDB-2
['1795'] = {'C'}, -- AFDB-2
['2100'] = {'E', 'I'}, -- AFDB-2
['1793'] = {'J'}, -- AFDB-2
['5372'] = {'A', 'B', 'C', 'D', 'E', 'G'}, -- AFDB-7
['1549'] = {'F'}, -- AFDB-7
}
--[[-------------------------< N A M E _ C A S E >--------------------------------------------------------------
Make mixed case names from uppercase names: HARRY L GLUCKSMAN to Harry L Glucksman
]]
local function name_case (name)
local s = mw.text.trim (name or ''); -- no leading or trailing white space
local words = mw.text.split (s, ' '); -- split the name at the spaces it there are any
for i, s in ipairs (words) do -- for each 'word' of the name
s = string.lower (s) -- make it lowercase
words[i] = mw.getContentLanguage():ucfirst(s); -- then uppercase the the first character
end
s = table.concat (words, ' '); -- put the name back together
words = mw.text.split (s, '\''); -- now handle apostrophes if any (O'Brien, etc)
for i, s in ipairs (words) do
words[i] = mw.getContentLanguage():ucfirst(s);
end
return table.concat (words, '\\\''); -- put the name back together and done
end
--[[--------------------------< A D D _ D A T A >--------------------------------------------------------------
adds formatted individual ship data to the hull classification (group) table in <holding_t>
]]
local function add_data (holding_t, group, hull_num, ship_id, name)
if not holding_t[group] then -- when no <group> table
holding_t[group] = {}; -- create one
end
name = name_case (name);
name = name:gsub ('\"woody\"', '\"Woody\"'); -- special case to fix Hershel "woody" Williams (ESB-4)
table.insert (holding_t[group], table.concat ({ -- add fromatted ship data
'\t\t[\'', -- open the index
hull_num, -- index by hul number
'\'] = {\'', -- clode index, open sequence
ship_id, -- NVR ship identifier (becomes part of url)
'\', \'', -- separator
name, -- ship name
'\'}' -- and close the sequence
}));
end
--[[--------------------------< A F D B _ A D D >--------------------------------------------------------------
special case for AFDB sections. uses <AFDB_t> to create entries for the various floating drydock sections that
are not listed individually in the NVR.
]]
local function afdb_add (holding_t, group, hull_num, ship_id, name)
if AFDB_t[ship_id] then -- if this AFDB floating drydock has a section sequence
for _, section in ipairs (AFDB_t[ship_id]) do -- spin through the sequence
add_data (holding_t, group, hull_num..section, ship_id, name); -- modify the hullnumber to include the section identifier and create an individual entry
end
else
add_data (holding_t, group, hull_num, ship_id, name); -- no sections; create and entry for the drydock as a whole
end
end
--[[--------------------------< M A I N >----------------------------------------------------------------------
{{#invoke:Sandbox/Naval Vessel Register URL/data extraction tool|main}}
<tr><td>AK 3000</td><td><span title="View AK 3003" class="hullLink" data-shipid="926">AK 3003</span></td><td class="sorting_1">1ST LT ALEX BONNYMAN</td></tr>
Constitutiom, Maine, Texas html data look like this:
<tr><td></td><td><span title="View * 0" class="hullLink" data-shipid="1315">* 0</span></td><td class="sorting_1">CONSTITUTION</td></tr>
TODO: make this more robust? sorting the table at NVR rearranges the html tags so the gmatch() pattern doesn't.
]]
local function main (frame)
local page = mw.title.getCurrentTitle(); -- get a page object for this page
local content = page:getContent(); -- get unparsed content
content = content:gsub ('""', '"'); -- special case for HERSHEL ""WOODY"" WILLIAMS (ESB-4)
local ship_id, hull_num, name, group;
local holding_t = {}; -- holds tables of sequences indexed by <group>
for ship_id, hull_num, name in string.gmatch (content, 'shipid="(%d+)">([^<]+)</span></td><td class="sorting_1">([^<]+)') do
group = hull_num:match ('^[%a%-]+'); -- attempt to extract a group name from the hull number (the alpha and hyphen characters)
hull_num = hull_num:gsub (' +', '-'); -- ensure that the hull number has wikipedia format (<alpha + hyphens><space><digits> -> <alpha + hyphens><hyphen><digits>)
if hull_num and group then -- both are required and both normally provided
if 'AFDB' == group then -- special case for floating fry dock sections
afdb_add (holding_t, group, hull_num, ship_id, name);
else
add_data (holding_t, group, hull_num, ship_id, name);
end
else -- special case for constitution, maine, and texas
if hull_num then
group = hull_num:match ('%d');
end
group = ({['0'] = '0_CONSTITUTION', ['1'] = '1_MAINE', ['2'] = '2_TEXAS'})[group]; -- this naming to force these entries to top of list
hull_num = group;
if group then
add_data (holding_t, group, hull_num, ship_id, name);
end
end
end
local function numeric_sort_fn (a, b) -- local function to do numeric sorting
a = tonumber (a:match ('%[\'[A-Z%-]+(%d+)')); -- extract numeric portion of the hull number
b = tonumber (b:match ('%[\'[A-Z%-]+(%d+)'));
return a < b; -- evaluate and return boolean result
end
local out_t = {};
for group_key, values_t in pairs (holding_t) do -- for each group
if 'AFDB' == group_key then -- special case for AFDB-1, AFDB-2, and AFDB-7 sections
table.sort (values_t); -- ascending alpha sort works until AFDB-10 comes into being
else -- all other ships
table.sort (values_t, numeric_sort_fn); -- ascending sort numerically ships in group by hull number
end
table.insert (out_t, table.concat ({ -- add this group to the output table
'\t[\'', -- open the group key
group_key, -- add the key
'\'] = {\n', -- and close
table.concat (values_t, ',\n'), -- make and add a big string from ship sequences in group
'\n\t\t},' -- and close group table
}));
end
table.sort (out_t); -- ascending sort the output
out_t = {table.concat (out_t, '\n\n')}; -- replace <out_t> content with a big damn string as sequence [1]
table.insert (out_t, 1, table.concat ({ -- add lua table name and associated markup
'<syntaxhighlight lang="lua">return {', -- use syntaxhighlight for styling
string.rep ('\t', 18), -- string of tab chars to properly place timestamp comment
os.date ('!-- created %FT%H:%M:%S UTC\n'), -- add the timestamp (ISO 8601 YYYY-MM-DD format)
}));
table.insert (out_t, '\n\t}</syntaxhighlight>'); -- and the table's closing '}'
return frame:preprocess (table.concat (out_t):gsub ('%d_', '')); -- tostring and format for output; gsub fixes constitution, maine, and texas entries
end
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]
return {
main = main,
}
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.