Module:Get taxolist
From BugSigDB
Documentation for this module may be created at Module:Get taxolist/doc
local p = {}
-- Common contaminants
local contaminationListPage = 'MediaWiki:ContaminationList'
local contaminantionListVar = 'get_taxolist_contamination_list_content'
local contaminationExposedVariable = 'IsContaminant'
local contaminationList = {} -- loads at getTaxolist
-- Not host contaminants
local contaminationNotHostListPage = 'MediaWiki:ContaminationNotHostList'
local contaminantionNotHostListVar = 'get_taxolist_contamination_not_host_list_content'
local contaminationNotHostExposedVariable = 'IsContaminantNotHost'
local contaminationNotHostList = {} -- loads at getTaxolist
-- Bodysite prevalence
local contaminationPrevalentExposedVariable = 'IsPrevalentContaminant'
local contaminationPrevalentBodysiteExposedVariable = 'IsPrevalentContaminantBodysite'
local contaminationListPrevalent = {} -- loads at getPrevalentTaxons
local contaminationListPrevalentVar = 'get_taxolist_contamination_prevalent_list_content'
local contaminationListPrevalentMappingVar = 'get_taxolist_contamination_prevalent_mapping_content'
local prevalentSourceStudy = 'Study 562' -- Study to load contaminats from (Signatures)
local mapTaxonBodysitePrevalent = {}
local function getTax( id, arg )
return mw.getCurrentFrame():callParserFunction( '#taxonomy', id, "Taxon/" .. arg )
end
function p.getClassificationVars(id)
local frame = mw.getCurrentFrame()
local ret = {}
local curId = id
while curId ~= "" and curId ~= "1" do
local rank = getTax(curId, "Rank")
ret[rank] = getTax(curId, "ScientificName")
ret[rank .. "_id" ] = curId
curId = getTax(curId, "ParentTaxId" )
end
return ret
end
function p.getTaxolist( frame )
local parent = frame:getParent()
local id = parent.args[1]
local useLink = false
-- loads contamination list from title contents or from a variable cache
contaminationList = p.loadContaminationList()
-- loads not hsot contamination list from title contents or from a variable cache
contaminationNotHostList = p.loadContaminationNotHostList()
if p.isEligibleForPrevalent( frame ) then
contaminationListPrevalent = p.loadPrevalentTaxons()
end
-- exposes a variable to define if the taxon is contaminant
mw.ext.VariablesLua.vardefine( contaminationExposedVariable , 'no' )
mw.ext.VariablesLua.vardefine( contaminationNotHostExposedVariable , 'no' )
mw.ext.VariablesLua.vardefine( contaminationPrevalentExposedVariable , 'no' )
mw.ext.VariablesLua.vardefine( contaminationPrevalentBodysiteExposedVariable , '' )
if getTax( id, "ScientificName") == "" then
return ""
end
if parent.args[3] == "link" then
useLink = true
end
local lineage
if parent.args[2] ~= nil and parent.args[2] ~= '' then
lineage = mw.text.split( parent.args[2], ", *")
else
lineage = {
"superkingdom",
"kingdom",
"phylum",
"class",
"order",
"family",
"subfamily",
"genus",
"species",
"subspecies"
}
end
return p.getTaxolistImpl( id, lineage, useLink )
end
-- a shortcut for a #subobject call
local function subobject( args )
mw.getCurrentFrame():callParserFunction{ name = '#subobject', args = args }
end
-- Iterates thorugh the table looking for a value
-- This is not optimal solution, wrapping table with add/contains methods would
-- make the search faster, but comparison in performance was never made
local function table_contains( tbl, x )
for _, v in pairs( tbl ) do
if v == x then
return true
end
end
return false
end
function p.getTaxolistImpl( id, lineage, useLink )
local out = "<dl>"
local info = p.getClassificationVars( id )
local lineageStr = ''
local frame = mw.getCurrentFrame()
local NCBI_val = "" -- used as a #var by {{Signature}}
local NCBI_ids = "" -- used as a #var by {{Signature}}
local isContaminantCommon = false
local isContaminantNotHost = false
local isContaminantPrevalent = false
local contaminantPrevalentBodySite = {}
local subobjectArgs = {
id,
['Taxonomic rank']=getTax(id, "Rank"),
['Taxon name']=getTax(id, "ScientificName"),
['Tax parent id']=getTax(id, "ParentTaxId"),
NCBI=id
}
for _, rank in ipairs( lineage ) do
if info[rank] then
local rankId = info[rank .. "_id"]
-- Append to exported #var's
local initialLetterRank = string.sub( rank, 0, 1 )
if rank == 'superkingdom' then
initialLetterRank = 'k'
end
if NCBI_val ~= '' then
NCBI_val = NCBI_val .. '|'
end
NCBI_val = NCBI_val .. initialLetterRank .. "__" .. info[rank]
if NCBI_ids ~= '' then
NCBI_ids = NCBI_ids .. "|"
end
NCBI_ids = NCBI_ids .. rankId
-- Actually generate the output
out = out .. "<dt>" .. rank .. "</dt>"
out = out .."<dd>" .. info[rank] .. ' (' .. rankId .. ')'
if useLink then
out = out .. frame:expandTemplate{ title = "taxolink", args = { rankId } }
end
if lineageStr ~= '' then
lineageStr = lineageStr .. ", "
end
lineageStr = lineageStr .. rank .. ": " .. info[rank]
-- TODO: Understand how subobjects work better
subobjectArgs[rank] = info[rank]
subobject{ id, [rank] = info[rank] }
if table_contains( contaminationList, tonumber( rankId ) ) then
isContaminantCommon = true
end
if table_contains( contaminationNotHostList, tonumber( rankId ) ) then
isContaminantNotHost = true
end
if table_contains( contaminationListPrevalent, tonumber( rankId ) ) then
isContaminantPrevalent = true
if not table_contains( contaminantPrevalentBodySite, mapTaxonBodysitePrevalent[ tonumber( rankId ) ] ) then
table.insert(contaminantPrevalentBodySite, mapTaxonBodysitePrevalent[ tonumber( rankId ) ])
end
end
-- TODO the subobject Taxonomic rank. Why is this inside the loop??
end
subobject{ id, [rank] = info[rank] }
frame:callParserFunction{ name = "#vardefine", args = { "NCBI_val", NCBI_val } }
frame:callParserFunction{ name = "#vardefine", args = { "NCBI_ids", NCBI_ids } }
end
-- We follow the wikitext version of this template where setting a custom
-- lineage also affects this value.
subobjectArgs["Lineage"] = lineageStr
-- Note, in original template, this part is inside the loop body for unclear reasons.
-- Make sure we have at least one valid entry before setting
if subobjectArgs['Taxonomic rank'] ~= '' then
subobject(subobjectArgs)
end
if isContaminantCommon then
mw.ext.VariablesLua.vardefine( contaminationExposedVariable , 'yes' )
end
if isContaminantNotHost then
mw.ext.VariablesLua.vardefine( contaminationNotHostExposedVariable , 'yes' )
end
if isContaminantPrevalent then
mw.ext.VariablesLua.vardefine( contaminationPrevalentExposedVariable , 'yes' )
mw.ext.VariablesLua.vardefine(
contaminationPrevalentBodysiteExposedVariable,
mw.text.listToText( contaminantPrevalentBodySite, ', ', ' and ' )
)
end
-- TODO ncbi_ids and ncbi_val variables the parent template uses.
out = out .. "</dl>"
return out
end
function p.loadContaminationNotHostList()
local ids = {}
-- Check if the title exists
local sourceTitle = mw.title.new( contaminationNotHostListPage )
if sourceTitle.exists then
-- Try to load contents from variable
local sourceText = mw.ext.VariablesLua.var( contaminantionNotHostListVar )
-- Load contents from the title otherwise
if sourceText == '' then
sourceText = sourceTitle:getContent()
mw.ext.VariablesLua.vardefine( contaminantionNotHostListVar , sourceText )
end
-- Split by newline into a table
for s in sourceText:gmatch("[^\r\n]+") do
table.insert(ids, tonumber(s))
end
end
-- Return the table of IDs
return ids
end
function p.loadContaminationList()
local ids = {}
-- Check if the title exists
local sourceTitle = mw.title.new( contaminationListPage )
if sourceTitle.exists then
-- Try to load contents from variable
local sourceText = mw.ext.VariablesLua.var( contaminantionListVar )
-- Load contents from the title otherwise
if sourceText == '' then
sourceText = sourceTitle:getContent()
mw.ext.VariablesLua.vardefine( contaminantionListVar , sourceText )
end
-- Split by newline into a table
for s in sourceText:gmatch("[^\r\n]+") do
table.insert(ids, tonumber(s))
end
end
-- Return the table of IDs
return ids
end
-- Loads taxons list either from variable (cached) or from Study source page
-- this method also loads bodysite mapping table from cache if available
function p.loadPrevalentTaxons()
local ids = {}
-- Try to load contents from variable
local sourceText = mw.ext.VariablesLua.var( contaminationListPrevalentVar )
-- Load contents from the title otherwise
if sourceText == '' then
-- load data
sourceText = mw.text.listToText( p.getPrevalentTaxons(), ',' )
mw.ext.VariablesLua.vardefine( contaminationListPrevalentVar , sourceText )
-- the mapTaxonBodysitePrevalent contains values by this stage
mw.ext.VariablesLua.vardefine(
contaminationListPrevalentMappingVar,
mw.text.jsonEncode( mapTaxonBodysitePrevalent )
)
else
-- there is a cache
-- unpack mapping
mapTaxonBodysitePrevalent = mw.text.jsonDecode(
mw.ext.VariablesLua.var(
contaminationListPrevalentMappingVar
)
)
end
-- Split by comma into a table
for s in sourceText:gmatch("([^,]+)") do
table.insert(ids, tonumber(s))
end
-- Return the table of IDs
return ids
end
-- Fetches taxonds from Study_562 signatures
function p.getPrevalentTaxons()
-- query for 'Related directly' property of Signatures, the property stores taxons input
local queryResult = mw.smw.ask{
'[[Category:Signatures]]',
'[[Related study::' .. prevalentSourceStudy .. ']]',
'?Related directly',
'?Body site'
}
local taxonsTable = {}
if type( queryResult ) == "table" then
local myResult = ""
for num, row in pairs( queryResult ) do
local taxonValue = row['Related directly']
if type( taxonValue ) == "table" then
for k, v in pairs( taxonValue ) do
--if not table_contains( taxonsTable, v ) then
table.insert(taxonsTable, tonumber(v))
mapTaxonBodysitePrevalent[tonumber(v)] = row['Body site']
--end
end
else
--if not table_contains( taxonsTable, row['Related directly'] ) then
table.insert(taxonsTable, tonumber(taxonValue))
mapTaxonBodysitePrevalent[tonumber(taxonValue)] = row['Body site']
--end
end
end
end
return taxonsTable
end
-- check if Related study property on the page not equal for non-host study source page
function p.isEligibleForPrevalent( frame )
local study = frame:callParserFunction(
'#show',
{
mw.title.getCurrentTitle().fullText,
'?Related study',
'link=none'
} )
-- check that this is not experiment or signature linked to the page
-- and this is not the study page itself
return study ~= prevalentSourceStudy and mw.title.getCurrentTitle().text ~= prevalentSourceStudy
end
-- TODO: remove
function p.debugLoadTest()
rr = ""
r = p.loadPrevalentTaxons()
r = p.loadPrevalentTaxons()
--local s = mw.text.jsonEncode( mapTaxonBodysitePrevalent )
--local e = mw.text.jsonDecode( s )
return mw.dumpObject(r)
--for k, v in pairs(r) do
-- rr = rr .. "*" .. v .. ' (' .. mapTaxonBodysitePrevalent[v] .. ' )' .. "\n"
--end
--return rr
end
return p