Module:Get taxolist

From BugSigDB

Documentation for this module may be created at Module:Get taxolist/doc

local p = {}
-- Common contaminants
local contaminationListPage = 'MediaWiki:ContaminationList'
local contaminantionListVar = 'get_taxolist_contamination_list_content'
local contaminationExposedVariable = 'IsContaminant'
local contaminationList = {} -- loads at getTaxolist
-- Not host contaminants
local contaminationNotHostListPage = 'MediaWiki:ContaminationNotHostList'
local contaminantionNotHostListVar = 'get_taxolist_contamination_not_host_list_content'
local contaminationNotHostExposedVariable = 'IsContaminantNotHost'
local contaminationNotHostList = {} -- loads at getTaxolist
-- Bodysite prevalence
local contaminationPrevalentExposedVariable = 'IsPrevalentContaminant'
local contaminationPrevalentBodysiteExposedVariable = 'IsPrevalentContaminantBodysite'
local contaminationListPrevalent = {} -- loads at getPrevalentTaxons
local contaminationListPrevalentVar = 'get_taxolist_contamination_prevalent_list_content'
local contaminationListPrevalentMappingVar = 'get_taxolist_contamination_prevalent_mapping_content'
local prevalentSourceStudy = 'Study 562' -- Study to load contaminats from (Signatures)
local mapTaxonBodysitePrevalent = {}

local function getTax( id, arg )
	return mw.getCurrentFrame():callParserFunction( '#taxonomy', id, "Taxon/" .. arg )
end

function p.getClassificationVars(id)
	local frame =  mw.getCurrentFrame()
	local ret = {}
	local curId = id
	while curId ~= "" and curId ~= "1" do
		local rank = getTax(curId, "Rank")
		ret[rank] = getTax(curId, "ScientificName")
		ret[rank .. "_id" ] = curId
		curId = getTax(curId, "ParentTaxId" )
	end
	return ret
end

function p.getTaxolist( frame )
	local parent = frame:getParent()
	local id = parent.args[1]
	local useLink = false
	-- loads contamination list from title contents or from a variable cache
	contaminationList = p.loadContaminationList()
	-- loads not hsot contamination list from title contents or from a variable cache
	contaminationNotHostList = p.loadContaminationNotHostList()

	if p.isEligibleForPrevalent( frame ) then
		contaminationListPrevalent = p.loadPrevalentTaxons()
	end

	-- exposes a variable to define if the taxon is contaminant
	mw.ext.VariablesLua.vardefine( contaminationExposedVariable , 'no' )
	mw.ext.VariablesLua.vardefine( contaminationNotHostExposedVariable , 'no' )
	mw.ext.VariablesLua.vardefine( contaminationPrevalentExposedVariable , 'no' )
	mw.ext.VariablesLua.vardefine( contaminationPrevalentBodysiteExposedVariable , '' )
	
	if getTax( id, "ScientificName") == "" then
		return ""
	end
	
	if parent.args[3] == "link" then
		useLink = true
	end
	
	local lineage
	if parent.args[2] ~= nil and parent.args[2] ~= '' then
		lineage = mw.text.split( parent.args[2], ", *")
	else
		lineage = {
			"superkingdom",
			"kingdom",
			"phylum",
			"class",
			"order",
			"family",
			"subfamily",
			"genus",
			"species",
			"subspecies"
		}
	end
	return p.getTaxolistImpl( id, lineage, useLink )
	
end

-- a shortcut for a #subobject call
local function subobject( args )
	mw.getCurrentFrame():callParserFunction{ name = '#subobject', args = args }
end

-- Iterates thorugh the table looking for a value
-- This is not optimal solution, wrapping table with add/contains methods would 
-- make the search faster, but comparison in performance was never made
local function table_contains( tbl, x )
	for _, v in pairs( tbl ) do
		if v == x then
			return true
		end
	end
	return false
end

function p.getTaxolistImpl( id, lineage, useLink )
	local out = "<dl>"
	local info = p.getClassificationVars( id )
	local lineageStr = ''
	local frame = mw.getCurrentFrame()
	local NCBI_val = "" -- used as a #var by {{Signature}}
	local NCBI_ids = "" -- used as a #var by {{Signature}}
	local isContaminantCommon = false
	local isContaminantNotHost = false
	local isContaminantPrevalent = false
	local contaminantPrevalentBodySite = {}
	local subobjectArgs = {
		id,
		['Taxonomic rank']=getTax(id, "Rank"),
	    ['Taxon name']=getTax(id, "ScientificName"),
        ['Tax parent id']=getTax(id, "ParentTaxId"),
        NCBI=id
	}
	for _, rank in ipairs( lineage ) do
		if info[rank] then
			
			local rankId = info[rank .. "_id"]
			-- Append to exported #var's
			local initialLetterRank = string.sub( rank, 0, 1 )

			if rank == 'superkingdom' then
				initialLetterRank = 'k'
			end
			if NCBI_val ~= '' then
				NCBI_val = NCBI_val .. '|'
			end
			NCBI_val = NCBI_val .. initialLetterRank .. "__" .. info[rank]
			if NCBI_ids ~= '' then
				NCBI_ids = NCBI_ids .. "|"
			end
			NCBI_ids = NCBI_ids .. rankId
			
			-- Actually generate the output
			out = out .. "<dt>" .. rank .. "</dt>"
			out = out .."<dd>" .. info[rank] .. ' (' .. rankId .. ')'
			if useLink then
				out = out .. frame:expandTemplate{ title = "taxolink", args = { rankId } }
			end
			if lineageStr ~= '' then
				lineageStr = lineageStr .. ", "
			end
			lineageStr = lineageStr .. rank .. ": " .. info[rank]
			-- TODO: Understand how subobjects work better
			subobjectArgs[rank] = info[rank]
			subobject{ id, [rank] = info[rank] }

			if table_contains( contaminationList, tonumber( rankId ) ) then
				isContaminantCommon = true
			end

			if table_contains( contaminationNotHostList, tonumber( rankId ) ) then
				isContaminantNotHost = true
			end

			if table_contains( contaminationListPrevalent, tonumber( rankId ) ) then
				isContaminantPrevalent = true
				if not table_contains( contaminantPrevalentBodySite, mapTaxonBodysitePrevalent[ tonumber( rankId ) ] ) then
					table.insert(contaminantPrevalentBodySite, mapTaxonBodysitePrevalent[ tonumber( rankId ) ])
				end
			end
		
			-- TODO the subobject Taxonomic rank. Why is this inside the loop??
		end
		subobject{ id, [rank] = info[rank] }
		frame:callParserFunction{ name = "#vardefine", args = { "NCBI_val", NCBI_val } }
		frame:callParserFunction{ name = "#vardefine", args = { "NCBI_ids", NCBI_ids } }
	end
	-- We follow the wikitext version of this template where setting a custom
	-- lineage also affects this value.
	subobjectArgs["Lineage"] = lineageStr
	-- Note, in original template, this part is inside the loop body for unclear reasons.
	-- Make sure we have at least one valid entry before setting
	if subobjectArgs['Taxonomic rank'] ~= '' then
		subobject(subobjectArgs)
	end

	if isContaminantCommon then
		mw.ext.VariablesLua.vardefine( contaminationExposedVariable , 'yes' )
	end

	if isContaminantNotHost then
		mw.ext.VariablesLua.vardefine( contaminationNotHostExposedVariable , 'yes' )
	end

	if isContaminantPrevalent then
		mw.ext.VariablesLua.vardefine( contaminationPrevalentExposedVariable , 'yes' )
		mw.ext.VariablesLua.vardefine(
			contaminationPrevalentBodysiteExposedVariable,
			mw.text.listToText( contaminantPrevalentBodySite, ', ', ' and ' )
		)
	end

	-- TODO ncbi_ids and ncbi_val variables the parent template uses.
	out = out .. "</dl>"
	return out
end

function p.loadContaminationNotHostList()
	local ids = {}

	-- Check if the title exists
	local sourceTitle = mw.title.new( contaminationNotHostListPage )
	if sourceTitle.exists then
		-- Try to load contents from variable
		local sourceText = mw.ext.VariablesLua.var( contaminantionNotHostListVar )
		-- Load contents from the title otherwise
		if sourceText == '' then
			sourceText = sourceTitle:getContent()
			mw.ext.VariablesLua.vardefine( contaminantionNotHostListVar , sourceText )
		end
		-- Split by newline into a table
		for s in sourceText:gmatch("[^\r\n]+") do
		    table.insert(ids, tonumber(s))
		end
	end
	
	-- Return the table of IDs
	return ids
end

function p.loadContaminationList()
	local ids = {}

	-- Check if the title exists
	local sourceTitle = mw.title.new( contaminationListPage )
	if sourceTitle.exists then
		-- Try to load contents from variable
		local sourceText = mw.ext.VariablesLua.var( contaminantionListVar )
		-- Load contents from the title otherwise
		if sourceText == '' then
			sourceText = sourceTitle:getContent()
			mw.ext.VariablesLua.vardefine( contaminantionListVar , sourceText )
		end
		-- Split by newline into a table
		for s in sourceText:gmatch("[^\r\n]+") do
		    table.insert(ids, tonumber(s))
		end
	end
	
	-- Return the table of IDs
	return ids
end

-- Loads taxons list either from variable (cached) or from Study source page
-- this method also loads bodysite mapping table from cache if available
function p.loadPrevalentTaxons()

	local ids = {}

	-- Try to load contents from variable
	local sourceText = mw.ext.VariablesLua.var( contaminationListPrevalentVar )
	-- Load contents from the title otherwise
	if sourceText == '' then
		-- load data
		sourceText = mw.text.listToText( p.getPrevalentTaxons(), ',' )
		mw.ext.VariablesLua.vardefine( contaminationListPrevalentVar , sourceText )
		-- the mapTaxonBodysitePrevalent contains values by this stage
		mw.ext.VariablesLua.vardefine(
			contaminationListPrevalentMappingVar,
			mw.text.jsonEncode( mapTaxonBodysitePrevalent )
		)
	else
		-- there is a cache
		-- unpack mapping
		mapTaxonBodysitePrevalent = mw.text.jsonDecode(
			mw.ext.VariablesLua.var(
				contaminationListPrevalentMappingVar
			)
		)
	end

	-- Split by comma into a table
	for s in sourceText:gmatch("([^,]+)") do
	    table.insert(ids, tonumber(s))
	end
	
	-- Return the table of IDs
	return ids

end

-- Fetches taxonds from Study_562 signatures
function p.getPrevalentTaxons()
	-- query for 'Related directly' property of Signatures, the property stores taxons input
	local queryResult = mw.smw.ask{
		'[[Category:Signatures]]',
		'[[Related study::' .. prevalentSourceStudy .. ']]',
		'?Related directly',
		'?Body site'
	}
	local taxonsTable = {}
	if type( queryResult ) == "table" then
        local myResult = ""
        for num, row in pairs( queryResult ) do

        	local taxonValue = row['Related directly']
            
            if type( taxonValue ) == "table" then
        		for k, v in pairs( taxonValue ) do
        			--if not table_contains( taxonsTable, v ) then
        				table.insert(taxonsTable, tonumber(v))
        				mapTaxonBodysitePrevalent[tonumber(v)] = row['Body site']
    				--end
    			end
        	else
        		--if not table_contains( taxonsTable, row['Related directly'] ) then
	    			table.insert(taxonsTable, tonumber(taxonValue))
	    			mapTaxonBodysitePrevalent[tonumber(taxonValue)] = row['Body site']
        		--end
        	end

        end
    end
	return taxonsTable
end

-- check if Related study property on the page not equal for non-host study source page
function p.isEligibleForPrevalent( frame )
	local study = frame:callParserFunction(
		'#show',
		{
			mw.title.getCurrentTitle().fullText,
			'?Related study',
			'link=none'
		} )
	-- check that this is not experiment or signature linked to the page
	-- and this is not the study page itself
	return study ~= prevalentSourceStudy and mw.title.getCurrentTitle().text ~= prevalentSourceStudy
end

-- TODO: remove
function p.debugLoadTest()
	rr = ""
	r = p.loadPrevalentTaxons()
	r = p.loadPrevalentTaxons()

	--local s = mw.text.jsonEncode( mapTaxonBodysitePrevalent )
	--local e = mw.text.jsonDecode( s )

	return mw.dumpObject(r)


	--for k, v in pairs(r) do
	--	rr = rr .. "*" .. v .. ' (' .. mapTaxonBodysitePrevalent[v] .. ' )' .. "\n"
	--end
	--return rr
end


return p