local gui = require 'gui'
local script = require 'gui.script'
local args = {...}
local vs = dfhack.gui.getCurViewscreen()
function move_back_to_main_folder()
return dfhack.filesystem.restore_cwd()
end
local folder_name = "job_data"
dfhack.filesystem.mkdir(folder_name)
-- Go to save folder, returns true if successfully
function move_to_save_folder()
if move_back_to_main_folder() then
return dfhack.filesystem.chdir(folder_name)
end
return false
end
function progress_ipairs(vector, desc, interval)
desc = desc or 'item'
interval = interval or 10000
local cb = ipairs(vector)
return function(vector, k, ...)
if k and #vector >= interval and (k % interval == 0 or k == #vector - 1) then
print((' %s %i/%i (%0.f%%)'):format(desc, k, #vector, k * 100 / #vector))
end
return cb(vector, k)
end, vector, nil
end
function escape_xml(str)
return str:gsub('&', '&'):gsub('<', '<'):gsub('>', '>')
end
local df_enums = {} --as:df
setmetatable(df_enums, {
__index = function(self, enum)
if not df[enum] or df[enum]._kind ~= 'enum-type' then
error('invalid enum: ' .. enum)
end
local t = {}
setmetatable(t, {
__index = function(self, k)
return df[enum][k] or 'unknown ' .. k
end
})
return t
end,
__newindex = function() error('read-only') end
})
if not move_to_save_folder() then
qerror('Could not move into the save folder.')
end
local filename = "job_data.xml"
local file = io.open(filename, 'w')
move_back_to_main_folder()
if not file then
qerror("could not open file: " .. filename)
end
file:write("<?xml version=\"1.0\" encoding='UTF-8'?>\n")
file:write("<jobs>\n")
for hfK, hfV in progress_ipairs(df.global.world.history.figures, 'historical figure') do
file:write("<historical_figure>\n")
if hfV.race >= 0 then
file:write("\t<race>"..escape_xml(dfhack.df2utf(df.creature_raw.find(hfV.race).name[0])).."</race>\n")
end
file:write("\t<profession>"..df_enums.profession[hfV.profession]:lower().."</profession>\n")
file:write("</historical_figure>\n")
end
for ID, event in progress_ipairs(df.global.world.history.events, 'event') do
if df.history_event_change_hf_jobst:is_instance(event) then
file:write("<job_change_event>\n")
for k,v in pairs(event) do
if df.history_event_change_hf_jobst:is_instance(event) and (k == "new_job" or k == "old_job") then
file:write("\t\t<"..k..">"..df_enums.profession[v]:lower().."</"..k..">\n")
end
end
file:write("</job_change_event>\n")
end
end
file:write("</jobs>")
file:close()
This is based on the exportlegends script. I basically just ripped out the parts that I needed. Call it from legends mode and it creates a job_data.xml in the main DF folder.import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
# Parse xml
context = ET.iterparse("job_data.xml", events=("start", "end"))
context = iter(context)
ev, root = next(context)
# Find histfig professions and new_job values
professions = []
newjobs = []
for ev, el in context:
if ev == "start" and el.tag == "profession":
if el.text is not None:
professions.append(el.text.lower())
root.clear()
elif ev == 'start' and el.tag == 'new_job':
if el.text is not None:
newjobs.append(el.text)
root.clear()
# Remove all the fluff
bollocks = ["recruit", "pikeman", "master_pikeman", "trained_war", "trained_hunter", "blowgunman", "master_blowgunman", "none", "drunk", "standard", "baby", "child", "swordsman", "axeman", "maceman", "hammerman", "spearman", "lasher", "pikeman", "crossbowman", "bowman", "wrestler", "master_swordsman", "master_axeman", "master_maceman", "master_hammerman", "master_spearman", "master_lasher", "master_pikeman", "master_crossbowman", "master_bowman", "master_wrestler"]
professions_filtered = [x for x in professions if x not in bollocks]
newjobs_filtered = [x for x in newjobs if x not in bollocks]
# Get jobs and job count for professions
count_prof = []
jobs_prof = []
for job in professions_filtered:
if job not in jobs_prof:
jobcount = professions_filtered.count(job)
count_prof.append(jobcount)
jobs_prof.append(job)
# Get jobs and job count for new_jobs
count_new = []
jobs_new = []
for job in newjobs_filtered:
if job not in jobs_new:
jobcount = newjobs_filtered.count(job)
count_new.append(jobcount)
jobs_new.append(job)
# Sort by job count
jobs_prof_sorted = [x for _, x in sorted(zip(count_prof, jobs_prof))]
count_prof_sorted = [x for x, _ in sorted(zip(count_prof, jobs_prof))]
jobs_new_sorted = [x for _, x in sorted(zip(count_new, jobs_new))]
count_new_sorted = [x for x, _ in sorted(zip(count_new, jobs_new))]
# Normalise count values
count_prof_norm = [float(i)/max(count_prof_sorted) for i in count_prof_sorted]
count_new_norm = [float(i)/max(count_new_sorted) for i in count_new_sorted]
# Categorise professions
miners = ["miner"]
woodworkers = ["woodworker", "bowyer", "carpenter", "woodcutter"]
stoneworkers = ["stoneworker", "engraver", "mason"]
rangers = ["hunter", "animal_caretaker", "animal_dissector", "animal_trainer", "trapper", "ranger"]
doctors = ["doctor"]
farmers = ["planter", "beekeeper", "brewer", "butcher", "cheese_maker", "cook", "dyer", "gelder", "farmer", "herbalist", "lye_maker", "milker", "miller", "potash_maker", "presser", "shearer", "soap_maker", "spinner", "tanner", "thresher", "wood_burner"]
fishers = ["fisherman", "fishery_worker", "fish_dissector", "fish_cleaner"]
metalsmiths = ["armorer", "furnace_operator", "metalcrafter", "weaponsmith", "blacksmith", "metalsmith"]
jewelers = ["jeweler", "gem_cutter", "gem_setter"]
crafters = ["craftsman", "woodcrafter", "stonecrafter", "leatherworker", "bone_carver", "weaver", "clothier", "glassmaker", "strand_extractor", "papermaker", "wax_worker", "potter", "bookbinder"]
engineers = ["engineer", "mechanic", "siege_engineer", "siege_operator", "pump_operator"]
# Assign colours based on profession (default DF colour scheme used)
colours_prof = [
"#C0C0C0" if y in miners
else "#FFFF00" if y in woodworkers
else "#000000" if y in stoneworkers
else "#008000" if y in rangers
else "#800080" if y in doctors
else "#808000" if y in farmers
else "#000080" if y in fishers
else "#808080" if y in metalsmiths
else "#00FF00" if y in jewelers
else "#0000FF" if y in crafters
else "#FF0000" if y in engineers
else "#800080"
for y in jobs_prof_sorted
]
colours_new = [
"#C0C0C0" if y in miners
else "#FFFF00" if y in woodworkers
else "#000000" if y in stoneworkers
else "#008000" if y in rangers
else "#800080" if y in doctors
else "#808000" if y in farmers
else "#000080" if y in fishers
else "#808080" if y in metalsmiths
else "#00FF00" if y in jewelers
else "#0000FF" if y in crafters
else "#FF0000" if y in engineers
else "#800080"
for y in jobs_new_sorted
]
# Plot data
fig, ax = plt.subplots(figsize=(18,12))
ax.barh(jobs_prof_sorted, count_prof_norm, color=colours_prof)
ax.set_title("Histfig professions")
ax.grid(which="both", axis="x")
fig.savefig("jobdistribution.png")
fig2, ax2 = plt.subplots(figsize=(18,12))
ax2.barh(jobs_new_sorted, count_new_norm, color=colours_new)
ax2.set_title("New_job values of change_hf_job events")
ax2.grid(which="both", axis="x")
fig2.savefig("newjobs.png")
Run this in the folder where job_data.xml is located. Requires matplotlib. It created two plots: one for the job distribution and one for the frequency of new_job values.