#!/usr/bin/env python

# This software is coypright 2012 by Thomas Dickerson
# and StickFigure Graphic Productions
# under the terms of the GNU General Public License v3
# a copy of which may be obtained here: http://www.gnu.org/licenses/gpl-3.0.txt

# This module provides an easy and instructive template
# for creating genetic algorithms in Python

# Setup the Python environment
import sys
from random import sample, random, randint, triangular, choice, uniform, seed
from csv import reader, writer
from time import time
seed(time())

# Constraints - $80,000 max
# At least $24,000 to each county (Addison, Washington, and Franklin)
# OEO can fully subsidize weatherization for 12 clients
# Can't install somewhere that needs weatherization or is unsafe
# Can't install a low-btu stove in a high-btu house
# Can't install a non-self-starting stove for people with disability

COUNTIES = ["ADDI","WASH","FRNK"]
COUNTY_INDICES = dict((c,i) for i,c in enumerate(sorted(COUNTIES)))

NUM_C = len(COUNTIES)

MAX_B = 80000
MAX_W = 12
MIN_B = 24000

# These are the standard parameters
# for an evolutionary algorithm
MUTATION_RATE = 0.01
MUTATION_THRESHOLD = 0.125
CROSSOVER_RATE = 0.7

# Cost effectiveness per BTU of pellets compared against other heat sources
# Based on data current for 2011/2012 heating season.
COST_RATIO = {'propane':1.8,'kerosene':1.8,'oil':1.6,'electric':1.6,'cord wood':0.7}

# Heating requirements to require a high-BTU stove.
HIGH_BTU_THRESHOLD = 2500


# Factors
#-----------------------
# Current Fuel
# Current Bill
# County
# Requires self-start
# Needs housework/weatherization for safe stove installation


# Costs - $200 installation (fixed)
# 		- $100 safety maintenance
#		- $1200 for stove without auto-ignition
#		- $2000 for standard stove
#		- $4000 for high-output stove



# Action	| Stove	| Maintenance
#----------------------------------
# None		| 0		| 0
#----------------------------------
# Weather	| 0		| 1
#----------------------------------
# No-Auto	| 1		| 0-3
#----------------------------------
# Regular	| 2		| 0-3
#----------------------------------
# HighBTU	| 3		| 0-3

# This class represents client data to configure the problem constraints
class HouseholdD(object):
	def __init__(self, county, current_fuel, current_cost, space_heating, disability, weatherization_status, unsafe_for_stove):
		self.county = str(county)
		self.current_fuel = str(current_fuel)
		self.current_cost = int(current_cost)
		self.space_heating = float(space_heating)
		self.disability = int(disability)
		self.weatherization_status = int(weatherization_status)
		self.unsafe_for_stove = int(unsafe_for_stove)
		
# Helper function to generate a random population
# Using a couple of different probability distributions to achieve the desired effect.
def random_client_data(client_count):
	data = []
	for i in xrange(client_count):
		data.append([choice(COUNTIES), choice(COST_RATIO.keys()), int(triangular(1500,5000,1500)), uniform(0.6, 0.75), int(not randint(0,5)), int(triangular(-1.9,1.9,0.5)), int(not randint(0,9))])
	data.sort()
	return data
	
# Saves the input population to an Excel-friendly csv file
# to compare results of repeated runs.
def write_client_data(out_filename,data_rows):
	with open(out_filename,'wb') as outfile:
		client_writer = writer(outfile,dialect='excel')
		client_writer.writerows(data_rows)
		
# Parses a data file to an input population to configure the problem constraints
def parse_data_file(in_filename):
	data = [[],[],[]]
	with open(in_filename,'rU') as infile:
		client_reader = reader(infile,dialect='excel')
		for county,fuel,cost,space,disability,weather,safety_flag in client_reader:
			data[COUNTY_INDICES[county]].append(HouseholdD(county=county,current_fuel=fuel,current_cost=cost,
														   space_heating=space,disability=disability,
														   weatherization_status=weather,unsafe_for_stove=safety_flag))
	return data

# This class represents the actions to take with a client household
# as part of a potential solution. Includes constants representing various costs and configurations
class HouseholdS(object):
	NO_STOVE = 0
	NO_AUTO_STOVE = 1
	NORM_STOVE = 2
	HIGH_BTU_STOVE = 3
	
	NO_MAINTENANCE = 0
	WEATHERIZE = 1
	SAFETY_MAINTENANCE = 2
	
	FIXED_INSTALL = 200
	SAFETY_INSTALL = 100
	
	STOVE_COSTS = {NO_AUTO_STOVE : 1200, NORM_STOVE : 2000, HIGH_BTU_STOVE : 4000}
	
	MIN_COST = STOVE_COSTS[NO_AUTO_STOVE] + FIXED_INSTALL
	MAX_COST = STOVE_COSTS[HIGH_BTU_STOVE] + FIXED_INSTALL + SAFETY_INSTALL
	
	def __init__(self, stove_code = 0, maintenance_code = 0, old_house = None):
		if not old_house:
			self.stove_code = stove_code
			self.maintenance_code = maintenance_code
		else:
			self.stove_code = old_house.stove_code
			self.maintenance_code = old_house.maintenance_code
		
	def calc_cost(self):
		return (HouseholdS.FIXED_INSTALL + HouseholdS.SAFETY_INSTALL * self.safety_work() + HouseholdS.STOVE_COSTS[self.stove_code]) if self.stove_code else 0
		
	def weatherizing(self):
		return bool(self.maintenance_code & HouseholdS.WEATHERIZE)
		
	def safety_work(self):
		return bool(self.maintenance_code & HouseholdS.SAFETY_MAINTENANCE)
	
# Initialize the global constraints
# based on budget and available weatherizations
def init_constraints():
	return ([MAX_B / NUM_C, MAX_B / NUM_C, MAX_B / NUM_C],[MIN_B,MIN_B,MIN_B],MAX_W)
	

# Create a potential set of actions for a household based on current budget constraints
def constrained_client(household,wmax,rmax):
	min_stove_code = HouseholdS.HIGH_BTU_STOVE if (household.space_heating * household.current_cost / COST_RATIO[household.current_fuel] > HIGH_BTU_THRESHOLD) \
						else ( HouseholdS.NORM_STOVE if household.disability else HouseholdS.NO_AUTO_STOVE)
	if household.weatherization_status < 0 and wmax <= 0:
		ret = HouseholdS()	# has to be weatherized, but we can't weatherize anymore
	elif rmax < HouseholdS(stove_code=min_stove_code,maintenance_code=2*household.unsafe_for_stove).calc_cost():
		ret = HouseholdS() # this house is too expensive with our remaining budget
	else:
		ret = HouseholdS(stove_code = randint(min_stove_code,HouseholdS.HIGH_BTU_STOVE), maintenance_code = (int(triangular(0,0.01 + 1.9 * (household.weatherization_status != 1),0))) | (household.weatherization_status < 0) | (2 * household.unsafe_for_stove))
	return ret

# Create an initial population of potential solution genomes.
# Each genome contains a set of actions for each household in the input data
def generate_population(data, population):
	solutions = [[[] for j in xrange(NUM_C)] for i in xrange(population)]
	
	for genome in solutions:
		rmaxs,rmins,wmax = init_constraints()
		for c_num in sample(xrange(NUM_C),NUM_C):
			num_households = len(data[c_num])
			genome[c_num] = [HouseholdS() for i in range(num_households)]
			for house_num in sample(xrange(num_households), num_households):
				client_S = constrained_client(data[c_num][house_num],wmax,rmaxs[c_num])
				genome[c_num][house_num] = client_S
				client_cost = client_S.calc_cost()
				
				wmax -= client_S.weatherizing()
				rmins[c_num] -= client_cost
				rmaxs[c_num] -= client_cost
				if rmins[c_num] < 0 and ((random() > 0.5) or (rmaxs[c_num] < HouseholdS.MAX_COST)):
					break
			else:
				if rmins[c_num] > 0 or rmaxs[c_num] < 0:
					print "Warning, we're starting with a genome that may not satisfy the constraints"
	return solutions

# Return the cost-benefit scaling factor of weatherizing the house (if it's being weatherized at all)
def weatherization_factor(house_data,house_sol):
	return (1 + 0.25 * (1 - house_data.weatherization_status)) if (house_sol.weatherizing()) else 1

# Benefit
# --------
# weatherization_factor * (current_cost - current_cost * (1 + dual_fuel / cost_ratio - dual_fuel))
# --------
# How much money do we save on this house using the benefit function above?

def savings(house_data,house_sol):
	return 0 if not (house_sol.maintenance_code or house_sol.stove_code) \
			else (weatherization_factor(house_data,house_sol) * house_data.current_cost \
					* house_data.space_heating * (1 - 1 / COST_RATIO[house_data.current_fuel]))

# Calculate fitness as the sum on all savings across all 3 counties
def fitness(data,genome):
	return sum(
				sum( savings(house_data, house_sol) \
					for house_sol,house_data in zip(county,data[i]))
				for i,county in enumerate(genome))

# Compare the fitnesses (f1, f2) of two genomes (d1, d2),
# in a way that is compatible with Python's builtin sorting methods
def fitness_cmp((f1, d1), (f2, d2)):
	return -1 if f2 > f1 else (1 if f1 > f2 else 0)

# Build the standard genetic-algorithm roulette wheel.
# This should be entirely reusable.
def build_roulette(data, potential_solutions):
	fitnesses = [fitness(data, genome) for genome in potential_solutions]
	total_fitness = float(sum(fitnesses))
	roulette_order = zip([s_fitness / total_fitness for s_fitness in fitnesses], potential_solutions)
	roulette_order.sort(fitness_cmp)
	return roulette_order

# Pick a genome on the wheel based on a random spin
# This should be entirely reusable.
def match_region(wheel,rnum):
	pos = 0
	for prob, genome in wheel:
		pos += prob
		if rnum < pos:
			ret = genome
			break
	else: # we had some weird round-off error combined with an unlucky number. e.g, rnum = 0.99999, final pos = 0.999
		ret = genome
	return ret

# Standard crossover operation
# Should be mostly reusable. See note about counties/chromosomes below.
def cross(mom,dad,code):
	kids = [mom,dad]
	for i in range(NUM_C):
		if code & (1<<i):
			kids[0][i],kids[1][i] = kids[1][i],kids[0][i]
	return kids

# Helper function to identify compliance with constraints
def calc_county_costs(genome):
	return [sum(house.calc_cost() for house in county) for county in genome]
	
# Calculate current constraints based on genome and initial global constraints.
def calc_constraints(genome):
	county_costs = calc_county_costs(genome)
	total_costs = sum(county_costs)
	rmax = MAX_B - total_costs
	rmins = [MIN_B - county_cost for county_cost in county_costs]
	wmax = MAX_W - sum(sum((house.weatherizing()) for house in county) for county in genome)
	return (rmax,rmins,wmax)

# Validates whether a pair of genomes are properly constrained
def are_valid(*twins):
	for child in twins:
		county_costs = calc_county_costs(child)
		if (sum(county_costs) > MAX_B) or (True in [cost < MIN_B for cost in county_costs]):
			ret = False
			break
	else:
		ret = True
	return ret
			
# Attempts to apply the crossover operator between two parent genomes
def safe_sex(mom, dad):
	for crossover_code in sample(xrange(1,1 << NUM_C), (1 << NUM_C) - 1):
		twins = cross(mom,dad,crossover_code)
		if are_valid(*twins):
			break
	else:
		print "warning, parents unable to reproduce within constraints - just cloning instead"
		twins = [mom, dad]
	return twins

# Attempts to apply a mutation within constraints
# Fails to mutate if restraints can't be met
def mutate(twin,data):
	rmax,rmins,wmax = calc_constraints(twin)
	genome = [[] for j in xrange(NUM_C)]
	mutated = False
	for c_num in sample(xrange(NUM_C),NUM_C):
		num_households = len(data[c_num])
		genome[c_num] = [HouseholdS(old_house = twin[c_num][i]) for i in range(num_households)]
		if mutated:
			continue
		for house_num in sample(xrange(num_households), num_households):
			if not mutated:
				old_client_S = genome[c_num][house_num]
				twm = wmax + old_client_S.weatherizing()
				trm = rmax + old_client_S.calc_cost()
				if random() < MUTATION_THRESHOLD: # Remove a stove
					client_S = HouseholdS()
				else:			# try a new stove
					client_S = constrained_client(data[c_num][house_num],twm,trm)
				genome[c_num][house_num] = client_S
				if are_valid(genome):
					mutated = True
					break
				else:
					genome[c_num][house_num] = old_client_S
	return twin

# Determine whether a mutation is necessary on either of a pair of offspring genomes
def try_mutate(twins,data):
	new_twins = []
	for twin in twins:
		if random() < MUTATION_RATE:
			new_twins.append(mutate(twin,data))
		else:
			new_twins.append(twin)
	return new_twins

# Helper function to print information about the current generation
def dump_fitness(n,population,data):
	print "Generation",n,":",len(population)
	fitnesses = [fitness(data,genome) for genome in population]
	print "\tMaximum Fitness",max(fitnesses)
	print "\tAverage Fitness",sum(fitnesses)/len(fitnesses)
	print
	
# This is the standard main loop of any genetic algorithm
# The only variation from "textbook" procedure is that the genome
# used is divided into "chromosomes" to maintain separation between
# clients in each county. The NUM_C constant controls this.
def optimize(data_filename, population, generations):
	# round population, or breeding won't work right
	# extra int() is for API soundness. If called from main,
	# it's unnecessary
	data = parse_data_file(data_filename)
	population = int(population / 2) * 2
	print "Actual Population:", population
	potential_solutions = generate_population(data, population)
	
	dump_fitness(-1,potential_solutions,data)
	for generation in xrange(generations):
		next_solutions = [[[] for j in xrange(NUM_C)] for i in xrange(population)]
		wheel = build_roulette(data, potential_solutions)
		for i in range(0,population,2):
			next_pair = [match_region(wheel,random())[:] for j in range(2)]
			if random() < CROSSOVER_RATE:
				next_pair = safe_sex(*next_pair)
			next_pair = try_mutate(next_pair,data)
			next_solutions[i:i+2] = next_pair
		potential_solutions = next_solutions
		dump_fitness(generation+1,potential_solutions,data)
	return zip(data, build_roulette(data,potential_solutions)[-1][1])

if __name__ == '__main__':
	try:
		in_filename = sys.argv[1]
		best_found = optimize(in_filename, *[int(i) for i in sys.argv[2:]])
		out_filename = "%s_answer.csv" % (".".join(in_filename.split(".")[:-1]) if "." in sys.argv[1] else in_filename)
		with open(out_filename,'wb') as outfile:
			client_writer = writer(outfile,dialect='excel')
			client_writer.writerow(["County","Current Fuel","Current Cost", "% Space Heating","Disability?","Weatherization Code","Currently Unsafe for Stove?","","Stove Code","Maintenance Code","Fuel Savings (Year 1)"])
			for county in best_found:
				clients = zip(*county)
				for dc,sc in clients:
					client_writer.writerow([dc.county,dc.current_fuel,dc.current_cost,dc.space_heating,dc.disability,dc.weatherization_status,dc.unsafe_for_stove,"",sc.stove_code,sc.maintenance_code,savings(dc, sc)])
		
	except (ValueError,IndexError):
		print("Usage:	evolve_stoves.py datafile population generations")