Coverage for /builds/ase/ase/ase/ga/population.py: 26.38%
527 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-08-02 00:12 +0000
1# fmt: off
3""" Implementation of a population for maintaining a GA population and
4proposing structures to pair. """
5from math import exp, sqrt, tanh
6from operator import itemgetter
8import numpy as np
10from ase.db.core import now
11from ase.ga import get_raw_score
14def count_looks_like(a, all_cand, comp):
15 """Utility method for counting occurrences."""
16 n = 0
17 for b in all_cand:
18 if a.info['confid'] == b.info['confid']:
19 continue
20 if comp.looks_like(a, b):
21 n += 1
22 return n
25class Population:
26 """Population class which maintains the current population
27 and proposes which candidates to pair together.
29 Parameters:
31 data_connection: DataConnection object
32 Bla bla bla.
34 population_size: int
35 The number of candidates in the population.
37 comparator: Comparator object
38 this will tell if two configurations are equal.
39 Default compare atoms objects directly.
41 logfile: str
42 Text file that contains information about the population
43 The format is::
45 timestamp: generation(if available): id1,id2,id3...
47 Using this file greatly speeds up convergence checks.
48 Default None meaning that no file is written.
50 use_extinct: boolean
51 Set this to True if mass extinction and the extinct key
52 are going to be used. Default is False.
54 rng: Random number generator
55 By default numpy.random.
56 """
58 def __init__(self, data_connection, population_size,
59 comparator=None, logfile=None, use_extinct=False,
60 rng=np.random):
61 self.dc = data_connection
62 self.pop_size = population_size
63 if comparator is None:
64 from ase.ga.standard_comparators import AtomsComparator
65 comparator = AtomsComparator()
66 self.comparator = comparator
67 self.logfile = logfile
68 self.use_extinct = use_extinct
69 self.rng = rng
70 self.pop = []
71 self.pairs = None
72 self.all_cand = None
73 self.__initialize_pop__()
75 def __initialize_pop__(self):
76 """ Private method that initializes the population when
77 the population is created. """
79 # Get all relaxed candidates from the database
80 ue = self.use_extinct
81 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
82 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],
83 reverse=True)
84 # all_cand.sort(key=lambda x: x.get_potential_energy())
86 # Fill up the population with the self.pop_size most stable
87 # unique candidates.
88 i = 0
89 while i < len(all_cand) and len(self.pop) < self.pop_size:
90 c = all_cand[i]
91 i += 1
92 eq = False
93 for a in self.pop:
94 if self.comparator.looks_like(a, c):
95 eq = True
96 break
97 if not eq:
98 self.pop.append(c)
100 for a in self.pop:
101 a.info['looks_like'] = count_looks_like(a, all_cand,
102 self.comparator)
104 self.all_cand = all_cand
105 self.__calc_participation__()
107 def __calc_participation__(self):
108 """ Determines, from the database, how many times each
109 candidate has been used to generate new candidates. """
110 (participation, pairs) = self.dc.get_participation_in_pairing()
111 for a in self.pop:
112 if a.info['confid'] in participation.keys():
113 a.info['n_paired'] = participation[a.info['confid']]
114 else:
115 a.info['n_paired'] = 0
116 self.pairs = pairs
118 def update(self, new_cand=None):
119 """ New candidates can be added to the database
120 after the population object has been created.
121 This method extracts these new candidates from the
122 database and includes them in the population. """
124 if len(self.pop) == 0:
125 self.__initialize_pop__()
127 if new_cand is None:
128 ue = self.use_extinct
129 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,
130 use_extinct=ue)
132 for a in new_cand:
133 self.__add_candidate__(a)
134 self.all_cand.append(a)
135 self.__calc_participation__()
136 self._write_log()
138 def get_current_population(self):
139 """ Returns a copy of the current population. """
140 self.update()
141 return [a.copy() for a in self.pop]
143 def get_population_after_generation(self, gen):
144 """ Returns a copy of the population as it where
145 after generation gen"""
146 if self.logfile is not None:
147 with open(self.logfile) as fd:
148 gens = {}
149 for line in fd:
150 _, no, popul = line.split(':')
151 gens[int(no)] = [int(i) for i in popul.split(',')]
152 return [c.copy() for c in self.all_cand[::-1]
153 if c.info['relax_id'] in gens[gen]]
155 all_candidates = [c for c in self.all_cand
156 if c.info['key_value_pairs']['generation'] <= gen]
157 cands = [all_candidates[0]]
158 for b in all_candidates:
159 if b not in cands:
160 for a in cands:
161 if self.comparator.looks_like(a, b):
162 break
163 else:
164 cands.append(b)
165 pop = cands[:self.pop_size]
166 return [a.copy() for a in pop]
168 def __add_candidate__(self, a):
169 """ Adds a single candidate to the population. """
171 # check if the structure is too low in raw score
172 raw_score_a = get_raw_score(a)
173 raw_score_worst = get_raw_score(self.pop[-1])
174 if raw_score_a < raw_score_worst \
175 and len(self.pop) == self.pop_size:
176 return
178 # check if the new candidate should
179 # replace a similar structure in the population
180 for (i, b) in enumerate(self.pop):
181 if self.comparator.looks_like(a, b):
182 if get_raw_score(b) < raw_score_a:
183 del self.pop[i]
184 a.info['looks_like'] = count_looks_like(a,
185 self.all_cand,
186 self.comparator)
187 self.pop.append(a)
188 self.pop.sort(key=get_raw_score,
189 reverse=True)
190 return
192 # the new candidate needs to be added, so remove the highest
193 # energy one
194 if len(self.pop) == self.pop_size:
195 del self.pop[-1]
197 # add the new candidate
198 a.info['looks_like'] = count_looks_like(a,
199 self.all_cand,
200 self.comparator)
201 self.pop.append(a)
202 self.pop.sort(key=get_raw_score, reverse=True)
204 def __get_fitness__(self, indecies, with_history=True):
205 """Calculates the fitness using the formula from
206 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
208 Sign change on the fitness compared to the formulation in the
209 abovementioned paper due to maximizing raw_score instead of
210 minimizing energy. (Set raw_score=-energy to optimize the energy)
211 """
213 scores = [get_raw_score(x) for x in self.pop]
214 min_s = min(scores)
215 max_s = max(scores)
216 T = min_s - max_s
217 if isinstance(indecies, int):
218 indecies = [indecies]
220 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))
221 for i in indecies]
222 if with_history:
223 M = [float(self.pop[i].info['n_paired']) for i in indecies]
224 L = [float(self.pop[i].info['looks_like']) for i in indecies]
225 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])
226 for i in range(len(f))]
227 return f
229 def get_two_candidates(self, with_history=True):
230 """ Returns two candidates for pairing employing the
231 fitness criteria from
232 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
233 and the roulete wheel selection scheme described in
234 R.L. Johnston Dalton Transactions,
235 Vol. 22, No. 22. (2003), pp. 4193-4207
236 """
238 if len(self.pop) < 2:
239 self.update()
241 if len(self.pop) < 2:
242 return None
244 fit = self.__get_fitness__(range(len(self.pop)), with_history)
245 fmax = max(fit)
246 c1 = self.pop[0]
247 c2 = self.pop[0]
248 used_before = False
249 while c1.info['confid'] == c2.info['confid'] and not used_before:
250 nnf = True
251 while nnf:
252 t = self.rng.randint(len(self.pop))
253 if fit[t] > self.rng.random() * fmax:
254 c1 = self.pop[t]
255 nnf = False
256 nnf = True
257 while nnf:
258 t = self.rng.randint(len(self.pop))
259 if fit[t] > self.rng.random() * fmax:
260 c2 = self.pop[t]
261 nnf = False
263 c1id = c1.info['confid']
264 c2id = c2.info['confid']
265 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs
266 return (c1.copy(), c2.copy())
268 def get_one_candidate(self, with_history=True):
269 """Returns one candidate for mutation employing the
270 fitness criteria from
271 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
272 and the roulete wheel selection scheme described in
273 R.L. Johnston Dalton Transactions,
274 Vol. 22, No. 22. (2003), pp. 4193-4207
275 """
276 if len(self.pop) < 1:
277 self.update()
279 if len(self.pop) < 1:
280 return None
282 fit = self.__get_fitness__(range(len(self.pop)), with_history)
283 fmax = max(fit)
284 nnf = True
285 while nnf:
286 t = self.rng.randint(len(self.pop))
287 if fit[t] > self.rng.random() * fmax:
288 c1 = self.pop[t]
289 nnf = False
291 return c1.copy()
293 def _write_log(self):
294 """Writes the population to a logfile.
296 The format is::
298 timestamp: generation(if available): id1,id2,id3..."""
299 if self.logfile is not None:
300 ids = [str(a.info['relax_id']) for a in self.pop]
301 if ids != []:
302 try:
303 gen_nums = [c.info['key_value_pairs']['generation']
304 for c in self.all_cand]
305 max_gen = max(gen_nums)
306 except KeyError:
307 max_gen = ' '
308 with open(self.logfile, 'a') as fd:
309 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),
310 pop=','.join(ids),
311 gen=max_gen))
313 def is_uniform(self, func, min_std, pop=None):
314 """Tests whether the current population is uniform or diverse.
315 Returns True if uniform, False otherwise.
317 Parameters:
319 func: function
320 that takes one argument an atoms object and returns a value that
321 will be used for testing against the rest of the population.
323 min_std: int or float
324 The minimum standard deviation, if the population has a lower
325 std dev it is uniform.
327 pop: list, optional
328 use this list of Atoms objects instead of the current population.
329 """
330 if pop is None:
331 pop = self.pop
332 vals = [func(a) for a in pop]
333 stddev = np.std(vals)
334 if stddev < min_std:
335 return True
336 return False
338 def mass_extinction(self, ids):
339 """Kills every candidate in the database with gaid in the
340 supplied list of ids. Typically used on the main part of the current
341 population if the diversity is to small.
343 Parameters:
345 ids: list
346 list of ids of candidates to be killed.
348 """
349 for confid in ids:
350 self.dc.kill_candidate(confid)
351 self.pop = []
354class RandomPopulation(Population):
355 def __init__(self, data_connection, population_size,
356 comparator=None, logfile=None, exclude_used_pairs=False,
357 bad_candidates=0, use_extinct=False):
358 self.exclude_used_pairs = exclude_used_pairs
359 self.bad_candidates = bad_candidates
360 Population.__init__(self, data_connection, population_size,
361 comparator, logfile, use_extinct)
363 def __initialize_pop__(self):
364 """ Private method that initializes the population when
365 the population is created. """
367 # Get all relaxed candidates from the database
368 ue = self.use_extinct
369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
370 all_cand.sort(key=get_raw_score, reverse=True)
371 # all_cand.sort(key=lambda x: x.get_potential_energy())
373 if len(all_cand) > 0:
374 # Fill up the population with the self.pop_size most stable
375 # unique candidates.
376 ratings = []
377 best_raw = get_raw_score(all_cand[0])
378 i = 0
379 while i < len(all_cand):
380 c = all_cand[i]
381 i += 1
382 eq = False
383 for a in self.pop:
384 if self.comparator.looks_like(a, c):
385 eq = True
386 break
387 if not eq:
388 if len(self.pop) < self.pop_size - self.bad_candidates:
389 self.pop.append(c)
390 else:
391 exp_fact = exp(get_raw_score(c) / best_raw)
392 ratings.append([c, (exp_fact - 1) * self.rng.random()])
393 ratings.sort(key=itemgetter(1), reverse=True)
395 for i in range(self.bad_candidates):
396 self.pop.append(ratings[i][0])
398 for a in self.pop:
399 a.info['looks_like'] = count_looks_like(a, all_cand,
400 self.comparator)
402 self.all_cand = all_cand
403 self.__calc_participation__()
405 def update(self):
406 """ The update method in Population will add to the end of
407 the population, that can't be used here since we might have
408 bad candidates that need to stay in the population, therefore
409 just recalc the population every time. """
411 self.pop = []
412 self.__initialize_pop__()
414 self._write_log()
416 def get_one_candidate(self):
417 """Returns one candidates at random."""
418 if len(self.pop) < 1:
419 self.update()
421 if len(self.pop) < 1:
422 return None
424 t = self.rng.randint(len(self.pop))
425 c = self.pop[t]
427 return c.copy()
429 def get_two_candidates(self):
430 """Returns two candidates at random."""
431 if len(self.pop) < 2:
432 self.update()
434 if len(self.pop) < 2:
435 return None
437 c1 = self.pop[0]
438 c2 = self.pop[0]
439 used_before = False
440 while c1.info['confid'] == c2.info['confid'] and not used_before:
441 t = self.rng.randint(len(self.pop))
442 c1 = self.pop[t]
443 t = self.rng.randint(len(self.pop))
444 c2 = self.pop[t]
446 c1id = c1.info['confid']
447 c2id = c2.info['confid']
448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and
449 self.exclude_used_pairs)
450 return (c1.copy(), c2.copy())
453class FitnessSharingPopulation(Population):
454 """ Fitness sharing population that penalizes structures if they are
455 too similar. This is determined by a distance measure
457 Parameters:
459 comp_key: string
460 Key where the distance measure can be found in the
461 atoms.info['key_value_pairs'] dictionary.
463 threshold: float or int
464 Value above which no penalization of the fitness takes place
466 alpha_sh: float or int
467 Determines the shape of the sharing function.
468 Default is 1, which gives a linear sharing function.
470 """
472 def __init__(self, data_connection, population_size,
473 comp_key, threshold, alpha_sh=1.,
474 comparator=None, logfile=None, use_extinct=False):
475 self.comp_key = comp_key
476 self.dt = threshold # dissimilarity threshold
477 self.alpha_sh = alpha_sh
478 self.fit_scaling = 1.
480 self.sh_cache = {}
482 Population.__init__(self, data_connection, population_size,
483 comparator, logfile, use_extinct)
485 def __get_fitness__(self, candidates):
486 """Input should be sorted according to raw_score."""
487 max_s = get_raw_score(candidates[0])
488 min_s = get_raw_score(candidates[-1])
489 T = min_s - max_s
491 shared_fit = []
492 for c in candidates:
493 sc = get_raw_score(c)
494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))
495 m = 1.
496 ck = c.info['key_value_pairs'][self.comp_key]
497 for other in candidates:
498 if other != c:
499 name = tuple(sorted([c.info['confid'],
500 other.info['confid']]))
501 if name not in self.sh_cache:
502 ok = other.info['key_value_pairs'][self.comp_key]
503 d = abs(ck - ok)
504 if d < self.dt:
505 v = 1 - (d / self.dt)**self.alpha_sh
506 self.sh_cache[name] = v
507 else:
508 self.sh_cache[name] = 0
509 m += self.sh_cache[name]
511 shf = (obj_fit ** self.fit_scaling) / m
512 shared_fit.append(shf)
513 return shared_fit
515 def update(self):
516 """ The update method in Population will add to the end of
517 the population, that can't be used here since the shared fitness
518 will change for all candidates when new are added, therefore
519 just recalc the population every time. """
521 self.pop = []
522 self.__initialize_pop__()
524 self._write_log()
526 def __initialize_pop__(self):
527 # Get all relaxed candidates from the database
528 ue = self.use_extinct
529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
530 all_cand.sort(key=get_raw_score, reverse=True)
532 if len(all_cand) > 0:
533 shared_fit = self.__get_fitness__(all_cand)
534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),
535 reverse=True)))[1]
537 # Fill up the population with the self.pop_size most stable
538 # unique candidates.
539 i = 0
540 while i < len(all_sorted) and len(self.pop) < self.pop_size:
541 c = all_sorted[i]
542 i += 1
543 eq = False
544 for a in self.pop:
545 if self.comparator.looks_like(a, c):
546 eq = True
547 break
548 if not eq:
549 self.pop.append(c)
551 for a in self.pop:
552 a.info['looks_like'] = count_looks_like(a, all_cand,
553 self.comparator)
554 self.all_cand = all_cand
556 def get_two_candidates(self):
557 """ Returns two candidates for pairing employing the
558 fitness criteria from
559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
560 and the roulete wheel selection scheme described in
561 R.L. Johnston Dalton Transactions,
562 Vol. 22, No. 22. (2003), pp. 4193-4207
563 """
565 if len(self.pop) < 2:
566 self.update()
568 if len(self.pop) < 2:
569 return None
571 fit = self.__get_fitness__(self.pop)
572 fmax = max(fit)
573 c1 = self.pop[0]
574 c2 = self.pop[0]
575 while c1.info['confid'] == c2.info['confid']:
576 nnf = True
577 while nnf:
578 t = self.rng.randint(len(self.pop))
579 if fit[t] > self.rng.random() * fmax:
580 c1 = self.pop[t]
581 nnf = False
582 nnf = True
583 while nnf:
584 t = self.rng.randint(len(self.pop))
585 if fit[t] > self.rng.random() * fmax:
586 c2 = self.pop[t]
587 nnf = False
589 return (c1.copy(), c2.copy())
592class RankFitnessPopulation(Population):
593 """ Ranks the fitness relative to set variable to flatten the surface
594 in a certain direction such that mating across variable is equally
595 likely irrespective of raw_score.
597 Parameters:
599 variable_function: function
600 A function that takes as input an Atoms object and returns
601 the variable that differentiates the ranks.
603 exp_function: boolean
604 If True use an exponential function for ranking the fitness.
605 If False use the same as in Population. Default True.
607 exp_prefactor: float
608 The prefactor used in the exponential fitness scaling function.
609 Default 0.5
610 """
612 def __init__(self, data_connection, population_size, variable_function,
613 comparator=None, logfile=None, use_extinct=False,
614 exp_function=True, exp_prefactor=0.5):
615 self.exp_function = exp_function
616 self.exp_prefactor = exp_prefactor
617 self.vf = variable_function
618 # The current fitness is set at each update of the population
619 self.current_fitness = None
621 Population.__init__(self, data_connection, population_size,
622 comparator, logfile, use_extinct)
624 def get_rank(self, rcand, key=None):
625 # Set the initial order of the candidates, will need to
626 # be returned in this order at the end of ranking.
627 ordered = list(zip(range(len(rcand)), rcand))
629 # Niche and rank candidates.
630 rec_nic = []
631 rank_fit = []
632 for o, c in ordered:
633 if o not in rec_nic:
634 ntr = []
635 ce1 = self.vf(c)
636 rec_nic.append(o)
637 ntr.append([o, c])
638 for oother, cother in ordered:
639 if oother not in rec_nic:
640 ce2 = self.vf(cother)
641 if ce1 == ce2:
642 # put the now processed in oother
643 # in rec_nic as well
644 rec_nic.append(oother)
645 ntr.append([oother, cother])
646 # Each niche is sorted according to raw_score and
647 # assigned a fitness according to the ranking of
648 # the candidates
649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],
650 reverse=True)
651 start_rank = -1
652 for cor, (on, cn) in enumerate(ntr):
653 rank = start_rank - cor
654 rank_fit.append([on, cn, rank])
655 # The original order is reformed
656 rank_fit.sort(key=itemgetter(0), reverse=False)
657 return np.array(list(zip(*rank_fit))[2])
659 def __get_fitness__(self, candidates):
660 expf = self.exp_function
661 rfit = self.get_rank(candidates, key='raw_score')
663 if not expf:
664 rmax = max(rfit)
665 rmin = min(rfit)
666 T = rmin - rmax
667 # If using obj_rank probability, must have non-zero T val.
668 # pop_size must be greater than number of permutations.
669 # We test for this here
670 msg = "Equal fitness for best and worst candidate in the "
671 msg += "population! Fitness scaling is impossible! "
672 msg += "Try with a larger population."
673 assert T != 0., msg
674 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))
675 else:
676 return self.exp_prefactor ** (-rfit - 1)
678 def update(self):
679 """ The update method in Population will add to the end of
680 the population, that can't be used here since the fitness
681 will potentially change for all candidates when new are added,
682 therefore just recalc the population every time. """
684 self.pop = []
685 self.__initialize_pop__()
686 self.current_fitness = self.__get_fitness__(self.pop)
688 self._write_log()
690 def __initialize_pop__(self):
691 # Get all relaxed candidates from the database
692 ue = self.use_extinct
693 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
694 all_cand.sort(key=get_raw_score, reverse=True)
696 if len(all_cand) > 0:
697 fitf = self.__get_fitness__(all_cand)
698 all_sorted = list(zip(fitf, all_cand))
699 all_sorted.sort(key=itemgetter(0), reverse=True)
700 sort_cand = []
701 for _, t2 in all_sorted:
702 sort_cand.append(t2)
703 all_sorted = sort_cand
705 # Fill up the population with the self.pop_size most stable
706 # unique candidates.
707 i = 0
708 while i < len(all_sorted) and len(self.pop) < self.pop_size:
709 c = all_sorted[i]
710 c_vf = self.vf(c)
711 i += 1
712 eq = False
713 for a in self.pop:
714 a_vf = self.vf(a)
715 # Only run comparator if the variable_function (self.vf)
716 # returns the same. If it returns something different the
717 # candidates are inherently different.
718 # This is done to speed up.
719 if a_vf == c_vf:
720 if self.comparator.looks_like(a, c):
721 eq = True
722 break
723 if not eq:
724 self.pop.append(c)
725 self.all_cand = all_cand
727 def get_two_candidates(self):
728 """ Returns two candidates for pairing employing the
729 roulete wheel selection scheme described in
730 R.L. Johnston Dalton Transactions,
731 Vol. 22, No. 22. (2003), pp. 4193-4207
732 """
734 if len(self.pop) < 2:
735 self.update()
737 if len(self.pop) < 2:
738 return None
740 # Use saved fitness
741 fit = self.current_fitness
742 fmax = max(fit)
743 c1 = self.pop[0]
744 c2 = self.pop[0]
745 while c1.info['confid'] == c2.info['confid']:
746 nnf = True
747 while nnf:
748 t = self.rng.randint(len(self.pop))
749 if fit[t] > self.rng.random() * fmax:
750 c1 = self.pop[t]
751 nnf = False
752 nnf = True
753 while nnf:
754 t = self.rng.randint(len(self.pop))
755 if fit[t] > self.rng.random() * fmax:
756 c2 = self.pop[t]
757 nnf = False
759 return (c1.copy(), c2.copy())
762class MultiObjectivePopulation(RankFitnessPopulation):
763 """ Allows for assignment of fitness based on a set of two variables
764 such that fitness is ranked according to a Pareto-front of
765 non-dominated candidates.
767 Parameters
768 ----------
769 abs_data: list
770 Set of key_value_pairs in atoms object for which fitness should
771 be assigned based on absolute value.
773 rank_data: list
774 Set of key_value_pairs in atoms object for which data should
775 be ranked in order to ascribe fitness.
777 variable_function: function
778 A function that takes as input an Atoms object and returns
779 the variable that differentiates the ranks. Only use if
780 data is ranked.
782 exp_function: boolean
783 If True use an exponential function for ranking the fitness.
784 If False use the same as in Population. Default True.
786 exp_prefactor: float
787 The prefactor used in the exponential fitness scaling function.
788 Default 0.5
790 """
792 def __init__(self, data_connection, population_size,
793 variable_function=None, comparator=None, logfile=None,
794 use_extinct=False, abs_data=None, rank_data=None,
795 exp_function=True, exp_prefactor=0.5):
796 # The current fitness is set at each update of the population
797 self.current_fitness = None
799 if rank_data is None:
800 rank_data = []
801 self.rank_data = rank_data
803 if abs_data is None:
804 abs_data = []
805 self.abs_data = abs_data
807 RankFitnessPopulation.__init__(self, data_connection, population_size,
808 variable_function, comparator, logfile,
809 use_extinct, exp_function,
810 exp_prefactor)
812 def get_nonrank(self, nrcand, key=None):
813 """"Returns a list of fitness values."""
814 nrc_list = []
815 for nrc in nrcand:
816 nrc_list.append(nrc.info['key_value_pairs'][key])
817 return nrc_list
819 def __get_fitness__(self, candidates):
820 # There are no defaults set for the datasets to be
821 # used in this function, as such we test that the
822 # user has specified at least two here.
823 msg = "This is a multi-objective fitness function"
824 msg += " so there must be at least two datasets"
825 msg += " stated in the rank_data and abs_data variables"
826 assert len(self.rank_data) + len(self.abs_data) >= 2, msg
828 expf = self.exp_function
830 all_fitnesses = []
831 used = set()
832 for rd in self.rank_data:
833 used.add(rd)
834 # Build ranked fitness based on rd
835 all_fitnesses.append(self.get_rank(candidates, key=rd))
837 for d in self.abs_data:
838 if d not in used:
839 used.add(d)
840 # Build fitness based on d
841 all_fitnesses.append(self.get_nonrank(candidates, key=d))
843 # Set the initial order of the ranks, will need to
844 # be returned in this order at the end.
845 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))
846 mvf_rank = -1 # Start multi variable rank at -1.
847 rec_vrc = [] # A record of already ranked candidates.
848 mvf_list = [] # A list for all candidate ranks.
849 # Sort by raw_score_1 in case this is different from
850 # the stored raw_score() variable that all_cands are
851 # sorted by.
852 fordered.sort(key=itemgetter(1), reverse=True)
853 # Niche candidates with equal or better raw_score to
854 # current candidate.
855 for a in fordered:
856 order, rest = a[0], a[1:]
857 if order not in rec_vrc:
858 pff = []
859 pff2 = []
860 rec_vrc.append(order)
861 pff.append((order, rest))
862 for b in fordered:
863 border, brest = b[0], b[1:]
864 if border not in rec_vrc:
865 if np.any(np.array(brest) >= np.array(rest)):
866 pff.append((border, brest))
867 # Remove any candidate from pff list that is dominated
868 # by another in the list.
869 for na in pff:
870 norder, nrest = na[0], na[1:]
871 dom = False
872 for nb in pff:
873 nborder, nbrest = nb[0], nb[1:]
874 if norder != nborder:
875 if np.all(np.array(nbrest) > np.array(nrest)):
876 dom = True
877 if not dom:
878 pff2.append((norder, nrest))
879 # Assign pareto rank from -1 to -N niches.
880 for ffa in pff2:
881 fforder, ffrest = ffa[0], ffa[1:]
882 rec_vrc.append(fforder)
883 mvf_list.append((fforder, mvf_rank, ffrest))
884 mvf_rank = mvf_rank - 1
885 # The original order is reformed
886 mvf_list.sort(key=itemgetter(0), reverse=False)
887 rfro = np.array(list(zip(*mvf_list))[1])
889 if not expf:
890 rmax = max(rfro)
891 rmin = min(rfro)
892 T = rmin - rmax
893 # If using obj_rank probability, must have non-zero T val.
894 # pop_size must be greater than number of permutations.
895 # We test for this here
896 msg = "Equal fitness for best and worst candidate in the "
897 msg += "population! Fitness scaling is impossible! "
898 msg += "Try with a larger population."
899 assert T != 0., msg
900 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))
901 else:
902 return self.exp_prefactor ** (-rfro - 1)
904 def __initialize_pop__(self):
905 # Get all relaxed candidates from the database
906 ue = self.use_extinct
907 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
908 all_cand.sort(key=get_raw_score, reverse=True)
910 if len(all_cand) > 0:
911 fitf = self.__get_fitness__(all_cand)
912 all_sorted = list(zip(fitf, all_cand))
913 all_sorted.sort(key=itemgetter(0), reverse=True)
914 sort_cand = []
915 for _, t2 in all_sorted:
916 sort_cand.append(t2)
917 all_sorted = sort_cand
919 # Fill up the population with the self.pop_size most stable
920 # unique candidates.
921 i = 0
922 while i < len(all_sorted) and len(self.pop) < self.pop_size:
923 c = all_sorted[i]
924 # Use variable_function to decide whether to run comparator
925 # if the function has been defined by the user. This does not
926 # need to be dependent on using the rank_data function.
927 if self.vf is not None:
928 c_vf = self.vf(c)
929 i += 1
930 eq = False
931 for a in self.pop:
932 if self.vf is not None:
933 a_vf = self.vf(a)
934 # Only run comparator if the variable_function
935 # (self.vf) returns the same. If it returns something
936 # different the candidates are inherently different.
937 # This is done to speed up.
938 if a_vf == c_vf:
939 if self.comparator.looks_like(a, c):
940 eq = True
941 break
942 else:
943 if self.comparator.looks_like(a, c):
944 eq = True
945 break
946 if not eq:
947 self.pop.append(c)
948 self.all_cand = all_cand