Coverage for /builds/ase/ase/ase/ga/population.py: 26.38%

527 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-08-02 00:12 +0000

1# fmt: off 

2 

3""" Implementation of a population for maintaining a GA population and 

4proposing structures to pair. """ 

5from math import exp, sqrt, tanh 

6from operator import itemgetter 

7 

8import numpy as np 

9 

10from ase.db.core import now 

11from ase.ga import get_raw_score 

12 

13 

14def count_looks_like(a, all_cand, comp): 

15 """Utility method for counting occurrences.""" 

16 n = 0 

17 for b in all_cand: 

18 if a.info['confid'] == b.info['confid']: 

19 continue 

20 if comp.looks_like(a, b): 

21 n += 1 

22 return n 

23 

24 

25class Population: 

26 """Population class which maintains the current population 

27 and proposes which candidates to pair together. 

28 

29 Parameters: 

30 

31 data_connection: DataConnection object 

32 Bla bla bla. 

33 

34 population_size: int 

35 The number of candidates in the population. 

36 

37 comparator: Comparator object 

38 this will tell if two configurations are equal. 

39 Default compare atoms objects directly. 

40 

41 logfile: str 

42 Text file that contains information about the population 

43 The format is:: 

44 

45 timestamp: generation(if available): id1,id2,id3... 

46 

47 Using this file greatly speeds up convergence checks. 

48 Default None meaning that no file is written. 

49 

50 use_extinct: boolean 

51 Set this to True if mass extinction and the extinct key 

52 are going to be used. Default is False. 

53 

54 rng: Random number generator 

55 By default numpy.random. 

56 """ 

57 

58 def __init__(self, data_connection, population_size, 

59 comparator=None, logfile=None, use_extinct=False, 

60 rng=np.random): 

61 self.dc = data_connection 

62 self.pop_size = population_size 

63 if comparator is None: 

64 from ase.ga.standard_comparators import AtomsComparator 

65 comparator = AtomsComparator() 

66 self.comparator = comparator 

67 self.logfile = logfile 

68 self.use_extinct = use_extinct 

69 self.rng = rng 

70 self.pop = [] 

71 self.pairs = None 

72 self.all_cand = None 

73 self.__initialize_pop__() 

74 

75 def __initialize_pop__(self): 

76 """ Private method that initializes the population when 

77 the population is created. """ 

78 

79 # Get all relaxed candidates from the database 

80 ue = self.use_extinct 

81 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

82 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'], 

83 reverse=True) 

84 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

85 

86 # Fill up the population with the self.pop_size most stable 

87 # unique candidates. 

88 i = 0 

89 while i < len(all_cand) and len(self.pop) < self.pop_size: 

90 c = all_cand[i] 

91 i += 1 

92 eq = False 

93 for a in self.pop: 

94 if self.comparator.looks_like(a, c): 

95 eq = True 

96 break 

97 if not eq: 

98 self.pop.append(c) 

99 

100 for a in self.pop: 

101 a.info['looks_like'] = count_looks_like(a, all_cand, 

102 self.comparator) 

103 

104 self.all_cand = all_cand 

105 self.__calc_participation__() 

106 

107 def __calc_participation__(self): 

108 """ Determines, from the database, how many times each 

109 candidate has been used to generate new candidates. """ 

110 (participation, pairs) = self.dc.get_participation_in_pairing() 

111 for a in self.pop: 

112 if a.info['confid'] in participation.keys(): 

113 a.info['n_paired'] = participation[a.info['confid']] 

114 else: 

115 a.info['n_paired'] = 0 

116 self.pairs = pairs 

117 

118 def update(self, new_cand=None): 

119 """ New candidates can be added to the database 

120 after the population object has been created. 

121 This method extracts these new candidates from the 

122 database and includes them in the population. """ 

123 

124 if len(self.pop) == 0: 

125 self.__initialize_pop__() 

126 

127 if new_cand is None: 

128 ue = self.use_extinct 

129 new_cand = self.dc.get_all_relaxed_candidates(only_new=True, 

130 use_extinct=ue) 

131 

132 for a in new_cand: 

133 self.__add_candidate__(a) 

134 self.all_cand.append(a) 

135 self.__calc_participation__() 

136 self._write_log() 

137 

138 def get_current_population(self): 

139 """ Returns a copy of the current population. """ 

140 self.update() 

141 return [a.copy() for a in self.pop] 

142 

143 def get_population_after_generation(self, gen): 

144 """ Returns a copy of the population as it where 

145 after generation gen""" 

146 if self.logfile is not None: 

147 with open(self.logfile) as fd: 

148 gens = {} 

149 for line in fd: 

150 _, no, popul = line.split(':') 

151 gens[int(no)] = [int(i) for i in popul.split(',')] 

152 return [c.copy() for c in self.all_cand[::-1] 

153 if c.info['relax_id'] in gens[gen]] 

154 

155 all_candidates = [c for c in self.all_cand 

156 if c.info['key_value_pairs']['generation'] <= gen] 

157 cands = [all_candidates[0]] 

158 for b in all_candidates: 

159 if b not in cands: 

160 for a in cands: 

161 if self.comparator.looks_like(a, b): 

162 break 

163 else: 

164 cands.append(b) 

165 pop = cands[:self.pop_size] 

166 return [a.copy() for a in pop] 

167 

168 def __add_candidate__(self, a): 

169 """ Adds a single candidate to the population. """ 

170 

171 # check if the structure is too low in raw score 

172 raw_score_a = get_raw_score(a) 

173 raw_score_worst = get_raw_score(self.pop[-1]) 

174 if raw_score_a < raw_score_worst \ 

175 and len(self.pop) == self.pop_size: 

176 return 

177 

178 # check if the new candidate should 

179 # replace a similar structure in the population 

180 for (i, b) in enumerate(self.pop): 

181 if self.comparator.looks_like(a, b): 

182 if get_raw_score(b) < raw_score_a: 

183 del self.pop[i] 

184 a.info['looks_like'] = count_looks_like(a, 

185 self.all_cand, 

186 self.comparator) 

187 self.pop.append(a) 

188 self.pop.sort(key=get_raw_score, 

189 reverse=True) 

190 return 

191 

192 # the new candidate needs to be added, so remove the highest 

193 # energy one 

194 if len(self.pop) == self.pop_size: 

195 del self.pop[-1] 

196 

197 # add the new candidate 

198 a.info['looks_like'] = count_looks_like(a, 

199 self.all_cand, 

200 self.comparator) 

201 self.pop.append(a) 

202 self.pop.sort(key=get_raw_score, reverse=True) 

203 

204 def __get_fitness__(self, indecies, with_history=True): 

205 """Calculates the fitness using the formula from 

206 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

207 

208 Sign change on the fitness compared to the formulation in the 

209 abovementioned paper due to maximizing raw_score instead of 

210 minimizing energy. (Set raw_score=-energy to optimize the energy) 

211 """ 

212 

213 scores = [get_raw_score(x) for x in self.pop] 

214 min_s = min(scores) 

215 max_s = max(scores) 

216 T = min_s - max_s 

217 if isinstance(indecies, int): 

218 indecies = [indecies] 

219 

220 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) 

221 for i in indecies] 

222 if with_history: 

223 M = [float(self.pop[i].info['n_paired']) for i in indecies] 

224 L = [float(self.pop[i].info['looks_like']) for i in indecies] 

225 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) 

226 for i in range(len(f))] 

227 return f 

228 

229 def get_two_candidates(self, with_history=True): 

230 """ Returns two candidates for pairing employing the 

231 fitness criteria from 

232 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

233 and the roulete wheel selection scheme described in 

234 R.L. Johnston Dalton Transactions, 

235 Vol. 22, No. 22. (2003), pp. 4193-4207 

236 """ 

237 

238 if len(self.pop) < 2: 

239 self.update() 

240 

241 if len(self.pop) < 2: 

242 return None 

243 

244 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

245 fmax = max(fit) 

246 c1 = self.pop[0] 

247 c2 = self.pop[0] 

248 used_before = False 

249 while c1.info['confid'] == c2.info['confid'] and not used_before: 

250 nnf = True 

251 while nnf: 

252 t = self.rng.randint(len(self.pop)) 

253 if fit[t] > self.rng.random() * fmax: 

254 c1 = self.pop[t] 

255 nnf = False 

256 nnf = True 

257 while nnf: 

258 t = self.rng.randint(len(self.pop)) 

259 if fit[t] > self.rng.random() * fmax: 

260 c2 = self.pop[t] 

261 nnf = False 

262 

263 c1id = c1.info['confid'] 

264 c2id = c2.info['confid'] 

265 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs 

266 return (c1.copy(), c2.copy()) 

267 

268 def get_one_candidate(self, with_history=True): 

269 """Returns one candidate for mutation employing the 

270 fitness criteria from 

271 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

272 and the roulete wheel selection scheme described in 

273 R.L. Johnston Dalton Transactions, 

274 Vol. 22, No. 22. (2003), pp. 4193-4207 

275 """ 

276 if len(self.pop) < 1: 

277 self.update() 

278 

279 if len(self.pop) < 1: 

280 return None 

281 

282 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

283 fmax = max(fit) 

284 nnf = True 

285 while nnf: 

286 t = self.rng.randint(len(self.pop)) 

287 if fit[t] > self.rng.random() * fmax: 

288 c1 = self.pop[t] 

289 nnf = False 

290 

291 return c1.copy() 

292 

293 def _write_log(self): 

294 """Writes the population to a logfile. 

295 

296 The format is:: 

297 

298 timestamp: generation(if available): id1,id2,id3...""" 

299 if self.logfile is not None: 

300 ids = [str(a.info['relax_id']) for a in self.pop] 

301 if ids != []: 

302 try: 

303 gen_nums = [c.info['key_value_pairs']['generation'] 

304 for c in self.all_cand] 

305 max_gen = max(gen_nums) 

306 except KeyError: 

307 max_gen = ' ' 

308 with open(self.logfile, 'a') as fd: 

309 fd.write('{time}: {gen}: {pop}\n'.format(time=now(), 

310 pop=','.join(ids), 

311 gen=max_gen)) 

312 

313 def is_uniform(self, func, min_std, pop=None): 

314 """Tests whether the current population is uniform or diverse. 

315 Returns True if uniform, False otherwise. 

316 

317 Parameters: 

318 

319 func: function 

320 that takes one argument an atoms object and returns a value that 

321 will be used for testing against the rest of the population. 

322 

323 min_std: int or float 

324 The minimum standard deviation, if the population has a lower 

325 std dev it is uniform. 

326 

327 pop: list, optional 

328 use this list of Atoms objects instead of the current population. 

329 """ 

330 if pop is None: 

331 pop = self.pop 

332 vals = [func(a) for a in pop] 

333 stddev = np.std(vals) 

334 if stddev < min_std: 

335 return True 

336 return False 

337 

338 def mass_extinction(self, ids): 

339 """Kills every candidate in the database with gaid in the 

340 supplied list of ids. Typically used on the main part of the current 

341 population if the diversity is to small. 

342 

343 Parameters: 

344 

345 ids: list 

346 list of ids of candidates to be killed. 

347 

348 """ 

349 for confid in ids: 

350 self.dc.kill_candidate(confid) 

351 self.pop = [] 

352 

353 

354class RandomPopulation(Population): 

355 def __init__(self, data_connection, population_size, 

356 comparator=None, logfile=None, exclude_used_pairs=False, 

357 bad_candidates=0, use_extinct=False): 

358 self.exclude_used_pairs = exclude_used_pairs 

359 self.bad_candidates = bad_candidates 

360 Population.__init__(self, data_connection, population_size, 

361 comparator, logfile, use_extinct) 

362 

363 def __initialize_pop__(self): 

364 """ Private method that initializes the population when 

365 the population is created. """ 

366 

367 # Get all relaxed candidates from the database 

368 ue = self.use_extinct 

369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

370 all_cand.sort(key=get_raw_score, reverse=True) 

371 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

372 

373 if len(all_cand) > 0: 

374 # Fill up the population with the self.pop_size most stable 

375 # unique candidates. 

376 ratings = [] 

377 best_raw = get_raw_score(all_cand[0]) 

378 i = 0 

379 while i < len(all_cand): 

380 c = all_cand[i] 

381 i += 1 

382 eq = False 

383 for a in self.pop: 

384 if self.comparator.looks_like(a, c): 

385 eq = True 

386 break 

387 if not eq: 

388 if len(self.pop) < self.pop_size - self.bad_candidates: 

389 self.pop.append(c) 

390 else: 

391 exp_fact = exp(get_raw_score(c) / best_raw) 

392 ratings.append([c, (exp_fact - 1) * self.rng.random()]) 

393 ratings.sort(key=itemgetter(1), reverse=True) 

394 

395 for i in range(self.bad_candidates): 

396 self.pop.append(ratings[i][0]) 

397 

398 for a in self.pop: 

399 a.info['looks_like'] = count_looks_like(a, all_cand, 

400 self.comparator) 

401 

402 self.all_cand = all_cand 

403 self.__calc_participation__() 

404 

405 def update(self): 

406 """ The update method in Population will add to the end of 

407 the population, that can't be used here since we might have 

408 bad candidates that need to stay in the population, therefore 

409 just recalc the population every time. """ 

410 

411 self.pop = [] 

412 self.__initialize_pop__() 

413 

414 self._write_log() 

415 

416 def get_one_candidate(self): 

417 """Returns one candidates at random.""" 

418 if len(self.pop) < 1: 

419 self.update() 

420 

421 if len(self.pop) < 1: 

422 return None 

423 

424 t = self.rng.randint(len(self.pop)) 

425 c = self.pop[t] 

426 

427 return c.copy() 

428 

429 def get_two_candidates(self): 

430 """Returns two candidates at random.""" 

431 if len(self.pop) < 2: 

432 self.update() 

433 

434 if len(self.pop) < 2: 

435 return None 

436 

437 c1 = self.pop[0] 

438 c2 = self.pop[0] 

439 used_before = False 

440 while c1.info['confid'] == c2.info['confid'] and not used_before: 

441 t = self.rng.randint(len(self.pop)) 

442 c1 = self.pop[t] 

443 t = self.rng.randint(len(self.pop)) 

444 c2 = self.pop[t] 

445 

446 c1id = c1.info['confid'] 

447 c2id = c2.info['confid'] 

448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and 

449 self.exclude_used_pairs) 

450 return (c1.copy(), c2.copy()) 

451 

452 

453class FitnessSharingPopulation(Population): 

454 """ Fitness sharing population that penalizes structures if they are 

455 too similar. This is determined by a distance measure 

456 

457 Parameters: 

458 

459 comp_key: string 

460 Key where the distance measure can be found in the 

461 atoms.info['key_value_pairs'] dictionary. 

462 

463 threshold: float or int 

464 Value above which no penalization of the fitness takes place 

465 

466 alpha_sh: float or int 

467 Determines the shape of the sharing function. 

468 Default is 1, which gives a linear sharing function. 

469 

470 """ 

471 

472 def __init__(self, data_connection, population_size, 

473 comp_key, threshold, alpha_sh=1., 

474 comparator=None, logfile=None, use_extinct=False): 

475 self.comp_key = comp_key 

476 self.dt = threshold # dissimilarity threshold 

477 self.alpha_sh = alpha_sh 

478 self.fit_scaling = 1. 

479 

480 self.sh_cache = {} 

481 

482 Population.__init__(self, data_connection, population_size, 

483 comparator, logfile, use_extinct) 

484 

485 def __get_fitness__(self, candidates): 

486 """Input should be sorted according to raw_score.""" 

487 max_s = get_raw_score(candidates[0]) 

488 min_s = get_raw_score(candidates[-1]) 

489 T = min_s - max_s 

490 

491 shared_fit = [] 

492 for c in candidates: 

493 sc = get_raw_score(c) 

494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.)) 

495 m = 1. 

496 ck = c.info['key_value_pairs'][self.comp_key] 

497 for other in candidates: 

498 if other != c: 

499 name = tuple(sorted([c.info['confid'], 

500 other.info['confid']])) 

501 if name not in self.sh_cache: 

502 ok = other.info['key_value_pairs'][self.comp_key] 

503 d = abs(ck - ok) 

504 if d < self.dt: 

505 v = 1 - (d / self.dt)**self.alpha_sh 

506 self.sh_cache[name] = v 

507 else: 

508 self.sh_cache[name] = 0 

509 m += self.sh_cache[name] 

510 

511 shf = (obj_fit ** self.fit_scaling) / m 

512 shared_fit.append(shf) 

513 return shared_fit 

514 

515 def update(self): 

516 """ The update method in Population will add to the end of 

517 the population, that can't be used here since the shared fitness 

518 will change for all candidates when new are added, therefore 

519 just recalc the population every time. """ 

520 

521 self.pop = [] 

522 self.__initialize_pop__() 

523 

524 self._write_log() 

525 

526 def __initialize_pop__(self): 

527 # Get all relaxed candidates from the database 

528 ue = self.use_extinct 

529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

530 all_cand.sort(key=get_raw_score, reverse=True) 

531 

532 if len(all_cand) > 0: 

533 shared_fit = self.__get_fitness__(all_cand) 

534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand), 

535 reverse=True)))[1] 

536 

537 # Fill up the population with the self.pop_size most stable 

538 # unique candidates. 

539 i = 0 

540 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

541 c = all_sorted[i] 

542 i += 1 

543 eq = False 

544 for a in self.pop: 

545 if self.comparator.looks_like(a, c): 

546 eq = True 

547 break 

548 if not eq: 

549 self.pop.append(c) 

550 

551 for a in self.pop: 

552 a.info['looks_like'] = count_looks_like(a, all_cand, 

553 self.comparator) 

554 self.all_cand = all_cand 

555 

556 def get_two_candidates(self): 

557 """ Returns two candidates for pairing employing the 

558 fitness criteria from 

559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

560 and the roulete wheel selection scheme described in 

561 R.L. Johnston Dalton Transactions, 

562 Vol. 22, No. 22. (2003), pp. 4193-4207 

563 """ 

564 

565 if len(self.pop) < 2: 

566 self.update() 

567 

568 if len(self.pop) < 2: 

569 return None 

570 

571 fit = self.__get_fitness__(self.pop) 

572 fmax = max(fit) 

573 c1 = self.pop[0] 

574 c2 = self.pop[0] 

575 while c1.info['confid'] == c2.info['confid']: 

576 nnf = True 

577 while nnf: 

578 t = self.rng.randint(len(self.pop)) 

579 if fit[t] > self.rng.random() * fmax: 

580 c1 = self.pop[t] 

581 nnf = False 

582 nnf = True 

583 while nnf: 

584 t = self.rng.randint(len(self.pop)) 

585 if fit[t] > self.rng.random() * fmax: 

586 c2 = self.pop[t] 

587 nnf = False 

588 

589 return (c1.copy(), c2.copy()) 

590 

591 

592class RankFitnessPopulation(Population): 

593 """ Ranks the fitness relative to set variable to flatten the surface 

594 in a certain direction such that mating across variable is equally 

595 likely irrespective of raw_score. 

596 

597 Parameters: 

598 

599 variable_function: function 

600 A function that takes as input an Atoms object and returns 

601 the variable that differentiates the ranks. 

602 

603 exp_function: boolean 

604 If True use an exponential function for ranking the fitness. 

605 If False use the same as in Population. Default True. 

606 

607 exp_prefactor: float 

608 The prefactor used in the exponential fitness scaling function. 

609 Default 0.5 

610 """ 

611 

612 def __init__(self, data_connection, population_size, variable_function, 

613 comparator=None, logfile=None, use_extinct=False, 

614 exp_function=True, exp_prefactor=0.5): 

615 self.exp_function = exp_function 

616 self.exp_prefactor = exp_prefactor 

617 self.vf = variable_function 

618 # The current fitness is set at each update of the population 

619 self.current_fitness = None 

620 

621 Population.__init__(self, data_connection, population_size, 

622 comparator, logfile, use_extinct) 

623 

624 def get_rank(self, rcand, key=None): 

625 # Set the initial order of the candidates, will need to 

626 # be returned in this order at the end of ranking. 

627 ordered = list(zip(range(len(rcand)), rcand)) 

628 

629 # Niche and rank candidates. 

630 rec_nic = [] 

631 rank_fit = [] 

632 for o, c in ordered: 

633 if o not in rec_nic: 

634 ntr = [] 

635 ce1 = self.vf(c) 

636 rec_nic.append(o) 

637 ntr.append([o, c]) 

638 for oother, cother in ordered: 

639 if oother not in rec_nic: 

640 ce2 = self.vf(cother) 

641 if ce1 == ce2: 

642 # put the now processed in oother 

643 # in rec_nic as well 

644 rec_nic.append(oother) 

645 ntr.append([oother, cother]) 

646 # Each niche is sorted according to raw_score and 

647 # assigned a fitness according to the ranking of 

648 # the candidates 

649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key], 

650 reverse=True) 

651 start_rank = -1 

652 for cor, (on, cn) in enumerate(ntr): 

653 rank = start_rank - cor 

654 rank_fit.append([on, cn, rank]) 

655 # The original order is reformed 

656 rank_fit.sort(key=itemgetter(0), reverse=False) 

657 return np.array(list(zip(*rank_fit))[2]) 

658 

659 def __get_fitness__(self, candidates): 

660 expf = self.exp_function 

661 rfit = self.get_rank(candidates, key='raw_score') 

662 

663 if not expf: 

664 rmax = max(rfit) 

665 rmin = min(rfit) 

666 T = rmin - rmax 

667 # If using obj_rank probability, must have non-zero T val. 

668 # pop_size must be greater than number of permutations. 

669 # We test for this here 

670 msg = "Equal fitness for best and worst candidate in the " 

671 msg += "population! Fitness scaling is impossible! " 

672 msg += "Try with a larger population." 

673 assert T != 0., msg 

674 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.)) 

675 else: 

676 return self.exp_prefactor ** (-rfit - 1) 

677 

678 def update(self): 

679 """ The update method in Population will add to the end of 

680 the population, that can't be used here since the fitness 

681 will potentially change for all candidates when new are added, 

682 therefore just recalc the population every time. """ 

683 

684 self.pop = [] 

685 self.__initialize_pop__() 

686 self.current_fitness = self.__get_fitness__(self.pop) 

687 

688 self._write_log() 

689 

690 def __initialize_pop__(self): 

691 # Get all relaxed candidates from the database 

692 ue = self.use_extinct 

693 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

694 all_cand.sort(key=get_raw_score, reverse=True) 

695 

696 if len(all_cand) > 0: 

697 fitf = self.__get_fitness__(all_cand) 

698 all_sorted = list(zip(fitf, all_cand)) 

699 all_sorted.sort(key=itemgetter(0), reverse=True) 

700 sort_cand = [] 

701 for _, t2 in all_sorted: 

702 sort_cand.append(t2) 

703 all_sorted = sort_cand 

704 

705 # Fill up the population with the self.pop_size most stable 

706 # unique candidates. 

707 i = 0 

708 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

709 c = all_sorted[i] 

710 c_vf = self.vf(c) 

711 i += 1 

712 eq = False 

713 for a in self.pop: 

714 a_vf = self.vf(a) 

715 # Only run comparator if the variable_function (self.vf) 

716 # returns the same. If it returns something different the 

717 # candidates are inherently different. 

718 # This is done to speed up. 

719 if a_vf == c_vf: 

720 if self.comparator.looks_like(a, c): 

721 eq = True 

722 break 

723 if not eq: 

724 self.pop.append(c) 

725 self.all_cand = all_cand 

726 

727 def get_two_candidates(self): 

728 """ Returns two candidates for pairing employing the 

729 roulete wheel selection scheme described in 

730 R.L. Johnston Dalton Transactions, 

731 Vol. 22, No. 22. (2003), pp. 4193-4207 

732 """ 

733 

734 if len(self.pop) < 2: 

735 self.update() 

736 

737 if len(self.pop) < 2: 

738 return None 

739 

740 # Use saved fitness 

741 fit = self.current_fitness 

742 fmax = max(fit) 

743 c1 = self.pop[0] 

744 c2 = self.pop[0] 

745 while c1.info['confid'] == c2.info['confid']: 

746 nnf = True 

747 while nnf: 

748 t = self.rng.randint(len(self.pop)) 

749 if fit[t] > self.rng.random() * fmax: 

750 c1 = self.pop[t] 

751 nnf = False 

752 nnf = True 

753 while nnf: 

754 t = self.rng.randint(len(self.pop)) 

755 if fit[t] > self.rng.random() * fmax: 

756 c2 = self.pop[t] 

757 nnf = False 

758 

759 return (c1.copy(), c2.copy()) 

760 

761 

762class MultiObjectivePopulation(RankFitnessPopulation): 

763 """ Allows for assignment of fitness based on a set of two variables 

764 such that fitness is ranked according to a Pareto-front of 

765 non-dominated candidates. 

766 

767 Parameters 

768 ---------- 

769 abs_data: list 

770 Set of key_value_pairs in atoms object for which fitness should 

771 be assigned based on absolute value. 

772 

773 rank_data: list 

774 Set of key_value_pairs in atoms object for which data should 

775 be ranked in order to ascribe fitness. 

776 

777 variable_function: function 

778 A function that takes as input an Atoms object and returns 

779 the variable that differentiates the ranks. Only use if 

780 data is ranked. 

781 

782 exp_function: boolean 

783 If True use an exponential function for ranking the fitness. 

784 If False use the same as in Population. Default True. 

785 

786 exp_prefactor: float 

787 The prefactor used in the exponential fitness scaling function. 

788 Default 0.5 

789 

790 """ 

791 

792 def __init__(self, data_connection, population_size, 

793 variable_function=None, comparator=None, logfile=None, 

794 use_extinct=False, abs_data=None, rank_data=None, 

795 exp_function=True, exp_prefactor=0.5): 

796 # The current fitness is set at each update of the population 

797 self.current_fitness = None 

798 

799 if rank_data is None: 

800 rank_data = [] 

801 self.rank_data = rank_data 

802 

803 if abs_data is None: 

804 abs_data = [] 

805 self.abs_data = abs_data 

806 

807 RankFitnessPopulation.__init__(self, data_connection, population_size, 

808 variable_function, comparator, logfile, 

809 use_extinct, exp_function, 

810 exp_prefactor) 

811 

812 def get_nonrank(self, nrcand, key=None): 

813 """"Returns a list of fitness values.""" 

814 nrc_list = [] 

815 for nrc in nrcand: 

816 nrc_list.append(nrc.info['key_value_pairs'][key]) 

817 return nrc_list 

818 

819 def __get_fitness__(self, candidates): 

820 # There are no defaults set for the datasets to be 

821 # used in this function, as such we test that the 

822 # user has specified at least two here. 

823 msg = "This is a multi-objective fitness function" 

824 msg += " so there must be at least two datasets" 

825 msg += " stated in the rank_data and abs_data variables" 

826 assert len(self.rank_data) + len(self.abs_data) >= 2, msg 

827 

828 expf = self.exp_function 

829 

830 all_fitnesses = [] 

831 used = set() 

832 for rd in self.rank_data: 

833 used.add(rd) 

834 # Build ranked fitness based on rd 

835 all_fitnesses.append(self.get_rank(candidates, key=rd)) 

836 

837 for d in self.abs_data: 

838 if d not in used: 

839 used.add(d) 

840 # Build fitness based on d 

841 all_fitnesses.append(self.get_nonrank(candidates, key=d)) 

842 

843 # Set the initial order of the ranks, will need to 

844 # be returned in this order at the end. 

845 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses)) 

846 mvf_rank = -1 # Start multi variable rank at -1. 

847 rec_vrc = [] # A record of already ranked candidates. 

848 mvf_list = [] # A list for all candidate ranks. 

849 # Sort by raw_score_1 in case this is different from 

850 # the stored raw_score() variable that all_cands are 

851 # sorted by. 

852 fordered.sort(key=itemgetter(1), reverse=True) 

853 # Niche candidates with equal or better raw_score to 

854 # current candidate. 

855 for a in fordered: 

856 order, rest = a[0], a[1:] 

857 if order not in rec_vrc: 

858 pff = [] 

859 pff2 = [] 

860 rec_vrc.append(order) 

861 pff.append((order, rest)) 

862 for b in fordered: 

863 border, brest = b[0], b[1:] 

864 if border not in rec_vrc: 

865 if np.any(np.array(brest) >= np.array(rest)): 

866 pff.append((border, brest)) 

867 # Remove any candidate from pff list that is dominated 

868 # by another in the list. 

869 for na in pff: 

870 norder, nrest = na[0], na[1:] 

871 dom = False 

872 for nb in pff: 

873 nborder, nbrest = nb[0], nb[1:] 

874 if norder != nborder: 

875 if np.all(np.array(nbrest) > np.array(nrest)): 

876 dom = True 

877 if not dom: 

878 pff2.append((norder, nrest)) 

879 # Assign pareto rank from -1 to -N niches. 

880 for ffa in pff2: 

881 fforder, ffrest = ffa[0], ffa[1:] 

882 rec_vrc.append(fforder) 

883 mvf_list.append((fforder, mvf_rank, ffrest)) 

884 mvf_rank = mvf_rank - 1 

885 # The original order is reformed 

886 mvf_list.sort(key=itemgetter(0), reverse=False) 

887 rfro = np.array(list(zip(*mvf_list))[1]) 

888 

889 if not expf: 

890 rmax = max(rfro) 

891 rmin = min(rfro) 

892 T = rmin - rmax 

893 # If using obj_rank probability, must have non-zero T val. 

894 # pop_size must be greater than number of permutations. 

895 # We test for this here 

896 msg = "Equal fitness for best and worst candidate in the " 

897 msg += "population! Fitness scaling is impossible! " 

898 msg += "Try with a larger population." 

899 assert T != 0., msg 

900 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.)) 

901 else: 

902 return self.exp_prefactor ** (-rfro - 1) 

903 

904 def __initialize_pop__(self): 

905 # Get all relaxed candidates from the database 

906 ue = self.use_extinct 

907 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

908 all_cand.sort(key=get_raw_score, reverse=True) 

909 

910 if len(all_cand) > 0: 

911 fitf = self.__get_fitness__(all_cand) 

912 all_sorted = list(zip(fitf, all_cand)) 

913 all_sorted.sort(key=itemgetter(0), reverse=True) 

914 sort_cand = [] 

915 for _, t2 in all_sorted: 

916 sort_cand.append(t2) 

917 all_sorted = sort_cand 

918 

919 # Fill up the population with the self.pop_size most stable 

920 # unique candidates. 

921 i = 0 

922 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

923 c = all_sorted[i] 

924 # Use variable_function to decide whether to run comparator 

925 # if the function has been defined by the user. This does not 

926 # need to be dependent on using the rank_data function. 

927 if self.vf is not None: 

928 c_vf = self.vf(c) 

929 i += 1 

930 eq = False 

931 for a in self.pop: 

932 if self.vf is not None: 

933 a_vf = self.vf(a) 

934 # Only run comparator if the variable_function 

935 # (self.vf) returns the same. If it returns something 

936 # different the candidates are inherently different. 

937 # This is done to speed up. 

938 if a_vf == c_vf: 

939 if self.comparator.looks_like(a, c): 

940 eq = True 

941 break 

942 else: 

943 if self.comparator.looks_like(a, c): 

944 eq = True 

945 break 

946 if not eq: 

947 self.pop.append(c) 

948 self.all_cand = all_cand