Coverage for /builds/ase/ase/ase/ga/population.py: 26.38%

1# fmt: off

3""" Implementation of a population for maintaining a GA population and

4proposing structures to pair. """

5from math import exp, sqrt, tanh

6from operator import itemgetter

8import numpy as np

10from ase.db.core import now

11from ase.ga import get_raw_score

14def count_looks_like(a, all_cand, comp):

15 """Utility method for counting occurrences."""

16 n = 0

17 for b in all_cand:

18 if a.info['confid'] == b.info['confid']:

19 continue

20 if comp.looks_like(a, b):

21 n += 1

22 return n

25class Population:

26 """Population class which maintains the current population

27 and proposes which candidates to pair together.

29 Parameters:

31 data_connection: DataConnection object

32 Bla bla bla.

34 population_size: int

35 The number of candidates in the population.

37 comparator: Comparator object

38 this will tell if two configurations are equal.

39 Default compare atoms objects directly.

41 logfile: str

42 Text file that contains information about the population

43 The format is::

45 timestamp: generation(if available): id1,id2,id3...

47 Using this file greatly speeds up convergence checks.

48 Default None meaning that no file is written.

50 use_extinct: boolean

51 Set this to True if mass extinction and the extinct key

52 are going to be used. Default is False.

54 rng: Random number generator

55 By default numpy.random.

56 """

58 def __init__(self, data_connection, population_size,

59 comparator=None, logfile=None, use_extinct=False,

60 rng=np.random):

61 self.dc = data_connection

62 self.pop_size = population_size

63 if comparator is None:

64 from ase.ga.standard_comparators import AtomsComparator

65 comparator = AtomsComparator()

66 self.comparator = comparator

67 self.logfile = logfile

68 self.use_extinct = use_extinct

69 self.rng = rng

70 self.pop = []

71 self.pairs = None

72 self.all_cand = None

73 self.__initialize_pop__()

75 def __initialize_pop__(self):

76 """ Private method that initializes the population when

77 the population is created. """

79 # Get all relaxed candidates from the database

80 ue = self.use_extinct

81 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

82 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],

83 reverse=True)

84 # all_cand.sort(key=lambda x: x.get_potential_energy())

86 # Fill up the population with the self.pop_size most stable

87 # unique candidates.

88 i = 0

89 while i < len(all_cand) and len(self.pop) < self.pop_size:

90 c = all_cand[i]

91 i += 1

92 eq = False

93 for a in self.pop:

94 if self.comparator.looks_like(a, c):

95 eq = True

96 break

97 if not eq:

98 self.pop.append(c)

100 for a in self.pop:

101 a.info['looks_like'] = count_looks_like(a, all_cand,

102 self.comparator)

103

104 self.all_cand = all_cand

105 self.__calc_participation__()

106

107 def __calc_participation__(self):

108 """ Determines, from the database, how many times each

109 candidate has been used to generate new candidates. """

110 (participation, pairs) = self.dc.get_participation_in_pairing()

111 for a in self.pop:

112 if a.info['confid'] in participation.keys():

113 a.info['n_paired'] = participation[a.info['confid']]

114 else:

115 a.info['n_paired'] = 0

116 self.pairs = pairs

117

118 def update(self, new_cand=None):

119 """ New candidates can be added to the database

120 after the population object has been created.

121 This method extracts these new candidates from the

122 database and includes them in the population. """

123

124 if len(self.pop) == 0:

125 self.__initialize_pop__()

126

127 if new_cand is None:

128 ue = self.use_extinct

129 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,

130 use_extinct=ue)

131

132 for a in new_cand:

133 self.__add_candidate__(a)

134 self.all_cand.append(a)

135 self.__calc_participation__()

136 self._write_log()

137

138 def get_current_population(self):

139 """ Returns a copy of the current population. """

140 self.update()

141 return [a.copy() for a in self.pop]

142

143 def get_population_after_generation(self, gen):

144 """ Returns a copy of the population as it where

145 after generation gen"""

146 if self.logfile is not None:

147 with open(self.logfile) as fd:

148 gens = {}

149 for line in fd:

150 _, no, popul = line.split(':')

151 gens[int(no)] = [int(i) for i in popul.split(',')]

152 return [c.copy() for c in self.all_cand[::-1]

153 if c.info['relax_id'] in gens[gen]]

154

155 all_candidates = [c for c in self.all_cand

156 if c.info['key_value_pairs']['generation'] <= gen]

157 cands = [all_candidates[0]]

158 for b in all_candidates:

159 if b not in cands:

160 for a in cands:

161 if self.comparator.looks_like(a, b):

162 break

163 else:

164 cands.append(b)

165 pop = cands[:self.pop_size]

166 return [a.copy() for a in pop]

167

168 def __add_candidate__(self, a):

169 """ Adds a single candidate to the population. """

170

171 # check if the structure is too low in raw score

172 raw_score_a = get_raw_score(a)

173 raw_score_worst = get_raw_score(self.pop[-1])

174 if raw_score_a < raw_score_worst \

175 and len(self.pop) == self.pop_size:

176 return

177

178 # check if the new candidate should

179 # replace a similar structure in the population

180 for (i, b) in enumerate(self.pop):

181 if self.comparator.looks_like(a, b):

182 if get_raw_score(b) < raw_score_a:

183 del self.pop[i]

184 a.info['looks_like'] = count_looks_like(a,

185 self.all_cand,

186 self.comparator)

187 self.pop.append(a)

188 self.pop.sort(key=get_raw_score,

189 reverse=True)

190 return

191

192 # the new candidate needs to be added, so remove the highest

193 # energy one

194 if len(self.pop) == self.pop_size:

195 del self.pop[-1]

196

197 # add the new candidate

198 a.info['looks_like'] = count_looks_like(a,

199 self.all_cand,

200 self.comparator)

201 self.pop.append(a)

202 self.pop.sort(key=get_raw_score, reverse=True)

203

204 def __get_fitness__(self, indecies, with_history=True):

205 """Calculates the fitness using the formula from

206 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

207

208 Sign change on the fitness compared to the formulation in the

209 abovementioned paper due to maximizing raw_score instead of

210 minimizing energy. (Set raw_score=-energy to optimize the energy)

211 """

212

213 scores = [get_raw_score(x) for x in self.pop]

214 min_s = min(scores)

215 max_s = max(scores)

216 T = min_s - max_s

217 if isinstance(indecies, int):

218 indecies = [indecies]

219

220 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))

221 for i in indecies]

222 if with_history:

223 M = [float(self.pop[i].info['n_paired']) for i in indecies]

224 L = [float(self.pop[i].info['looks_like']) for i in indecies]

225 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])

226 for i in range(len(f))]

227 return f

228

229 def get_two_candidates(self, with_history=True):

230 """ Returns two candidates for pairing employing the

231 fitness criteria from

232 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

233 and the roulete wheel selection scheme described in

234 R.L. Johnston Dalton Transactions,

235 Vol. 22, No. 22. (2003), pp. 4193-4207

236 """

237

238 if len(self.pop) < 2:

239 self.update()

240

241 if len(self.pop) < 2:

242 return None

243

244 fit = self.__get_fitness__(range(len(self.pop)), with_history)

245 fmax = max(fit)

246 c1 = self.pop[0]

247 c2 = self.pop[0]

248 used_before = False

249 while c1.info['confid'] == c2.info['confid'] and not used_before:

250 nnf = True

251 while nnf:

252 t = self.rng.randint(len(self.pop))

253 if fit[t] > self.rng.random() * fmax:

254 c1 = self.pop[t]

255 nnf = False

256 nnf = True

257 while nnf:

258 t = self.rng.randint(len(self.pop))

259 if fit[t] > self.rng.random() * fmax:

260 c2 = self.pop[t]

261 nnf = False

262

263 c1id = c1.info['confid']

264 c2id = c2.info['confid']

265 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs

266 return (c1.copy(), c2.copy())

267

268 def get_one_candidate(self, with_history=True):

269 """Returns one candidate for mutation employing the

270 fitness criteria from

271 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

272 and the roulete wheel selection scheme described in

273 R.L. Johnston Dalton Transactions,

274 Vol. 22, No. 22. (2003), pp. 4193-4207

275 """

276 if len(self.pop) < 1:

277 self.update()

278

279 if len(self.pop) < 1:

280 return None

281

282 fit = self.__get_fitness__(range(len(self.pop)), with_history)

283 fmax = max(fit)

284 nnf = True

285 while nnf:

286 t = self.rng.randint(len(self.pop))

287 if fit[t] > self.rng.random() * fmax:

288 c1 = self.pop[t]

289 nnf = False

290

291 return c1.copy()

292

293 def _write_log(self):

294 """Writes the population to a logfile.

295

296 The format is::

297

298 timestamp: generation(if available): id1,id2,id3..."""

299 if self.logfile is not None:

300 ids = [str(a.info['relax_id']) for a in self.pop]

301 if ids != []:

302 try:

303 gen_nums = [c.info['key_value_pairs']['generation']

304 for c in self.all_cand]

305 max_gen = max(gen_nums)

306 except KeyError:

307 max_gen = ' '

308 with open(self.logfile, 'a') as fd:

309 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),

310 pop=','.join(ids),

311 gen=max_gen))

312

313 def is_uniform(self, func, min_std, pop=None):

314 """Tests whether the current population is uniform or diverse.

315 Returns True if uniform, False otherwise.

316

317 Parameters:

318

319 func: function

320 that takes one argument an atoms object and returns a value that

321 will be used for testing against the rest of the population.

322

323 min_std: int or float

324 The minimum standard deviation, if the population has a lower

325 std dev it is uniform.

326

327 pop: list, optional

328 use this list of Atoms objects instead of the current population.

329 """

330 if pop is None:

331 pop = self.pop

332 vals = [func(a) for a in pop]

333 stddev = np.std(vals)

334 if stddev < min_std:

335 return True

336 return False

337

338 def mass_extinction(self, ids):

339 """Kills every candidate in the database with gaid in the

340 supplied list of ids. Typically used on the main part of the current

341 population if the diversity is to small.

342

343 Parameters:

344

345 ids: list

346 list of ids of candidates to be killed.

347

348 """

349 for confid in ids:

350 self.dc.kill_candidate(confid)

351 self.pop = []

352

353

354class RandomPopulation(Population):

355 def __init__(self, data_connection, population_size,

356 comparator=None, logfile=None, exclude_used_pairs=False,

357 bad_candidates=0, use_extinct=False):

358 self.exclude_used_pairs = exclude_used_pairs

359 self.bad_candidates = bad_candidates

360 Population.__init__(self, data_connection, population_size,

361 comparator, logfile, use_extinct)

362

363 def __initialize_pop__(self):

364 """ Private method that initializes the population when

365 the population is created. """

366

367 # Get all relaxed candidates from the database

368 ue = self.use_extinct

369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

370 all_cand.sort(key=get_raw_score, reverse=True)

371 # all_cand.sort(key=lambda x: x.get_potential_energy())

372

373 if len(all_cand) > 0:

374 # Fill up the population with the self.pop_size most stable

375 # unique candidates.

376 ratings = []

377 best_raw = get_raw_score(all_cand[0])

378 i = 0

379 while i < len(all_cand):

380 c = all_cand[i]

381 i += 1

382 eq = False

383 for a in self.pop:

384 if self.comparator.looks_like(a, c):

385 eq = True

386 break

387 if not eq:

388 if len(self.pop) < self.pop_size - self.bad_candidates:

389 self.pop.append(c)

390 else:

391 exp_fact = exp(get_raw_score(c) / best_raw)

392 ratings.append([c, (exp_fact - 1) * self.rng.random()])

393 ratings.sort(key=itemgetter(1), reverse=True)

394

395 for i in range(self.bad_candidates):

396 self.pop.append(ratings[i][0])

397

398 for a in self.pop:

399 a.info['looks_like'] = count_looks_like(a, all_cand,

400 self.comparator)

401

402 self.all_cand = all_cand

403 self.__calc_participation__()

404

405 def update(self):

406 """ The update method in Population will add to the end of

407 the population, that can't be used here since we might have

408 bad candidates that need to stay in the population, therefore

409 just recalc the population every time. """

410

411 self.pop = []

412 self.__initialize_pop__()

413

414 self._write_log()

415

416 def get_one_candidate(self):

417 """Returns one candidates at random."""

418 if len(self.pop) < 1:

419 self.update()

420

421 if len(self.pop) < 1:

422 return None

423

424 t = self.rng.randint(len(self.pop))

425 c = self.pop[t]

426

427 return c.copy()

428

429 def get_two_candidates(self):

430 """Returns two candidates at random."""

431 if len(self.pop) < 2:

432 self.update()

433

434 if len(self.pop) < 2:

435 return None

436

437 c1 = self.pop[0]

438 c2 = self.pop[0]

439 used_before = False

440 while c1.info['confid'] == c2.info['confid'] and not used_before:

441 t = self.rng.randint(len(self.pop))

442 c1 = self.pop[t]

443 t = self.rng.randint(len(self.pop))

444 c2 = self.pop[t]

445

446 c1id = c1.info['confid']

447 c2id = c2.info['confid']

448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and

449 self.exclude_used_pairs)

450 return (c1.copy(), c2.copy())

451

452

453class FitnessSharingPopulation(Population):

454 """ Fitness sharing population that penalizes structures if they are

455 too similar. This is determined by a distance measure

456

457 Parameters:

458

459 comp_key: string

460 Key where the distance measure can be found in the

461 atoms.info['key_value_pairs'] dictionary.

462

463 threshold: float or int

464 Value above which no penalization of the fitness takes place

465

466 alpha_sh: float or int

467 Determines the shape of the sharing function.

468 Default is 1, which gives a linear sharing function.

469

470 """

471

472 def __init__(self, data_connection, population_size,

473 comp_key, threshold, alpha_sh=1.,

474 comparator=None, logfile=None, use_extinct=False):

475 self.comp_key = comp_key

476 self.dt = threshold # dissimilarity threshold

477 self.alpha_sh = alpha_sh

478 self.fit_scaling = 1.

479

480 self.sh_cache = {}

481

482 Population.__init__(self, data_connection, population_size,

483 comparator, logfile, use_extinct)

484

485 def __get_fitness__(self, candidates):

486 """Input should be sorted according to raw_score."""

487 max_s = get_raw_score(candidates[0])

488 min_s = get_raw_score(candidates[-1])

489 T = min_s - max_s

490

491 shared_fit = []

492 for c in candidates:

493 sc = get_raw_score(c)

494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))

495 m = 1.

496 ck = c.info['key_value_pairs'][self.comp_key]

497 for other in candidates:

498 if other != c:

499 name = tuple(sorted([c.info['confid'],

500 other.info['confid']]))

501 if name not in self.sh_cache:

502 ok = other.info['key_value_pairs'][self.comp_key]

503 d = abs(ck - ok)

504 if d < self.dt:

505 v = 1 - (d / self.dt)**self.alpha_sh

506 self.sh_cache[name] = v

507 else:

508 self.sh_cache[name] = 0

509 m += self.sh_cache[name]

510

511 shf = (obj_fit ** self.fit_scaling) / m

512 shared_fit.append(shf)

513 return shared_fit

514

515 def update(self):

516 """ The update method in Population will add to the end of

517 the population, that can't be used here since the shared fitness

518 will change for all candidates when new are added, therefore

519 just recalc the population every time. """

520

521 self.pop = []

522 self.__initialize_pop__()

523

524 self._write_log()

525

526 def __initialize_pop__(self):

527 # Get all relaxed candidates from the database

528 ue = self.use_extinct

529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

530 all_cand.sort(key=get_raw_score, reverse=True)

531

532 if len(all_cand) > 0:

533 shared_fit = self.__get_fitness__(all_cand)

534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),

535 reverse=True)))[1]

536

537 # Fill up the population with the self.pop_size most stable

538 # unique candidates.

539 i = 0

540 while i < len(all_sorted) and len(self.pop) < self.pop_size:

541 c = all_sorted[i]

542 i += 1

543 eq = False

544 for a in self.pop:

545 if self.comparator.looks_like(a, c):

546 eq = True

547 break

548 if not eq:

549 self.pop.append(c)

550

551 for a in self.pop:

552 a.info['looks_like'] = count_looks_like(a, all_cand,

553 self.comparator)

554 self.all_cand = all_cand

555

556 def get_two_candidates(self):

557 """ Returns two candidates for pairing employing the

558 fitness criteria from

559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

560 and the roulete wheel selection scheme described in

561 R.L. Johnston Dalton Transactions,

562 Vol. 22, No. 22. (2003), pp. 4193-4207

563 """

564

565 if len(self.pop) < 2:

566 self.update()

567

568 if len(self.pop) < 2:

569 return None

570

571 fit = self.__get_fitness__(self.pop)

572 fmax = max(fit)

573 c1 = self.pop[0]

574 c2 = self.pop[0]

575 while c1.info['confid'] == c2.info['confid']:

576 nnf = True

577 while nnf:

578 t = self.rng.randint(len(self.pop))

579 if fit[t] > self.rng.random() * fmax:

580 c1 = self.pop[t]

581 nnf = False

582 nnf = True

583 while nnf:

584 t = self.rng.randint(len(self.pop))

585 if fit[t] > self.rng.random() * fmax:

586 c2 = self.pop[t]

587 nnf = False

588

589 return (c1.copy(), c2.copy())

590

591

592class RankFitnessPopulation(Population):

593 """ Ranks the fitness relative to set variable to flatten the surface

594 in a certain direction such that mating across variable is equally

595 likely irrespective of raw_score.

596

597 Parameters:

598

599 variable_function: function

600 A function that takes as input an Atoms object and returns

601 the variable that differentiates the ranks.

602

603 exp_function: boolean

604 If True use an exponential function for ranking the fitness.

605 If False use the same as in Population. Default True.

606

607 exp_prefactor: float

608 The prefactor used in the exponential fitness scaling function.

609 Default 0.5

610 """

611

612 def __init__(self, data_connection, population_size, variable_function,

613 comparator=None, logfile=None, use_extinct=False,

614 exp_function=True, exp_prefactor=0.5):

615 self.exp_function = exp_function

616 self.exp_prefactor = exp_prefactor

617 self.vf = variable_function

618 # The current fitness is set at each update of the population

619 self.current_fitness = None

620

621 Population.__init__(self, data_connection, population_size,

622 comparator, logfile, use_extinct)

623

624 def get_rank(self, rcand, key=None):

625 # Set the initial order of the candidates, will need to

626 # be returned in this order at the end of ranking.

627 ordered = list(zip(range(len(rcand)), rcand))

628

629 # Niche and rank candidates.

630 rec_nic = []

631 rank_fit = []

632 for o, c in ordered:

633 if o not in rec_nic:

634 ntr = []

635 ce1 = self.vf(c)

636 rec_nic.append(o)

637 ntr.append([o, c])

638 for oother, cother in ordered:

639 if oother not in rec_nic:

640 ce2 = self.vf(cother)

641 if ce1 == ce2:

642 # put the now processed in oother

643 # in rec_nic as well

644 rec_nic.append(oother)

645 ntr.append([oother, cother])

646 # Each niche is sorted according to raw_score and

647 # assigned a fitness according to the ranking of

648 # the candidates

649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],

650 reverse=True)

651 start_rank = -1

652 for cor, (on, cn) in enumerate(ntr):

653 rank = start_rank - cor

654 rank_fit.append([on, cn, rank])

655 # The original order is reformed

656 rank_fit.sort(key=itemgetter(0), reverse=False)

657 return np.array(list(zip(*rank_fit))[2])

658

659 def __get_fitness__(self, candidates):

660 expf = self.exp_function

661 rfit = self.get_rank(candidates, key='raw_score')

662

663 if not expf:

664 rmax = max(rfit)

665 rmin = min(rfit)

666 T = rmin - rmax

667 # If using obj_rank probability, must have non-zero T val.

668 # pop_size must be greater than number of permutations.

669 # We test for this here

670 msg = "Equal fitness for best and worst candidate in the "

671 msg += "population! Fitness scaling is impossible! "

672 msg += "Try with a larger population."

673 assert T != 0., msg

674 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))

675 else:

676 return self.exp_prefactor ** (-rfit - 1)

677

678 def update(self):

679 """ The update method in Population will add to the end of

680 the population, that can't be used here since the fitness

681 will potentially change for all candidates when new are added,

682 therefore just recalc the population every time. """

683

684 self.pop = []

685 self.__initialize_pop__()

686 self.current_fitness = self.__get_fitness__(self.pop)

687

688 self._write_log()

689

690 def __initialize_pop__(self):

691 # Get all relaxed candidates from the database

692 ue = self.use_extinct

693 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

694 all_cand.sort(key=get_raw_score, reverse=True)

695

696 if len(all_cand) > 0:

697 fitf = self.__get_fitness__(all_cand)

698 all_sorted = list(zip(fitf, all_cand))

699 all_sorted.sort(key=itemgetter(0), reverse=True)

700 sort_cand = []

701 for _, t2 in all_sorted:

702 sort_cand.append(t2)

703 all_sorted = sort_cand

704

705 # Fill up the population with the self.pop_size most stable

706 # unique candidates.

707 i = 0

708 while i < len(all_sorted) and len(self.pop) < self.pop_size:

709 c = all_sorted[i]

710 c_vf = self.vf(c)

711 i += 1

712 eq = False

713 for a in self.pop:

714 a_vf = self.vf(a)

715 # Only run comparator if the variable_function (self.vf)

716 # returns the same. If it returns something different the

717 # candidates are inherently different.

718 # This is done to speed up.

719 if a_vf == c_vf:

720 if self.comparator.looks_like(a, c):

721 eq = True

722 break

723 if not eq:

724 self.pop.append(c)

725 self.all_cand = all_cand

726

727 def get_two_candidates(self):

728 """ Returns two candidates for pairing employing the

729 roulete wheel selection scheme described in

730 R.L. Johnston Dalton Transactions,

731 Vol. 22, No. 22. (2003), pp. 4193-4207

732 """

733

734 if len(self.pop) < 2:

735 self.update()

736

737 if len(self.pop) < 2:

738 return None

739

740 # Use saved fitness

741 fit = self.current_fitness

742 fmax = max(fit)

743 c1 = self.pop[0]

744 c2 = self.pop[0]

745 while c1.info['confid'] == c2.info['confid']:

746 nnf = True

747 while nnf:

748 t = self.rng.randint(len(self.pop))

749 if fit[t] > self.rng.random() * fmax:

750 c1 = self.pop[t]

751 nnf = False

752 nnf = True

753 while nnf:

754 t = self.rng.randint(len(self.pop))

755 if fit[t] > self.rng.random() * fmax:

756 c2 = self.pop[t]

757 nnf = False

758

759 return (c1.copy(), c2.copy())

760

761

762class MultiObjectivePopulation(RankFitnessPopulation):

763 """ Allows for assignment of fitness based on a set of two variables

764 such that fitness is ranked according to a Pareto-front of

765 non-dominated candidates.

766

767 Parameters

768 ----------

769 abs_data: list

770 Set of key_value_pairs in atoms object for which fitness should

771 be assigned based on absolute value.

772

773 rank_data: list

774 Set of key_value_pairs in atoms object for which data should

775 be ranked in order to ascribe fitness.

776

777 variable_function: function

778 A function that takes as input an Atoms object and returns

779 the variable that differentiates the ranks. Only use if

780 data is ranked.

781

782 exp_function: boolean

783 If True use an exponential function for ranking the fitness.

784 If False use the same as in Population. Default True.

785

786 exp_prefactor: float

787 The prefactor used in the exponential fitness scaling function.

788 Default 0.5

789

790 """

791

792 def __init__(self, data_connection, population_size,

793 variable_function=None, comparator=None, logfile=None,

794 use_extinct=False, abs_data=None, rank_data=None,

795 exp_function=True, exp_prefactor=0.5):

796 # The current fitness is set at each update of the population

797 self.current_fitness = None

798

799 if rank_data is None:

800 rank_data = []

801 self.rank_data = rank_data

802

803 if abs_data is None:

804 abs_data = []

805 self.abs_data = abs_data

806

807 RankFitnessPopulation.__init__(self, data_connection, population_size,

808 variable_function, comparator, logfile,

809 use_extinct, exp_function,

810 exp_prefactor)

811

812 def get_nonrank(self, nrcand, key=None):

813 """"Returns a list of fitness values."""

814 nrc_list = []

815 for nrc in nrcand:

816 nrc_list.append(nrc.info['key_value_pairs'][key])

817 return nrc_list

818

819 def __get_fitness__(self, candidates):

820 # There are no defaults set for the datasets to be

821 # used in this function, as such we test that the

822 # user has specified at least two here.

823 msg = "This is a multi-objective fitness function"

824 msg += " so there must be at least two datasets"

825 msg += " stated in the rank_data and abs_data variables"

826 assert len(self.rank_data) + len(self.abs_data) >= 2, msg

827

828 expf = self.exp_function

829

830 all_fitnesses = []

831 used = set()

832 for rd in self.rank_data:

833 used.add(rd)

834 # Build ranked fitness based on rd

835 all_fitnesses.append(self.get_rank(candidates, key=rd))

836

837 for d in self.abs_data:

838 if d not in used:

839 used.add(d)

840 # Build fitness based on d

841 all_fitnesses.append(self.get_nonrank(candidates, key=d))

842

843 # Set the initial order of the ranks, will need to

844 # be returned in this order at the end.

845 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))

846 mvf_rank = -1 # Start multi variable rank at -1.

847 rec_vrc = [] # A record of already ranked candidates.

848 mvf_list = [] # A list for all candidate ranks.

849 # Sort by raw_score_1 in case this is different from

850 # the stored raw_score() variable that all_cands are

851 # sorted by.

852 fordered.sort(key=itemgetter(1), reverse=True)

853 # Niche candidates with equal or better raw_score to

854 # current candidate.

855 for a in fordered:

856 order, rest = a[0], a[1:]

857 if order not in rec_vrc:

858 pff = []

859 pff2 = []

860 rec_vrc.append(order)

861 pff.append((order, rest))

862 for b in fordered:

863 border, brest = b[0], b[1:]

864 if border not in rec_vrc:

865 if np.any(np.array(brest) >= np.array(rest)):

866 pff.append((border, brest))

867 # Remove any candidate from pff list that is dominated

868 # by another in the list.

869 for na in pff:

870 norder, nrest = na[0], na[1:]

871 dom = False

872 for nb in pff:

873 nborder, nbrest = nb[0], nb[1:]

874 if norder != nborder:

875 if np.all(np.array(nbrest) > np.array(nrest)):

876 dom = True

877 if not dom:

878 pff2.append((norder, nrest))

879 # Assign pareto rank from -1 to -N niches.

880 for ffa in pff2:

881 fforder, ffrest = ffa[0], ffa[1:]

882 rec_vrc.append(fforder)

883 mvf_list.append((fforder, mvf_rank, ffrest))

884 mvf_rank = mvf_rank - 1

885 # The original order is reformed

886 mvf_list.sort(key=itemgetter(0), reverse=False)

887 rfro = np.array(list(zip(*mvf_list))[1])

888

889 if not expf:

890 rmax = max(rfro)

891 rmin = min(rfro)

892 T = rmin - rmax

893 # If using obj_rank probability, must have non-zero T val.

894 # pop_size must be greater than number of permutations.

895 # We test for this here

896 msg = "Equal fitness for best and worst candidate in the "

897 msg += "population! Fitness scaling is impossible! "

898 msg += "Try with a larger population."

899 assert T != 0., msg

900 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))

901 else:

902 return self.exp_prefactor ** (-rfro - 1)

903

904 def __initialize_pop__(self):

905 # Get all relaxed candidates from the database

906 ue = self.use_extinct

907 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

908 all_cand.sort(key=get_raw_score, reverse=True)

909

910 if len(all_cand) > 0:

911 fitf = self.__get_fitness__(all_cand)

912 all_sorted = list(zip(fitf, all_cand))

913 all_sorted.sort(key=itemgetter(0), reverse=True)

914 sort_cand = []

915 for _, t2 in all_sorted:

916 sort_cand.append(t2)

917 all_sorted = sort_cand

918

919 # Fill up the population with the self.pop_size most stable

920 # unique candidates.

921 i = 0

922 while i < len(all_sorted) and len(self.pop) < self.pop_size:

923 c = all_sorted[i]

924 # Use variable_function to decide whether to run comparator

925 # if the function has been defined by the user. This does not

926 # need to be dependent on using the rank_data function.

927 if self.vf is not None:

928 c_vf = self.vf(c)

929 i += 1

930 eq = False

931 for a in self.pop:

932 if self.vf is not None:

933 a_vf = self.vf(a)

934 # Only run comparator if the variable_function

935 # (self.vf) returns the same. If it returns something

936 # different the candidates are inherently different.

937 # This is done to speed up.

938 if a_vf == c_vf:

939 if self.comparator.looks_like(a, c):

940 eq = True

941 break

942 else:

943 if self.comparator.looks_like(a, c):

944 eq = True

945 break

946 if not eq:

947 self.pop.append(c)

948 self.all_cand = all_cand