Coverage for ase / utils / linesearcharmijo.py: 62.50%

152 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 08:22 +0000

1import logging 

2import math 

3 

4import numpy as np 

5import scipy 

6import scipy.linalg 

7 

8from ase.utils import longsum 

9 

10logger = logging.getLogger(__name__) 

11 

12 

13class LinearPath: 

14 """Describes a linear search path of the form t -> t g""" 

15 

16 def __init__(self, dirn): 

17 """Initialise LinearPath object 

18 

19 Args: 

20 dirn : search direction 

21 """ 

22 self.dirn = dirn 

23 

24 def step(self, alpha): 

25 return alpha * self.dirn 

26 

27 

28def nullspace(A, myeps=1e-10): 

29 """The RumPath class needs the ability to compute the null-space of 

30 a small matrix. This is provided here. But we now also need scipy! 

31 

32 This routine was copy-pasted from 

33 http://stackoverflow.com/questions/5889142/python-numpy-scipy-finding-the-null-space-of-a-matrix 

34 How the h*** does numpy/scipy not have a null-space implemented? 

35 """ 

36 u, s, vh = scipy.linalg.svd(A) 

37 padding = max(0, np.shape(A)[1] - np.shape(s)[0]) 

38 null_mask = np.concatenate( 

39 ((s <= myeps), np.ones((padding,), dtype=bool)), axis=0 

40 ) 

41 null_space = scipy.compress(null_mask, vh, axis=0) 

42 return scipy.transpose(null_space) 

43 

44 

45class RumPath: 

46 """Describes a curved search path, taking into account information 

47 about (near-) rigid unit motions (RUMs). 

48 

49 One can tag sub-molecules of the system, which are collections of 

50 particles that form a (near-)rigid unit. Let x1, ... xn be the positions 

51 of one such molecule, then we construct a path of the form 

52 xi(t) = xi(0) + (exp(K t) - I) yi + t wi + t c 

53 where yi = xi - <x>, c = <g> is a rigid translation, K is anti-symmetric 

54 so that exp(tK) yi denotes a rotation about the centre of mass, and wi 

55 is the remainind stretch of the molecule. 

56 

57 The following variables are stored: 

58 * rotation_factors : array of acceleration factors 

59 * rigid_units : array of molecule indices 

60 * stretch : w 

61 * K : list of K matrices 

62 * y : list of y-vectors 

63 """ 

64 

65 def __init__(self, x_start, dirn, rigid_units, rotation_factors): 

66 """Initialise a `RumPath` 

67 

68 Args: 

69 x_start : vector containing the positions in d x nAt shape 

70 dirn : search direction, same shape as x_start vector 

71 rigid_units : array of arrays of molecule indices 

72 rotation_factors : factor by which the rotation of each molecular 

73 is accelerated; array of scalars, same length as 

74 rigid_units 

75 """ 

76 

77 # keep some stuff stored 

78 self.rotation_factors = rotation_factors 

79 self.rigid_units = rigid_units 

80 # create storage for more stuff 

81 self.K = [] 

82 self.y = [] 

83 # We need to reshape x_start and dirn since we want to apply 

84 # rotations to individual position vectors! 

85 # we will eventually store the stretch in w, X is just a reference 

86 # to x_start with different shape 

87 w = dirn.copy().reshape([3, len(dirn) / 3]) 

88 X = x_start.reshape([3, len(dirn) / 3]) 

89 

90 for I in rigid_units: # I is a list of indices for one molecule 

91 # get the positions of the i-th molecule, subtract mean 

92 x = X[:, I] 

93 y = x - x.mean(0).T # PBC? 

94 # same for forces >>> translation component 

95 g = w[:, I] 

96 f = g - g.mean(0).T 

97 # compute the system to solve for K (see accompanying note!) 

98 # A = \sum_j Yj Yj' 

99 # b = \sum_j Yj' fj 

100 A = np.zeros((3, 3)) 

101 b = np.zeros(3) 

102 for j in range(len(I)): 

103 Yj = np.array( 

104 [ 

105 [y[1, j], 0.0, -y[2, j]], 

106 [-y[0, j], y[2, j], 0.0], 

107 [0.0, -y[1, j], y[0, j]], 

108 ] 

109 ) 

110 A += np.dot(Yj.T, Yj) 

111 b += np.dot(Yj.T, f[:, j]) 

112 # If the directions y[:,j] span all of R^3 (canonically this is true 

113 # when there are at least three atoms in the molecule) but if 

114 # not, then A is singular so we cannot solve A k = b. In this case 

115 # we solve Ak = b in the space orthogonal to the null-space of A. 

116 # TODO: 

117 # this can get unstable if A is "near-singular"! We may 

118 # need to revisit this idea at some point to get something 

119 # more robust 

120 N = nullspace(A) 

121 b -= np.dot(np.dot(N, N.T), b) 

122 A += np.dot(N, N.T) 

123 k = scipy.linalg.solve(A, b, sym_pos=True) 

124 K = np.array( 

125 [[0.0, k[0], -k[2]], [-k[0], 0.0, k[1]], [k[2], -k[1], 0.0]] 

126 ) 

127 # now remove the rotational component from the search direction 

128 # ( we actually keep the translational component as part of w, 

129 # but this could be changed as well! ) 

130 w[:, I] -= np.dot(K, y) 

131 # store K and y 

132 self.K.append(K) 

133 self.y.append(y) 

134 

135 # store the stretch (no need to copy here, since w is already a copy) 

136 self.stretch = w 

137 

138 def step(self, alpha): 

139 """perform a step in the line-search, given a step-length alpha 

140 

141 Args: 

142 alpha : step-length 

143 

144 Returns 

145 ------- 

146 s : update for positions 

147 """ 

148 # translation and stretch 

149 s = alpha * self.stretch 

150 # loop through rigid_units 

151 for I, K, y, rf in zip( 

152 self.rigid_units, self.K, self.y, self.rotation_factors 

153 ): 

154 # with matrix exponentials: 

155 # s[:, I] += expm(K * alpha * rf) * p.y - p.y 

156 # third-order taylor approximation: 

157 # I + t K + 1/2 t^2 K^2 + 1/6 t^3 K^3 - I 

158 # = t K (I + 1/2 t K (I + 1/3 t K)) 

159 aK = alpha * rf * K 

160 s[:, I] += np.dot( 

161 aK, y + 0.5 * np.dot(aK, y + 1 / 3.0 * np.dot(aK, y)) 

162 ) 

163 

164 return s.ravel() 

165 

166 

167class LineSearchArmijo: 

168 def __init__(self, func, c1=0.1, tol=1e-14): 

169 """Initialise the linesearch with set parameters and functions. 

170 

171 Args: 

172 func: the function we are trying to minimise (energy), which should 

173 take an array of positions for its argument 

174 c1: parameter for the sufficient decrease condition in (0.0 0.5) 

175 tol: tolerance for evaluating equality 

176 

177 """ 

178 

179 self.tol = tol 

180 self.func = func 

181 

182 if not (0 < c1 < 0.5): 

183 logger.error( 

184 'c1 outside of allowed interval (0, 0.5). Replacing with ' 

185 'default value.' 

186 ) 

187 print( 

188 'Warning: C1 outside of allowed interval. Replacing with ' 

189 'default value.' 

190 ) 

191 c1 = 0.1 

192 

193 self.c1 = c1 

194 

195 # CO : added rigid_units and rotation_factors 

196 

197 def run( 

198 self, 

199 x_start, 

200 dirn, 

201 a_max=None, 

202 a_min=None, 

203 a1=None, 

204 func_start=None, 

205 func_old=None, 

206 func_prime_start=None, 

207 rigid_units=None, 

208 rotation_factors=None, 

209 maxstep=None, 

210 ): 

211 """Perform a backtracking / quadratic-interpolation linesearch 

212 to find an appropriate step length with Armijo condition. 

213 NOTE THIS LINESEARCH DOES NOT IMPOSE WOLFE CONDITIONS! 

214 

215 The idea is to do backtracking via quadratic interpolation, stabilised 

216 by putting a lower bound on the decrease at each linesearch step. 

217 To ensure BFGS-behaviour, whenever "reasonable" we take 1.0 as the 

218 starting step. 

219 

220 Since Armijo does not guarantee convergence of BFGS, the outer 

221 BFGS algorithm must restart when the current search direction 

222 ceases to be a descent direction. 

223 

224 Args: 

225 x_start: vector containing the position to begin the linesearch 

226 from (ie the current location of the optimisation) 

227 dirn: vector pointing in the direction to search in (pk in [NW]). 

228 Note that this does not have to be a unit vector, but the 

229 function will return a value scaled with respect to dirn. 

230 a_max: an upper bound on the maximum step length allowed. 

231 Default is 2.0. 

232 a_min: a lower bound on the minimum step length allowed. 

233 Default is 1e-10. 

234 A RuntimeError is raised if this bound is violated 

235 during the line search. 

236 a1: the initial guess for an acceptable step length. If no value is 

237 given, this will be set automatically, using quadratic 

238 interpolation using func_old, or "rounded" to 1.0 if the 

239 initial guess lies near 1.0. (specifically for LBFGS) 

240 func_start: the value of func at the start of the linesearch, ie 

241 phi(0). Passing this information avoids potentially expensive 

242 re-calculations 

243 func_prime_start: the value of func_prime at the start of the 

244 linesearch (this will be dotted with dirn to find phi_prime(0)) 

245 func_old: the value of func_start at the previous step taken in 

246 the optimisation (this will be used to calculate the initial 

247 guess for the step length if it is not provided) 

248 rigid_units, rotationfactors : see documentation of RumPath,if it 

249 is unclear what these parameters are, then leave them at None 

250 maxstep: maximum allowed displacement in Angstrom. Default is 0.2. 

251 

252 Returns 

253 ------- 

254 A tuple: (step, func_val, no_update) 

255 

256 step: the final chosen step length, representing the number of 

257 multiples of the direction vector to move 

258 func_val: the value of func after taking this step, ie phi(step) 

259 no_update: true if the linesearch has not performed any updates of 

260 phi or alpha, due to errors or immediate convergence 

261 

262 Raises 

263 ------ 

264 ValueError for problems with arguments 

265 RuntimeError for problems encountered during iteration 

266 """ 

267 

268 a1 = self.handle_args( 

269 x_start, 

270 dirn, 

271 a_max, 

272 a_min, 

273 a1, 

274 func_start, 

275 func_old, 

276 func_prime_start, 

277 maxstep, 

278 ) 

279 

280 # DEBUG 

281 logger.debug('a1(auto) = %e', a1) 

282 

283 if abs(a1 - 1.0) <= 0.5: 

284 a1 = 1.0 

285 

286 logger.debug('-----------NEW LINESEARCH STARTED---------') 

287 

288 a_final = None 

289 phi_a_final = None 

290 num_iter = 0 

291 

292 # create a search-path 

293 if rigid_units is None: 

294 # standard linear search-path 

295 logger.debug('-----using LinearPath-----') 

296 path = LinearPath(dirn) 

297 else: 

298 logger.debug('-----using RumPath------') 

299 # if rigid_units != None, but rotation_factors == None, then 

300 # raise an error. 

301 if rotation_factors is None: 

302 raise RuntimeError( 

303 'RumPath cannot be created since rotation_factors == None' 

304 ) 

305 path = RumPath(x_start, dirn, rigid_units, rotation_factors) 

306 

307 while True: 

308 logger.debug('-----------NEW ITERATION OF LINESEARCH----------') 

309 logger.debug('Number of linesearch iterations: %d', num_iter) 

310 logger.debug('a1 = %e', a1) 

311 

312 # CO replaced: func_a1 = self.func(x_start + a1 * self.dirn) 

313 func_a1 = self.func(x_start + path.step(a1)) 

314 phi_a1 = func_a1 

315 # compute sufficient decrease (Armijo) condition 

316 suff_dec = ( 

317 phi_a1 <= self.func_start + self.c1 * a1 * self.phi_prime_start 

318 ) 

319 

320 # DEBUG 

321 # print("c1*a1*phi_prime_start = ", self.c1*a1*self.phi_prime_start, 

322 # " | phi_a1 - phi_0 = ", phi_a1 - self.func_start) 

323 logger.info('a1 = %.3f, suff_dec = %r', a1, suff_dec) 

324 if a1 < self.a_min: 

325 raise RuntimeError('a1 < a_min, giving up') 

326 if self.phi_prime_start > 0.0: 

327 raise RuntimeError('self.phi_prime_start > 0.0') 

328 

329 # check sufficient decrease (Armijo condition) 

330 if suff_dec: 

331 a_final = a1 

332 phi_a_final = phi_a1 

333 logger.debug( 

334 'Linesearch returned a = %e, phi_a = %e', 

335 a_final, 

336 phi_a_final, 

337 ) 

338 logger.debug('-----------LINESEARCH COMPLETE-----------') 

339 return a_final, phi_a_final, num_iter == 0 

340 

341 # we don't have sufficient decrease, so we need to compute a 

342 # new trial step-length 

343 at = -( 

344 (self.phi_prime_start * a1) 

345 / (2 * ((phi_a1 - self.func_start) / a1 - self.phi_prime_start)) 

346 ) 

347 logger.debug('quadratic_min: initial at = %e', at) 

348 

349 # because a1 does not satisfy Armijo it follows that at must 

350 # lie between 0 and a1. In fact, more strongly, 

351 # at \leq (2 (1-c1))^{-1} a1, which is a back-tracking condition 

352 # therefore, we should now only check that at has not become 

353 # too small, in which case it is likely that nonlinearity has 

354 # played a big role here, so we take an ultra-conservative 

355 # backtracking step 

356 a1 = max(at, a1 / 10.0) 

357 if a1 > at: 

358 logger.debug( 

359 'at (%e) < a1/10: revert to backtracking a1/10', at 

360 ) 

361 

362 # (end of while(True) line-search loop) 

363 

364 # (end of run()) 

365 

366 def handle_args( 

367 self, 

368 x_start, 

369 dirn, 

370 a_max, 

371 a_min, 

372 a1, 

373 func_start, 

374 func_old, 

375 func_prime_start, 

376 maxstep, 

377 ): 

378 """Verify passed parameters and set appropriate attributes accordingly. 

379 

380 A suitable value for the initial step-length guess will be either 

381 verified or calculated, stored in the attribute self.a_start, and 

382 returned. 

383 

384 Args: 

385 The args should be identical to those of self.run(). 

386 

387 Returns 

388 ------- 

389 The suitable initial step-length guess a_start 

390 

391 Raises 

392 ------ 

393 ValueError for problems with arguments 

394 

395 """ 

396 

397 self.a_max = a_max 

398 self.a_min = a_min 

399 self.x_start = x_start 

400 self.dirn = dirn 

401 self.func_old = func_old 

402 self.func_start = func_start 

403 self.func_prime_start = func_prime_start 

404 

405 if a_max is None: 

406 a_max = 2.0 

407 

408 if a_max < self.tol: 

409 logger.warning( 

410 'a_max too small relative to tol. Reverting to ' 

411 'default value a_max = 2.0 (twice the <ideal> step).' 

412 ) 

413 a_max = 2.0 # THIS ASSUMES NEWTON/BFGS TYPE BEHAVIOUR! 

414 

415 if self.a_min is None: 

416 self.a_min = 1e-10 

417 

418 if func_start is None: 

419 logger.debug('Setting func_start') 

420 self.func_start = self.func(x_start) 

421 

422 self.phi_prime_start = longsum(self.func_prime_start * self.dirn) 

423 if self.phi_prime_start >= 0: 

424 logger.error( 

425 'Passed direction which is not downhill. Aborting...: %e', 

426 self.phi_prime_start, 

427 ) 

428 raise ValueError('Direction is not downhill.') 

429 elif math.isinf(self.phi_prime_start): 

430 logger.error( 

431 'Passed func_prime_start and dirn which are too big. ' 

432 'Aborting...' 

433 ) 

434 raise ValueError('func_prime_start and dirn are too big.') 

435 

436 if a1 is None: 

437 if func_old is not None: 

438 # Interpolating a quadratic to func and func_old - see NW 

439 # equation 3.60 

440 a1 = ( 

441 2 * (self.func_start - self.func_old) / self.phi_prime_start 

442 ) 

443 logger.debug('Interpolated quadratic, obtained a1 = %e', a1) 

444 if a1 is None or a1 > a_max: 

445 logger.debug( 

446 'a1 greater than a_max. Reverting to default value a1 = 1.0' 

447 ) 

448 a1 = 1.0 

449 if a1 is None or a1 < self.tol: 

450 logger.debug( 

451 'a1 is None or a1 < self.tol. Reverting to default value ' 

452 'a1 = 1.0' 

453 ) 

454 a1 = 1.0 

455 if a1 is None or a1 < self.a_min: 

456 logger.debug( 

457 'a1 is None or a1 < a_min. Reverting to default value a1 = 1.0' 

458 ) 

459 a1 = 1.0 

460 

461 if maxstep is None: 

462 maxstep = 0.2 

463 logger.debug('maxstep = %e', maxstep) 

464 

465 r = np.reshape(dirn, (-1, 3)) 

466 steplengths = ((a1 * r) ** 2).sum(1) ** 0.5 

467 maxsteplength = np.max(steplengths) 

468 if maxsteplength >= maxstep: 

469 a1 *= maxstep / maxsteplength 

470 logger.debug('Rescaled a1 to fulfill maxstep criterion') 

471 

472 self.a_start = a1 

473 

474 logger.debug( 

475 'phi_start = %e, phi_prime_start = %e', 

476 self.func_start, 

477 self.phi_prime_start, 

478 ) 

479 logger.debug( 

480 'func_start = %s, self.func_old = %s', 

481 self.func_start, 

482 self.func_old, 

483 ) 

484 logger.debug('a1 = %e, a_max = %e, a_min = %e', a1, a_max, self.a_min) 

485 

486 return a1