class XSFParser:
def __init__(self, file: TextIOBase):
self._file: TextIOBase = file
self._peek_line: t.Optional[str] = None
self.lineno = 0
def skip_line(self, line: t.Optional[str]) -> bool:
return line is None or line.isspace() or line.lstrip().startswith('#')
def peek_line(self) -> t.Optional[str]:
try:
while self.skip_line(self._peek_line):
self._peek_line = next(self._file)
self.lineno += 1
return self._peek_line
except StopIteration:
return None
def next_line(self) -> t.Optional[str]:
line = self.peek_line()
self._peek_line = None
return line
def parse_atoms(self, expected_length: t.Optional[int] = None) -> polars.DataFrame:
zs = []
coords = []
words = None
while (line := self.peek_line()):
words = line.split()
if len(words) == 0:
continue
if words[0].isalpha():
break
self.next_line()
try:
z = int(words[0])
if z < 0 or z > 118:
raise ValueError()
except (ValueError, TypeError):
raise ValueError(f"Invalid atomic number '{words[0]}'") from None
try:
coords.append(numpy.array(list(map(float, words[1:]))))
zs.append(z)
except (ValueError, TypeError):
raise ValueError(f"Invalid atomic coordinates '{' '.join(words[1:])}'") from None
if expected_length is not None:
if not expected_length == len(zs):
logging.warning(f"Warning: List length {len(zs)} doesn't match declared length {expected_length}")
elif len(zs) == 0:
raise ValueError(f"Expected atom list after keyword 'ATOMS'. Got '{line or 'EOF'}' instead.")
if len(zs) == 0:
return polars.DataFrame({}, schema=['elem', 'x', 'y', 'z']) # type: ignore
coord_lens = list(map(len, coords))
if not all(coord_len == coord_lens[0] for coord_len in coord_lens[1:]):
raise ValueError("Mismatched atom dimensions.")
if coord_lens[0] < 3:
raise ValueError("Expected at least 3 coordinates per atom.")
coords = numpy.stack(coords, axis=0)[:, :3]
(x, y, z) = map(lambda a: a[:, 0], numpy.split(coords, 3, axis=1))
return polars.DataFrame({'elem': zs, 'x': x, 'y': y, 'z': z})
def parse_coords(self) -> polars.DataFrame:
line = self.next_line()
if line is None:
raise ValueError("Unexpected EOF before atom list")
words = line.split()
try:
if not len(words) == 2:
raise ValueError()
(n, _) = map(int, words)
except (ValueError, TypeError):
raise ValueError(f"Invalid atom list length: {line}") from None
return self.parse_atoms(n)
def parse_lattice(self) -> LinearTransform3D:
rows = []
for _ in range(3):
line = self.next_line()
if line is None:
raise ValueError("Unexpected EOF in vector section.")
words = line.split()
try:
if not len(words) == 3:
raise ValueError()
row = numpy.array(list(map(float, words)))
rows.append(row)
except (ValueError, TypeError):
raise ValueError(f"Invalid lattice vector: {line}") from None
matrix = numpy.stack(rows, axis=-1)
return LinearTransform3D(matrix)
def eat_sandwich(self, keyword: str):
begin_keyword = 'begin_' + keyword
end_keyword = 'end_' + keyword
lineno = self.lineno
while (line := self.next_line()):
keyword = line.lstrip().split(maxsplit=1)[0].lower()
if keyword.lower() == begin_keyword:
# recurse to inner (identical) section
self.eat_sandwich(keyword)
continue
if keyword.lower() == end_keyword:
break
else:
raise ValueError(f"Unclosed section '{keyword}' opened at line {lineno}")
def parse(self) -> XSF:
data: t.Dict[str, t.Any] = {}
periodicity: Periodicity = 'molecule'
while (line := self.next_line()):
keyword = line.lstrip().split(maxsplit=1)[0].lower()
logging.debug(f"Parsing keyword {keyword}")
if keyword == 'animsteps':
raise ValueError("Animated XSF files are not supported.")
elif keyword == 'atoms':
data['atoms'] = self.parse_atoms()
elif keyword in ('primcoord', 'convcoord'):
data[keyword] = self.parse_coords()
elif keyword in ('primvec', 'convvec'):
data[keyword] = self.parse_lattice()
elif keyword in ('crystal', 'slab', 'polymer', 'molecule'):
periodicity = keyword
elif keyword.startswith('begin_'):
self.eat_sandwich(keyword.removeprefix('begin_'))
elif keyword.startswith('end_'):
raise ValueError(f"Unopened section close keyword '{keyword}'")
else:
raise ValueError(f"Unexpected keyword '{keyword.upper()}'.")
if len(data) == 0:
raise ValueError("Unexpected EOF while parsing XSF file.")
# most validation is performed in XSF
return XSF(
periodicity, atoms=data.get('atoms'),
prim_coords=data.get('primcoord'),
conv_coords=data.get('convcoord'),
primitive_cell=data.get('primvec'),
conventional_cell=data.get('convvec'),
)