Skip to content

atomlib.io.cif

IO for the CIF1.1 file format, specified here.

Value module-attribute

Value: TypeAlias = Union[int, float, str, None]

SYMMETRY_PARSER module-attribute

SYMMETRY_PARSER: Parser[SymmetryVec, SymmetryVec] = Parser(
    [
        BinaryOrUnaryOp(["-"], sub, False, 5),
        BinaryOrUnaryOp(["+"], add, False, 5),
        BinaryOp(["*"], mul, 6),
        BinaryOp(["/"], truediv, 6),
    ],
    parse,
)

CIF dataclass

Source code in atomlib/io/cif.py
@dataclass
class CIF:
    data_blocks: t.Tuple[CIFDataBlock, ...]

    def __post_init__(self):
        # ensure that all data_blocks after the first have a name
        for data_block in self.data_blocks[1:]:
            if data_block.name is None:
                data_block.name = ""

    @staticmethod
    def from_file(file: FileOrPath) -> CIF:
        return CIF(tuple(CIFDataBlock.from_file(file)))

    def __len__(self) -> int:
        return self.data_blocks.__len__()

    def get_block(self, block: t.Union[int, str]) -> CIFDataBlock:
        try:
            if isinstance(block, int):
                return self.data_blocks[block]
            return next(b for b in self.data_blocks if b.name == block)
        except (IndexError, StopIteration):
            raise ValueError(f"Couldn't find block '{block}' in CIF file. File has {len(self)} blocks.")

    def write(self, file: FileOrPath):
        with open_file(file, 'w') as f:
            print("# generated by atomlib", file=f, end=None)
            for data_block in self.data_blocks:
                print(file=f)
                data_block._write(f)

data_blocks instance-attribute

data_blocks: Tuple[CIFDataBlock, ...]

from_file staticmethod

from_file(file: FileOrPath) -> CIF
Source code in atomlib/io/cif.py
@staticmethod
def from_file(file: FileOrPath) -> CIF:
    return CIF(tuple(CIFDataBlock.from_file(file)))

get_block

get_block(block: Union[int, str]) -> CIFDataBlock
Source code in atomlib/io/cif.py
def get_block(self, block: t.Union[int, str]) -> CIFDataBlock:
    try:
        if isinstance(block, int):
            return self.data_blocks[block]
        return next(b for b in self.data_blocks if b.name == block)
    except (IndexError, StopIteration):
        raise ValueError(f"Couldn't find block '{block}' in CIF file. File has {len(self)} blocks.")

write

write(file: FileOrPath)
Source code in atomlib/io/cif.py
def write(self, file: FileOrPath):
    with open_file(file, 'w') as f:
        print("# generated by atomlib", file=f, end=None)
        for data_block in self.data_blocks:
            print(file=f)
            data_block._write(f)

CIFDataBlock dataclass

Source code in atomlib/io/cif.py
@dataclass
class CIFDataBlock:
    name: t.Optional[str]  # None: no data_ block, empty string: unnamed "data_"

    # data (including loops) in file order
    data: t.Tuple[t.Union[t.Tuple[str, Value], CIFTable], ...]

    # data flattened into a single dictionary. Created automatically from `data`
    data_dict: t.Dict[str, t.Union[t.List[Value], Value]] = field(init=False)

    def __post_init__(self):
        # if we raise here, make sure the object state is fine
        self.data_dict = None  # type: ignore

        data_values = {}

        def _iter_data_values():
            for d in self.data:
                if isinstance(d, CIFTable):
                    yield from d.data.items()
                else:
                    yield d

        for (k, v) in _iter_data_values():
            if k in data_values:
                raise ValueError(f"Duplicate key {k}")
            data_values[k] = v

        self.data_dict = data_values

    @staticmethod
    def from_file(file: FileOrPath) -> t.Iterator[CIFDataBlock]:
        with open_file(file) as f:
            yield from CifReader(f).parse()

    @staticmethod
    def from_atoms(atoms: HasAtoms) -> CIFDataBlock:
        data: t.List[t.Union[t.Tuple[str, Value], CIFTable]] = []

        data.append(('audit_creation_method', 'Generated by atomlib'))

        keys: t.Sequence[t.Tuple[str, t.Union[str, polars.Expr], t.Union[str, bool]]] = (
            # col, expr, predicate (column or boolean)
            ('atom_site_type_symbol', 'symbol', True),
            ('atom_site_label', 'label', 'label'),
            ('atom_site_occupancy', 'frac_occupancy', 'frac_occupancy'),
            ('atom_site_Cartn_x', polars.col('coords').arr.get(0), True),
            ('atom_site_Cartn_y', polars.col('coords').arr.get(1), True),
            ('atom_site_Cartn_z', polars.col('coords').arr.get(2), True),
            ('atom_site_U_iso_or_equiv', 'wobble', 'wobble'),
        )
        data.append(CIFTable({
            key: atoms.select(expr).to_series().to_list() for (key, expr, pred) in keys
            if (atoms.try_get_column(pred) is not None if isinstance(pred, str) else pred)
        }))

        return CIFDataBlock("", tuple(data))

    @staticmethod
    def from_atomcell(atomcell: HasAtomCell) -> CIFDataBlock:
        atoms = atomcell.get_atoms('cell_box')
        ortho = atomcell.get_transform('local', 'cell_box').to_linear()
        (cell_size, cell_angle) = ortho_to_cell(ortho)
        cell_angle *= 180./numpy.pi  # convert to degrees

        data: t.List[t.Union[t.Tuple[str, Value], CIFTable]] = []

        data.append(('audit_creation_method', 'Generated by atomlib'))

        # symmetry information
        data.append(CIFTable({
            'space_group_symop_id': [1],
            'space_group_symop_operation_xyz': ['x,y,z'],
        }))

        # cell information
        data.append(('cell_length_a', cell_size[0]))
        data.append(('cell_length_b', cell_size[1]))
        data.append(('cell_length_c', cell_size[2]))
        data.append(('cell_angle_alpha', cell_angle[0]))
        data.append(('cell_angle_beta', cell_angle[1]))
        data.append(('cell_angle_gamma', cell_angle[2]))
        data.append(('cell_volume', ortho.det()))

        keys: t.Sequence[t.Tuple[str, t.Union[str, polars.Expr], t.Union[str, bool]]] = (
            # col, expr, predicate (column or boolean)
            ('atom_site_type_symbol', 'symbol', True),
            ('atom_site_label', 'label', 'label'),
            ('atom_site_occupancy', 'frac_occupancy', 'frac_occupancy'),
            ('atom_site_fract_x', polars.col('coords').arr.get(0), True),
            ('atom_site_fract_y', polars.col('coords').arr.get(1), True),
            ('atom_site_fract_z', polars.col('coords').arr.get(2), True),
            ('atom_site_U_iso_or_equiv', 'wobble', 'wobble'),
        )
        data.append(CIFTable({
            key: atoms.select(expr).to_series().to_list() for (key, expr, pred) in keys
            if (atoms.try_get_column(pred) is not None if isinstance(pred, str) else pred)
        }))

        return CIFDataBlock("", tuple(data))

    def write(self, file: FileOrPath):
        with open_file(file, 'w') as f:
            self._write(f)

    def _write(self, f: TextIOBase):
        if self.name is not None:
            print(f"data_{self.name}\n", file=f)

        for data in self.data:
            if isinstance(data, CIFTable):
                data._write(f)
            else:
                (name, value) = data
                val = _format_val(value).rstrip()
                if val.startswith(';'):
                    # multiline string
                    print(f"_{name}\n{val}", file=f)
                else:
                    print(f"_{name: <28} {_format_val(value).rstrip()}", file=f)

    def stack_tags(self, *tags: str, dtype: t.Union[str, numpy.dtype, t.Iterable[t.Union[str, numpy.dtype]], None] = None,
                   rename: t.Optional[t.Iterable[t.Optional[str]]] = None, required: t.Union[bool, t.Iterable[bool]] = True) -> polars.DataFrame:
        dtypes: t.Iterable[t.Optional[numpy.dtype]]
        if dtype is None:
            dtypes = repeat(None)
        elif isinstance(dtype, (numpy.dtype, str)):
            dtypes = (numpy.dtype(dtype),) * len(tags)
        else:
            dtypes = tuple(map(lambda ty: numpy.dtype(ty), dtype))
            if len(dtypes) != len(tags):
                raise ValueError("dtype list of invalid length")

        if isinstance(required, bool):
            required = repeat(required)

        if rename is None:
            rename = repeat(None)

        d = {}
        for (tag, ty, req, name) in zip(tags, dtypes, required, rename):
            if tag not in self.data_dict:
                if req:
                    raise ValueError(f"Tag '{tag}' missing from CIF file")
                continue
            try:
                arr = numpy.array(self.data_dict[tag], dtype=ty)
                d[name or tag] = arr
            except TypeError:
                raise TypeError(f"Tag '{tag}' of invalid or heterogeneous type.")

        if len(d) == 0:
            return polars.DataFrame({})

        tag_len = len(next(iter(d.values())))
        if any(len(arr) != tag_len for arr in d.values()):
            raise ValueError(f"Tags of mismatching lengths: {tuple(map(len, d.values()))}")

        return polars.DataFrame(d)

    def cell_size(self) -> t.Optional[t.Tuple[float, float, float]]:
        """Return cell size (in angstroms)."""
        try:
            a = float(self['cell_length_a'])  # type: ignore
            b = float(self['cell_length_b'])  # type: ignore
            c = float(self['cell_length_c'])  # type: ignore
            return (a, b, c)
        except (ValueError, TypeError, KeyError):
            return None

    def cell_angle(self) -> t.Optional[t.Tuple[float, float, float]]:
        """Return cell angle (in degrees)."""
        try:
            a = float(self['cell_angle_alpha'])  # type: ignore
            b = float(self['cell_angle_beta'])   # type: ignore
            g = float(self['cell_angle_gamma'])  # type: ignore
            return (a, b, g)
        except (ValueError, TypeError, KeyError):
            return None

    def get_symmetry(self) -> t.Iterator[AffineTransform3D]:
        syms = self.data_dict.get('space_group_symop_operation_xyz')
        if syms is None:
            # old name for symmetry
            syms = self.data_dict.get('symmetry_equiv_pos_as_xyz')
        if syms is None:
            syms = ()
        if not hasattr(syms, '__iter__'):
            syms = (syms,)
        return map(parse_symmetry, map(str, syms))  # type: ignore

    def __getitem__(self, key: str) -> t.Union[Value, t.List[Value]]:
        return self.data_dict.__getitem__(key)

name instance-attribute

name: Optional[str]

data instance-attribute

data: Tuple[Union[Tuple[str, Value], CIFTable], ...]

data_dict class-attribute instance-attribute

data_dict: Dict[str, Union[List[Value], Value]] = field(
    init=False
)

from_file staticmethod

from_file(file: FileOrPath) -> Iterator[CIFDataBlock]
Source code in atomlib/io/cif.py
@staticmethod
def from_file(file: FileOrPath) -> t.Iterator[CIFDataBlock]:
    with open_file(file) as f:
        yield from CifReader(f).parse()

from_atoms staticmethod

from_atoms(atoms: HasAtoms) -> CIFDataBlock
Source code in atomlib/io/cif.py
@staticmethod
def from_atoms(atoms: HasAtoms) -> CIFDataBlock:
    data: t.List[t.Union[t.Tuple[str, Value], CIFTable]] = []

    data.append(('audit_creation_method', 'Generated by atomlib'))

    keys: t.Sequence[t.Tuple[str, t.Union[str, polars.Expr], t.Union[str, bool]]] = (
        # col, expr, predicate (column or boolean)
        ('atom_site_type_symbol', 'symbol', True),
        ('atom_site_label', 'label', 'label'),
        ('atom_site_occupancy', 'frac_occupancy', 'frac_occupancy'),
        ('atom_site_Cartn_x', polars.col('coords').arr.get(0), True),
        ('atom_site_Cartn_y', polars.col('coords').arr.get(1), True),
        ('atom_site_Cartn_z', polars.col('coords').arr.get(2), True),
        ('atom_site_U_iso_or_equiv', 'wobble', 'wobble'),
    )
    data.append(CIFTable({
        key: atoms.select(expr).to_series().to_list() for (key, expr, pred) in keys
        if (atoms.try_get_column(pred) is not None if isinstance(pred, str) else pred)
    }))

    return CIFDataBlock("", tuple(data))

from_atomcell staticmethod

from_atomcell(atomcell: HasAtomCell) -> CIFDataBlock
Source code in atomlib/io/cif.py
@staticmethod
def from_atomcell(atomcell: HasAtomCell) -> CIFDataBlock:
    atoms = atomcell.get_atoms('cell_box')
    ortho = atomcell.get_transform('local', 'cell_box').to_linear()
    (cell_size, cell_angle) = ortho_to_cell(ortho)
    cell_angle *= 180./numpy.pi  # convert to degrees

    data: t.List[t.Union[t.Tuple[str, Value], CIFTable]] = []

    data.append(('audit_creation_method', 'Generated by atomlib'))

    # symmetry information
    data.append(CIFTable({
        'space_group_symop_id': [1],
        'space_group_symop_operation_xyz': ['x,y,z'],
    }))

    # cell information
    data.append(('cell_length_a', cell_size[0]))
    data.append(('cell_length_b', cell_size[1]))
    data.append(('cell_length_c', cell_size[2]))
    data.append(('cell_angle_alpha', cell_angle[0]))
    data.append(('cell_angle_beta', cell_angle[1]))
    data.append(('cell_angle_gamma', cell_angle[2]))
    data.append(('cell_volume', ortho.det()))

    keys: t.Sequence[t.Tuple[str, t.Union[str, polars.Expr], t.Union[str, bool]]] = (
        # col, expr, predicate (column or boolean)
        ('atom_site_type_symbol', 'symbol', True),
        ('atom_site_label', 'label', 'label'),
        ('atom_site_occupancy', 'frac_occupancy', 'frac_occupancy'),
        ('atom_site_fract_x', polars.col('coords').arr.get(0), True),
        ('atom_site_fract_y', polars.col('coords').arr.get(1), True),
        ('atom_site_fract_z', polars.col('coords').arr.get(2), True),
        ('atom_site_U_iso_or_equiv', 'wobble', 'wobble'),
    )
    data.append(CIFTable({
        key: atoms.select(expr).to_series().to_list() for (key, expr, pred) in keys
        if (atoms.try_get_column(pred) is not None if isinstance(pred, str) else pred)
    }))

    return CIFDataBlock("", tuple(data))

write

write(file: FileOrPath)
Source code in atomlib/io/cif.py
def write(self, file: FileOrPath):
    with open_file(file, 'w') as f:
        self._write(f)

stack_tags

stack_tags(
    *tags: str,
    dtype: Union[
        str, dtype, Iterable[Union[str, dtype]], None
    ] = None,
    rename: Optional[Iterable[Optional[str]]] = None,
    required: Union[bool, Iterable[bool]] = True
) -> DataFrame
Source code in atomlib/io/cif.py
def stack_tags(self, *tags: str, dtype: t.Union[str, numpy.dtype, t.Iterable[t.Union[str, numpy.dtype]], None] = None,
               rename: t.Optional[t.Iterable[t.Optional[str]]] = None, required: t.Union[bool, t.Iterable[bool]] = True) -> polars.DataFrame:
    dtypes: t.Iterable[t.Optional[numpy.dtype]]
    if dtype is None:
        dtypes = repeat(None)
    elif isinstance(dtype, (numpy.dtype, str)):
        dtypes = (numpy.dtype(dtype),) * len(tags)
    else:
        dtypes = tuple(map(lambda ty: numpy.dtype(ty), dtype))
        if len(dtypes) != len(tags):
            raise ValueError("dtype list of invalid length")

    if isinstance(required, bool):
        required = repeat(required)

    if rename is None:
        rename = repeat(None)

    d = {}
    for (tag, ty, req, name) in zip(tags, dtypes, required, rename):
        if tag not in self.data_dict:
            if req:
                raise ValueError(f"Tag '{tag}' missing from CIF file")
            continue
        try:
            arr = numpy.array(self.data_dict[tag], dtype=ty)
            d[name or tag] = arr
        except TypeError:
            raise TypeError(f"Tag '{tag}' of invalid or heterogeneous type.")

    if len(d) == 0:
        return polars.DataFrame({})

    tag_len = len(next(iter(d.values())))
    if any(len(arr) != tag_len for arr in d.values()):
        raise ValueError(f"Tags of mismatching lengths: {tuple(map(len, d.values()))}")

    return polars.DataFrame(d)

cell_size

cell_size() -> Optional[Tuple[float, float, float]]

Return cell size (in angstroms).

Source code in atomlib/io/cif.py
def cell_size(self) -> t.Optional[t.Tuple[float, float, float]]:
    """Return cell size (in angstroms)."""
    try:
        a = float(self['cell_length_a'])  # type: ignore
        b = float(self['cell_length_b'])  # type: ignore
        c = float(self['cell_length_c'])  # type: ignore
        return (a, b, c)
    except (ValueError, TypeError, KeyError):
        return None

cell_angle

cell_angle() -> Optional[Tuple[float, float, float]]

Return cell angle (in degrees).

Source code in atomlib/io/cif.py
def cell_angle(self) -> t.Optional[t.Tuple[float, float, float]]:
    """Return cell angle (in degrees)."""
    try:
        a = float(self['cell_angle_alpha'])  # type: ignore
        b = float(self['cell_angle_beta'])   # type: ignore
        g = float(self['cell_angle_gamma'])  # type: ignore
        return (a, b, g)
    except (ValueError, TypeError, KeyError):
        return None

get_symmetry

get_symmetry() -> Iterator[AffineTransform3D]
Source code in atomlib/io/cif.py
def get_symmetry(self) -> t.Iterator[AffineTransform3D]:
    syms = self.data_dict.get('space_group_symop_operation_xyz')
    if syms is None:
        # old name for symmetry
        syms = self.data_dict.get('symmetry_equiv_pos_as_xyz')
    if syms is None:
        syms = ()
    if not hasattr(syms, '__iter__'):
        syms = (syms,)
    return map(parse_symmetry, map(str, syms))  # type: ignore

CIFTable dataclass

Source code in atomlib/io/cif.py
@dataclass
class CIFTable:
    data: t.Dict[str, t.List[Value]]

    def _write(self, f: TextIOBase):
        print("\nloop_", file=f)
        for tag in self.data.keys():
            print(f" _{tag}", file=f)

        for row in zip(*self.data.values()):
            print(f' {"  ".join(map(_format_val, row))}', file=f)

        print(file=f)

data instance-attribute

data: Dict[str, List[Value]]

SymmetryVec

Source code in atomlib/io/cif.py
class SymmetryVec:
    @classmethod
    def parse(cls, s: str) -> SymmetryVec:
        if s[0] in ('x', 'y', 'z'):
            a = numpy.zeros((4,))
            a[('x', 'y', 'z').index(s[0])] += 1.
            return cls(a)
        return cls(float(s))

    def __init__(self, val: t.Union[float, NDArray[numpy.floating]]):
       self.inner: t.Union[float, NDArray[numpy.floating]] = val

    def is_scalar(self) -> bool:
        return isinstance(self.inner, float)

    def to_vec(self) -> NDArray[numpy.floating]:
        if isinstance(self.inner, (int, float)):
            vec = numpy.zeros((4,))
            vec[3] = self.inner
            return vec
        return self.inner

    def __add__(self, rhs: SymmetryVec) -> SymmetryVec:
        if self.is_scalar() and rhs.is_scalar():
            return SymmetryVec(self.inner + rhs.inner)
        return SymmetryVec(rhs.to_vec() + self.to_vec())

    def __neg__(self) -> SymmetryVec:
        return SymmetryVec(-self.inner)

    def __pos__(self) -> SymmetryVec:
        return self

    def __sub__(self, rhs: SymmetryVec) -> SymmetryVec:
        if self.is_scalar() and rhs.is_scalar():
            return SymmetryVec(self.inner - rhs.inner)
        return SymmetryVec(rhs.to_vec() - self.to_vec())

    def __mul__(self, rhs: SymmetryVec) -> SymmetryVec:
        if not self.is_scalar() and not rhs.is_scalar():
            raise ValueError("Can't multiply two symmetry directions")
        return SymmetryVec(rhs.inner * self.inner)

    def __truediv__(self, rhs: SymmetryVec) -> SymmetryVec:
        if not self.is_scalar() and not rhs.is_scalar():
            raise ValueError("Can't divide two symmetry directions")
        return SymmetryVec(rhs.inner / self.inner)

inner instance-attribute

inner: Union[float, NDArray[floating]] = val

parse classmethod

parse(s: str) -> SymmetryVec
Source code in atomlib/io/cif.py
@classmethod
def parse(cls, s: str) -> SymmetryVec:
    if s[0] in ('x', 'y', 'z'):
        a = numpy.zeros((4,))
        a[('x', 'y', 'z').index(s[0])] += 1.
        return cls(a)
    return cls(float(s))

is_scalar

is_scalar() -> bool
Source code in atomlib/io/cif.py
def is_scalar(self) -> bool:
    return isinstance(self.inner, float)

to_vec

to_vec() -> NDArray[floating]
Source code in atomlib/io/cif.py
def to_vec(self) -> NDArray[numpy.floating]:
    if isinstance(self.inner, (int, float)):
        vec = numpy.zeros((4,))
        vec[3] = self.inner
        return vec
    return self.inner

CifReader

Source code in atomlib/io/cif.py
class CifReader:
    def __init__(self, file: TextIOBase):
        self.line = 0
        self._file: TextIOBase = file
        self._buf: t.Optional[str] = None
        self._after_eol = True
        self._eof = False

    def parse(self) -> t.Iterator[CIFDataBlock]:
        while True:
            line = self.line
            word = self.peek_word()
            if word is None:
                return
            if word.lower().startswith('data_'):
                self.next_word()
                name = word[len('data_'):]
            elif word.startswith('_'):
                name = None
            else:
                raise ValueError(f"While parsing line {line}: Unexpected token {word}")

            yield self.parse_datablock(name)

    def after_eol(self) -> bool:
        """
        Returns whether the current token (the one that will be returned
        by the next peek() or next()) is after a newline.
        """
        return self._after_eol

    def peek_line(self) -> t.Optional[str]:
        buf = self._try_fill_buf()
        return buf

    def next_line(self) -> t.Optional[str]:
        line = self.peek_line()
        self._buf = None
        return line

    def next_until(self, marker: str) -> t.Optional[str]:
        """
        Collect words until `marker`. Because of the weirdness of CIF,
        `marker` must occur immediately before a whitespace boundary.
        """
        s = ""
        buf = self._try_fill_buf()
        if buf is None:
            return None
        while not (match := re.search(re.escape(marker) + r'(?=\s|$)', buf)):
            s += buf
            buf = self._try_fill_buf(True)
            if buf is None:
                return None
        s += buf[:match.end()]
        self._buf = buf[match.end():]
        if len(self._buf) == 0 or self._buf.isspace():
            self._buf = None
        return s

    def peek_word(self) -> t.Optional[str]:
        while True:
            buf = self._try_fill_buf()
            if buf is None:
                return None
            buf = buf.lstrip()
            if len(buf) == 0 or buf.isspace() or buf.startswith('#'):
                # eat comment or blank line
                self._buf = None
                continue
            break

        #print(f"buf: '{buf}'")
        return buf.split(maxsplit=1)[0]

    def next_word(self) -> t.Optional[str]:
        w = self.peek_word()
        if w is None:
            return None
        assert self._buf is not None
        self._buf = self._buf.lstrip()[len(w)+1:].lstrip()
        if len(self._buf) == 0 or self._buf.isspace():
            # eat whitespace at end of line
            self._buf = None
            self._after_eol = True
        else:
            self._after_eol = False
        return w

    def _try_fill_buf(self, force: bool = False) -> t.Optional[str]:
        if force:
            self._buf = None
        if self._buf is None:
            try:
                self._buf = next(self._file)
                self.line += 1
            except StopIteration:
                pass
        return self._buf

    def parse_bare(self) -> t.Union[int, float, str]:
        w = self.next_word()
        if w is None:
            raise ValueError("Unexpected EOF while parsing value.")
        if _INT_RE.fullmatch(w):
            return int(w)  # may raise
        if (m := _FLOAT_RE.fullmatch(w)):
            if m[1] != '.':
                return float(m[1])  # may raise
        return w

    def parse_datablock(self, name: t.Optional[str] = None) -> CIFDataBlock:
        logging.debug(f"parse datablock '{name}'")
        #data: t.Dict[str, t.Union[t.List[Value], Value]] = {}

        data: t.List[t.Union[CIFTable, t.Tuple[str, Value]]] = []

        while True:
            word = self.peek_word()
            if word is None:
                break
            if word.lower() == 'loop_':
                self.next_word()
                data.append(self.parse_loop())
            elif word.startswith('_'):
                self.next_word()
                (k, v) = (word[1:], self.parse_value())
                logging.debug(f"{k} = {v}")
                data.append((k, v))
            else:
                break

        return CIFDataBlock(name, tuple(data))

    def eat_saveframe(self):
        line = self.line
        while True:
            w = self.next_word()
            if w is None:
                raise ValueError(f"EOF before end of save frame starting at line {line}")
            if w.lower() == 'save_':
                break

    def parse_loop(self) -> CIFTable:
        line = self.line
        tags = []
        while True:
            w = self.peek_word()
            if w is None:
                raise ValueError(f"EOF before loop values at line {line}")
            if w.startswith('_'):
                self.next_word()
                tags.append(w[1:])
            else:
                break

        vals: t.Tuple[t.List[Value], ...] = tuple([] for _ in tags)
        i = 0

        while True:
            w = self.peek_word()
            if w is None or w.startswith('_') or w.endswith('_'):
                break
            vals[i].append(self.parse_value())
            i = (i + 1) % len(tags)

        if i != 0:
            n_vals = sum(map(len, vals))
            raise ValueError(f"While parsing loop at line {line}: "
                            f"Got {n_vals} vals, expected a multiple of {len(tags)}")

        return CIFTable(dict(zip(tags, vals)))

    def parse_value(self) -> Value:
        logging.debug("parse_value")
        w = self.peek_word()
        assert w is not None
        if w in ('.', '?'):
            self.next_word()
            return None

        if self.after_eol() and w == ';':
            return self.parse_text_field()

        if w[0] in ('"', "'"):
            return self.parse_quoted()

        return self.parse_bare()

    def parse_text_field(self) -> str:
        start_line = self.line
        line = self.next_line()
        assert line is not None
        s = line.lstrip().removeprefix(';').lstrip()
        while True:
            line = self.next_line()
            if line is None:
                raise ValueError(f"While parsing text field at line {start_line}: Unexpected EOF")
            if line.strip() == ';':
                break
            s += line
        return s.rstrip()

    def parse_quoted(self) -> str:
        line = self.line
        w = self.peek_word()
        assert w is not None
        quote = w[0]
        if quote not in ('"', "'"):
            raise ValueError(f"While parsing string at line {line}: Invalid quote char {quote}")

        s = self.next_until(quote)
        if s is None:
            raise ValueError(f"While parsing string {w}... at line {line}: Unexpected EOF")
        return s.lstrip()[1:-1]

line instance-attribute

line = 0

parse

parse() -> Iterator[CIFDataBlock]
Source code in atomlib/io/cif.py
def parse(self) -> t.Iterator[CIFDataBlock]:
    while True:
        line = self.line
        word = self.peek_word()
        if word is None:
            return
        if word.lower().startswith('data_'):
            self.next_word()
            name = word[len('data_'):]
        elif word.startswith('_'):
            name = None
        else:
            raise ValueError(f"While parsing line {line}: Unexpected token {word}")

        yield self.parse_datablock(name)

after_eol

after_eol() -> bool

Returns whether the current token (the one that will be returned by the next peek() or next()) is after a newline.

Source code in atomlib/io/cif.py
def after_eol(self) -> bool:
    """
    Returns whether the current token (the one that will be returned
    by the next peek() or next()) is after a newline.
    """
    return self._after_eol

peek_line

peek_line() -> Optional[str]
Source code in atomlib/io/cif.py
def peek_line(self) -> t.Optional[str]:
    buf = self._try_fill_buf()
    return buf

next_line

next_line() -> Optional[str]
Source code in atomlib/io/cif.py
def next_line(self) -> t.Optional[str]:
    line = self.peek_line()
    self._buf = None
    return line

next_until

next_until(marker: str) -> Optional[str]

Collect words until marker. Because of the weirdness of CIF, marker must occur immediately before a whitespace boundary.

Source code in atomlib/io/cif.py
def next_until(self, marker: str) -> t.Optional[str]:
    """
    Collect words until `marker`. Because of the weirdness of CIF,
    `marker` must occur immediately before a whitespace boundary.
    """
    s = ""
    buf = self._try_fill_buf()
    if buf is None:
        return None
    while not (match := re.search(re.escape(marker) + r'(?=\s|$)', buf)):
        s += buf
        buf = self._try_fill_buf(True)
        if buf is None:
            return None
    s += buf[:match.end()]
    self._buf = buf[match.end():]
    if len(self._buf) == 0 or self._buf.isspace():
        self._buf = None
    return s

peek_word

peek_word() -> Optional[str]
Source code in atomlib/io/cif.py
def peek_word(self) -> t.Optional[str]:
    while True:
        buf = self._try_fill_buf()
        if buf is None:
            return None
        buf = buf.lstrip()
        if len(buf) == 0 or buf.isspace() or buf.startswith('#'):
            # eat comment or blank line
            self._buf = None
            continue
        break

    #print(f"buf: '{buf}'")
    return buf.split(maxsplit=1)[0]

next_word

next_word() -> Optional[str]
Source code in atomlib/io/cif.py
def next_word(self) -> t.Optional[str]:
    w = self.peek_word()
    if w is None:
        return None
    assert self._buf is not None
    self._buf = self._buf.lstrip()[len(w)+1:].lstrip()
    if len(self._buf) == 0 or self._buf.isspace():
        # eat whitespace at end of line
        self._buf = None
        self._after_eol = True
    else:
        self._after_eol = False
    return w

parse_bare

parse_bare() -> Union[int, float, str]
Source code in atomlib/io/cif.py
def parse_bare(self) -> t.Union[int, float, str]:
    w = self.next_word()
    if w is None:
        raise ValueError("Unexpected EOF while parsing value.")
    if _INT_RE.fullmatch(w):
        return int(w)  # may raise
    if (m := _FLOAT_RE.fullmatch(w)):
        if m[1] != '.':
            return float(m[1])  # may raise
    return w

parse_datablock

parse_datablock(name: Optional[str] = None) -> CIFDataBlock
Source code in atomlib/io/cif.py
def parse_datablock(self, name: t.Optional[str] = None) -> CIFDataBlock:
    logging.debug(f"parse datablock '{name}'")
    #data: t.Dict[str, t.Union[t.List[Value], Value]] = {}

    data: t.List[t.Union[CIFTable, t.Tuple[str, Value]]] = []

    while True:
        word = self.peek_word()
        if word is None:
            break
        if word.lower() == 'loop_':
            self.next_word()
            data.append(self.parse_loop())
        elif word.startswith('_'):
            self.next_word()
            (k, v) = (word[1:], self.parse_value())
            logging.debug(f"{k} = {v}")
            data.append((k, v))
        else:
            break

    return CIFDataBlock(name, tuple(data))

eat_saveframe

eat_saveframe()
Source code in atomlib/io/cif.py
def eat_saveframe(self):
    line = self.line
    while True:
        w = self.next_word()
        if w is None:
            raise ValueError(f"EOF before end of save frame starting at line {line}")
        if w.lower() == 'save_':
            break

parse_loop

parse_loop() -> CIFTable
Source code in atomlib/io/cif.py
def parse_loop(self) -> CIFTable:
    line = self.line
    tags = []
    while True:
        w = self.peek_word()
        if w is None:
            raise ValueError(f"EOF before loop values at line {line}")
        if w.startswith('_'):
            self.next_word()
            tags.append(w[1:])
        else:
            break

    vals: t.Tuple[t.List[Value], ...] = tuple([] for _ in tags)
    i = 0

    while True:
        w = self.peek_word()
        if w is None or w.startswith('_') or w.endswith('_'):
            break
        vals[i].append(self.parse_value())
        i = (i + 1) % len(tags)

    if i != 0:
        n_vals = sum(map(len, vals))
        raise ValueError(f"While parsing loop at line {line}: "
                        f"Got {n_vals} vals, expected a multiple of {len(tags)}")

    return CIFTable(dict(zip(tags, vals)))

parse_value

parse_value() -> Value
Source code in atomlib/io/cif.py
def parse_value(self) -> Value:
    logging.debug("parse_value")
    w = self.peek_word()
    assert w is not None
    if w in ('.', '?'):
        self.next_word()
        return None

    if self.after_eol() and w == ';':
        return self.parse_text_field()

    if w[0] in ('"', "'"):
        return self.parse_quoted()

    return self.parse_bare()

parse_text_field

parse_text_field() -> str
Source code in atomlib/io/cif.py
def parse_text_field(self) -> str:
    start_line = self.line
    line = self.next_line()
    assert line is not None
    s = line.lstrip().removeprefix(';').lstrip()
    while True:
        line = self.next_line()
        if line is None:
            raise ValueError(f"While parsing text field at line {start_line}: Unexpected EOF")
        if line.strip() == ';':
            break
        s += line
    return s.rstrip()

parse_quoted

parse_quoted() -> str
Source code in atomlib/io/cif.py
def parse_quoted(self) -> str:
    line = self.line
    w = self.peek_word()
    assert w is not None
    quote = w[0]
    if quote not in ('"', "'"):
        raise ValueError(f"While parsing string at line {line}: Invalid quote char {quote}")

    s = self.next_until(quote)
    if s is None:
        raise ValueError(f"While parsing string {w}... at line {line}: Unexpected EOF")
    return s.lstrip()[1:-1]

parse_symmetry

parse_symmetry(s: str) -> AffineTransform3D
Source code in atomlib/io/cif.py
def parse_symmetry(s: str) -> AffineTransform3D:
    axes = s.split(',')
    if not len(axes) == 3:
        raise ValueError(f"Error parsing symmetry expression '{s}': Expected 3 values, got {len(axes)}")

    axes = [SYMMETRY_PARSER.parse(StringIO(ax)).eval(lambda v: v).to_vec() for ax in axes]
    axes.append(numpy.array([0., 0., 0., 1.]))
    return AffineTransform3D(numpy.stack(axes, axis=0))