Examples¶

How do I compute disulfide bonds for a protein?

from pathlib import Path
import numpy as np
from lahuta import LahutaSystem, metrics

SS_MAX_DISTANCE_SQ = 2.3 * 2.3

def detect_disulfide_bonds(structure: Path) -> list[tuple[int, int]]:
    sys = LahutaSystem(str(structure))
    if not sys.build_topology():
        raise RuntimeError(f"Failed to build topology for: {structure}")

    cys_sg_mask = (sys.props.resnames == "CYS") & (sys.props.names == "SG")
    cys_sg_positions = sys.props.positions[cys_sg_mask]
    cys_sg_indices = sys.props.indices[cys_sg_mask]

    if cys_sg_positions.shape[0] < 2:
        return []

    dist_sq_matrix = metrics.pairwise_distances(cys_sg_positions, squared=True)

    i_idx, j_idx = np.triu_indices(cys_sg_positions.shape[0], k=1)
    pair_dist_sq = dist_sq_matrix[i_idx, j_idx]

    geometric_mask = pair_dist_sq <= SS_MAX_DISTANCE_SQ

    mol = sys.get_topology().molecule()
    bonds: list[tuple[int, int]] = []
    for ii, jj in zip(i_idx[geometric_mask], j_idx[geometric_mask]):
        atom_i = int(cys_sg_indices[ii])
        atom_j = int(cys_sg_indices[jj])

        if mol.getBondBetweenAtoms(atom_i, atom_j) is None:
            continue

        bonds.append((atom_i, atom_j))  # disulfides

    return bonds

How do I find first-shell and second-shell residues around a residue?

from dataclasses import dataclass
from pathlib import Path
from lahuta import LahutaSystem, Residue

TARGET_CHAIN = "A"
TARGET_RESID = 10
CUTOFF = 4.5
RESIDUE_DIFFERENCE = 0

@dataclass(frozen=True)
class ShellResult:
    structure: str
    target: Residue
    first_shell: list[Residue]
    second_shell: list[Residue]


def find_shells_for_target(file_path: str | Path) -> ShellResult:
    system = LahutaSystem(str(file_path))

    residues = system.residues
    target_residues = residues.filter(lambda x: x.number == TARGET_RESID and x.chain_id == TARGET_CHAIN)
    if len(target_residues) == 0:
        raise ValueError(f"Target residue not found: {TARGET_CHAIN}:{TARGET_RESID}")
    target_residue = target_residues[0]

    target_atoms = {a.getIdx() for a in target_residue.atoms}
    ns = system.find_neighbors(cutoff=CUTOFF, residue_difference=RESIDUE_DIFFERENCE)

    first_shell: list[Residue] = []
    first_shell_seen: set[Residue] = set()
    for atom_i, atom_j in ns.filter(list(target_atoms)).pairs:
        if atom_i in target_atoms and atom_j not in target_atoms:
            candidate = residues.residue_of_atom(atom_j)
            if candidate not in first_shell_seen:
                first_shell_seen.add(candidate)
                first_shell.append(candidate)
        elif atom_j in target_atoms and atom_i not in target_atoms:
            candidate = residues.residue_of_atom(atom_i)
            if candidate not in first_shell_seen:
                first_shell_seen.add(candidate)
                first_shell.append(candidate)

    second_shell: list[Residue] = []
    second_shell_seen: set[Residue] = set()
    for atom_i, atom_j in ns.pairs:
        res_i = residues.residue_of_atom(atom_i)
        res_j = residues.residue_of_atom(atom_j)
        if res_i == res_j:
            continue
        if res_i in first_shell_seen and res_j not in first_shell_seen and res_j != target_residue:
            if res_j not in second_shell_seen:
                second_shell_seen.add(res_j)
                second_shell.append(res_j)
        if res_j in first_shell_seen and res_i not in first_shell_seen and res_i != target_residue:
            if res_i not in second_shell_seen:
                second_shell_seen.add(res_i)
                second_shell.append(res_i)

    return ShellResult(
        structure=str(file_path),
        target=target_residue,
        first_shell=first_shell,
        second_shell=second_shell,
    )

How do I compare MolStar, Arpeggio, and GetContacts results and keep only consensus interactions?

from pathlib import Path
from typing import TypeAlias
from lahuta import AtomTypingMethod, ContactSet, LahutaSystem, Topology
from lahuta.entities import compute_contacts

METHODS_TYPE = [AtomTypingMethod.MolStar, AtomTypingMethod.Arpeggio, AtomTypingMethod.GetContacts]
METHODS_STR = ["molstar", "arpeggio", "getcontacts"]
PairKey: TypeAlias = tuple[str, str]
PairData: TypeAlias = dict[str, float | str]
ProviderPairs: TypeAlias = dict[str, dict[PairKey, PairData]]

def contacts_by_pair(top: Topology, contacts: ContactSet) -> dict[PairKey, PairData]:
    pairs: dict[PairKey, PairData] = {}
    for c in contacts:
        rec = c.to_dict(top)
        lhs = str(rec["lhs"])
        rhs = str(rec["rhs"])
        key = (lhs, rhs) if lhs <= rhs else (rhs, lhs)
        if key not in pairs:
            pairs[key] = {"type": str(rec["type"]), "distance_sq": float(rec["distance_sq"])}
    return pairs

def consensus_interactions(structure: str | Path) -> list[dict[str, str | dict[str, float | str]]]:
    system = LahutaSystem(str(structure))
    if not system.build_topology():
        raise RuntimeError(f"Failed to build topology for: {structure}")
    top = system.get_topology()

    per_provider: ProviderPairs = {}
    for typing_method, provider in zip(METHODS_TYPE, METHODS_STR):
        top.assign_typing(typing_method)
        contacts = compute_contacts(top, provider=provider)
        per_provider[provider] = contacts_by_pair(top, contacts)

    shared_pairs = set.intersection(*(set(pairs.keys()) for pairs in per_provider.values()))

    consensus: list[dict[str, str | dict[str, float | str]]] = []
    for lhs, rhs in sorted(shared_pairs):
        consensus.append(
            {
                "lhs": lhs,
                "rhs": rhs,
                "types": {provider: per_provider[provider][(lhs, rhs)]["type"] for provider in METHODS_STR},
                "distance_sq": {provider: per_provider[provider][(lhs, rhs)]["distance_sq"] for provider in METHODS_STR},
            }
        )
    return consensus

How do I build a chain-interface hotspot map by counting cross-chain contacts per residue and interaction type?

from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
import numpy as np

from lahuta import AtomRec, EntityResolver, GroupRec, LahutaSystem, Residue, RingRec, Topology
from lahuta.entities import compute_contacts
from lahuta.entities.api import ContactProviderType

@dataclass(frozen=True)
class HotspotRecord:
    residue: Residue
    total_contacts: int
    zscore: float
    by_interaction: dict[str, int]

@dataclass(frozen=True)
class HotspotMapResult:
    hotspots: list[HotspotRecord]
    interaction_types: list[str]
    chain_pair_counts: dict[tuple[str, str], int]

def _residue_key(residue: Residue) -> tuple[str, int, str, str]:
    return (residue.chain_id, residue.number, residue.name, residue.alt_loc)

def _residues_from_record(record: AtomRec | RingRec | GroupRec, topology: Topology) -> set[Residue]:
    residues: set[Residue] = set()
    if isinstance(record, AtomRec):
        residues.add(topology.residue_of_atom(int(record.idx)))
        return residues
    if isinstance(record, (RingRec, GroupRec)):
        for atom in record.atoms:
            residues.add(topology.residue_of_atom(int(atom.getIdx())))
        return residues
    return residues

def build_chain_interface_hotspot_map(
    structure: str | Path,
    *,
    provider: ContactProviderType = "molstar",
) -> HotspotMapResult:
    system = LahutaSystem(str(structure))
    if not system.build_topology():
        raise RuntimeError(f"Failed to build topology for: {structure}")

    topology: Topology = system.get_topology()
    resolver = EntityResolver(topology)
    contacts = compute_contacts(topology, provider=provider)

    # Residue is mutable in Python bindings; avoid mutating residue fields after using as dict/set keys.
    per_residue_type: dict[Residue, Counter[str]] = defaultdict(Counter)
    chain_pair_counts: Counter[tuple[str, str]] = Counter()

    for contact in contacts:
        lhs_rec, rhs_rec = resolver.resolve_contact(contact)
        lhs_residues = _residues_from_record(lhs_rec, topology)
        rhs_residues = _residues_from_record(rhs_rec, topology)
        if not lhs_residues or not rhs_residues:
            continue
        interaction_type = str(contact.type)

        seen_pairs: set[tuple[Residue, Residue]] = set()
        for lhs in lhs_residues:
            for rhs in rhs_residues:
                if lhs.chain_id == rhs.chain_id:
                    continue

                pair = (lhs, rhs) if _residue_key(lhs) <= _residue_key(rhs) else (rhs, lhs)
                if pair in seen_pairs:
                    continue
                seen_pairs.add(pair)

                per_residue_type[pair[0]][interaction_type] += 1
                per_residue_type[pair[1]][interaction_type] += 1
                c1, c2 = pair[0].chain_id, pair[1].chain_id
                chain_pair = (c1, c2) if c1 <= c2 else (c2, c1)
                chain_pair_counts[chain_pair] += 1

    interaction_types = sorted({itype for counts in per_residue_type.values() for itype in counts})
    result = HotspotMapResult(
        hotspots=[],
        interaction_types=interaction_types,
        chain_pair_counts=dict(chain_pair_counts),
    )

    residues = list(per_residue_type.keys())
    totals = np.array([sum(per_residue_type[r].values()) for r in residues], dtype=np.int32)
    if totals.size == 0:
        return result

    mean = float(totals.mean())
    std = float(totals.std())
    zscores = np.zeros_like(totals, dtype=np.float64) if std == 0.0 else (totals - mean) / std

    hotspots: list[HotspotRecord] = []
    order = np.argsort(-totals, kind="stable")
    for i in order:
        residue = residues[int(i)]
        counts = per_residue_type[residue]
        hotspots.append(
            HotspotRecord(
                residue=residue,
                total_contacts=int(totals[int(i)]),
                zscore=float(zscores[int(i)]),
                by_interaction={itype: int(counts.get(itype, 0)) for itype in interaction_types},
            )
        )

    result = HotspotMapResult(
        hotspots=hotspots,
        interaction_types=result.interaction_types,
        chain_pair_counts=result.chain_pair_counts,
    )
    return result