Examples¶
How do I compute disulfide bonds for a protein?
from pathlib import Path
import numpy as np
from lahuta import LahutaSystem, metrics
SS_MAX_DISTANCE_SQ = 2.3 * 2.3
def detect_disulfide_bonds(structure: Path) -> list[tuple[int, int]]:
sys = LahutaSystem(str(structure))
if not sys.build_topology():
raise RuntimeError(f"Failed to build topology for: {structure}")
cys_sg_mask = (sys.props.resnames == "CYS") & (sys.props.names == "SG")
cys_sg_positions = sys.props.positions[cys_sg_mask]
cys_sg_indices = sys.props.indices[cys_sg_mask]
if cys_sg_positions.shape[0] < 2:
return []
dist_sq_matrix = metrics.pairwise_distances(cys_sg_positions, squared=True)
i_idx, j_idx = np.triu_indices(cys_sg_positions.shape[0], k=1)
pair_dist_sq = dist_sq_matrix[i_idx, j_idx]
geometric_mask = pair_dist_sq <= SS_MAX_DISTANCE_SQ
mol = sys.get_topology().molecule()
bonds: list[tuple[int, int]] = []
for ii, jj in zip(i_idx[geometric_mask], j_idx[geometric_mask]):
atom_i = int(cys_sg_indices[ii])
atom_j = int(cys_sg_indices[jj])
if mol.getBondBetweenAtoms(atom_i, atom_j) is None:
continue
bonds.append((atom_i, atom_j)) # disulfides
return bonds
How do I find first-shell and second-shell residues around a residue?
from dataclasses import dataclass
from pathlib import Path
from lahuta import LahutaSystem, Residue
TARGET_CHAIN = "A"
TARGET_RESID = 10
CUTOFF = 4.5
RESIDUE_DIFFERENCE = 0
@dataclass(frozen=True)
class ShellResult:
structure: str
target: Residue
first_shell: list[Residue]
second_shell: list[Residue]
def find_shells_for_target(file_path: str | Path) -> ShellResult:
system = LahutaSystem(str(file_path))
residues = system.residues
target_residues = residues.filter(lambda x: x.number == TARGET_RESID and x.chain_id == TARGET_CHAIN)
if len(target_residues) == 0:
raise ValueError(f"Target residue not found: {TARGET_CHAIN}:{TARGET_RESID}")
target_residue = target_residues[0]
target_atoms = {a.getIdx() for a in target_residue.atoms}
ns = system.find_neighbors(cutoff=CUTOFF, residue_difference=RESIDUE_DIFFERENCE)
first_shell: list[Residue] = []
first_shell_seen: set[Residue] = set()
for atom_i, atom_j in ns.filter(list(target_atoms)).pairs:
if atom_i in target_atoms and atom_j not in target_atoms:
candidate = residues.residue_of_atom(atom_j)
if candidate not in first_shell_seen:
first_shell_seen.add(candidate)
first_shell.append(candidate)
elif atom_j in target_atoms and atom_i not in target_atoms:
candidate = residues.residue_of_atom(atom_i)
if candidate not in first_shell_seen:
first_shell_seen.add(candidate)
first_shell.append(candidate)
second_shell: list[Residue] = []
second_shell_seen: set[Residue] = set()
for atom_i, atom_j in ns.pairs:
res_i = residues.residue_of_atom(atom_i)
res_j = residues.residue_of_atom(atom_j)
if res_i == res_j:
continue
if res_i in first_shell_seen and res_j not in first_shell_seen and res_j != target_residue:
if res_j not in second_shell_seen:
second_shell_seen.add(res_j)
second_shell.append(res_j)
if res_j in first_shell_seen and res_i not in first_shell_seen and res_i != target_residue:
if res_i not in second_shell_seen:
second_shell_seen.add(res_i)
second_shell.append(res_i)
return ShellResult(
structure=str(file_path),
target=target_residue,
first_shell=first_shell,
second_shell=second_shell,
)
How do I compare MolStar, Arpeggio, and GetContacts results and keep only consensus interactions?
from pathlib import Path
from typing import TypeAlias
from lahuta import AtomTypingMethod, ContactSet, LahutaSystem, Topology
from lahuta.entities import compute_contacts
METHODS_TYPE = [AtomTypingMethod.MolStar, AtomTypingMethod.Arpeggio, AtomTypingMethod.GetContacts]
METHODS_STR = ["molstar", "arpeggio", "getcontacts"]
PairKey: TypeAlias = tuple[str, str]
PairData: TypeAlias = dict[str, float | str]
ProviderPairs: TypeAlias = dict[str, dict[PairKey, PairData]]
def contacts_by_pair(top: Topology, contacts: ContactSet) -> dict[PairKey, PairData]:
pairs: dict[PairKey, PairData] = {}
for c in contacts:
rec = c.to_dict(top)
lhs = str(rec["lhs"])
rhs = str(rec["rhs"])
key = (lhs, rhs) if lhs <= rhs else (rhs, lhs)
if key not in pairs:
pairs[key] = {"type": str(rec["type"]), "distance_sq": float(rec["distance_sq"])}
return pairs
def consensus_interactions(structure: str | Path) -> list[dict[str, str | dict[str, float | str]]]:
system = LahutaSystem(str(structure))
if not system.build_topology():
raise RuntimeError(f"Failed to build topology for: {structure}")
top = system.get_topology()
per_provider: ProviderPairs = {}
for typing_method, provider in zip(METHODS_TYPE, METHODS_STR):
top.assign_typing(typing_method)
contacts = compute_contacts(top, provider=provider)
per_provider[provider] = contacts_by_pair(top, contacts)
shared_pairs = set.intersection(*(set(pairs.keys()) for pairs in per_provider.values()))
consensus: list[dict[str, str | dict[str, float | str]]] = []
for lhs, rhs in sorted(shared_pairs):
consensus.append(
{
"lhs": lhs,
"rhs": rhs,
"types": {provider: per_provider[provider][(lhs, rhs)]["type"] for provider in METHODS_STR},
"distance_sq": {provider: per_provider[provider][(lhs, rhs)]["distance_sq"] for provider in METHODS_STR},
}
)
return consensus
How do I build a chain-interface hotspot map by counting cross-chain contacts per residue and interaction type?
from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from lahuta import AtomRec, EntityResolver, GroupRec, LahutaSystem, Residue, RingRec, Topology
from lahuta.entities import compute_contacts
from lahuta.entities.api import ContactProviderType
@dataclass(frozen=True)
class HotspotRecord:
residue: Residue
total_contacts: int
zscore: float
by_interaction: dict[str, int]
@dataclass(frozen=True)
class HotspotMapResult:
hotspots: list[HotspotRecord]
interaction_types: list[str]
chain_pair_counts: dict[tuple[str, str], int]
def _residue_key(residue: Residue) -> tuple[str, int, str, str]:
return (residue.chain_id, residue.number, residue.name, residue.alt_loc)
def _residues_from_record(record: AtomRec | RingRec | GroupRec, topology: Topology) -> set[Residue]:
residues: set[Residue] = set()
if isinstance(record, AtomRec):
residues.add(topology.residue_of_atom(int(record.idx)))
return residues
if isinstance(record, (RingRec, GroupRec)):
for atom in record.atoms:
residues.add(topology.residue_of_atom(int(atom.getIdx())))
return residues
return residues
def build_chain_interface_hotspot_map(
structure: str | Path,
*,
provider: ContactProviderType = "molstar",
) -> HotspotMapResult:
system = LahutaSystem(str(structure))
if not system.build_topology():
raise RuntimeError(f"Failed to build topology for: {structure}")
topology: Topology = system.get_topology()
resolver = EntityResolver(topology)
contacts = compute_contacts(topology, provider=provider)
# Residue is mutable in Python bindings; avoid mutating residue fields after using as dict/set keys.
per_residue_type: dict[Residue, Counter[str]] = defaultdict(Counter)
chain_pair_counts: Counter[tuple[str, str]] = Counter()
for contact in contacts:
lhs_rec, rhs_rec = resolver.resolve_contact(contact)
lhs_residues = _residues_from_record(lhs_rec, topology)
rhs_residues = _residues_from_record(rhs_rec, topology)
if not lhs_residues or not rhs_residues:
continue
interaction_type = str(contact.type)
seen_pairs: set[tuple[Residue, Residue]] = set()
for lhs in lhs_residues:
for rhs in rhs_residues:
if lhs.chain_id == rhs.chain_id:
continue
pair = (lhs, rhs) if _residue_key(lhs) <= _residue_key(rhs) else (rhs, lhs)
if pair in seen_pairs:
continue
seen_pairs.add(pair)
per_residue_type[pair[0]][interaction_type] += 1
per_residue_type[pair[1]][interaction_type] += 1
c1, c2 = pair[0].chain_id, pair[1].chain_id
chain_pair = (c1, c2) if c1 <= c2 else (c2, c1)
chain_pair_counts[chain_pair] += 1
interaction_types = sorted({itype for counts in per_residue_type.values() for itype in counts})
result = HotspotMapResult(
hotspots=[],
interaction_types=interaction_types,
chain_pair_counts=dict(chain_pair_counts),
)
residues = list(per_residue_type.keys())
totals = np.array([sum(per_residue_type[r].values()) for r in residues], dtype=np.int32)
if totals.size == 0:
return result
mean = float(totals.mean())
std = float(totals.std())
zscores = np.zeros_like(totals, dtype=np.float64) if std == 0.0 else (totals - mean) / std
hotspots: list[HotspotRecord] = []
order = np.argsort(-totals, kind="stable")
for i in order:
residue = residues[int(i)]
counts = per_residue_type[residue]
hotspots.append(
HotspotRecord(
residue=residue,
total_contacts=int(totals[int(i)]),
zscore=float(zscores[int(i)]),
by_interaction={itype: int(counts.get(itype, 0)) for itype in interaction_types},
)
)
result = HotspotMapResult(
hotspots=hotspots,
interaction_types=result.interaction_types,
chain_pair_counts=result.chain_pair_counts,
)
return result