################################################################################ # solvate.tcl # # Generate structure files (PSF) for MD simulations from PDB structures; # Generate multimers for a PDB entry; # Solvate PDB structures in water box/sphere and generate the result PSF # files. # # Usage: # # Input file named "sysprep.dat" like the following: # # molID 1cwp-NT0 # Molecule ID (PDB file name prefix) # GenPSF 1 # Generate PSF file for input PDB structure? (0) No, (1) Yes # shape 0 # Shape of result system wanted for solvation (0) no solvation, (1) box, (2) sphere, (3) truncated octahedron # I 0.2 # Ionic strength wanted for system solvation # # Running with VMD: # vmd -dispdev text -e solvate.tcl # # Output: # * PSF and PDB files for the result system depending on the option chosen for "shape" # * a file "ama_para.dat" including the system information needed for MD/OPX simulation # # Yinglong Miao # Center for Cell and Virus Theory # Department of Chemistry # Indiana University, Bloomington, USA # yimiao@indiana.edu # # 3/28/2008 # ################################################################################ set remark "solvate)"; \ puts "$remark -----------------------------------------------" puts "$remark VMD script for solvating molecular structures and outputing their PSF and PDB files for simulations" set topo "/home/long/namd/common/top_all27_prot_lipid.inp"; puts "$remark topology file used: $topo" # default control parameters puts "$remark Control parameter initialization:" set psf 1; puts "$remark Generate PSF file? $psf" set multimers 0; puts "$remark Generate multimers? $multimers" set wb 0; puts "$remark Generate water box? $wb" set ws 0; puts "$remark Generate water sphere? $ws" set toct 0; puts "$remark Generate water octahedron? $toct" set ionwb 0; puts "$remark Ionize water box? $ionwb" set ionws 0; puts "$remark Ionize water sphere? $ionws" set iontoct 0; puts "$remark Ionize water box? $iontoct" # read system preparation parameters set parafile sysprep.dat; set fpara [open $parafile r]; \ puts "$remark Open file ${parafile} for reading simulation parameters ..." set params {} set values {} while { [gets $fpara line] >= 0 } { lappend params [string tolower [lindex $line 0]] lappend values [lindex $line 1] } close $fpara # set system preparation parameters for { set i 0} { $i < [llength $params] } { incr i } { if { [lindex $params $i] == "molid" } { set molname [lindex $values $i]; puts "$remark Molecule ID: $molname" }; # if if { [lindex $params $i] == "genpsf" } { set psf [lindex $values $i]; puts "$remark Generate PSF file for input PDB structure? $psf" }; # if if { [lindex $params $i] == "shape" } { set shape [lindex $values $i]; puts "$remark Shape of result system wanted (0) no solvation (1) box, (2) sphere, (3) truncated octahedron: $shape" }; # if if { [lindex $params $i] == "i" } { set I [lindex $values $i]; puts "$remark Ionic strength wanted for system solvation: $I M" }; # if }; # for switch $shape { 1 { set wb 1; puts "$remark Generate water box? $wb" set ionwb 1; puts "$remark Ionize water box? $ionwb" } 2 { set ws 1; puts "$remark Generate water sphere? $ws" set ionws 1; puts "$remark Ionize water sphere? $ionws" } 3 { set toct 1 puts "$remark Generate water octahedron? $toct" set iontoct 1; puts "$remark Ionize water box? $iontoct" } }; # switch shape set molID $molname; puts "$remark Updated molecule ID: $molID" if {$molname == "1r1v"} { set nmers 2; puts "$remark Number of multimers = $nmers" set Matnmers {{{-1.000000 0.000000 0.000000 54.44400} {0.000000 -1.000000 0.000000 150.74000} {0.000000 0.000000 1.000000 0.000000} {0.00000 0.00000 0.00000 1.000000}}}; puts "$remark " } # NAMD simulation parameters set cutoff 12.0; puts "$remark cutoff distance = $cutoff A" set expand 7.0; puts "$remark expected expanding of simulated structure dimension = $expand A" # I = 1/2 * conc [(#Na + #Cl)/V] # set I 0.05; puts "$remark ionic strength of solvated system = $I M" # minimum distance from ions to molecule surface # I = 0.2M => Debye length ~ 6.80A # I = 1M => Debye length ~ 3.04A if {$I >= 1.0} { set distsurf 3; puts "$remark minimum distance of ions to structure surface = $distsurf A" } else { set distsurf 5; puts "$remark minimum distance of ions to structure surface = $distsurf A" } # space between ions set space 3; puts "$remark minimum distance between ions = $space A" set b 1.8; puts "remark minimum distance between water and solute for solvation = $b A" package require psfgen topology $topo pdbalias residue HIS HSE pdbalias residue ZN ZN2 ### :::::::::::::: ### psf.tcl ### :::::::::::::: if {$psf} { puts "$remark -----------------------------------------------" puts "$remark Solvate: Generate PSF file" # split the PDB structure using Fortran 90 code pdb_split.exe if {0} { if {[file exists ${molname}.pdb_1]} { set fpdbs [glob ${molname}.pdb_*]; foreach fpdb $fpdbs { exec rm $fpdb; puts "$remark rm $fpdb" } } puts "$remark split PDB structure: [exec pdb_split.exe -pdb ${molname}.pdb | tee -a vmd.log]" } # split the PDB structure using VMD scripts mol load pdb ${molname}.pdb set chain 0 set chains {} # extract protein chains set sel [atomselect top protein] set frags [lsort -unique [$sel get pfrag]] foreach frag $frags { set sel [atomselect top "pfrag $frag"] incr chain lappend chains "P" $sel writepdb ${molname}-${chain}.pdb } # extract hucleic acid chains set sel [atomselect top nucleic] set frags [lsort -unique [$sel get nfrag]] foreach frag $frags { set sel [atomselect top "nfrag $frag"] incr chain lappend chains "N" $sel writepdb ${molname}-${chain}.pdb } # extract other atoms except protein, hucleic acid and water set sel [atomselect top "not protein and not nucleic and not water"] if { [$sel num] } { incr chain lappend chains "X" $sel writepdb ${molname}-${chain}.pdb } set nchains $chain; puts "$remark $nchains chains extracted from the PDB structure: {$chains} " ### patch each protein subunit separately for {set i 1} {$i <= $nchains} {incr i} { segment $i { pdb ${molname}-${i}.pdb if { [lindex $chains [expr $i-1]] == "P"} { puts "$remark patch protein termini of chain $i" first NTER last CTER } } coordpdb ${molname}-${i}.pdb $i exec rm -v ${molname}-${i}.pdb } guesscoord writepdb ${molname}-vmd-H.pdb writepsf ${molname}-vmd-H.psf mol delete top set molID ${molname}-vmd-H; puts "$remark Updated molecule ID: $molID" } ### :::::::::::::: ### Generate multimers with transformation matrices ### :::::::::::::: if {$multimers} { puts "$remark -----------------------------------------------" puts "$remark Solvate: Generate multimers" # build atomic configurations for other units resetpsf readpsf ${molname}-vmd-H.psf coordpdb ${molname}-vmd-H.pdb # writepdb ${molname}-vmd-H-test.pdb mol load psf ${molname}-vmd-H.psf pdb ${molname}-vmd-H.pdb set sel [atomselect top all] set xyz0 [$sel get {x y z}]; puts "$remark save original coordinates" set segs [$sel get segid]; puts "$remark get original segment sequence" puts "$remark obtain residue sequence for constructing the structure " set reslist {} set resnamelist {} for {set j 1} {$j <= $nchains} {incr j} { set chain [lindex $chains [expr $j-1]] if {$chain == "P"} { set selection "segid $j and name CA" } if {$chain != "P" && $chain != "N"} { set selection "segid $j" } set selseg [atomselect top $selection]; # puts "$remark $selection selected." lappend reslist [$selseg get resid] lappend resnamelist [$selseg get resname] } for {set i 1} {$i < $nmers} {incr i} { if {$i > 1} { $sel set {x y z} $xyz0 } $sel move [lindex $Matnmers [expr $i-1]] set newsegs {} foreach seg $segs { lappend newsegs [expr $seg+$nchains*$i] } $sel set segid $newsegs $sel writepdb ${molname}-vmd-H-${i}.pdb } # build structure and atomic coordinates for other units for {set i 1} {$i < $nmers} {incr i} { resetpsf for {set j 1} {$j <= $nchains} {incr j} { set chain [expr $j-1] # puts "residue sequence for segment $j: [lindex $reslist $seg]" # puts "residue sequence for segment $j: [lindex $resnamelist $seg]" segment [expr $j+$nchains*$i] { # pdb ${molname}-vmd-H-${i}.pdb foreach resid [lindex $reslist $chain] resname [lindex $resnamelist $chain] { residue $resid $resname } if { [lindex $chains $chain] == "P"} { puts "$remark patch protein termini of chain $i" first NTER last CTER } } } writepsf ${molname}-vmd-H-${i}.psf } resetpsf for {set i 0} {$i < $nmers} {incr i} { if {$i == 0} { readpsf ${molname}-vmd-H.psf coordpdb ${molname}-vmd-H.pdb } else { readpsf ${molname}-vmd-H-${i}.psf coordpdb ${molname}-vmd-H-${i}.pdb } } set molname ${molname}-${nmers}mers; puts "$remark change $molname to ${molname}-${nmers}mers" writepdb ${molname}-vmd-H.pdb writepsf ${molname}-vmd-H.psf mol delete top set molID ${molname}-vmd-H; puts "$remark Updated molecule ID: $molID" } ### :::::::::::::: ### toctsolvate.tcl ### :::::::::::::: if {$toct} { puts "$remark -----------------------------------------------" puts "$remark Solvate the structure ${molname}-vmd-H.pdb in a water truncated octahedron" mol new ${molname}-vmd-H.psf mol addfile ${molname}-vmd-H.pdb ### Determine the center of mass of the molecule and store the coordinates set sel [atomselect top "protein backbone"] set cen [measure center $sel]; puts "$remark Protein backbone center = $cen" if {0} { set minmax [measure minmax $sel] foreach {min max} $minmax { break } set box [vecsub $max $min] set lpmin 1000000 set lpmax 0 for {set i 0} {$i < 3 } { incr i } { set lp [lindex $box $i] if {$lpmin > $lp} { set lpmin $lp } if {$lpmax < $lp} { set lpmax $lp } } set dhalf [expr ($lpmax+$cutoff)/2.0+$expand] } else { # determine protein radius set pr 0 foreach pos [$sel get {x y z}] { set dist [veclength [vecsub $pos $cen]] if {$pr < $dist} { set pr $dist } } puts "$remark Protein backbone radius = $pr" set dhalf [expr 2.0/sqrt(3.0)*($pr+$expand+$cutoff/2.0)] # set dhalf [expr $pr+$expand+$cutoff/2.0]; puts "$remark Half of the distance between two images = $dhalf" # set dhalf [expr sqrt(3.0)*($pr+$expand+$cutoff/2.0)]; puts "$remark Half of the distance between two images = $dhalf" } set ldhalf [list $dhalf $dhalf $dhalf] set minmax [list [vecsub $cen $ldhalf] [vecadd $cen $ldhalf]]; puts "$remark Minmax for the cube containing the result truncated octahedron = $minmax" mol delete top package require vexpr package require toctsolvate # set d [format "%d" [expr $dhalf*2] ]; puts "$remark Distance between two images = $d" toctsolvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -o ${molname}-vmd-H-toct -minmax $minmax -b $b # toctsolvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -o ${molname}-vmd-H-toct -minmax [list [list $dmin $dmin $dmin] [list $dmax $dmax $dmax]] -b 1.80 # toctsolvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -o ${molname}-vmd-H-toct -minmax {{$dmin $dmin $dmin} {$dmax $dmax $dmax}} -b 1.80 # toctsolvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -o ${molname}-vmd-H-toct -minmax {{-30 -30 -30} {30 30 30}} -b 1.80 set molID ${molname}-vmd-H-toct; puts "$remark Updated molecule ID: $molID" mol new ${molname}-vmd-H-toct.psf mol addfile ${molname}-vmd-H-toct.pdb ### compute the protein size set protein [atomselect top "protein backbone"] set minmax [measure minmax $protein] foreach {min max} $minmax { break } set box [vecsub $max $min] puts "$remark MAX OF PROTEIN: $max" puts "$remark MIN OF PROTEIN: $min" puts "$remark SIZE OF PROTEIN: $box" puts "$remark CENTER OF MASS OF PROTEIN: [measure center $protein]" ### compute the system size set sel [atomselect top all] set minmax [measure minmax $sel] foreach {min max} $minmax { break } set box [vecsub $max $min] puts "$remark MAX OF Toct: $max" puts "$remark MIN OF Toct: $min" puts "$remark SIZE OF Toct: $box" puts "$remark CENTER OF MASS OF Toct: [measure center $sel]" ### use center for cell origin ### use $max-$min for cell basis vectors mol delete top } ### :::::::::::::: ### wat_box.tcl ### :::::::::::::: if {$wb} { puts "$remark -----------------------------------------------" puts "$remark Solvate the structure ${molname}-vmd-H.pdb in a water box" # determine $wlb: the water layer needed for the water box if {$ws} { mol new ${molname}-vmd-H.psf mol addfile ${molname}-vmd-H.pdb set sel [atomselect top "protein backbone"] set minmax [measure minmax $sel] foreach {min max} $minmax { break } set box [vecsub $max $min] set lpmin 1000000 set lpmax 0 for {set i 0} {$i < 3 } { incr i } { set lp [lindex $box $i] if {$lpmin > $lp} { set lpmin $lp } if {$lpmax < $lp} { set lpmax $lp } } set wlb [expr $lpmax-$lpmin+$cutoff+$expand] mol delete top } else { set wlb [expr $cutoff/2.0+$expand] } puts "$remark calculated water layer for box = $wlb" package require solvate solvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -t $wlb -o ${molname}-vmd-H-wb -b $b set molID ${molname}-vmd-H-wb; puts "$remark Updated molecule ID: $molID" mol new ${molname}-vmd-H-wb.psf mol addfile ${molname}-vmd-H-wb.pdb ### compute the protein size set protein [atomselect top "protein backbone"] set minmax [measure minmax $protein] foreach {min max} $minmax { break } set box [vecsub $max $min] puts "$remark MAX OF PROTEIN: $max" puts "$remark MIN OF PROTEIN: $min" puts "$remark SIZE OF PROTEIN: $box" puts "$remark CENTER OF MASS OF PROTEIN: [measure center $protein]" ### compute the system size set sel [atomselect top all] set minmax [measure minmax $sel] foreach {min max} $minmax { break } set box [vecsub $max $min] puts "$remark MAX OF BOX: $max" puts "$remark MIN OF BOX: $min" puts "$remark SIZE OF BOX: $box" puts "$remark CENTER OF MASS OF BOX: [measure center $sel]" ### use center for cell origin ### use $max-$min for cell basis vectors mol delete top } ### :::::::::::::: ### wat_sphere.tcl ### :::::::::::::: if {$ws} { puts "$remark -----------------------------------------------" puts "$remark Extract ${molname}-vmd-H-wb.pdb to get ${molname}-vmd-H.pdb in a water sphere" ### Solvate the molecule in a water box with enough padding (15 A). ### One could alternatively align the molecule such that the vector ### from the center of mass to the farthest atom is aligned with an axis, ### and then use no padding # package require solvate # solvate ${molname}-vmd-H.psf ${molname}-vmd-H.pdb -t 15 -o ${molname}-vmd-H-wb mol new ${molname}-vmd-H-wb.psf mol addfile ${molname}-vmd-H-wb.pdb readpsf ${molname}-vmd-H-wb.psf coordpdb ${molname}-vmd-H-wb.pdb ### Determine the center of mass of the molecule and store the coordinates set sel [atomselect top "protein backbone"] set cen [measure center $sel] set x1 [lindex $cen 0] set y1 [lindex $cen 1] set z1 [lindex $cen 2] set num [$sel num]; puts "$remark Number of protein backbone atoms: $num" puts "$remark Protein backbone center = $cen" # determine protein radius set pr 0 foreach pos [$sel get {x y z}] { set dist [veclength [vecsub $pos $cen]] if {$pr < $dist} { set pr $dist } } puts "$remark Protein backbone radius = $pr" ### Determine the largest radius allowed to cut a complete sphere out of the box set sel [atomselect top all] set minmax [measure minmax $sel] foreach {min max} $minmax { break } set vecmax [vecsub $max $cen] set vecmin [vecsub $cen $min] set rmax 1000000 for {set i 0} {$i < 3 } { incr i } { if {$rmax > [lindex $vecmax $i]} { set rmax [lindex $vecmax $i] } if {$rmax > [lindex $vecmin $i]} { set rmax [lindex $vecmin $i] } } puts "$remark calculated sphere radius = $rmax" if {$rmax < $pr} { puts "$remark A complete water sphere surrounding the structure can not be generated: Rsphere < Protein Radius" puts "$remark Increase the water box first to make largest shpere allowed contains the protein." exit } ### Determine which water molecules need to be deleted and use a for loop ### to delete them # set wat [atomselect top "same residue as {water and ((x-$x1)*(x-$x1) + (y-$y1)*(y-$y1) + (z-$z1)*(z-$z1))<($max*$max)}"] set del [atomselect top "water and not same residue as {water and ((x-$x1)*(x-$x1) + (y-$y1)*(y-$y1) + (z-$z1)*(z-$z1))<($rmax*$rmax)}"] set seg [$del get segid] set res [$del get resid] set name [$del get name] for {set i 0} {$i < [llength $seg]} {incr i} { delatom [lindex $seg $i] [lindex $res $i] [lindex $name $i] } writepsf ${molname}-vmd-H-ws.psf writepdb ${molname}-vmd-H-ws.pdb mol delete top set molID ${molname}-vmd-H-ws; puts "$remark Updated molecule ID: $molID" mol new ${molname}-vmd-H-ws.psf mol addfile ${molname}-vmd-H-ws.pdb set sel [atomselect top all] set cen [measure center $sel] # determine rmax set minmax [measure minmax $sel] foreach {min max} $minmax { break } set vecmax [vecsub $max $cen] set vecmin [vecsub $cen $min] set rmax 1000000 for {set i 0} {$i < 3 } { incr i } { if {$rmax > [lindex $vecmax $i]} { set rmax [lindex $vecmax $i] } if {$rmax > [lindex $vecmin $i]} { set rmax [lindex $vecmin $i] } } puts "$remark result sphere center = $cen" puts "$remark result sphere radius = $rmax" mol delete top } ### :::::::::::::: ### ionize.tcl ### :::::::::::::: puts "$remark -----------------------------------------------" package require autoionize if {$iontoct} { puts "$remark Ionize ${molname}-vmd-H-toct.pdb" autoionize_core -psf ${molname}-vmd-H-toct.psf -pdb ${molname}-vmd-H-toct.pdb -o ${molname}-vmd-H-toct-${I}M -seg ION -is [expr $I*2.0] -from $distsurf -between $space set molID ${molname}-vmd-H-toct-${I}M; puts "$remark Updated molecule ID: $molID" } if {$ionwb} { puts "$remark Ionize ${molname}-vmd-H-wb.pdb" autoionize_core -psf ${molname}-vmd-H-wb.psf -pdb ${molname}-vmd-H-wb.pdb -o ${molname}-vmd-H-wb-${I}M -seg ION -is [expr $I*2.0] -from $distsurf -between $space set molID ${molname}-vmd-H-wb-${I}M; puts "$remark Updated molecule ID: $molID" } if {$ionws} { puts "$remark Ionize ${molname}-vmd-H-ws.pdb" autoionize_core -psf ${molname}-vmd-H-ws.psf -pdb ${molname}-vmd-H-ws.pdb -o ${molname}-vmd-H-ws-${I}M -seg ION -is [expr $I*2.0] -from $distsurf -between $space set molID ${molname}-vmd-H-ws-${I}M; puts "$remark Updated molecule ID: $molID" } ### :::::::::::::: ### write result system parameters ### :::::::::::::: set writepara 1; puts "$remark write result system parameters for simulation" if {$writepara} { mol new ${molID}.psf mol addfile ${molID}.pdb set sel [atomselect top all] set Nt [$sel num]; puts "$remark Number of all atoms: $Nt" set sel [atomselect top "not water and not ion"] set Nstr [$sel num]; puts "$remark Number of atoms in molecular structre only: $Nstr" mol delete top set parafile "ama_para.dat" set fpara [open $parafile w]; \ puts "AMA) Open file ${parafile} for writing result system." puts $fpara "molID \t\t$molID \t# Molecule ID" puts $fpara "TotNatoms \t$Nt \t# Total number of atoms in the system" puts $fpara "StrNatoms \t$Nstr \t# Number of atoms in molecular structure (i.e., system excluding water and ions)" } exit