--- title: "chinf" author: "Lubomír Prokeš" date: "11/12/2020" output: pdf_document: default word_document: default html_document: default --- ### R ### Organic plant protection products in the river Jagst (Germany) in 2013 ```{r, echo = FALSE} library(webchem) # jagst ``` ```{r} unique(jagst[,2]) ``` SMILES ```{r} library(webchem) smile <- as.character(cir_query('Triclosan', 'smiles')) is.smiles(smile, verbose = TRUE) smile ``` SDF ```{r} library(rJava) library(rcdk) library(webchem) mol <- parse.smiles('c1ccccc1C(Cl)(Br)c1ccccc1')[[1]] write.molecules(mol, filename='c:\\Users\\lubop\\Documents\\R a Python chemie\\R\\mymols2.sdf') mols <- load.molecules("c:\\Users\\lubop\\Documents\\R a Python chemie\\R\\mymols.sdf") ## view molecule depictions view.molecule.2d(mols) # nefunguje ## evaluate fingerprints fps <- get.fingerprints(mols, type="maccs") ## generate descriptors dnames <- get.desc.names("topological") descs <- eval.desc(mols, dnames) ``` ```{r} library(rJava) library(rcdk) m <- parse.smiles(smile)[[1]] formula <- get.mol2formula(m,charge=0) formula@mass formula@charge formula@isotopes formula@string get.exact.mass(m) get.natural.mass(m) is.neutral(m) get.atom.count(m) get.bonds(m) mol <- parse.smiles('CC(=O)CC(=O)NCN')[[1]] convert.implicit.to.explicit(mol) get.tpsa(mol) get.xlogp(mol) get.alogp(mol) mol <- parse.smiles('c1ccccc1C(Cl)(Br)c1ccccc1')[[1]] atoms <- get.atoms(mol) bonds <- get.bonds(mol) cat('No. of atoms =', length(atoms), '\n') cat('No. of bonds =', length(bonds), '\n') ``` [Chemical Identifier Resolver(CIR)](https://cactus.nci.nih.gov/) ```{r} library(webchem) cir_query("piperonyl butoxide", representation = "smiles", resolver = NULL, first = FALSE, verbose = TRUE) # viz Reference Manual cir_query('DEET', 'cas', first = TRUE) cir_query('Triclosan', 'cas') cir_query('Triclosan', 'smiles') cir_query('Triclosan', 'smiles') cir_query('Aspirin', 'smiles') # CAS cir_query('Imidacloprid', representation = 'cas') # SMILES cir_query('Imidacloprid', representation = 'smiles') # InChIKey cir_query('Imidacloprid', representation = 'stdinchikey') # Molecular weight cir_query('Imidacloprid', representation = 'mw') # number of rings cir_query('Imidacloprid', representation = 'ring_count') cir_query("51-03-6", representation = "smiles", resolver = NULL, first = FALSE, verbose = TRUE) cir_query("3380-34-5", 'cas', first = TRUE) cir_query("3380-34-5", 'cas', resolver = 'cas_number') cir_query("3380-34-5", 'smiles') cir_query("3380-34-5", 'formula') cir_query("3380-34-5", 'names') cir_query("3380-34-5", 'pubchem_sid') cir_query("3380-34-5", 'chemnavigator_sid') name = 'Triclosan' cir_query(name, 'mw') cir_query(name, 'formula') cir_query(name, 'monoisotopic_mass') cir_query(name, 'heteroatom_count') cir_query(name, 'hydrogen_atom_count') comp <- c('Triclosan', 'Aspirin') cir_query(comp, 'cas') cir_query(comp, 'cas', first = TRUE) cir_query(comp, 'smiles') cir_query(comp, 'mw') ``` ```{r, echo = FALSE} library(rJava) library(rcdk) library(webchem) todepict <- function(mol,pathsd){ result = tryCatch({ factory <- .jnew("org.openscience.cdk.depict.DepictionGenerator")$withAtomColors() factory$withSize(1000,1000)#$getStyle("cow") temp1 <- paste0(pathsd) result<-factory$depict(mol)$writeTo(temp1) }, warning = function(w) { result=NULL }, error = function(e) { result=NULL }) # return(result) } ``` ```{r, echo = FALSE} name = "permethrin" smile <- as.character(cir_query(name, 'smiles')) molv<-parse.smiles(smile, kekulise=TRUE)[[1]] todepict(molv,paste("c:\\Users\\lubop\\OneDrive\\Dokumenty\\",name,".png",sep="")) library(imager) im <- load.image(paste("c:\\Users\\lubop\\OneDrive\\Dokumenty\\",name,".png",sep="")) ``` Název sloučeniny: `r name ` ```{r, echo = FALSE} plot(im, axes = FALSE) ``` Název sloučeniny: `r name ` ![]("c:\\Users\\lubop\\OneDrive\\Dokumenty\\permethrin.png") ```{r} library(rcellminer) tmp <- parse.smiles("C1=CN(C(=O)N=C1N)C2C(C(C(O2)CO)O)(F)F") #rcdkplot(tmp[[1]], width=300, height=300, main="Gemcitabine") ``` ```{r} library(rJava) library(rcdk) library(webchem) name = "permethrin" rcdkplot2 = function(molecule){ par(mar=c(0,0,0,0)) # set margins to zero since this isn't a real plot temp1 = view.image.2d(molecule, depictor = NULL) # get Java representation into an image matrix. set number of pixels you want horizontal and vertical plot(NA,NA,xlim=c(1,14),ylim=c(1,10),xaxt='n',yaxt='n',xlab='',ylab='') # create an empty plot rasterImage(temp1,2,1,12,10) # boundaries of raster: xmin, ymin, xmax, ymax. here i set them equal to plot boundaries } smile <- as.character(cir_query(name, 'smiles')) # is.smiles(smile, verbose = TRUE) # smile m <- parse.smiles(smile)[[1]] rcdkplot2(m) library(rJava) library(rcdk) library(webchem) mol <- parse.smiles(as.character(cir_query(name, 'smiles')))[[1]] write.molecules(mol, filename = paste("c:\\Users\\lubop\\OneDrive\\Dokumenty\\",name,".sdf",sep="")) mols <- load.molecules( paste("c:\\Users\\lubop\\OneDrive\\Dokumenty\\",name,".sdf",sep="")) ``` [Alan Woods Compendium of Pesticide Common Names](http://www.alanwood.net/pesticides) ```{r} library(webchem) # name aw_query(name, from = 'name', verbose = TRUE) #aw_query("fluazinam", from = 'name', verbose = TRUE)$fluazinam$formula[1] # cas aw_query("51-03-6", from = 'cas', verbose = TRUE) ``` ### PYTHON ### ```{r setup, include=FALSE} library(knitr) library(reticulate) knitr::knit_engines$set(python = reticulate::eng_python) Sys.setenv(RETICULATE_PYTHON = "c:/Users/lubop/AppData/Local/r-miniconda") ``` ```{python} import pubchempy as pcp c = pcp.Compound.from_cid(5090) print(c.molecular_formula) print(c.molecular_weight) print(c.isomeric_smiles) print(c.xlogp) print(c.iupac_name) print(c.synonyms) ``` ```{python} import pubchempy as pcp from pubchempy import Compound, get_compounds c = Compound.from_cid(1423) cs = get_compounds('Aspirin', 'name') results = pcp.get_compounds('Glucose', 'name') print(results) ``` ```{python} import cirpy cirpy.resolve('Aspirin', 'smiles') cirpy.resolve('Aspirin', 'formula') cirpy.resolve('Aspirin', 'iupac_name') cirpy.resolve('Aspirin', 'cas') cirpy.resolve('Aspirin', 'inchi') cirpy.resolve('Aspirin', 'stdinchi') cirpy.resolve('Aspirin', 'stdinchikey') cirpy.resolve('coumarin 343', 'mw') cirpy.resolve('coumarin 343', 'ring_count') cirpy.resolve('coumarin 343', 'ringsys_count') cirpy.resolve('coumarin 343', 'h_bond_donor_count') cirpy.resolve('coumarin 343', 'h_bond_acceptor_count') cirpy.resolve('coumarin 343', 'h_bond_center_count') cirpy.resolve('coumarin 343', 'rule_of_5_violation_count') cirpy.resolve('coumarin 343', 'rotor_count') cirpy.resolve('coumarin 343', 'effective_rotor_count') ``` ```{python} import openbabel mol = openbabel.OBMol() print('Should print 0 (atoms)') print(mol.NumAtoms()) a = mol.NewAtom() a.SetAtomicNum(6) # carbon atom a.SetVector(0.0, 1.0, 2.0) # coordinates b = mol.NewAtom() mol.AddBond(1, 2, 1) # atoms indexed from 1 print('Should print 2 (atoms)') print(mol.NumAtoms()) print('Should print 1 (bond)') print(mol.NumBonds()) mol.Clear() ##### obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") mol = openbabel.OBMol() obConversion.ReadString(mol, "C1=CC=CS1") print('Should print 5 (atoms)') print(mol.NumAtoms()) mol.AddHydrogens() print('Should print 9 (atoms) after adding hydrogens') print(mol.NumAtoms()) outMDL = obConversion.WriteString(mol) ##### obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("pdb", "mol2") mol = openbabel.OBMol() # obConversion.ReadFile(mol, "1ABC.pdb.gz") # Open Babel will uncompress automatically mol.AddHydrogens() print(mol.NumAtoms()) print(mol.NumBonds()) print(mol.NumResidues()) obConversion.WriteFile(mol, '1abc.mol2') ###### for obatom in openbabel.OBMolAtomIter(mol): print(obatom.GetAtomicMass()) ``` ```{python} from matplotlib import style style.use('ggplot') from rdkit import Chem from rdkit.Chem import Draw size = (120, 120) # Smaller figures than the default. m = Chem.MolFromSmiles('[Na+].[Cl-]') fig = Draw.MolToMPL(m, size=size) m = Chem.MolFromSmiles('c1ccccc1') fig = Draw.MolToMPL(m, size=size) m = Chem.MolFromSmiles('C1=C2C(=CC(=C1Cl)Cl)OC3=CC(=C(C=C3O2)Cl)Cl') fig = Draw.MolToMPL(m, size=size) m = Chem.MolFromSmiles('Cn1cnc2c1c(=O)n(c(=O)n2C)C') fig = Draw.MolToMPL(m, size=size) m = Chem.MolFromSmiles('CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)Cc3ccccc3)C(=O)O)C') fig = Draw.MolToMPL(m, size=size) # fig = Draw.MolToMPL(m, size=(200, 200)) ``` ```{python} from rdkit import Chem from rdkit.Chem import Draw m = Chem.MolFromSmiles('[Na+].[Cl-]') Draw.MolToFile(m,'molx.png') m0 = Chem.MolFromSmiles('c1ccccc1') Draw.MolToFile(m0,'molx0.png') m1 = Chem.MolFromSmiles('C1=C2C(=CC(=C1Cl)Cl)OC3=CC(=C(C=C3O2)Cl)Cl') Draw.MolToFile(m1,'molx1.png') m2 = Chem.MolFromSmiles('Cn1cnc2c1c(=O)n(c(=O)n2C)C') Draw.MolToFile(m2,'molx2.png') m3 = Chem.MolFromSmiles('CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)Cc3ccccc3)C(=O)O)C') Draw.MolToFile(m3,'molx3.png') ``` ```{r, echo=FALSE} knitr::include_graphics("molx.png") knitr::include_graphics("molx0.png") knitr::include_graphics("molx1.png") knitr::include_graphics("molx2.png") knitr::include_graphics("molx3.png") ```