<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Biorelated &#187; tutorials</title>
	<atom:link href="http://biorelated.com/category/tutorials/feed/" rel="self" type="application/rss+xml" />
	<link>http://biorelated.com</link>
	<description></description>
	<lastBuildDate>Sat, 28 Jan 2012 06:29:42 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='biorelated.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://0.gravatar.com/blavatar/41054b22bbe7debbf1d63972772e21fa?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>Biorelated &#187; tutorials</title>
		<link>http://biorelated.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://biorelated.com/osd.xml" title="Biorelated" />
	<atom:link rel='hub' href='http://biorelated.com/?pushpress=hub'/>
		<item>
		<title>Keep track of Bioruby plugins</title>
		<link>http://biorelated.com/2011/08/26/keep-track-of-bioruby-plugins/</link>
		<comments>http://biorelated.com/2011/08/26/keep-track-of-bioruby-plugins/#comments</comments>
		<pubDate>Fri, 26 Aug 2011 19:39:27 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=413</guid>
		<description><![CDATA[Biogems.info is a new site for keeping track of new and existing Bioruby plugins. Plugins are separate code libraries that split functionality out of the Bioruby main tree. The idea is to have a core Bioruby release and to allow Ruby developers to contribute to Bioruby through plugins. According to Bonnal, the maintainer of biogem (the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=413&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a title="Biogems" href="http://www.biogems.info/">Biogems.info </a>is a new site for keeping track of new and existing Bioruby plugins. Plugins are separate code libraries that split functionality out of the Bioruby main tree. The idea is to have a core Bioruby release and to allow Ruby developers to contribute to Bioruby through plugins. According to Bonnal, the maintainer of biogem (the bio-plugin crafting tool),  plugins are separately maintained and may represent experimental or work in progress.</p>
<p>To read more about Bioruby plugin system please refer to <a title="bioruby plugins" href="http://bioruby.open-bio.org/wiki/Plugins" target="_blank">the wiki page on plugins.</a></p>
<p>Happy biology!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/413/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/413/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/413/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=413&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/08/26/keep-track-of-bioruby-plugins/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>Bioruby 1.4.2 released!</title>
		<link>http://biorelated.com/2011/08/26/bioruby-1-4-2-released/</link>
		<comments>http://biorelated.com/2011/08/26/bioruby-1-4-2-released/#comments</comments>
		<pubDate>Fri, 26 Aug 2011 19:27:36 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=409</guid>
		<description><![CDATA[The Bioruby development team has continued to work tirelessly to bring us the latest release of the Ruby bioinformatics library commonly referred to as bioruby. A list of all the new changes is available  here . One of the most pleasant news for beginners is that the Bioruby tutorial has been updated thanks to Michael [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=409&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The Bioruby development team has continued to work tirelessly to bring us the latest release of the Ruby bioinformatics library commonly referred to as bioruby. A list of all the new changes is available  <a title="Changelog" href="https://github.com/bioruby/bioruby/blob/1.4.2/ChangeLog">here</a> . One of the most pleasant news for beginners is that the Bioruby tutorial has been updated thanks to Michael O&#8217;Keefe and Pjotr Prins. The Release is largely a bug fix release with updates on web services from SOAP to REST interfaces. Upgrading to the latest release is easy&#8230;<br />
<code>gem update bio</code><br />
or<br />
<code>gem install bio</code></p>
<p>Happy biology!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/409/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/409/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/409/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=409&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/08/26/bioruby-1-4-2-released/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>Processing netMHCII-pan prediction output</title>
		<link>http://biorelated.com/2011/08/25/processing-netmhcii-pan-prediction-output/</link>
		<comments>http://biorelated.com/2011/08/25/processing-netmhcii-pan-prediction-output/#comments</comments>
		<pubDate>Thu, 25 Aug 2011 07:34:28 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=391</guid>
		<description><![CDATA[Like most informatics throughput methods, epitope prediction generates a lot of output and in a not so friendly format suitable for subsequent analysis. I considered writing a parser for the output using Ruby, but would that not take long? A simple vim function that I added to my .vimrc file to format the output and [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=391&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Like most informatics throughput methods, epitope prediction generates a lot of output and in a not so friendly format suitable for subsequent analysis. I considered writing a parser for the output using Ruby, but would that not take long? A simple vim function that I added to my .vimrc file to format the output and use a single keystroke worked the magic and saved time. </p>
<p><pre class="brush: plain;">
&quot; formating output from netMHCII-pan program
function! FormatNetmhcOutput()
   g/^\#/norm dd 
   g/^--/norm dd
   g/^Protein/norm dd
   %le
   g/^pos/norm dd
   %s/&lt;=\sWB//g
   %s/&lt;=\sSB//g
   %s/\s\+$//
   %s/\s\+/,/g
   g/^$/d
endfunction
nmap   ;h  :call FormatNetmhcOutput()
</pre><br />
This function can be called by pressing the ; and h key when in normal mode. It removes comments and provides a csv output that can be read with a simple R directive. </p>
<p><pre class="brush: r;">data &lt;– read.csv(&quot;file.csv&quot;) </pre></p>
<p>sample output<br />
<a href="http://biorelated.files.wordpress.com/2011/08/screen-shot-2011-08-25-at-09-15-56.png"><img src="http://biorelated.files.wordpress.com/2011/08/screen-shot-2011-08-25-at-09-15-56.png?w=300&#038;h=69" alt="" title="Screen shot 2011-08-25 at 09.15.56" width="300" height="69" class="aligncenter size-medium wp-image-398" /></a></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/391/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/391/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/391/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=391&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/08/25/processing-netmhcii-pan-prediction-output/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>

		<media:content url="http://biorelated.files.wordpress.com/2011/08/screen-shot-2011-08-25-at-09-15-56.png?w=300" medium="image">
			<media:title type="html">Screen shot 2011-08-25 at 09.15.56</media:title>
		</media:content>
	</item>
		<item>
		<title>Convert a fastA file to a hash</title>
		<link>http://biorelated.com/2011/06/25/convert-a-fasta-file-to-a-hash/</link>
		<comments>http://biorelated.com/2011/06/25/convert-a-fasta-file-to-a-hash/#comments</comments>
		<pubDate>Sat, 25 Jun 2011 18:41:51 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=358</guid>
		<description><![CDATA[Sometimes you might want to convert a file of fastA sequences to a hash. Here is a one line method that might come handy for that. The result is an array of hashes. Each hash key corresponds to the sequence name<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=358&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Sometimes you might want to convert a file of fastA sequences to a hash. Here is a one line method that might come handy for that.<br />
<pre class="brush: ruby;">
require 'bio'
file_path = &quot;example.fasta&quot;

def fasta_to_hash
  Bio::FlatFile.auto(file_path){ |f| f.map {|entry| Hash.[](entry.definition.to_sym,[entry.seq.to_s])}}
end

 #=&gt;[{:&quot;seq1&quot;=&gt;[&quot;gatataggagatatcgttagag&quot;]}]
</pre></p>
<p>The result is an array of hashes. Each hash key corresponds to the sequence name</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/358/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/358/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/358/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=358&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/06/25/convert-a-fasta-file-to-a-hash/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>Translating a nucleotide sequence in six frames with bioruby</title>
		<link>http://biorelated.com/2011/02/02/translating-a-nucleotide-sequence-in-six-frames-with-bioruby/</link>
		<comments>http://biorelated.com/2011/02/02/translating-a-nucleotide-sequence-in-six-frames-with-bioruby/#comments</comments>
		<pubDate>Wed, 02 Feb 2011 12:18:53 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[bioinformatics]]></category>
		<category><![CDATA[bioruby]]></category>
		<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=297</guid>
		<description><![CDATA[Bioruby offers a very easy and simple way to translate nucleotide sequences. seq= Bio::Sequence::NA.new("acctatagctctagcta") seq.translate We know that there are six posible reading frames for any given nucleotide sequence. Generally the longests Open reading frame is taken to be the correct frame, when we do not have information about the possible protein that is encoded [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=297&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Bioruby offers a very easy and simple way to translate nucleotide sequences.</p>
<pre>seq= Bio::Sequence::NA.new("acctatagctctagcta")</pre>
<pre>seq.translate</pre>
<p>We know that there are six posible reading frames for any given nucleotide sequence. Generally the longests Open reading frame is taken to be the correct frame, when we do not have information about the possible protein that is encoded by a given gene. By default the translate method performs translation in the first frame but it can take an argument that defines the translation frame</p>
<pre>seq.translate(2) #translate using the second reading frame.</pre>
<p>Given a long list of sequences how do we quickly determine the correct reading frame. We would want to have method to translate a given  sequence in all frames and pick the longest reading frame. Assuming that the correct reading frame has no stop codons, we can write a quick method to perform  the six frame translation.</p>
<pre> def longest_reading_frame(sequence)
  orfs = [] #a container for orfs(open reading frames)
  #translate a sequence in all 6 frames
   6.times do |frame|
   translated = Bio::Sequence::NA.new(sequence).translate(frame + 1)
   stop_codons = translated.scan(/\*/).size
    orfs &lt;&lt; translated if stop_codons == 0
   end
  orfs[0]
end</pre>
<p>This method uses an array to collect all translated sequences that contain no stop codons and returns the first sequence in the array. This might not scale very well for very long sequences but that will be a post for another day!</p>
<p>Happy Biology!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/297/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/297/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/297/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=297&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/02/02/translating-a-nucleotide-sequence-in-six-frames-with-bioruby/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>Converting sequence data from csv to fasta format</title>
		<link>http://biorelated.com/2011/01/26/converting-sequence-data-from-csv-to-fasta-format/</link>
		<comments>http://biorelated.com/2011/01/26/converting-sequence-data-from-csv-to-fasta-format/#comments</comments>
		<pubDate>Wed, 26 Jan 2011 08:24:07 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[bioruby]]></category>
		<category><![CDATA[Tools]]></category>
		<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=280</guid>
		<description><![CDATA[Many  times I find someone storing sequence data in excel Workbooks.(insert scream here) This is usually followed by a request which goes like this, Someone: &#8221; I will send you some sequences and then we can perform xyz analysis please?&#8221; Me: &#8220;Are they in fasta format?&#8221; Someone: &#8220;No, they are in Excel &#8220; Me: (supressing [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=280&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Many  times I find someone storing sequence data in excel Workbooks.(insert scream here) This is usually followed by a request which goes like this,</p>
<p><strong>Someone: </strong>&#8221; I will send you some sequences and then we can perform xyz analysis please?&#8221;</p>
<p><strong>Me: </strong>&#8220;Are they in fasta format?&#8221;</p>
<p><strong>Someone: </strong>&#8220;No, they are in Excel &#8220;</p>
<p><strong>Me: </strong>(<em>supressing a laugh</em>) &#8220;Ok, do you mind to convert them to Fasta and then we can do xyz?&#8221;</p>
<p><strong>Someone:</strong>(<em>with a wiggle on the face</em>)  &#8220;How do I do that?, Is there a windows  program to do that?&#8221;</p>
<p><strong>Me:</strong> (<em>feeling superman-nish</em>) &#8220;eeh we can create a quick script in perl or Ruby, I prefer Ruby &#8230; but you should lean some basic perl or Ruby&#8230;. and run away from windows. :)&#8221;</p>
<p><strong>Me:</strong> &#8220;Save your data as CSV(File -&gt;Save As-&gt; csv),  then send me that file&#8221;</p>
<p>So here is a very simple script that reads a csv file and creates a fasta file using Ruby.</p>
<p>You need to specify the path to the input csv file and the output fasta file, the column number that contains the name of the sequence and the column number that contains the sequence data in the csv file.</p>
<pre>require 'csv'
# read a csv file and create a fasta file
def csv_to_fasta(csv_file,output_file,name_col,seq_col)
  File.open(output_file,'w') do |file|
  count = 0
  CSV.foreach(csv_file) do |row|
   sequence_id = row[name_col]
   seq = row[seq_col]

  count = count+1
  puts sequence_id
  file.puts "&gt;#{sequence_id} \n#{seq}"
 end
 puts "#{count} sequences processed"
end</pre>
<pre>csv_file    = "#{ENV['HOME']}/path_to_csv_file.csv"
fasta_file  = "#{ENV['HOME']}/path_to_fasta_file.fasta"

seq_name_col = 0 #assumes the first column contains the names
seq_data_col = 1 #second column contains the seq data

csv_to_fasta(csv_file,fasta_file,seq_name_col,seq_data_col)</pre>
<p>Happy biology!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/280/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/280/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/280/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=280&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/01/26/converting-sequence-data-from-csv-to-fasta-format/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>My first Bioruby plugin calculates the isoelectric point of a protein</title>
		<link>http://biorelated.com/2011/01/06/my-first-bioruby-plugin-calculates-the-isoelectric-point-of-a-protein/</link>
		<comments>http://biorelated.com/2011/01/06/my-first-bioruby-plugin-calculates-the-isoelectric-point-of-a-protein/#comments</comments>
		<pubDate>Thu, 06 Jan 2011 18:15:07 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[bioinformatics]]></category>
		<category><![CDATA[bioruby]]></category>
		<category><![CDATA[technology]]></category>
		<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.com/?p=255</guid>
		<description><![CDATA[Late last year,  there was a lot of talk about creating a plugin system for Bioruby. The idea is that more people can start to develop bioinformatics libraries using the Ruby language and the libraries can leverage on the bioruby framework. Bioruby maintainers can then concentrate on yet to be defined &#8220;core&#8221; parts of the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=255&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Late last year,  there was a lot of talk about creating a<a title="bioruby plugins" href="http://bioruby.open-bio.org/wiki/Plugins" target="_blank"> plugin system for Bioruby</a>. The idea is that more people can start to develop bioinformatics libraries using the Ruby language and the libraries can leverage on the bioruby framework. Bioruby maintainers can then concentrate on yet to be defined &#8220;core&#8221; parts of the library to ensure compatibility and support for the plugins.Together with Pascal Bentz we have created a library to calculate the Isoelectric point of a protein given a Pka set and an  amino acid sequence of a peptide/protein. The project lay domant for a while at github until now! I am happy to release my first bioruby plugin, bio-isoelectric point! <a title="Bio-isoelectric_point" href="https://rubygems.org/gems/bio-isoelectric_point" target="_blank">Download it at rubygems.org</a> <a title="bioruby-isoelectric-point" href="https://github.com/georgeG/bioruby-isoelectric_point" target="_blank">Fork it and check the usage at github </a></p>
<p>Examples<br />
<code><br />
require 'bio'<br />
require 'bio-isoelectric_point'<br />
protein_seq = Bio::Sequence::AA.new("KKGFTCGELA")</code></p>
<p>#what is the protein charge at ph 14?<br />
charge = protein_seq.charge_at(14) #=&gt;-2.999795857467562</p>
<p>#calculate the ph using dtaselect pka set and round off to 3 decimal places<br />
isoelectric_point = protein_seq.isoelectric_point(&#8216;dtaselect&#8217;, 3) #=&gt;8.219</p>
<p># calculate the isoelectric point pH with a custom set<br />
custom_pka_set = { &#8220;N_TERMINUS&#8221; =&gt; 8.1,<br />
&#8220;K&#8221; =&gt; 10.1,<br />
&#8220;R&#8221; =&gt; 12.1,<br />
&#8220;H&#8221; =&gt; 6.4,<br />
&#8220;C_TERMINUS&#8221; =&gt; 3.15,<br />
&#8220;D&#8221; =&gt; 4.34,<br />
&#8220;E&#8221; =&gt; 4.33,<br />
&#8220;C&#8221; =&gt; 8.33,<br />
&#8220;Y&#8221; =&gt; 9.5<br />
}<br />
iep_ph = protein_seq.isoelectric_point(custom_pka_set, 3) #=&gt; 8.193</p>
<p>This gem supports the following Pka sets, as well as allowing a user to provide a custom Pka set.</p>
<pre>    * dta_select
    * emboss
    * rodwell
    * wikipedia
    * sillero</pre>
<p>Happy biology!</p>
<p><a title="Bio-isoelectric_point" href="https://rubygems.org/gems/bio-isoelectric_point" target="_blank"><br />
</a></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/255/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/255/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/255/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=255&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2011/01/06/my-first-bioruby-plugin-calculates-the-isoelectric-point-of-a-protein/feed/</wfw:commentRss>
		<slash:comments>9</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>What&#8217;s new in bioruby</title>
		<link>http://biorelated.com/2009/12/17/whats-new-in-bioruby/</link>
		<comments>http://biorelated.com/2009/12/17/whats-new-in-bioruby/#comments</comments>
		<pubDate>Thu, 17 Dec 2009 08:17:52 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.wordpress.com/?p=234</guid>
		<description><![CDATA[&#160;   I scouted the bioruby git repository the other day to see what might be new in the current snapshot.   These are some of the notable changes: Bug fixes; Lots of bug fixes. For example the Bio::Fasta.remote bug has been fixed, workaround for Zlib error, fixed method names Increased ruby 1.9 support Renaming [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=234&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>&nbsp;</p>
<div style="background-image:initial;background-attachment:initial;background-color:#ffffff;font:normal normal normal 13px/19px Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif;background-position:initial initial;background-repeat:initial initial;margin:0;padding:.6em;">
<p><a href="http://biorelated.files.wordpress.com/2009/12/bioruby-link.gif"><img class="alignleft size-full wp-image-246" title="bioruby-link" src="http://biorelated.files.wordpress.com/2009/12/bioruby-link.gif?w=640" alt=""   /></a>  I scouted the bioruby git repository the other day to see what might be new in the current snapshot.   These are some of the notable changes:</p>
<p><strong>Bug fixes;</strong></p>
<p>Lots of bug fixes. For example the Bio::Fasta.remote bug has been fixed, workaround for Zlib error, fixed method names</p>
<p><strong>Increased ruby 1.9 support</strong></p>
<p><strong>Renaming of files and modules</strong></p>
<p>Some files and modules have been renamed for example Bio::Fastq:QualityScore has been renamed to Bio::Sequence::QualityScore</p>
<p><strong>Better documentation</strong></p>
<p>There is a samples folder that include sample usage of some classes and methods</p>
<p><strong>PhyloXML support</strong></p>
<p>Support for the phyloxml parser and writer has been included. A new version(1.10) of the PhyloXML schema has been added.</p>
<p>This contribution was provided by the awesome <a href="http://latvianlinuxgirl.blogspot.com/2009/05/hello-world.html" target="_blank">Latvian girl</a> through a Google Summer of Code project and <span style="font-family:Georgia, serif;line-height:20px;color:#333333;">working for NESCent organization.</span></p>
<p><strong>Meme and Mast support</strong></p>
<p>Contributed by Adam Kraut. Minimal and basic support for the motif finding application Meme and Mast has been added.</p>
<p><strong>Efficiecy</strong></p>
<p>Speed up of Bio::Tree.children</p>
<div id="_mcePaste">&#8220;For speed up of Bio::Tree#children and parent, internal cache of</div>
<div id="_mcePaste">the parent for each node is added. The cache is automatically</div>
<div id="_mcePaste">cleared when the tree is modified. Note that the cache can only</div>
<div id="_mcePaste">be accessed from inside Bio::Tree.</div>
<pre>* Bio::Tree#parent is changed to directly raise IndexError when</pre>
<div id="_mcePaste">both of the root specified in the argument and preset in the</div>
<div id="_mcePaste">tree are nil (previously, the same error is raised in the path</div>
<div id="_mcePaste">method which is internally called from the parent method).</div>
<pre>* Bio::Tree#path is changed not to call bfs_shortest_path if the</pre>
<div id="_mcePaste">node1 and node2 are adjacent.&#8221;</div>
</div>
<div>To build a gem based on the current snapshot,  make sure the following lines have been included in the bioruby.gemspecs file. The current(today&#8217;s) snapshot may have already fixed this by now. :)</div>
<pre>    "lib/bio/db/fasta/fasta_to_biosequence.rb",
    "lib/bio/db/fastq/fastq_to_biosequence.rb",
    "lib/bio/db/fastq/format_fastq.rb",
    "lib/bio/db/fastq.rb",
    "lib/bio/db/sanger_chromatogram/abif.rb",
    "lib/bio/db/sanger_chromatogram/chromatogram.rb",
    "lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb",
    "lib/bio/db/sanger_chromatogram/scf.rb",
    "lib/bio/db/phyloxml/phyloxml.xsd",
    "lib/bio/db/phyloxml/phyloxml_elements.rb",
    "lib/bio/db/phyloxml/phyloxml_parser.rb",
    "lib/bio/db/phyloxml/phyloxml_writer.rb",
    "lib/bio/sequence/quality_score.rb"</pre>
<div>Note that this is the breeding edge version and things are bound to break.</div>
<div>Please forward <a title="Mailing list" href="bioruby@lists.open-bio.org." target="_blank">feedback to the developers.</a></div>
<pre style="font:normal normal normal 12px/18px Consolas, Monaco, 'Courier New', Courier, monospace;"><span style="font-family:arial, sans-serif;line-height:normal;border-collapse:collapse;color:#222222;">Thank you for the awesome work! </span></pre>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/234/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=234&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2009/12/17/whats-new-in-bioruby/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>

		<media:content url="http://biorelated.files.wordpress.com/2009/12/bioruby-link.gif" medium="image">
			<media:title type="html">bioruby-link</media:title>
		</media:content>
	</item>
		<item>
		<title>Standalone BLAST with Ruby revisited</title>
		<link>http://biorelated.com/2009/12/15/standalone-blast-with-ruby-revisited/</link>
		<comments>http://biorelated.com/2009/12/15/standalone-blast-with-ruby-revisited/#comments</comments>
		<pubDate>Tue, 15 Dec 2009 09:32:19 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[bioinformatics]]></category>
		<category><![CDATA[blast]]></category>
		<category><![CDATA[databases]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.wordpress.com/?p=211</guid>
		<description><![CDATA[Earlier  I showed a very simple way to perform a BLAST  using Ruby. Today I would like to revisit that topic for two reasons. The &#8220;using ruby with blast&#8221; search term seems to be very common and actually one of the ways that people reach my blog. The original post was not very through. BLAST [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=211&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Earlier  I showed a very simple way to<a href="http://biorelated.wordpress.com/2007/10/03/standalone-blast-with-ruby-part-1/" target="_blank"> perform a BLAST  using Ruby</a>. Today I would like to revisit that topic for two reasons.</p>
<ol>
<li>The &#8220;using ruby with blast&#8221; search term seems to be very common and actually one of the ways that people reach my blog.</li>
<li>The original post was not very through.</li>
</ol>
<p>BLAST aka Basic Local Alignment Tool is used to search a sequence (either DNA or protein) against a database of other sequences (either all nucleotide or all protein) in order to identify similar sequences. BLAST has many different flavors and can  search DNA against DNA or protein against protein and also can translate a nucleotide query and search it against a protein database  and vice versa. It can also compute a “profile” for the query sequence and use that for further searches as well as search the query against a database of profiles.</p>
<p>The BLAST tool is fundamental to molecular biologists and bioinformaticians. There are excellent books and tutorials on how to and when to use BLAST, so i will assume all you need is to automated your work and parse the results. The actual algorithm is implemented in C and freely  available from the NCBI website.The first thing  to do is to download the appropriate binaries for your platform. <a title="installing blast" href="http://bioinfolab.unl.edu/emlab/documents/blast_readme/README.bls.html" target="_blank">Instructions for setting up and installing BLAST</a></p>
<p>Once installed on your system  the primary method of interaction is using the command line. Use formatdb to create blast databases and blastall to search for sequence homology for a given sequence against a given blast database.</p>
<p>In Ruby, there are two ways you can call the BLAST program. First using the <a href="http://bioruby.org/" target="_blank">Bioruby library</a> and second by writing your own ruby wrapper for the BLAST command line parameters and execution. Most often, one executes BLAST from the command line and then process the results file which is in either one of the many BLAST output formats. Bioruby is excellent  at parsing the results file. Using Bioruby with BLAST is  very straightforward:</p>
<p>#blasting the bioruby way   #query_file: a list of query sequences in fasta format   #database_path: a path to the actual BLAST formatted database   #program: The BLAST program to call, either of blastp,blastn,tblastn e.t.c.<br />
def bio_blast(program, database_path,query_file)</p>
<p><code><br />
factory = Bio::Blast.local(program,database_path)<br />
ff = Bio::FlatFile.open(Bio::FastaFormat, query_file)<br />
ff.each do |entry|<br />
report = factory.query(entry) # report will be a Blast::Report object<br />
# iterate trough the hits<br />
report.each do|hit|<br />
puts hit.bit_score        # bit score (*)<br />
puts hit.query_seq        # query sequence<br />
puts hit.midline          # middle line string of alignment of homologous region (*)<br />
puts hit.target_seq       # hit sequence<br />
puts hit.evalue           # E-value<br />
puts hit.identity         # % identity<br />
puts hit.overlap          # length of overlapping region<br />
puts hit.query_id         # identifier of query sequence<br />
puts hit.query_def        # definition(comment line) of query sequence<br />
puts hit.query_len        # length of query sequence<br />
puts hit.target_id        # identifier of hit sequence<br />
puts hit.target_def       # definition(comment line) of hit sequence<br />
puts hit.target_len       # length of hit sequence<br />
puts hit.query_start      # start position of homologous region in query sequence<br />
puts hit.query_end        # end position of homologous region in query sequence<br />
puts hit.target_start     # start position of homologous region in hit(target) sequence<br />
puts hit.target_end       # end position of homologous region in hit(target) sequence<br />
puts hit.lap_at           # array of above four numbers<br />
hit.each do |hsp|<br />
   puts hsp.query_from<br />
   end<br />
  end<br />
 end<br />
end<br />
</code></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/211/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/211/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/211/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=211&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2009/12/15/standalone-blast-with-ruby-revisited/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>
	</item>
		<item>
		<title>A ruby class for screen-scraping plasmodb database</title>
		<link>http://biorelated.com/2009/12/09/a-ruby-class-for-screen-scraping-plasmodb-database/</link>
		<comments>http://biorelated.com/2009/12/09/a-ruby-class-for-screen-scraping-plasmodb-database/#comments</comments>
		<pubDate>Wed, 09 Dec 2009 15:25:50 +0000</pubDate>
		<dc:creator>George</dc:creator>
				<category><![CDATA[bioinformatics]]></category>
		<category><![CDATA[bioruby]]></category>
		<category><![CDATA[databases]]></category>
		<category><![CDATA[malaria]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[tutorials]]></category>

		<guid isPermaLink="false">http://biorelated.wordpress.com/?p=182</guid>
		<description><![CDATA[Plasmodb is the primary resource for retrieving Plasmodium falciparum genomic data and information. Unfortunately this database has no API or XML service to request or query its  information from a programmer&#8217;s point of view or for easy automation of sequence information retrieval.  Recently I needed to download a long list of Plasmodium falciparum genomic, Protein [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=182&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a title="Plasmodb" href="http://www.plasmodb.org" target="_blank">Plasmodb</a> is the primary resource for retrieving <em>Plasmodium falciparum </em>genomic data and information. Unfortunately this database has no API or XML service to request or query its  information from a programmer&#8217;s point of view or for easy automation of sequence information retrieval.  Recently I needed to download a long list of <em>Plasmodium falciparum </em>genomic, Protein and other information for a set of genes. Been lazy to click and open the webpage for each gene in my list. I wrote this in ruby.</p>
<p>It would be great if Plasmodb  would provide an easy way  of automated sequence retrieval. A webservice or an XML output format would do. Screen scraping is not a very efficient approach.  Here we use<a title="ScrAPI" href="http://blog.labnotes.org/tag/scrapi/" target="_self"> Scrapi </a>which  is an HTML scraping toolkit for Ruby. It uses CSS selectors to write easy, maintainable scraping rules to select, extract and store data from HTML content.</p>
<p><img src="http://static.rubyforge.vm.bytemark.co.uk/themes/rubyforge/images/clear.png" alt="" width="10" height="1" /></p>
<pre><span style="color:#969696;">#A class to fetch information from plasmodb using the scrapi API
</span><span style="color:#969696;">#</span><span style="color:#969696;">#TODO handle  Scraper::Reader::HTTPUnspecifiedError
</span><span style="color:#0000e6;">class</span> <span style="color:#000000;">Plasmodb</span>
   <span style="color:#969696;">#retrives a information  using the gene_id
</span>   <span style="color:#969696;">#returns a structure obj
</span>  <span style="color:#0000e6;">def</span> fetch_by_gene_id(var_name)
    <span style="color:#0000e6;">begin</span>
      scraper = <span style="color:#000000;">Scraper</span>.define <span style="color:#0000e6;">do</span>
        process <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">div#genomicSequence pre</span><span style="color:#ce7b00;">"</span>,    <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">genomic_sequence</span>  =&gt; <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">text</span>
        process <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">div#transcriptSequence pre</span><span style="color:#ce7b00;">"</span>, <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">mrna_sequence</span> =&gt;<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">text</span>
        process <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">div#proteinSequence pre</span><span style="color:#ce7b00;">"</span>,    <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">protein_sequence</span>  =&gt;<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">text</span><span style="color:#969696;">
</span>        process <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">div#Aliases td&gt;table</span><span style="color:#ce7b00;">"</span>,       <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">aliases</span> =&gt;<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">text</span>
        result <span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">protein_sequence</span>,<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">aliases</span>,<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">mrna_sequence</span>,<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">genomic_sequence</span>
        <span style="color:#0000e6;">end
</span>
     search_link="http://plasmodb.org/plasmo/showRecord.do?
               name=GeneRecordClasses.GeneRecordClass&amp;source_id="+var_name+"&amp;project_id=PlasmoDB"
     uri = <span style="color:#000000;">URI</span>.parse(search_link)
     <span style="color:#009900;">@query</span> = scraper.scrape(uri)

    <span style="color:#0000e6;">rescue</span> <span style="color:#000000;">Scraper</span>::<span style="color:#000000;">Reader</span>::<span style="color:#000000;">HTTPUnspecifiedError</span>
      <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">None</span><span style="color:#ce7b00;">"</span>
    <span style="color:#0000e6;">end</span>
  <span style="color:#0000e6;">end</span>
  <span style="color:#969696;">#returns the predicted protein sequence
</span>  <span style="color:#0000e6;">def</span> protein_sequence
    <span style="color:#009900;">@query</span>.protein_sequence.chomp
  <span style="color:#0000e6;">end</span>
<span style="color:#969696;">#  Returns the genomic sequence
</span>  <span style="color:#0000e6;">def</span> genomic_sequence
    <span style="color:#009900;">@query</span>.genomic_sequence.chomp
  <span style="color:#0000e6;">end</span>
  <span style="color:#969696;">#returns Aliases
</span>  <span style="color:#0000e6;">def</span> aliases
    <span style="color:#009900;">@query</span>.aliases
  <span style="color:#0000e6;">end</span>
  <span style="color:#969696;">#returns the mrna sequence
</span>  <span style="color:#0000e6;">def</span> mrna_sequence
    <span style="color:#009900;">@query</span>.mrna_sequence.chomp
  <span style="color:#0000e6;">end</span>
<span style="color:#0000e6;">end</span>

<span style="color:#969696;">#Use the class to fetch information.</span><span style="color:#969696;">
</span>require <span style="color:#ce7b00;">'</span><span style="color:#ce7b00;">rubygems</span><span style="color:#ce7b00;">'</span>
require <span style="color:#ce7b00;">'</span><span style="color:#ce7b00;">bio</span><span style="color:#ce7b00;">'</span>
require <span style="color:#ce7b00;">'</span><span style="color:#ce7b00;">scrapi</span><span style="color:#ce7b00;">'</span>

file = <span style="color:#ce7b00;">"</span><span style="color:#ce7b00;">/home/george/genes_list.txt</span><span style="color:#ce7b00;">"</span> <span style="color:#969696;">#a file containing a list of accession numbers.
#one accession number per line
</span>
plasmo = <span style="color:#000000;">Plasmodb</span>.new <span style="color:#969696;">#initialize a plasmodb class instance
</span>
<span style="color:#969696;">#Read the file and process each accession number.
</span><span style="color:#000000;">File</span>.readlines(file).each <span style="color:#0000e6;">do</span> |line|
  line.chomp!
  plasmo.fetch_by_gene_id(line)  <span style="color:#969696;">#fetches the information from Plasmodb.
</span>  <span style="color:#969696;">#print a fasta entry for the protein sequence
</span>  puts <span style="color:#000000;">Bio</span>::<span style="color:#000000;">Sequence</span>.new(plasmo.protein_sequence).output(<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">fasta</span>,<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">header</span>=&gt;line)
  puts <span style="color:#000000;">Bio</span>::<span style="color:#000000;">Sequence</span>.new(plasmo.genomic_sequence).output(<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">fasta</span>,<span style="color:#2e92c7;">:</span><span style="color:#2e92c7;">header</span>=&gt;line)
<span style="color:#0000e6;">end</span>

<span style="color:#0000e6;">#another example</span><span style="color:#0000e6;">
<div id="_mcePaste">#p = Plasmodb.new</div>
<div id="_mcePaste">#p.fetch_by_gene_id("PFD0020c")</div>
<div id="_mcePaste">#puts p.genomic_sequence</div>

</span></pre>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/biorelated.wordpress.com/182/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/biorelated.wordpress.com/182/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/biorelated.wordpress.com/182/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=biorelated.com&amp;blog=1167040&amp;post=182&amp;subd=biorelated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://biorelated.com/2009/12/09/a-ruby-class-for-screen-scraping-plasmodb-database/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d9e14f1be0972ff1f393cc87dbd072e1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">george_g</media:title>
		</media:content>

		<media:content url="http://static.rubyforge.vm.bytemark.co.uk/themes/rubyforge/images/clear.png" medium="image" />
	</item>
	</channel>
</rss>
