001 /*
002 * NeoBio.java
003 *
004 * Copyright 2003 Sergio Anibal de Carvalho Junior
005 *
006 * This file is part of NeoBio.
007 *
008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009 * the GNU General Public License as published by the Free Software Foundation; either
010 * version 2 of the License, or (at your option) any later version.
011 *
012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014 * PURPOSE. See the GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License along with NeoBio;
017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018 * Boston, MA 02111-1307, USA.
019 *
020 * Proper attribution of the author as the source of the software would be appreciated.
021 *
022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net
023 * Department of Computer Science http://www.dcs.kcl.ac.uk
024 * King's College London, UK http://www.kcl.ac.uk
025 *
026 * Please visit http://neobio.sourceforge.net
027 *
028 * This project was supervised by Professor Maxime Crochemore.
029 *
030 */
031
032 package neobio.textui;
033
034 import neobio.alignment.*;
035 import java.io.FileReader;
036 import java.io.IOException;
037
038 /**
039 * This class is a simple command line based utility for computing pairwise sequence
040 * alignments using one of the the algorithms provided in the {@link neobio.alignment}
041 * package.
042 *
043 * <P>The main method takes the follwing parameters from the command line:
044 *
045 * <CODE><BLOCKQUOTE>
046 * NeoBio <alg> <S1> <S2> [M <matrix> | S <match>
047 * <mismatch> <gap>]
048 * </BLOCKQUOTE></CODE>
049 *
050 * <UL>
051 * <LI><B><CODE><alg></CODE></B> is either <B><CODE>NW</CODE></B> for {@linkplain
052 * neobio.alignment.NeedlemanWunsch Needleman & Wunsch} (global alignment),
053 * <B><CODE>SW</CODE></B> for {@linkplain neobio.alignment.SmithWaterman Smith & Waterman}
054 * (local alignment), <B><CODE>CLZG</CODE></B> for {@linkplain
055 * neobio.alignment.CrochemoreLandauZivUkelsonGlobalAlignment Crochemore, Landau &
056 * Ziv-Ukelson global alignment} or <B><CODE>CLZL</CODE></B> for {@linkplain
057 * neobio.alignment.CrochemoreLandauZivUkelsonLocalAlignment Crochemore, Landau &
058 * Ziv-Ukelson local alignment};
059 *
060 * <LI><B><CODE><S1></CODE></B> is the first sequence file;
061 *
062 * <LI><B><CODE><S2></CODE></B> is the second sequence file;
063 *
064 * <LI><B><CODE>M <matrix></CODE></B> is for using a scoring matrix file;
065 *
066 * <LI><B><CODE>S <match> <mismatch> <gap></CODE></B> is for using a
067 * simple scoring scheme, where <B><CODE><match></CODE></B> is the match reward
068 * value, <B><CODE><mismatch></CODE></B> is the mismatch penalty value and
069 * <B><CODE><gap></CODE></B> is the cost of a gap (linear gap cost function).
070 * </UL>
071 *
072 * @author Sergio A. de Carvalho Jr.
073 */
074 public class NeoBio
075 {
076 /**
077 * The main method takes parameters from the command line to compute a pairwise
078 * sequence alignment. See the class description for details.
079 *
080 * @param args command line arguments
081 */
082 public static void main (String args[])
083 {
084 PairwiseAlignmentAlgorithm algorithm;
085 FileReader seq1, seq2;
086 ScoringScheme scoring;
087 PairwiseAlignment alignment;
088 String algo, file1, file2, scoring_type;
089 long start, elapsed;
090 int match, mismatch, gap;
091
092 try
093 {
094 // create an instance of the
095 // requested algorithm
096 algo = args[0];
097
098 if (algo.equalsIgnoreCase("nw"))
099 algorithm = new NeedlemanWunsch();
100 else if (algo.equalsIgnoreCase("sw"))
101 algorithm = new SmithWaterman();
102 else if (algo.equalsIgnoreCase("clzg"))
103 algorithm = new CrochemoreLandauZivUkelsonGlobalAlignment();
104 else if (algo.equalsIgnoreCase("clzl"))
105 algorithm = new CrochemoreLandauZivUkelsonLocalAlignment();
106 else
107 {
108 usage();
109 System.exit(1);
110 return;
111 }
112
113 // sequences file names
114 file1 = args[1];
115 file2 = args[2];
116 }
117 catch (ArrayIndexOutOfBoundsException e)
118 {
119 usage();
120 System.exit(1);
121 return;
122 }
123
124 try
125 {
126 // scoring scheme type
127 scoring_type = args[3];
128
129 try
130 {
131 if (scoring_type.equalsIgnoreCase("M"))
132 {
133 // use scoring matrix
134 scoring = new ScoringMatrix (new FileReader(args[4]));
135 }
136 else if (scoring_type.equalsIgnoreCase("S"))
137 {
138 // use basic scoring scheme
139 match = Integer.parseInt(args[4]);
140 mismatch = Integer.parseInt(args[5]);
141 gap = Integer.parseInt(args[6]);
142
143 scoring = new BasicScoringScheme (match, mismatch, gap);
144 }
145 else
146 {
147 usage();
148 System.exit(1);
149 return;
150 }
151 }
152 catch (NumberFormatException e)
153 {
154 usage();
155 System.exit(1);
156 return;
157 }
158 catch (ArrayIndexOutOfBoundsException e)
159 {
160 usage();
161 System.exit(1);
162 return;
163 }
164 catch (InvalidScoringMatrixException e)
165 {
166 System.err.println(e.getMessage());
167 System.exit(2);
168 return;
169 }
170 catch (IOException e)
171 {
172 System.err.println(e.getMessage());
173 System.exit(2);
174 return;
175 }
176 }
177 catch (ArrayIndexOutOfBoundsException e)
178 {
179 // not specified: use default scoring scheme
180 scoring = new BasicScoringScheme (1, -1, -1);
181 }
182
183 // set scoring scheme
184 algorithm.setScoringScheme(scoring);
185
186 try
187 {
188 // load sequences
189 System.err.println("\nLoading sequences...");
190
191 seq1 = new FileReader(file1);
192 seq2 = new FileReader(file2);
193
194 start = System.currentTimeMillis();
195 algorithm.loadSequences(seq1, seq2);
196 elapsed = System.currentTimeMillis() - start;
197
198 // close files
199 seq1.close();
200 seq2.close();
201
202 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]\n");
203
204 /*
205 // compute score only
206 System.err.println("\nComputing score...");
207
208 start = System.currentTimeMillis();
209 score = algorithm.getScore();
210 elapsed = System.currentTimeMillis() - start;
211
212 System.out.println("Score: " + score);
213 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]");
214 //*/
215
216 // compute alignment
217 System.err.println("Computing alignment...");
218
219 start = System.currentTimeMillis();
220 alignment = algorithm.getPairwiseAlignment();
221 elapsed = System.currentTimeMillis() - start;
222
223 System.err.println("[ Elapsed time: " + elapsed + " milliseconds ]\n");
224
225 System.out.println("Alignment:\n" + alignment);
226 }
227 catch (InvalidSequenceException e)
228 {
229 System.err.println("Invalid sequence file.");
230 System.exit(2);
231 return;
232 }
233 catch (IncompatibleScoringSchemeException e)
234 {
235 System.err.println("Incompatible scoring scheme.");
236 System.exit(2);
237 return;
238 }
239 catch (IOException e)
240 {
241 System.err.println(e.getMessage());
242 System.exit(2);
243 return;
244 }
245
246 // print scoring scheme
247 //System.out.println(scoring);
248
249 System.exit(0);
250 }
251
252 /**
253 * Prints command line usage.
254 */
255 public static void usage ()
256 {
257 System.err.println(
258 "\nUsage: NeoBio <alg> <S1> <S2> [M <matrix> | S <match> <mismatch> <gap>]\n\n" +
259 "where:\n\n" +
260 " <alg> = NW for Needleman & Wunsch (global alignment)\n" +
261 " or SW for Smith & Waterman (local alignment)\n" +
262 " or CLZG for Crochemore, Landau & Ziv-Ukelson global alignment\n" +
263 " or CLZL for Crochemore, Landau & Ziv-Ukelson local alignment\n\n" +
264 " <S1> = first sequence file\n\n" +
265 " <S2> = second sequence file\n\n" +
266 " M <matrix> for using a scoring matrix file\n\n" +
267 "or\n\n" +
268 " S <match> <mismatch> <gap> for using a simple scoring scheme\n" +
269 " where <match> = match reward value\n" +
270 " <mismatch> = mismatch penalty value\n" +
271 " <gap> = cost of a gap (linear gap cost function)"
272 );
273 }
274 }