001 /*
002 * PairwiseAlignment.java
003 *
004 * Copyright 2003 Sergio Anibal de Carvalho Junior
005 *
006 * This file is part of NeoBio.
007 *
008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009 * the GNU General Public License as published by the Free Software Foundation; either
010 * version 2 of the License, or (at your option) any later version.
011 *
012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014 * PURPOSE. See the GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License along with NeoBio;
017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018 * Boston, MA 02111-1307, USA.
019 *
020 * Proper attribution of the author as the source of the software would be appreciated.
021 *
022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net
023 * Department of Computer Science http://www.dcs.kcl.ac.uk
024 * King's College London, UK http://www.kcl.ac.uk
025 *
026 * Please visit http://neobio.sourceforge.net
027 *
028 * This project was supervised by Professor Maxime Crochemore.
029 *
030 */
031
032 package neobio.alignment;
033
034 import java.io.Serializable;
035
036 /**
037 * This class is the product of a pairwise alignment, generated by one subclasses of
038 * {@linkplain PairwiseAlignmentAlgorithm}. It contains the two sequences strings with
039 * gaps, a score tag line, and a score value. It is typically displayed in three rows as
040 * in the following example of an alignment between parts of two protein sequences:
041 *
042 * <CODE><BLOCKQUOTE><PRE>
043 * MDEIHQLEDMFTVDSETLRKVVKHFILPHD-----MRTTKHQEELWSFIAELDSLKDFMVEQE // sequence 1
044 * M +I E +FTV +ETL+ V KHFILP D MRTT++ +ELW FIA DSLK F+ EQ // score tag line
045 * MQQIENFEKIFTVPTETLQAVTKHFILP-DATETLMRTTQNPDELWEFIA--DSLKAFIDEQF // sequence 2
046 * </PRE></BLOCKQUOTE></CODE>
047 *
048 * <P>Each column has one character of each sequence and a score tag. The same character
049 * is displayed in all three rows when a column has an exact match (character of sequences
050 * 1 and 2 are equal). When a mismatch occurs (substitution of different characters), the
051 * score tag is left blank. A '+' in the score line signals a partial match (a
052 * substitution of similar characters). The difference between a partial match and a
053 * mismatch is that the score of a partial match is positive whereas the score of a
054 * mismatch is zero or negative (each case is determined by the scoring scheme).</P>
055 *
056 * <P>Gaps are usually represented by dashes ('-') and have a blank score tag. Insertions
057 * have dashes in sequence 1 and the inserted character in sequence 2. Deletions, by
058 * contrast, have the deleted character in sequence 1 and dashes in sequence 2.</P>
059 *
060 * <P>Each column carries a score value for the corresponding operation (as defined by the
061 * scoring scheme). The overall score of a pairwise alignment is the sum of all columns
062 * scores values.</P>
063 *
064 * <P>When the scoring schemes does not support partial matches, a match is usually
065 * signaled by a '|' character.<P>
066 *
067 * <P>Note that these special characters are defined by the
068 * <CODE>PairwiseAlignmentAlgorithm</CODE> class. Consult that class specification for the
069 * actual configuration. For instance, an alignment between two DNA fragmens may look like
070 * this:</P>
071 *
072 * <CODE><BLOCKQUOTE><PRE>
073 * A--C--TAAAAAGCA--TT-AATAATAAA-A
074 * | | |||| ||| || ||||| ||| |
075 * AAGCCCTAAACCGCAAGTTTAATAA-AAATA
076 * </PRE></BLOCKQUOTE></CODE>
077 *
078 * <P>This class is serializable, so it can be saved to a file (or any other output). It
079 * overrides the default <CODE>equals</CODE> method of the <CODE>Object</CODE> class to
080 * allow a proper comparsion of alignments produced by different algorithms or even
081 * different runs of the same algorithm. However, it does not override the
082 * <CODE>hashCode</CODE> method as it is generally the case to maintain the contract for
083 * the <CODE>hashCode</CODE> method (which states that equal objects must have equal hash
084 * codes). Hence, as it is, its use in a hash table is not supported.</P>
085 *
086 * @author Sergio A. de Carvalho Jr.
087 * @see PairwiseAlignmentAlgorithm
088 * @see PairwiseAlignmentAlgorithm#MATCH_TAG
089 * @see PairwiseAlignmentAlgorithm#APPROXIMATE_MATCH_TAG
090 * @see PairwiseAlignmentAlgorithm#MISMATCH_TAG
091 * @see PairwiseAlignmentAlgorithm#GAP_TAG
092 * @see PairwiseAlignmentAlgorithm#GAP_CHARACTER
093 * @see ScoringScheme
094 * @see ScoringScheme#isPartialMatchSupported
095 */
096 public class PairwiseAlignment implements Serializable
097 {
098 /**
099 * First gapped sequence.
100 *
101 * @serial
102 */
103 protected String gapped_seq1;
104
105 /**
106 * The score tag line.
107 *
108 * @serial
109 */
110 protected String score_tag_line;
111
112 /**
113 * Second gapped sequence.
114 *
115 * @serial
116 */
117 protected String gapped_seq2;
118
119 /**
120 * The overall score value for this alignment.
121 *
122 * @serial
123 */
124 protected int score;
125
126 /**
127 * Creates a <CODE>PairwiseAlignment</CODE> instance with the specified gapped
128 * sequences, score tag line and score value.
129 *
130 * @param gapped_seq1 the first gapped sequence
131 * @param score_tag_line the score tag line
132 * @param gapped_seq2 the second gapped sequence
133 * @param score the overall score value for this alignment
134 */
135 public PairwiseAlignment (String gapped_seq1, String score_tag_line,
136 String gapped_seq2, int score)
137 {
138 this.gapped_seq1 = gapped_seq1;
139 this.score_tag_line = score_tag_line;
140 this.gapped_seq2 = gapped_seq2;
141 this.score = score;
142 }
143
144 /**
145 * Returns the first gapped sequence.
146 *
147 * @return first gapped sequence
148 */
149 public String getGappedSequence1 ()
150 {
151 return gapped_seq1;
152 }
153
154 /**
155 * Returns the score tag line.
156 *
157 * @return score tag line
158 */
159 public String getScoreTagLine ()
160 {
161 return score_tag_line;
162 }
163
164 /**
165 * Returns the second gapped sequence.
166 *
167 * @return second gapped sequence
168 */
169 public String getGappedSequence2 ()
170 {
171 return gapped_seq2;
172 }
173
174 /**
175 * Returns the score for this alignment.
176 *
177 * @return overall score for this alignment
178 */
179 public int getScore ()
180 {
181 return score;
182 }
183
184 /**
185 * Returns a four-line String representation of this alignment in the following
186 * order: first gapped sequence, score tag line, second gapped sequence and the
187 * score value.
188 *
189 * @return a String representation of this scoring matrix
190 */
191 public String toString ()
192 {
193 return gapped_seq1 + "\n" + score_tag_line + "\n"
194 + gapped_seq2 + "\nScore: " + score;
195 }
196
197 /**
198 * Compares this object to the specified object. The result is <CODE>true</CODE> if
199 * and only if the argument is not <CODE>null</CODE> and is an
200 * <CODE>PairwiseAlignment</CODE> object that contains the same values as this object,
201 * i.e. the same gapped sequences, the same score tag line and the same score.
202 *
203 * @param obj the object to compare with
204 * @return <CODE>true</CODE> if objects are the same, <CODE>false</CODE> otherwise
205 */
206 public boolean equals (Object obj)
207 {
208 if (!(obj instanceof PairwiseAlignment))
209 return false;
210
211 PairwiseAlignment another_pa = (PairwiseAlignment) obj;
212
213 if (this.score != another_pa.score)
214 return false;
215
216 if (!this.gapped_seq1.equals(another_pa.gapped_seq1))
217 return false;
218
219 if (!this.score_tag_line.equals(another_pa.score_tag_line))
220 return false;
221
222 if (!this.gapped_seq2.equals(another_pa.gapped_seq2))
223 return false;
224
225 return true;
226 }
227 }