View Javadoc
1   /*
2    * $Source$
3    * $Revision$
4    *
5    * Copyright (C) 2003 Jim Wright
6    *
7    * Part of Melati (http://melati.org), a framework for the rapid
8    * development of clean, maintainable web applications.
9    *
10   * Melati is free software; Permission is granted to copy, distribute
11   * and/or modify this software under the terms either:
12   *
13   * a) the GNU General Public License as published by the Free Software
14   *    Foundation; either version 2 of the License, or (at your option)
15   *    any later version,
16   *
17   *    or
18   *
19   * b) any version of the Melati Software License, as published
20   *    at http://melati.org
21   *
22   * You should have received a copy of the GNU General Public License and
23   * the Melati Software License along with this program;
24   * if not, write to the Free Software Foundation, Inc.,
25   * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to obtain the
26   * GNU General Public License and visit http://melati.org to obtain the
27   * Melati Software License.
28   *
29   * Feel free to contact the Developers of Melati (http://melati.org),
30   * if you would like to work out a different arrangement than the options
31   * outlined here.  It is our intention to allow Melati to be used by as
32   * wide an audience as possible.
33   *
34   * This program is distributed in the hope that it will be useful,
35   * but WITHOUT ANY WARRANTY; without even the implied warranty of
36   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37   * GNU General Public License for more details.
38   *
39   * Contact details for copyright holder:
40   *
41   *     Jim Wright <jimw At paneris.org>
42   *     Bohemian Enterprise
43   *     Predmerice nad Jizerou 77
44   *     294 74
45   *     Mlada Boleslav
46   *     Czech Republic
47   */
48  
49  package org.melati.util;
50  
51  import java.io.StreamTokenizer;
52  import java.io.StringReader;
53  import java.io.IOException;
54  import java.util.Iterator;
55  import java.util.Enumeration;
56  
57  /**
58   * Representation of occurrences of an HTTP header field.
59   * <p>
60   * These are defined in RFC 2616 and have the same general form as in
61   * RFC 822 section 3.1.
62   * <P>
63   * We generally assume that all continuation lines and occurrences in
64   * a message are concatenated with comma separators.
65   *
66   * @author  Jim Wright
67   */
68  public class HttpHeader {
69  
70    /**
71     * Instance of inner {@link Tokenizer}.
72     */
73    protected Tokenizer tokenizer;
74  
75    /**
76     * Create an instance representing the given comma separated fields.
77     */
78    public HttpHeader(String values) {
79      if(values == null)
80        values= "";
81      tokenizer = new Tokenizer(values);
82    }
83  
84    /**
85     * Abstract enumeration of fields.
86     * <p>
87     * Subtypes decide what type of token to return and how
88     * to represent it.
89     * <p>
90     * This class serves to remove doubts about whether we should and can
91     * implement <code>Iterator</code> or <code>Enumeration</code> and
92     * proves itself unnecessary ;-). But we can factor stuff out and
93     * re-use it later.
94     * <p>
95     * Actually, it also removes the need to think about exceptions in
96     * subtypes.
97     */
98    @SuppressWarnings("rawtypes")
99    public abstract class FieldIterator implements Iterator<Object>, Enumeration{
100 
101     /**
102      * {@inheritDoc}
103      * @see java.util.Enumeration#hasMoreElements()
104      */
105     public final boolean hasMoreElements() {
106       return hasNext();
107     }
108 
109     /**
110      * {@inheritDoc}
111      * @see java.util.Enumeration#nextElement()
112      */
113     public final Object nextElement() {
114       return next();
115     }
116 
117     /**
118      * {@inheritDoc}
119      * @see java.util.Iterator#hasNext()
120      * @see #next()
121      */
122     public final boolean hasNext() {
123       return tokenizer.ttype != StreamTokenizer.TT_EOF;
124     }
125 
126     /**
127      * {@inheritDoc}
128      * @see java.util.Iterator#remove()
129      */
130     public void remove() throws UnsupportedOperationException {
131       throw new UnsupportedOperationException("Cannot remove tokens from the HTTP header");
132     }
133 
134     /**
135      * Return the next element or an exception.
136      *
137      * @return An exception if an object of the anticipated type cannot be returned
138      */
139     public Object next() {
140       try {
141         return nextToken();
142       }
143       catch (HttpHeaderException e) {
144         return e;
145       }
146     }
147 
148     /**
149      * @return the next token or throws an exception
150      */
151     public abstract Object nextToken() throws HttpHeaderException;
152 
153   }
154 
155   /**
156    * Iteration over {@link HttpHeader.TokenAndQValue}s.
157    */
158   public class WordIterator extends FieldIterator {
159 
160     /**
161      * @return the next word
162      */
163     public String nextWord() throws HttpHeaderException {
164       String result = tokenizer.readWord();
165       tokenizer.skipAnyCommaSeparator();
166       return result;
167     }
168 
169     /**
170      * {@inheritDoc}
171      * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
172      */
173     public Object nextToken() throws HttpHeaderException {
174       return nextWord();
175     }
176 
177   }
178 
179   /**
180    * Factory method to create and return an iterator of words.
181    * 
182    * @return a new WordIterator
183    */
184   public final WordIterator wordIterator() {
185     return new WordIterator();
186   }
187 
188   /**
189    * Iteration over {@link HttpHeader.TokenAndQValue}s.
190    */
191   public class TokenAndQValueIterator extends FieldIterator {
192 
193     /**
194      * @return the next TokenAndQValue
195      * @throws HttpHeaderException
196      */
197     public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
198       return HttpHeader.this.nextTokenAndQValue();
199     }
200 
201     /**
202      * {@inheritDoc}
203      * @see org.melati.util.HttpHeader.FieldIterator#nextToken()
204      */
205     public Object nextToken() throws HttpHeaderException {
206       return nextTokenAndQValue();
207     }
208 
209   }
210 
211   /**
212    * Factory method to create and return the next
213    * {@link HttpHeader.TokenAndQValue}.
214    * @return a new TokenAndQValue
215    */
216   public TokenAndQValue nextTokenAndQValue() throws HttpHeaderException {
217     return new TokenAndQValue(tokenizer);
218   }
219 
220   /**
221    * Factory method to create and return an iterator of {@link TokenAndQValue}'s.
222    * @return a new TokenAndQValueIterator
223    */
224   public TokenAndQValueIterator tokenAndQValueIterator() {
225     return new TokenAndQValueIterator();
226   }
227 
228   /**
229    * A token and associated qvalue.
230    */
231   public static class TokenAndQValue {
232 
233     /**
234      * Token followed by a semicolon separator.
235      */
236     public String token;
237 
238     /**
239      * Value between zero and one with at most 3 decimal places.
240      * <p>
241      * q stands for "quality" but the RFC 2616 says this is not
242      * completely accurate.
243      * Values closer to 1.0 are better.
244      * Zero means completely unfit.
245      * <p>
246      * The default is 1.0 if not explicitly initialised and this
247      * appears to be correct for most possible uses if not all.
248      */
249     public float q = 1.0f;
250 
251     /**
252      * Create an uninitialised instance.
253      */
254     public TokenAndQValue() {
255     }
256 
257     /**
258      * Create an instance and initialise it by reading the given
259      * tokenizer.
260      */
261     public TokenAndQValue(Tokenizer t) throws HttpHeaderException {
262       this();
263       t.readTokenAndQValue(this);
264       t.skipAnyCommaSeparator();
265     }
266 
267   }  
268   
269   /**
270    * Tokenizer for parsing occurences of a field.
271    * <p>
272    * Header fields have format defined in RFC 2616 and have the same
273    * general form as in RFC 822 section 3.1.
274    * <p>
275    * This is for fields consisting of tokens, quoted strings and
276    * separators and not those consisting of an arbitrary sequence of
277    * octets.
278    * Tokens are US ASCII characters other than:
279    * <ul>
280    * <li> control characters 0000 to 001F and 007E;
281    * <li> separators defined in RFC 2616;
282    * </ul>
283    * <p>
284    * The convenience methods defined here provide some guidance on how
285    * to interact with the super-type but you can also use inherited
286    * methods.
287    * <p>
288    * We assume that the next token is always already read when a method
289    * starts to interpret a sequence of tokens.
290    * In other words the first token is read by the constructor(s) and then
291    * each such
292    * method returns as a result of reading a token or EOF that it cannot
293    * process but without pushing it back.
294    * The next token to be interpreted is hence the current token
295    * described by the inherited instance variables.
296    * <p>
297    * Note that whitespace is automatically skipped by the supertype.
298    *
299    * @author  Jim Wright
300    */
301   public static class Tokenizer extends StreamTokenizer {
302 
303     /**
304      * Create an instance from a string formed by concatenation of
305      * continuation lines and all occurences of a field, with comma
306      * separators.
307      * <p>
308      * In theory a separator can consist of one or more commas and
309      * spaces and tab.
310      * Fields are never empty.
311      * We cope with this but I doubt typical callers ever encounter
312      * such strings.
313      * <p>
314      * The field list should not be empty but null is
315      * allowed to explicitly indicate that there are no such fields,
316      * if an instance if required nevertheless to provide other
317      * functionality.
318      * 
319      * @param fields A non-null, non-empty String
320      * @throws HttpHeaderException Error detected in the argument.
321      */
322     Tokenizer(String fields) {
323       super(new StringReader(fields));
324 
325 
326       resetSyntax();
327       // Initially make all non-control characters token
328       // characters
329       wordChars('\u0020', '\u007E');
330       // Now change separators back. Tab is not
331       // necessary and there are some ranges but let's
332       // not try and be clever.
333       String separator = "()<>@,;:\\\"/[]?={} \t";
334       for (int i = 0; i < separator.length(); i++) {
335         ordinaryChar(separator.charAt(i));
336         // System.err.println("Tested 34");
337       }
338 
339       // Resetting effectively did this to whitespace chars
340       // ordinaryChars('\u0000', '\u0020');
341       // Set space and table characters as whitespace
342       whitespaceChars(' ', ' ');
343       whitespaceChars('\t', '\t');
344 
345       quoteChar('"');
346 
347       parseNumbers();
348 
349       // Here are some things we have effectively done by resetting
350       // ordinaryChar('/');
351       // ordinaryChar('\'');
352 
353       // Do not do any other special processing
354       eolIsSignificant(false);
355       lowerCaseMode(false);
356       slashSlashComments(false);
357       slashStarComments(false);
358 
359       // Read the first token
360       nextLToken();
361       if (ttype == ',') {
362         // System.err.println("Tested 36");
363         throw new HttpHeaderException("HTTP header fields starts with comma separator");
364       }
365     }
366 
367     /**
368      * Same as <code>nextToken()</code> but does not throw an <code>IOException</code>
369      * and handles erroneous line breaks.
370      *
371      * @return int value of next LToken
372      * @throws HttpHeaderException Error detected in the fields.
373      */
374     public int nextLToken() throws HttpHeaderException {
375       int result;
376       try {
377         result = nextToken();
378         if (ttype == TT_EOL) {
379           System.err.println("Not tested 38");
380           throw new HttpHeaderException("HTTP header fields span unquoted line breaks");
381         }
382         // System.err.println("Tested 39");
383         return result;
384       }
385       catch (IOException e) {
386         //assert false : "We are reading from a string";
387         return 0;
388       }
389     }
390 
391     /**
392      * Read up to and including the next token after comma
393      * separator(s) and whitespace assuming the current token is a comma.
394      *
395      * @return Resulting ttype.
396      */
397     public final int skipCommaSeparator() throws HttpHeaderException {
398       if (ttype != ',') {
399         throw new IllegalStateException("Not at a comma");
400       }
401       while (nextLToken() == ',')
402         ;
403       return ttype;
404     }
405 
406     /**
407      * Read up to and including the next token after any comma
408      * separator(s) and whitespace.
409      * <p>
410      * This is the same as {@link #skipCommaSeparator()} but it does
411      * nothing if we are and EOF.
412      *
413      * @return Resulting ttype.
414      */
415     public final int skipAnyCommaSeparator() throws HttpHeaderException {
416       if (ttype != TT_EOF) {
417         skipCommaSeparator();
418       }
419       return ttype;
420     }
421 
422     /**
423      * Convenience method to test for token or quoted string.
424      * <p>
425      * If this returns true then the token value is in <code>sval</code>
426      * with any quotes removed.
427      * @return whether token is an SVal
428      */
429     public final boolean isSVal() {
430       return ttype == TT_WORD || ttype == '"';
431     }
432 
433     /**
434      * Read the word token or quoted string that comes next.
435      *
436      * @return the SVal 
437      * @throws HttpHeaderException Error detected in the fields.
438      */
439     public final String readSVal() throws HttpHeaderException {
440       if (! isSVal()) {
441         throw new HttpHeaderException("Next token is not a (possibly quoted) word: " +
442             toString());
443       }      
444       String result = sval;
445       nextLToken();
446       return result;
447     }
448 
449     /**
450      * Read the word token that comes next.
451      * 
452      * @return the word as a String
453      * @throws HttpHeaderException Error detected in the fields.
454      */
455     public final String readWord() throws HttpHeaderException {
456       if (ttype != TT_WORD) {
457         throw new HttpHeaderException("Next token is not a word token: " +
458                                       toString());
459       }      
460       String result = sval;
461       nextLToken();
462       // System.err.println("Tested 47");
463       return result;
464     }
465 
466     /**
467      * Read the given word token that comes next.
468      *
469      * @throws HttpHeaderException Error detected in the fields.
470      */
471     public final void readWord(String word) throws HttpHeaderException {
472       String read = readWord();
473       if (! read.equals(word)) {
474         // System.err.println("Tested 48 by temporary hack");
475         throw new HttpHeaderException("Expecting '" + word +
476                                       "' but encountered: " + toString());
477       }
478     }
479 
480     /**
481      * Read the given character that comes next.
482      *
483      * @throws HttpHeaderException Error detected in the fields.
484      */
485     public final void readChar(char c) throws HttpHeaderException {
486       if (ttype != c) {
487         // System.err.println("Tested 49");
488         throw new HttpHeaderException("Expecting '" + c +
489                                       "' but encountered: " +
490                                       toString());
491       }
492       nextLToken();
493     }
494 
495     /**
496      * Read the number token that comes next.
497      * @return the number's value as a double
498      * @throws HttpHeaderException Error detected in the fields.
499      */
500     public final double readNVal() throws HttpHeaderException {
501       if (ttype != TT_NUMBER) {
502         throw new HttpHeaderException("Next token is not a number: " +
503             toString());
504       }      
505       double result = nval;
506       nextLToken();
507       return result;
508     }
509 
510     /**
511      * Read a token sequence of the form "; q = 0.42" and return the number.
512      * @return the number's value as a float
513      *
514      * @throws IllegalStateException Current token not semicolon.
515      * @throws HttpHeaderException Error detected in the fields.
516      */
517     public final float readQValue() 
518         throws IllegalStateException, HttpHeaderException {
519       if (ttype != ';') {
520         throw new IllegalStateException("Not at a semicolon");
521       }
522       readChar(';');
523       readWord("q");
524       readChar('=');
525       return (float)readNVal();
526     }
527 
528     /**
529      * Read a word or quoted string token optionally followed by a string
530      * of the form "; q = 0.42" and initialises the given object.
531      * @return current TokenAndQValue
532      */
533     protected TokenAndQValue readTokenAndQValue(TokenAndQValue result)
534           throws HttpHeaderException {
535       result.token = readSVal();
536       switch (ttype) {
537       case TT_EOF :
538       case ',' :
539         break;
540       case ';' :
541         result.q = readQValue();
542         break;
543       default:
544         throw new HttpHeaderException("Word token: \'" + result.token +
545             "\' is followed by something unexpected: " + toString());
546       }
547       return result;
548     }
549 
550   }
551 
552 
553   public static class HttpHeaderException extends MelatiRuntimeException {
554 
555     private static final long serialVersionUID = -8870151118057435290L;
556 
557     /**
558      * Create an instance with message.
559      */
560     public HttpHeaderException(String message) {
561       super(message);
562     }
563 
564     /**
565      * Create an instance with message and cause.
566      */
567     public HttpHeaderException(String message, Exception e) {
568       super(message, e);
569     }
570 
571   }
572 
573 }