001    /* CollationElementIterator.java -- Walks through collation elements
002       Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010     
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package java.text;
040    
041    import gnu.java.lang.CPStringBuilder;
042    
043    import java.util.ArrayList;
044    
045    /* Written using "Java Class Libraries", 2nd edition, plus online
046     * API docs for JDK 1.2 from http://www.javasoft.com.
047     * Status: Believed complete and correct to JDK 1.1.
048     */
049    
050    /**
051     * This class walks through the character collation elements of a 
052     * <code>String</code> as defined by the collation rules in an instance of 
053     * <code>RuleBasedCollator</code>.  There is no public constructor for
054     * this class.  An instance is created by calling the
055     * <code>getCollationElementIterator</code> method on 
056     * <code>RuleBasedCollator</code>.
057     *
058     * @author Aaron M. Renn (arenn@urbanophile.com)
059     * @author Tom Tromey (tromey@cygnus.com)
060     * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
061     */
062    public final class CollationElementIterator
063    {
064      /**
065       * This is a constant value that is returned to indicate that the end of 
066       * the string was encountered.
067       */
068      public static final int NULLORDER = -1;
069    
070      /**
071       * This is the RuleBasedCollator this object was created from.
072       */
073      RuleBasedCollator collator;
074    
075      /**
076       * This is the String that is being iterated over.
077       */
078      CharacterIterator text;
079    
080      /**
081       * This is the index into the collation decomposition where we are currently scanning.
082       */
083      int index;
084    
085      /**
086       * This is the index into the String where we are currently scanning.
087       */
088      int textIndex;
089    
090      /**
091       * Array containing the collation decomposition of the
092       * text given to the constructor.
093       */
094      private RuleBasedCollator.CollationElement[] text_decomposition;
095    
096      /**
097       * Array containing the index of the specified block.
098       */
099      private int[] text_indexes;
100    
101      /**
102       * This method initializes a new instance of <code>CollationElementIterator</code>
103       * to iterate over the specified <code>String</code> using the rules in the
104       * specified <code>RuleBasedCollator</code>.
105       *
106       * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
107       * @param text The <code>String</code> to iterate over.
108       */
109      CollationElementIterator(RuleBasedCollator collator, String text)
110      {
111        this.collator = collator;
112        
113        setText (text);    
114      }
115    
116      /**
117       * This method initializes a new instance of <code>CollationElementIterator</code>
118       * to iterate over the specified <code>String</code> using the rules in the
119       * specified <code>RuleBasedCollator</code>.
120       *
121       * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
122       * @param text The character iterator to iterate over.
123       */
124      CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
125      {
126        this.collator = collator;
127        
128        setText (text);    
129      }
130    
131      RuleBasedCollator.CollationElement nextBlock()
132      {
133        if (index >= text_decomposition.length)
134          return null;
135        
136        RuleBasedCollator.CollationElement e = text_decomposition[index];
137        
138        textIndex = text_indexes[index+1];
139    
140        index++;
141    
142        return e;
143      }
144    
145      RuleBasedCollator.CollationElement previousBlock()
146      {
147        if (index == 0)
148          return null;
149        
150        index--;
151        RuleBasedCollator.CollationElement e = text_decomposition[index];
152    
153        textIndex = text_indexes[index+1];
154        
155        return e;
156      }
157    
158      /**
159       * This method returns the collation ordering value of the next character sequence
160       * in the string (it may be an extended character following collation rules).
161       * This method will return <code>NULLORDER</code> if the
162       * end of the string was reached.
163       *
164       * @return The collation ordering value.
165       */
166      public int next()
167      {
168        RuleBasedCollator.CollationElement e = nextBlock();
169    
170        if (e == null)
171          return NULLORDER;
172        
173        return e.getValue();
174      }
175    
176      /**
177       * This method returns the collation ordering value of the previous character
178       * in the string.  This method will return <code>NULLORDER</code> if the
179       * beginning of the string was reached.
180       *
181       * @return The collation ordering value.
182       */
183      public int previous()
184      {
185        RuleBasedCollator.CollationElement e = previousBlock();
186    
187        if (e == null)
188          return NULLORDER;
189        
190        return e.getValue();
191      }
192    
193      /**
194       * This method returns the primary order value for the given collation
195       * value.
196       *
197       * @param order The collation value returned from <code>next()</code> or 
198       *              <code>previous()</code>.
199       *
200       * @return The primary order value of the specified collation value.  This is
201       *         the high 16 bits.
202       */
203      public static int primaryOrder(int order)
204      {
205        // From the JDK 1.2 spec.
206        return order >>> 16;
207      }
208    
209      /**
210       * This method resets the internal position pointer to read from the
211       * beginning of the <code>String</code> again.
212       */
213      public void reset()
214      {
215        index = 0;
216        textIndex = 0;
217      }
218    
219      /**
220       * This method returns the secondary order value for the given collation
221       * value.
222       *
223       * @param order The collation value returned from <code>next()</code> or 
224       *              <code>previous()</code>.
225       *
226       * @return The secondary order value of the specified collation value.  This 
227       *         is the bits 8-15.
228       */
229      public static short secondaryOrder(int order)
230      {
231        // From the JDK 1.2 spec.
232        return (short) ((order >>> 8) & 255);
233      }
234    
235      /**
236       * This method returns the tertiary order value for the given collation
237       * value.
238       *
239       * @param order The collation value returned from <code>next()</code> or 
240       *              <code>previous()</code>.
241       *
242       * @return The tertiary order value of the specified collation value.  This 
243       *         is the low eight bits.
244       */
245      public static short tertiaryOrder(int order)
246      {
247        // From the JDK 1.2 spec.
248        return (short) (order & 255);
249      }
250    
251      /**
252       * This method sets the <code>String</code> that it is iterating over
253       * to the specified <code>String</code>.
254       *
255       * @param text The new <code>String</code> to iterate over.
256       *
257       * @since 1.2
258       */
259      public void setText(String text)
260      {
261        int idx = 0;
262        int idx_idx = 0;
263        int alreadyExpanded = 0;
264        int idxToMove = 0;
265    
266        this.text = new StringCharacterIterator(text);
267        this.index = 0;
268    
269        String work_text = text.intern();
270    
271        ArrayList a_element = new ArrayList();
272        ArrayList a_idx = new ArrayList();
273    
274        // Build element collection ordered as they come in "text".
275        while (idx < work_text.length())
276          {
277            String key, key_old;
278    
279            Object object = null;
280            int p = 1;
281            
282            // IMPROVE: use a TreeMap with a prefix-ordering rule.
283            key_old = key = null;
284            do
285              {
286                if (object != null)
287                  key_old = key;
288                key = work_text.substring (idx, idx+p);
289                object = collator.prefix_tree.get (key);
290                if (object != null && idx < alreadyExpanded)
291                  {
292                    RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
293                    if (prefix.expansion != null && 
294                        prefix.expansion.startsWith(work_text.substring(0, idx)))
295                    {
296                      object = null;
297                      key = key_old;
298                    }
299                  }
300                p++;
301              }
302            while (idx+p <= work_text.length());
303            
304            if (object == null)
305              key = key_old;
306            
307            RuleBasedCollator.CollationElement prefix =
308              (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
309    
310            /*
311             * First case: There is no such sequence in the database.
312             * We will have to build one from the context.
313             */
314            if (prefix == null)
315              {
316                /*
317                 * We are dealing with sequences in an expansion. They
318                 * are treated as accented characters (tertiary order).
319                 */
320                if (alreadyExpanded > 0)
321                  {
322                    RuleBasedCollator.CollationElement e =
323                      collator.getDefaultAccentedElement (work_text.charAt (idx));
324                    
325                    a_element.add (e);
326                    a_idx.add (new Integer(idx_idx));
327                    idx++;
328                    alreadyExpanded--;
329                    if (alreadyExpanded == 0)
330                      {
331                        /* There is not any characters left in the expansion set.
332                         * We can increase the pointer in the source string.
333                         */
334                        idx_idx += idxToMove;
335                        idxToMove = 0; 
336                      }
337                    else
338                      idx_idx++;
339                  }
340                else
341                  {
342                    /* This is a normal character. */
343                    RuleBasedCollator.CollationElement e =
344                      collator.getDefaultElement (work_text.charAt (idx));
345                    Integer i_ref = new Integer(idx_idx);
346    
347                    /* Don't forget to mark it as a special sequence so the
348                     * string can be ordered.
349                     */
350                    a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
351                    a_idx.add (i_ref);
352                    a_element.add (e);
353                    a_idx.add (i_ref);
354                    idx_idx++;
355                    idx++;
356                  }
357                continue;
358              }
359     
360            /*
361             * Second case: Here we have found a matching sequence.
362             * Here we have an expansion string prepend it to the "work text" and
363             * add the corresponding sorting element. We must also mark 
364             */
365            if (prefix.expansion != null)
366              {
367                work_text = prefix.expansion
368                  + work_text.substring (idx+prefix.key.length());
369                idx = 0;
370                a_element.add (prefix);
371                a_idx.add (new Integer(idx_idx));
372                if (alreadyExpanded == 0)
373                  idxToMove = prefix.key.length();
374                alreadyExpanded += prefix.expansion.length()-prefix.key.length();
375              }
376            else
377              {
378                /* Third case: the simplest. We have got the prefix and it
379                 * has not to be expanded.
380                 */
381                a_element.add (prefix);
382                a_idx.add (new Integer(idx_idx));
383                idx += prefix.key.length();
384                /* If the sequence is in an expansion, we must decrease the
385                 * counter.
386                 */
387                if (alreadyExpanded > 0)
388                  {
389                    alreadyExpanded -= prefix.key.length();
390                    if (alreadyExpanded == 0)
391                      {
392                        idx_idx += idxToMove;
393                        idxToMove = 0;
394                      }
395                  }
396                else
397                  idx_idx += prefix.key.length();
398              }
399          }
400        
401        text_decomposition = (RuleBasedCollator.CollationElement[])
402               a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
403        text_indexes = new int[a_idx.size()+1];
404        for (int i = 0; i < a_idx.size(); i++) 
405          {
406            text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
407          }
408        text_indexes[a_idx.size()] = text.length();
409      }
410    
411      /**
412       * This method sets the <code>String</code> that it is iterating over
413       * to the <code>String</code> represented by the specified
414       * <code>CharacterIterator</code>.
415       *
416       * @param source The <code>CharacterIterator</code> containing the new
417       * <code>String</code> to iterate over.
418       */
419      public void setText(CharacterIterator source)
420      {
421        CPStringBuilder expand = new CPStringBuilder();
422    
423        // For now assume we read from the beginning of the string.
424        for (char c = source.first();
425             c != CharacterIterator.DONE;
426             c = source.next())
427          expand.append(c);
428    
429        setText(expand.toString());
430      }
431    
432      /**
433       * This method returns the current offset into the <code>String</code>
434       * that is being iterated over.
435       *
436       * @return The iteration index position.
437       *
438       * @since 1.2
439       */
440      public int getOffset()
441      {
442        return textIndex;
443      }
444    
445      /**
446       * This method sets the iteration index position into the current
447       * <code>String</code> to the specified value.  This value must not
448       * be negative and must not be greater than the last index position
449       * in the <code>String</code>.
450       *
451       * @param offset The new iteration index position.
452       *
453       * @exception IllegalArgumentException If the new offset is not valid.
454       */
455      public void setOffset(int offset)
456      {
457        if (offset < 0)
458          throw new IllegalArgumentException("Negative offset: " + offset);
459    
460        if (offset > (text.getEndIndex() - 1))
461          throw new IllegalArgumentException("Offset too large: " + offset);
462        
463        for (index = 0; index < text_decomposition.length; index++)
464          { 
465            if (offset <= text_indexes[index])
466              break;
467          }
468        /*
469         * As text_indexes[0] == 0, we should not have to take care whether index is
470         * greater than 0. It is always.
471         */
472        if (text_indexes[index] == offset)
473          textIndex = offset;
474        else
475          textIndex = text_indexes[index-1];
476      }
477    
478      /**
479       * This method returns the maximum length of any expansion sequence that
480       * ends with the specified collation order value.  (Whatever that means).
481       *
482       * @param value The collation order value
483       *
484       * @return The maximum length of an expansion sequence.
485       */
486      public int getMaxExpansion(int value)
487      {
488        return 1;
489      }
490    }