001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 019 package org.apache.commons.net.nntp; 020 021 /** 022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 025 * 026 * @author rwinston <rwinston@checkfree.com> 027 * 028 */ 029 030 import java.util.Arrays; 031 import java.util.HashMap; 032 import java.util.Iterator; 033 import java.util.List; 034 035 public class Threader { 036 private ThreadContainer root; 037 private HashMap<String,ThreadContainer> idTable; 038 private int bogusIdCount = 0; 039 040 /** 041 * The main threader entry point - The client passes in an array of Threadable objects, and 042 * the Threader constructs a connected 'graph' of messages 043 * @param messages array of messages to thread 044 * @return null if messages == null or root.child == null 045 * @deprecated (2.2) prefer {@link #thread(List)} 046 */ 047 @Deprecated 048 public Threadable thread(Threadable[] messages) { 049 return thread(Arrays.asList(messages)); 050 } 051 052 /** 053 * The main threader entry point - The client passes in a list of Threadable objects, and 054 * the Threader constructs a connected 'graph' of messages 055 * @param messages list of messages to thread 056 * @return null if messages == null or root.child == null 057 * @since 2.2 058 */ 059 public Threadable thread(List<? extends Threadable> messages) { 060 if (messages == null) 061 return null; 062 063 idTable = new HashMap<String,ThreadContainer>(); 064 065 // walk through each Threadable element 066 for (Threadable t : messages) { 067 if (!t.isDummy()) 068 buildContainer(t); 069 } 070 071 root = findRootSet(); 072 idTable.clear(); 073 idTable = null; 074 075 pruneEmptyContainers(root); 076 077 root.reverseChildren(); 078 gatherSubjects(); 079 080 if (root.next != null) 081 throw new RuntimeException("root node has a next:" + root); 082 083 for (ThreadContainer r = root.child; r != null; r = r.next) { 084 if (r.threadable == null) 085 r.threadable = r.child.threadable.makeDummy(); 086 } 087 088 Threadable result = (root.child == null ? null : root.child.threadable); 089 root.flush(); 090 root = null; 091 092 return result; 093 } 094 095 /** 096 * 097 * @param threadable 098 */ 099 private void buildContainer(Threadable threadable) { 100 String id = threadable.messageThreadId(); 101 ThreadContainer container = idTable.get(id); 102 103 // A ThreadContainer exists for this id already. This should be a forward reference, but may 104 // be a duplicate id, in which case we will need to generate a bogus placeholder id 105 if (container != null) { 106 if (container.threadable != null) { // oops! duplicate ids... 107 id = "<Bogus-id:" + (bogusIdCount++) + ">"; 108 container = null; 109 } else { 110 // The container just contained a forward reference to this message, so let's 111 // fill in the threadable field of the container with this message 112 container.threadable = threadable; 113 } 114 } 115 116 // No container exists for that message Id. Create one and insert it into the hash table. 117 if (container == null) { 118 container = new ThreadContainer(); 119 container.threadable = threadable; 120 idTable.put(id, container); 121 } 122 123 // Iterate through all of the references and create ThreadContainers for any references that 124 // don't have them. 125 ThreadContainer parentRef = null; 126 { 127 String[] references = threadable.messageThreadReferences(); 128 for (int i = 0; i < references.length; ++i) { 129 String refString = references[i]; 130 ThreadContainer ref = idTable.get(refString); 131 132 // if this id doesnt have a container, create one 133 if (ref == null) { 134 ref = new ThreadContainer(); 135 idTable.put(refString, ref); 136 } 137 138 // Link references together in the order they appear in the References: header, 139 // IF they dont have a have a parent already && 140 // IF it will not cause a circular reference 141 if ((parentRef != null) 142 && (ref.parent == null) 143 && (parentRef != ref) 144 && !(parentRef.findChild(ref))) { 145 // Link ref into the parent's child list 146 ref.parent = parentRef; 147 ref.next = parentRef.child; 148 parentRef.child = ref; 149 } 150 parentRef = ref; 151 } 152 } 153 154 // parentRef is now set to the container of the last element in the references field. make that 155 // be the parent of this container, unless doing so causes a circular reference 156 if (parentRef != null 157 && (parentRef == container || container.findChild(parentRef))) 158 parentRef = null; 159 160 // if it has a parent already, its because we saw this message in a References: field, and presumed 161 // a parent based on the other entries in that field. Now that we have the actual message, we can 162 // throw away the old parent and use this new one 163 if (container.parent != null) { 164 ThreadContainer rest, prev; 165 166 for (prev = null, rest = container.parent.child; 167 rest != null; 168 prev = rest, rest = rest.next) { 169 if (rest == container) 170 break; 171 } 172 173 if (rest == null) { 174 throw new RuntimeException( 175 "Didnt find " 176 + container 177 + " in parent" 178 + container.parent); 179 } 180 181 // Unlink this container from the parent's child list 182 if (prev == null) 183 container.parent.child = container.next; 184 else 185 prev.next = container.next; 186 187 container.next = null; 188 container.parent = null; 189 } 190 191 // If we have a parent, link container into the parents child list 192 if (parentRef != null) { 193 container.parent = parentRef; 194 container.next = parentRef.child; 195 parentRef.child = container; 196 } 197 } 198 199 /** 200 * Find the root set of all existing ThreadContainers 201 * @return root the ThreadContainer representing the root node 202 */ 203 private ThreadContainer findRootSet() { 204 ThreadContainer root = new ThreadContainer(); 205 Iterator<String> iter = idTable.keySet().iterator(); 206 207 while (iter.hasNext()) { 208 Object key = iter.next(); 209 ThreadContainer c = idTable.get(key); 210 if (c.parent == null) { 211 if (c.next != null) 212 throw new RuntimeException( 213 "c.next is " + c.next.toString()); 214 c.next = root.child; 215 root.child = c; 216 } 217 } 218 return root; 219 } 220 221 /** 222 * Delete any empty or dummy ThreadContainers 223 * @param parent 224 */ 225 private void pruneEmptyContainers(ThreadContainer parent) { 226 ThreadContainer container, prev, next; 227 for (prev = null, container = parent.child, next = container.next; 228 container != null; 229 prev = container, 230 container = next, 231 next = (container == null ? null : container.next)) { 232 233 // Is it empty and without any children? If so,delete it 234 if (container.threadable == null && container.child == null) { 235 if (prev == null) 236 parent.child = container.next; 237 else 238 prev.next = container.next; 239 240 // Set container to prev so that prev keeps its same value the next time through the loop 241 container = prev; 242 } 243 244 // Else if empty, with kids, and (not at root or only one kid) 245 else if ( 246 container.threadable == null 247 && container.child != null 248 && (container.parent != null 249 || container.child.next == null)) { 250 // We have an invalid/expired message with kids. Promote the kids to this level. 251 ThreadContainer tail; 252 ThreadContainer kids = container.child; 253 254 // Remove this container and replace with 'kids'. 255 if (prev == null) 256 parent.child = kids; 257 else 258 prev.next = kids; 259 260 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next 261 // i.e. splice kids into the list in place of container 262 for (tail = kids; tail.next != null; tail = tail.next) 263 tail.parent = container.parent; 264 265 tail.parent = container.parent; 266 tail.next = container.next; 267 268 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 269 // promoted items next time round 270 next = kids; 271 272 // Set container to prev so that prev keeps its same value the next time through the loop 273 container = prev; 274 } else if (container.child != null) { 275 // A real message , with kids 276 // Iterate over the children 277 pruneEmptyContainers(container); 278 } 279 } 280 } 281 282 /** 283 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 284 */ 285 private void gatherSubjects() { 286 287 int count = 0; 288 289 for (ThreadContainer c = root.child; c != null; c = c.next) 290 count++; 291 292 // TODO verify this will avoid rehashing 293 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9); 294 count = 0; 295 296 for (ThreadContainer c = root.child; c != null; c = c.next) { 297 Threadable threadable = c.threadable; 298 299 // No threadable? If so, it is a dummy node in the root set. 300 // Only root set members may be dummies, and they alway have at least 2 kids 301 // Take the first kid as representative of the subject 302 if (threadable == null) 303 threadable = c.child.threadable; 304 305 String subj = threadable.simplifiedSubject(); 306 307 if (subj == null || subj == "") 308 continue; 309 310 ThreadContainer old = subjectTable.get(subj); 311 312 // Add this container to the table iff: 313 // - There exists no container with this subject 314 // - or this is a dummy container and the old one is not - the dummy one is 315 // more interesting as a root, so put it in the table instead 316 // - The container in the table has a "Re:" version of this subject, and 317 // this container has a non-"Re:" version of this subject. The non-"Re:" version 318 // is the more interesting of the two. 319 if (old == null 320 || (c.threadable == null && old.threadable != null) 321 || (old.threadable != null 322 && old.threadable.subjectIsReply() 323 && c.threadable != null 324 && !c.threadable.subjectIsReply())) { 325 subjectTable.put(subj, c); 326 count++; 327 } 328 } 329 330 // If the table is empty, we're done 331 if (count == 0) 332 return; 333 334 // subjectTable is now populated with one entry for each subject which occurs in the 335 // root set. Iterate over the root set, and gather together the difference. 336 ThreadContainer prev, c, rest; 337 for (prev = null, c = root.child, rest = c.next; 338 c != null; 339 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 340 Threadable threadable = c.threadable; 341 342 // is it a dummy node? 343 if (threadable == null) 344 threadable = c.child.threadable; 345 346 String subj = threadable.simplifiedSubject(); 347 348 // Dont thread together all subjectless messages 349 if (subj == null || subj == "") 350 continue; 351 352 ThreadContainer old = subjectTable.get(subj); 353 354 if (old == c) // That's us 355 continue; 356 357 // We have now found another container in the root set with the same subject 358 // Remove the "second" message from the root set 359 if (prev == null) 360 root.child = c.next; 361 else 362 prev.next = c.next; 363 c.next = null; 364 365 if (old.threadable == null && c.threadable == null) { 366 // both dummies - merge them 367 ThreadContainer tail; 368 for (tail = old.child; 369 tail != null && tail.next != null; 370 tail = tail.next); 371 372 tail.next = c.child; 373 374 for (tail = c.child; tail != null; tail = tail.next) 375 tail.parent = old; 376 377 c.child = null; 378 } else if ( 379 old.threadable == null 380 || (c.threadable != null 381 && c.threadable.subjectIsReply() 382 && !old.threadable.subjectIsReply())) { 383 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 384 c.parent = old; 385 c.next = old.child; 386 old.child = c; 387 } else { 388 // else make the old and new messages be children of a new dummy container. 389 // We create a new container object for old.msg and empty the old container 390 ThreadContainer newc = new ThreadContainer(); 391 newc.threadable = old.threadable; 392 newc.child = old.child; 393 394 for (ThreadContainer tail = newc.child; 395 tail != null; 396 tail = tail.next) 397 tail.parent = newc; 398 399 old.threadable = null; 400 old.child = null; 401 402 c.parent = old; 403 newc.parent = old; 404 405 // Old is now a dummy- give it 2 kids , c and newc 406 old.child = c; 407 c.next = newc; 408 } 409 // We've done a merge, so keep the same prev 410 c = prev; 411 } 412 413 subjectTable.clear(); 414 subjectTable = null; 415 416 } 417 }