001    // EntityResolver2.java - Extended SAX entity resolver.
002    // http://www.saxproject.org
003    // No warranty; no copyright -- use this as you will.
004    // $Id: EntityResolver2.java,v 1.2 2006/12/10 20:25:41 gnu_andrew Exp $
005    
006    package org.xml.sax.ext;
007    
008    import java.io.IOException;
009    
010    import org.xml.sax.EntityResolver;
011    import org.xml.sax.InputSource;
012    import org.xml.sax.XMLReader;
013    import org.xml.sax.SAXException;
014    
015    
016    /**
017     * Extended interface for mapping external entity references to input
018     * sources, or providing a missing external subset.  The
019     * {@link XMLReader#setEntityResolver XMLReader.setEntityResolver()} method
020     * is used to provide implementations of this interface to parsers.
021     * When a parser uses the methods in this interface, the
022     * {@link EntityResolver2#resolveEntity EntityResolver2.resolveEntity()}
023     * method (in this interface) is used <em>instead of</em> the older (SAX 1.0)
024     * {@link EntityResolver#resolveEntity EntityResolver.resolveEntity()} method.
025     *
026     * <blockquote>
027     * <em>This module, both source code and documentation, is in the
028     * Public Domain, and comes with <strong>NO WARRANTY</strong>.</em>
029     * </blockquote>
030     *
031     * <p>If a SAX application requires the customized handling which this
032     * interface defines for external entities, it must ensure that it uses
033     * an XMLReader with the
034     * <em>http://xml.org/sax/features/use-entity-resolver2</em> feature flag
035     * set to <em>true</em> (which is its default value when the feature is
036     * recognized).  If that flag is unrecognized, or its value is false,
037     * or the resolver does not implement this interface, then only the
038     * {@link EntityResolver} method will be used.
039     * </p>
040     *
041     * <p>That supports three categories of application that modify entity
042     * resolution.  <em>Old Style</em> applications won't know about this interface;
043     * they will provide an EntityResolver.
044     * <em>Transitional Mode</em> provide an EntityResolver2 and automatically
045     * get the benefit of its methods in any systems (parsers or other tools)
046     * supporting it, due to polymorphism.
047     * Both <em>Old Style</em> and <em>Transitional Mode</em> applications will
048     * work with any SAX2 parser.
049     * <em>New style</em> applications will fail to run except on SAX2 parsers
050     * that support this particular feature.
051     * They will insist that feature flag have a value of "true", and the
052     * EntityResolver2 implementation they provide  might throw an exception
053     * if the original SAX 1.0 style entity resolution method is invoked.
054     * </p>
055     *
056     * @see org.xml.sax.XMLReader#setEntityResolver
057     *
058     * @since SAX 2.0 (extensions 1.1 alpha)
059     * @author David Brownell
060     * @version TBD
061     */
062    public interface EntityResolver2 extends EntityResolver
063    {
064        /**
065         * Allows applications to provide an external subset for documents
066         * that don't explicitly define one.  Documents with DOCTYPE declarations
067         * that omit an external subset can thus augment the declarations
068         * available for validation, entity processing, and attribute processing
069         * (normalization, defaulting, and reporting types including ID).
070         * This augmentation is reported
071         * through the {@link LexicalHandler#startDTD startDTD()} method as if
072         * the document text had originally included the external subset;
073         * this callback is made before any internal subset data or errors
074         * are reported.</p>
075         *
076         * <p>This method can also be used with documents that have no DOCTYPE
077         * declaration.  When the root element is encountered,
078         * but no DOCTYPE declaration has been seen, this method is
079         * invoked.  If it returns a value for the external subset, that root
080         * element is declared to be the root element, giving the effect of
081         * splicing a DOCTYPE declaration at the end the prolog of a document
082         * that could not otherwise be valid.  The sequence of parser callbacks
083         * in that case logically resembles this:</p>
084         *
085         * <pre>
086         * ... comments and PIs from the prolog (as usual)
087         * startDTD ("rootName", source.getPublicId (), source.getSystemId ());
088         * startEntity ("[dtd]");
089         * ... declarations, comments, and PIs from the external subset
090         * endEntity ("[dtd]");
091         * endDTD ();
092         * ... then the rest of the document (as usual)
093         * startElement (..., "rootName", ...);
094         * </pre>
095         *
096         * <p>Note that the InputSource gets no further resolution.
097         * Implementations of this method may wish to invoke
098         * {@link #resolveEntity resolveEntity()} to gain benefits such as use
099         * of local caches of DTD entities.  Also, this method will never be
100         * used by a (non-validating) processor that is not including external
101         * parameter entities. </p>
102         *
103         * <p>Uses for this method include facilitating data validation when
104         * interoperating with XML processors that would always require
105         * undesirable network accesses for external entities, or which for
106         * other reasons adopt a "no DTDs" policy.
107         * Non-validation motives include forcing documents to include DTDs so
108         * that attributes are handled consistently.
109         * For example, an XPath processor needs to know which attibutes have
110         * type "ID" before it can process a widely used type of reference.</p>
111         *
112         * <p><strong>Warning:</strong> Returning an external subset modifies
113         * the input document.  By providing definitions for general entities,
114         * it can make a malformed document appear to be well formed.
115         * </p>
116         *
117         * @param name Identifies the document root element.  This name comes
118         *  from a DOCTYPE declaration (where available) or from the actual
119         *  root element.
120         * @param baseURI The document's base URI, serving as an additional
121         *  hint for selecting the external subset.  This is always an absolute
122         *  URI, unless it is null because the XMLReader was given an InputSource
123         *  without one.
124         *
125         * @return An InputSource object describing the new external subset
126         *  to be used by the parser, or null to indicate that no external
127         *  subset is provided.
128         *
129         * @exception SAXException Any SAX exception, possibly wrapping
130         *  another exception.
131         * @exception IOException Probably indicating a failure to create
132         *  a new InputStream or Reader, or an illegal URL.
133         */
134        public InputSource getExternalSubset (String name, String baseURI)
135        throws SAXException, IOException;
136    
137        /**
138         * Allows applications to map references to external entities into input
139         * sources, or tell the parser it should use conventional URI resolution.
140         * This method is only called for external entities which have been
141         * properly declared.
142         * This method provides more flexibility than the {@link EntityResolver}
143         * interface, supporting implementations of more complex catalogue
144         * schemes such as the one defined by the <a href=
145            "http://www.oasis-open.org/committees/entity/spec-2001-08-06.html"
146            >OASIS XML Catalogs</a> specification.</p>
147         *
148         * <p>Parsers configured to use this resolver method will call it
149         * to determine the input source to use for any external entity
150         * being included because of a reference in the XML text.
151         * That excludes the document entity, and any external entity returned
152         * by {@link #getExternalSubset getExternalSubset()}.
153         * When a (non-validating) processor is configured not to include
154         * a class of entities (parameter or general) through use of feature
155         * flags, this method is not invoked for such entities.  </p>
156         *
157         * <p>Note that the entity naming scheme used here is the same one
158         * used in the {@link LexicalHandler}, or in the {@link
159            org.xml.sax.ContentHandler#skippedEntity
160            ContentHandler.skippedEntity()}
161         * method. </p>
162         *
163         * @param name Identifies the external entity being resolved.
164         *  Either "[dtd]" for the external subset, or a name starting
165         *  with "%" to indicate a parameter entity, or else the name of
166         *  a general entity.  This is never null when invoked by a SAX2
167         *  parser.
168         * @param publicId The public identifier of the external entity being
169         *  referenced (normalized as required by the XML specification), or
170         *  null if none was supplied.
171         * @param baseURI The URI with respect to which relative systemIDs
172         *  are interpreted.  This is always an absolute URI, unless it is
173         *  null (likely because the XMLReader was given an InputSource without
174         *  one).  This URI is defined by the XML specification to be the one
175         *  associated with the "&lt;" starting the relevant declaration.
176         * @param systemId The system identifier of the external entity
177         *  being referenced; either a relative or absolute URI.
178         *  This is never null when invoked by a SAX2 parser; only declared
179         *  entities, and any external subset, are resolved by such parsers.
180         *
181         * @return An InputSource object describing the new input source to
182         *  be used by the parser.  Returning null directs the parser to
183         *  resolve the system ID against the base URI and open a connection
184         *  to resulting URI.
185         *
186         * @exception SAXException Any SAX exception, possibly wrapping
187         *  another exception.
188         * @exception IOException Probably indicating a failure to create
189         *  a new InputStream or Reader, or an illegal URL.
190         */
191        public InputSource resolveEntity (
192                String name,
193                String publicId,
194                String baseURI,
195                String systemId
196        ) throws SAXException, IOException;
197    }