001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.cache;
003
004import java.io.FileNotFoundException;
005import java.io.IOException;
006import java.net.HttpURLConnection;
007import java.net.URL;
008import java.security.SecureRandom;
009import java.util.HashSet;
010import java.util.List;
011import java.util.Map;
012import java.util.Set;
013import java.util.concurrent.ConcurrentHashMap;
014import java.util.concurrent.ConcurrentMap;
015import java.util.concurrent.LinkedBlockingDeque;
016import java.util.concurrent.ThreadPoolExecutor;
017import java.util.concurrent.TimeUnit;
018import java.util.regex.Matcher;
019
020import org.apache.commons.jcs.access.behavior.ICacheAccess;
021import org.apache.commons.jcs.engine.behavior.ICacheElement;
022import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
023import org.openstreetmap.josm.data.imagery.TileJobOptions;
024import org.openstreetmap.josm.data.preferences.IntegerProperty;
025import org.openstreetmap.josm.tools.CheckParameterUtil;
026import org.openstreetmap.josm.tools.HttpClient;
027import org.openstreetmap.josm.tools.Logging;
028import org.openstreetmap.josm.tools.Utils;
029
030/**
031 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
032 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
033 * or If-Modified-Since / Last-Modified.
034 *
035 * If the tile is not valid, it will try to download it from remote service and put it
036 * to cache. If remote server will fail it will try to use stale entry.
037 *
038 * This class will keep only one Job running for specified tile. All others will just finish, but
039 * listeners will be gathered and notified, once download job will be finished
040 *
041 * @author Wiktor Niesiobędzki
042 * @param <K> cache entry key type
043 * @param <V> cache value type
044 * @since 8168
045 */
046public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
047    protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
048    // Limit for the max-age value send by the server.
049    protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
050    // Absolute expire time limit. Cached tiles that are older will not be used,
051    // even if the refresh from the server fails.
052    protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
053
054    /**
055     * maximum download threads that will be started
056     */
057    public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
058
059    /*
060     * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
061     *
062     * The queue works FIFO, so one needs to take care about ordering of the entries submitted
063     *
064     * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
065     * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
066     * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
067     */
068
069    private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
070            1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
071            THREAD_LIMIT.get(), // do not this number of threads
072            30, // keepalive for thread
073            TimeUnit.SECONDS,
074            // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
075            new LinkedBlockingDeque<Runnable>(),
076            Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
077            );
078
079    private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
080    private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
081
082    protected final long now; // when the job started
083
084    private final ICacheAccess<K, V> cache;
085    private ICacheElement<K, V> cacheElement;
086    protected V cacheData;
087    protected CacheEntryAttributes attributes;
088
089    // HTTP connection parameters
090    private final int connectTimeout;
091    private final int readTimeout;
092    private final Map<String, String> headers;
093    private final ThreadPoolExecutor downloadJobExecutor;
094    private Runnable finishTask;
095    private boolean force;
096    private long minimumExpiryTime;
097
098    /**
099     * @param cache cache instance that we will work on
100     * @param options options of the request
101     * @param downloadJobExecutor that will be executing the jobs
102     */
103    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
104            TileJobOptions options,
105            ThreadPoolExecutor downloadJobExecutor) {
106        CheckParameterUtil.ensureParameterNotNull(cache, "cache");
107        this.cache = cache;
108        this.now = System.currentTimeMillis();
109        this.connectTimeout = options.getConnectionTimeout();
110        this.readTimeout = options.getReadTimeout();
111        this.headers = options.getHeaders();
112        this.downloadJobExecutor = downloadJobExecutor;
113        this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime());
114    }
115
116    /**
117     * @param cache cache instance that we will work on
118     * @param options of the request
119     */
120    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
121            TileJobOptions options) {
122        this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
123    }
124
125    private void ensureCacheElement() {
126        if (cacheElement == null && getCacheKey() != null) {
127            cacheElement = cache.getCacheElement(getCacheKey());
128            if (cacheElement != null) {
129                attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
130                cacheData = cacheElement.getVal();
131            }
132        }
133    }
134
135    @Override
136    public V get() {
137        ensureCacheElement();
138        return cacheData;
139    }
140
141    @Override
142    public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
143        this.force = force;
144        boolean first = false;
145        URL url = getUrl();
146        String deduplicationKey = null;
147        if (url != null) {
148            // url might be null, for example when Bing Attribution is not loaded yet
149            deduplicationKey = url.toString();
150        }
151        if (deduplicationKey == null) {
152            Logging.warn("No url returned for: {0}, skipping", getCacheKey());
153            throw new IllegalArgumentException("No url returned");
154        }
155        synchronized (inProgress) {
156            Set<ICachedLoaderListener> newListeners = inProgress.get(deduplicationKey);
157            if (newListeners == null) {
158                newListeners = new HashSet<>();
159                inProgress.put(deduplicationKey, newListeners);
160                first = true;
161            }
162            newListeners.add(listener);
163        }
164
165        if (first || force) {
166            // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
167            Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
168            downloadJobExecutor.execute(this);
169        }
170    }
171
172    /**
173     * This method is run when job has finished
174     */
175    protected void executionFinished() {
176        if (finishTask != null) {
177            finishTask.run();
178        }
179    }
180
181    /**
182     *
183     * @return checks if object from cache has sufficient data to be returned
184     */
185    protected boolean isObjectLoadable() {
186        if (cacheData == null) {
187            return false;
188        }
189        return cacheData.getContent().length > 0;
190    }
191
192    /**
193     * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
194     * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
195     *
196     * @return true if we should put empty object into cache, regardless of what remote resource has returned
197     */
198    protected boolean cacheAsEmpty() {
199        return attributes.getResponseCode() < 500;
200    }
201
202    /**
203     * @return key under which discovered server settings will be kept
204     */
205    protected String getServerKey() {
206        try {
207            return getUrl().getHost();
208        } catch (IOException e) {
209            Logging.trace(e);
210            return null;
211        }
212    }
213
214    @Override
215    public void run() {
216        final Thread currentThread = Thread.currentThread();
217        final String oldName = currentThread.getName();
218        currentThread.setName("JCS Downloading: " + getUrlNoException());
219        Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
220        ensureCacheElement();
221        try {
222            // try to fetch from cache
223            if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
224                // we got something in cache, and it's valid, so lets return it
225                Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
226                finishLoading(LoadResult.SUCCESS);
227                return;
228            }
229
230            // try to load object from remote resource
231            if (loadObject()) {
232                finishLoading(LoadResult.SUCCESS);
233            } else {
234                // if loading failed - check if we can return stale entry
235                if (isObjectLoadable()) {
236                    // try to get stale entry in cache
237                    finishLoading(LoadResult.SUCCESS);
238                    Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
239                } else {
240                    // failed completely
241                    finishLoading(LoadResult.FAILURE);
242                }
243            }
244        } finally {
245            executionFinished();
246            currentThread.setName(oldName);
247        }
248    }
249
250    private void finishLoading(LoadResult result) {
251        Set<ICachedLoaderListener> listeners;
252        synchronized (inProgress) {
253            try {
254                listeners = inProgress.remove(getUrl().toString());
255            } catch (IOException e) {
256                listeners = null;
257                Logging.trace(e);
258            }
259        }
260        if (listeners == null) {
261            Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
262            return;
263        }
264        for (ICachedLoaderListener l: listeners) {
265            l.loadingFinished(cacheData, attributes, result);
266        }
267    }
268
269    protected boolean isCacheElementValid() {
270        long expires = attributes.getExpirationTime();
271
272        // check by expire date set by server
273        if (expires != 0L) {
274            // put a limit to the expire time (some servers send a value
275            // that is too large)
276            expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime));
277            if (now > expires) {
278                Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
279                        getUrlNoException(), Long.toString(expires), Long.toString(now));
280                return false;
281            }
282        } else if (attributes.getLastModification() > 0 &&
283                now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
284            // check by file modification date
285            Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
286            return false;
287        } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
288            Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
289            return false;
290        }
291        return true;
292    }
293
294    /**
295     * @return true if object was successfully downloaded, false, if there was a loading failure
296     */
297    private boolean loadObject() {
298        if (attributes == null) {
299            attributes = new CacheEntryAttributes();
300        }
301        try {
302            // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
303            // then just use HEAD request and check returned values
304            if (isObjectLoadable() &&
305                    Boolean.TRUE.equals(useHead.get(getServerKey())) &&
306                    isCacheValidUsingHead()) {
307                Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
308                return true;
309            }
310
311            Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
312            final HttpClient request = getRequest("GET", true);
313
314            if (isObjectLoadable() &&
315                    (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
316                request.setIfModifiedSince(attributes.getLastModification());
317            }
318            if (isObjectLoadable() && attributes.getEtag() != null) {
319                request.setHeader("If-None-Match", attributes.getEtag());
320            }
321
322            final HttpClient.Response urlConn = request.connect();
323
324            if (urlConn.getResponseCode() == 304) {
325                // If isModifiedSince or If-None-Match has been set
326                // and the server answers with a HTTP 304 = "Not Modified"
327                Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
328                // update cache attributes
329                attributes = parseHeaders(urlConn);
330                cache.put(getCacheKey(), cacheData, attributes);
331                return true;
332            } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
333                    && (
334                            (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
335                            attributes.getLastModification() == urlConn.getLastModified())
336                    ) {
337                // we sent ETag or If-Modified-Since, but didn't get 304 response code
338                // for further requests - use HEAD
339                String serverKey = getServerKey();
340                Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
341                        serverKey);
342                useHead.put(serverKey, Boolean.TRUE);
343            }
344
345            attributes = parseHeaders(urlConn);
346
347            for (int i = 0; i < 5; ++i) {
348                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
349                    Thread.sleep(5000L+new SecureRandom().nextInt(5000));
350                    continue;
351                }
352
353                attributes.setResponseCode(urlConn.getResponseCode());
354                byte[] raw;
355                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
356                    raw = Utils.readBytesFromStream(urlConn.getContent());
357                } else {
358                    raw = new byte[]{};
359                    try {
360                        String data = urlConn.fetchContent();
361                        if (!data.isEmpty()) {
362                            String detectErrorMessage = detectErrorMessage(data);
363                            if (detectErrorMessage != null) {
364                                attributes.setErrorMessage(detectErrorMessage);
365                            }
366                        }
367                    } catch (IOException e) {
368                        Logging.warn(e);
369                    }
370                }
371
372                if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
373                    // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
374                    // as empty (eg. empty tile images) to save some space
375                    cacheData = createCacheEntry(raw);
376                    cache.put(getCacheKey(), cacheData, attributes);
377                    Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
378                            getCacheKey(), raw.length, getUrl());
379                    return true;
380                } else if (cacheAsEmpty()) {
381                    cacheData = createCacheEntry(new byte[]{});
382                    cache.put(getCacheKey(), cacheData, attributes);
383                    Logging.debug("JCS - Caching empty object {0}", getUrl());
384                    return true;
385                } else {
386                    Logging.debug("JCS - failure during load - response is not loadable nor cached as empty");
387                    return false;
388                }
389            }
390        } catch (FileNotFoundException e) {
391            Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
392            attributes.setResponseCode(404);
393            attributes.setError(e);
394            attributes.setException(e);
395            boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty();
396            if (doCache) {
397                cacheData = createCacheEntry(new byte[]{});
398                cache.put(getCacheKey(), cacheData, attributes);
399            }
400            return doCache;
401        } catch (IOException e) {
402            Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
403            if (isObjectLoadable()) {
404                return true;
405            } else {
406                attributes.setError(e);
407                attributes.setException(e);
408                attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
409                return false;
410            }
411
412        } catch (InterruptedException e) {
413            attributes.setError(e);
414            attributes.setException(e);
415            Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
416            Thread.currentThread().interrupt();
417        }
418        Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
419        return false;
420    }
421
422    /**
423     * Tries do detect an error message from given string.
424     * @param data string to analyze
425     * @return error message if detected, or null
426     * @since 14535
427     */
428    public String detectErrorMessage(String data) {
429        Matcher m = HttpClient.getTomcatErrorMatcher(data);
430        return m.matches() ? m.group(1).replace("'", "''") : null;
431    }
432
433    /**
434     * Check if the object is loadable. This means, if the data will be parsed, and if this response
435     * will finish as successful retrieve.
436     *
437     * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
438     *
439     * @param headerFields headers sent by server
440     * @param responseCode http status code
441     * @param raw data read from server
442     * @return true if object should be cached and returned to listener
443     */
444    protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
445        return raw != null && raw.length != 0 && responseCode < 400;
446    }
447
448    protected abstract V createCacheEntry(byte[] content);
449
450    protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
451        CacheEntryAttributes ret = new CacheEntryAttributes();
452
453        /*
454         * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age
455         * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger
456         */
457        Long expiration = 0L;
458        String cacheControl = urlConn.getHeaderField("Cache-Control");
459        if (cacheControl != null) {
460            for (String token: cacheControl.split(",")) {
461                try {
462                    if (token.startsWith("max-age=")) {
463                        expiration = Math.max(expiration,
464                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length())))
465                                + System.currentTimeMillis()
466                                );
467                    }
468                    if (token.startsWith("s-max-age=")) {
469                        expiration = Math.max(expiration,
470                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length())))
471                                + System.currentTimeMillis()
472                                );
473                    }
474                } catch (NumberFormatException e) {
475                    // ignore malformed Cache-Control headers
476                    Logging.trace(e);
477                }
478            }
479        }
480
481        if (expiration.equals(0L)) {
482            expiration = urlConn.getExpiration();
483        }
484
485        // if nothing is found - set default
486        if (expiration.equals(0L)) {
487            expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME;
488        }
489
490        ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration));
491        ret.setLastModification(now);
492        ret.setEtag(urlConn.getHeaderField("ETag"));
493
494        return ret;
495    }
496
497    private HttpClient getRequest(String requestMethod, boolean noCache) throws IOException {
498        final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
499        urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
500        urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
501        urlConn.setConnectTimeout(connectTimeout);
502        if (headers != null) {
503            urlConn.setHeaders(headers);
504        }
505
506        if (force || noCache) {
507            urlConn.useCache(false);
508        }
509        return urlConn;
510    }
511
512    private boolean isCacheValidUsingHead() throws IOException {
513        final HttpClient.Response urlConn = getRequest("HEAD", false).connect();
514        long lastModified = urlConn.getLastModified();
515        boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
516                (lastModified != 0 && lastModified <= attributes.getLastModification());
517        if (ret) {
518            // update attributes
519            attributes = parseHeaders(urlConn);
520            cache.put(getCacheKey(), cacheData, attributes);
521        }
522        return ret;
523    }
524
525    /**
526     * TODO: move to JobFactory
527     * cancels all outstanding tasks in the queue.
528     */
529    public void cancelOutstandingTasks() {
530        for (Runnable r: downloadJobExecutor.getQueue()) {
531            if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
532                ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
533            }
534        }
535    }
536
537    /**
538     * Sets a job, that will be run, when job will finish execution
539     * @param runnable that will be executed
540     */
541    public void setFinishedTask(Runnable runnable) {
542        this.finishTask = runnable;
543
544    }
545
546    /**
547     * Marks this job as canceled
548     */
549    public void handleJobCancellation() {
550        finishLoading(LoadResult.CANCELED);
551    }
552
553    private URL getUrlNoException() {
554        try {
555            return getUrl();
556        } catch (IOException e) {
557            return null;
558        }
559    }
560}