001/**
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.http.api;
017
018
019import static com.google.common.collect.Iterators.concat;
020import static com.google.common.collect.Iterators.filter;
021import static com.google.common.collect.Iterators.transform;
022import static com.hp.hpl.jena.rdf.model.ModelFactory.createDefaultModel;
023import static com.hp.hpl.jena.vocabulary.RDF.type;
024import static javax.ws.rs.core.HttpHeaders.CACHE_CONTROL;
025import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE;
026import static javax.ws.rs.core.Response.ok;
027import static javax.ws.rs.core.Response.status;
028import static javax.ws.rs.core.Response.temporaryRedirect;
029import static javax.ws.rs.core.Response.Status.PARTIAL_CONTENT;
030import static javax.ws.rs.core.Response.Status.REQUESTED_RANGE_NOT_SATISFIABLE;
031import static org.apache.commons.lang.StringUtils.isBlank;
032import static org.apache.jena.riot.RDFLanguages.contentTypeToLang;
033import static org.fcrepo.kernel.api.FedoraJcrTypes.LDP_BASIC_CONTAINER;
034import static org.fcrepo.kernel.api.FedoraJcrTypes.LDP_DIRECT_CONTAINER;
035import static org.fcrepo.kernel.api.FedoraJcrTypes.LDP_INDIRECT_CONTAINER;
036import static org.fcrepo.kernel.api.RdfLexicon.BASIC_CONTAINER;
037import static org.fcrepo.kernel.api.RdfLexicon.CONTAINER;
038import static org.fcrepo.kernel.api.RdfLexicon.DIRECT_CONTAINER;
039import static org.fcrepo.kernel.api.RdfLexicon.INDIRECT_CONTAINER;
040import static org.fcrepo.kernel.api.RdfLexicon.LDP_NAMESPACE;
041import static org.fcrepo.kernel.api.RdfLexicon.isManagedNamespace;
042import static org.fcrepo.kernel.modeshape.rdf.ManagedRdf.isManagedTriple;
043
044import java.io.IOException;
045import java.io.InputStream;
046import java.net.URI;
047import java.net.URISyntaxException;
048import java.util.Date;
049import java.util.Iterator;
050import java.util.function.Predicate;
051
052import javax.inject.Inject;
053import javax.jcr.AccessDeniedException;
054import javax.jcr.Binary;
055import javax.jcr.RepositoryException;
056import javax.jcr.Session;
057import javax.servlet.http.HttpServletResponse;
058import javax.ws.rs.BadRequestException;
059import javax.ws.rs.BeanParam;
060import javax.ws.rs.WebApplicationException;
061import javax.ws.rs.core.CacheControl;
062import javax.ws.rs.core.Context;
063import javax.ws.rs.core.EntityTag;
064import javax.ws.rs.core.MediaType;
065import javax.ws.rs.core.Request;
066import javax.ws.rs.core.Response;
067
068import org.fcrepo.http.commons.api.rdf.HttpTripleUtil;
069import org.fcrepo.http.commons.domain.MultiPrefer;
070import org.fcrepo.http.commons.domain.PreferTag;
071import org.fcrepo.http.commons.domain.Range;
072import org.fcrepo.http.commons.domain.ldp.LdpPreferTag;
073import org.fcrepo.http.commons.responses.RangeRequestInputStream;
074import org.fcrepo.kernel.api.exception.InvalidChecksumException;
075import org.fcrepo.kernel.api.exception.MalformedRdfException;
076import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
077import org.fcrepo.kernel.api.models.Container;
078import org.fcrepo.kernel.api.models.FedoraBinary;
079import org.fcrepo.kernel.api.models.FedoraResource;
080import org.fcrepo.kernel.api.models.NonRdfSource;
081import org.fcrepo.kernel.api.models.NonRdfSourceDescription;
082import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
083import org.fcrepo.kernel.api.utils.iterators.RdfStream;
084import org.fcrepo.kernel.modeshape.rdf.impl.AclRdfContext;
085import org.fcrepo.kernel.modeshape.rdf.impl.BlankNodeRdfContext;
086import org.fcrepo.kernel.modeshape.rdf.impl.ChildrenRdfContext;
087import org.fcrepo.kernel.modeshape.rdf.impl.ContentRdfContext;
088import org.fcrepo.kernel.modeshape.rdf.impl.HashRdfContext;
089import org.fcrepo.kernel.modeshape.rdf.impl.LdpContainerRdfContext;
090import org.fcrepo.kernel.modeshape.rdf.impl.LdpIsMemberOfRdfContext;
091import org.fcrepo.kernel.modeshape.rdf.impl.LdpRdfContext;
092import org.fcrepo.kernel.modeshape.rdf.impl.ParentRdfContext;
093import org.fcrepo.kernel.modeshape.rdf.impl.PropertiesRdfContext;
094import org.fcrepo.kernel.modeshape.rdf.impl.ReferencesRdfContext;
095import org.fcrepo.kernel.modeshape.rdf.impl.RootRdfContext;
096import org.fcrepo.kernel.modeshape.rdf.impl.TypeRdfContext;
097import org.fcrepo.kernel.modeshape.services.TransactionServiceImpl;
098
099import org.apache.jena.riot.Lang;
100import org.glassfish.jersey.media.multipart.ContentDisposition;
101import org.jvnet.hk2.annotations.Optional;
102
103import com.google.common.collect.ImmutableList;
104import com.google.common.collect.Iterators;
105import com.hp.hpl.jena.graph.Triple;
106import com.hp.hpl.jena.rdf.model.Model;
107import com.hp.hpl.jena.rdf.model.Statement;
108
109/**
110 * An abstract class that sits between AbstractResource and any resource that
111 * wishes to share the routines for building responses containing binary
112 * content.
113 *
114 * @author Mike Durbin
115 * @author ajs6f
116 */
117public abstract class ContentExposingResource extends FedoraBaseResource {
118
119    public static final MediaType MESSAGE_EXTERNAL_BODY = MediaType.valueOf("message/external-body");
120
121    @Context protected Request request;
122    @Context protected HttpServletResponse servletResponse;
123
124    @Inject
125    @Optional
126    private HttpTripleUtil httpTripleUtil;
127
128    @BeanParam
129    protected MultiPrefer prefer;
130
131    @Inject
132    @Optional
133    StoragePolicyDecisionPoint storagePolicyDecisionPoint;
134
135    protected FedoraResource resource;
136
137    private static final long MAX_BUFFER_SIZE = 10240000;
138
139    private static final Predicate<Triple> IS_MANAGED_TYPE = t -> t.getPredicate().equals(type.asNode()) &&
140            isManagedNamespace.apply(t.getObject().getNameSpace());
141
142    protected abstract String externalPath();
143
144    protected Response getContent(final String rangeValue,
145                                  final RdfStream rdfStream) throws IOException {
146        if (resource() instanceof FedoraBinary) {
147
148            final String contentTypeString = ((FedoraBinary) resource()).getMimeType();
149
150            final Lang lang = contentTypeToLang(contentTypeString);
151
152            if (!contentTypeString.equals("text/plain") && lang != null) {
153
154                final String format = lang.getName().toUpperCase();
155
156                final InputStream content = ((FedoraBinary) resource()).getContent();
157
158                final Model inputModel = createDefaultModel()
159                        .read(content,  (resource()).toString(), format);
160
161                rdfStream.concat(Iterators.transform(inputModel.listStatements(), Statement::asTriple));
162            } else {
163
164                final MediaType mediaType = MediaType.valueOf(contentTypeString);
165                if (MESSAGE_EXTERNAL_BODY.isCompatible(mediaType)
166                        && mediaType.getParameters().containsKey("access-type")
167                        && mediaType.getParameters().get("access-type").equals("URL")
168                        && mediaType.getParameters().containsKey("URL") ) {
169                    try {
170                        return temporaryRedirect(new URI(mediaType.getParameters().get("URL"))).build();
171                    } catch (final URISyntaxException e) {
172                        throw new RepositoryRuntimeException(e);
173                    }
174                }
175                return getBinaryContent(rangeValue);
176            }
177
178        } else {
179            rdfStream.concat(getResourceTriples());
180
181            if (prefer != null) {
182                prefer.getReturn().addResponseHeaders(servletResponse);
183            }
184
185        }
186        servletResponse.addHeader("Vary", "Accept, Range, Accept-Encoding, Accept-Language");
187
188        return Response.ok(rdfStream).build();
189    }
190
191    protected RdfStream getResourceTriples() {
192
193        final PreferTag returnPreference;
194
195        if (prefer != null && prefer.hasReturn()) {
196            returnPreference = prefer.getReturn();
197        } else if (prefer != null && prefer.hasHandling()) {
198            returnPreference = prefer.getHandling();
199        } else {
200            returnPreference = PreferTag.emptyTag();
201        }
202
203        final LdpPreferTag ldpPreferences = new LdpPreferTag(returnPreference);
204
205        final RdfStream rdfStream = new RdfStream();
206
207        final Predicate<Triple> tripleFilter;
208        if (ldpPreferences.prefersServerManaged()) {
209            tripleFilter = x -> true;
210        } else {
211            tripleFilter = IS_MANAGED_TYPE.or(isManagedTriple::apply).negate();
212        }
213
214        if (ldpPreferences.prefersServerManaged()) {
215            rdfStream.concat(getTriples(LdpRdfContext.class));
216        }
217
218        rdfStream.concat(filter(getTriples(TypeRdfContext.class), tripleFilter::test));
219
220        rdfStream.concat(filter(getTriples(PropertiesRdfContext.class), tripleFilter::test));
221
222        if (!returnPreference.getValue().equals("minimal")) {
223
224            // Additional server-managed triples about this resource
225            if (ldpPreferences.prefersServerManaged()) {
226                rdfStream.concat(getTriples(AclRdfContext.class));
227                rdfStream.concat(getTriples(RootRdfContext.class));
228                rdfStream.concat(getTriples(ContentRdfContext.class));
229                rdfStream.concat(getTriples(ParentRdfContext.class));
230            }
231
232            // containment triples about this resource
233            if (ldpPreferences.prefersContainment()) {
234                rdfStream.concat(getTriples(ChildrenRdfContext.class));
235            }
236
237            // LDP container membership triples for this resource
238            if (ldpPreferences.prefersMembership()) {
239                rdfStream.concat(getTriples(LdpContainerRdfContext.class));
240                rdfStream.concat(getTriples(LdpIsMemberOfRdfContext.class));
241            }
242
243            // Include binary properties if this is a binary description
244            if (resource() instanceof NonRdfSourceDescription) {
245                final FedoraResource described = ((NonRdfSourceDescription) resource()).getDescribedResource();
246                rdfStream.concat(filter(described.getTriples(translator(), ImmutableList.of(TypeRdfContext.class,
247                        PropertiesRdfContext.class,
248                        ContentRdfContext.class)), tripleFilter::test));
249                if (ldpPreferences.prefersServerManaged()) {
250                    rdfStream.concat(getTriples(described,LdpRdfContext.class));
251                }
252            }
253
254            // Embed all hash and blank nodes
255            rdfStream.concat(filter(getTriples(HashRdfContext.class), tripleFilter::test));
256            rdfStream.concat(filter(getTriples(BlankNodeRdfContext.class), tripleFilter::test));
257
258            // Include inbound references to this object
259            if (ldpPreferences.prefersReferences()) {
260                rdfStream.concat(getTriples(ReferencesRdfContext.class));
261            }
262
263            // Embed the children of this object
264            if (ldpPreferences.prefersEmbed()) {
265
266                final Iterator<FedoraResource> children = resource().getChildren();
267
268                rdfStream.concat(filter(concat(transform(children, child ->
269                child.getTriples(translator(),
270                        ImmutableList.of(
271                                TypeRdfContext.class, PropertiesRdfContext.class, BlankNodeRdfContext.class)))),
272                        tripleFilter::test));
273
274            }
275        }
276
277        if (httpTripleUtil != null && ldpPreferences.prefersServerManaged()) {
278            httpTripleUtil.addHttpComponentModelsForResourceToStream(rdfStream, resource(), uriInfo, translator());
279        }
280
281
282        return rdfStream;
283    }
284
285    /**
286     * Get the binary content of a datastream
287     *
288     * @param rangeValue the range value
289     * @return Binary blob
290     * @throws IOException if io exception occurred
291     */
292    protected Response getBinaryContent(final String rangeValue)
293            throws IOException {
294            final FedoraBinary binary = (FedoraBinary)resource();
295
296            // we include an explicit etag, because the default behavior is to use the JCR node's etag, not
297            // the jcr:content node digest. The etag is only included if we are not within a transaction.
298            final String txId = TransactionServiceImpl.getCurrentTransactionId(session());
299            if (txId == null) {
300                checkCacheControlHeaders(request, servletResponse, binary, session());
301            }
302            final CacheControl cc = new CacheControl();
303            cc.setMaxAge(0);
304            cc.setMustRevalidate(true);
305            Response.ResponseBuilder builder;
306
307            if (rangeValue != null && rangeValue.startsWith("bytes")) {
308
309                final Range range = Range.convert(rangeValue);
310
311                final long contentSize = binary.getContentSize();
312
313                final String endAsString;
314
315                if (range.end() == -1) {
316                    endAsString = Long.toString(contentSize - 1);
317                } else {
318                    endAsString = Long.toString(range.end());
319                }
320
321                final String contentRangeValue =
322                        String.format("bytes %s-%s/%s", range.start(),
323                                endAsString, contentSize);
324
325                if (range.end() > contentSize ||
326                        (range.end() == -1 && range.start() > contentSize)) {
327
328                    builder = status(REQUESTED_RANGE_NOT_SATISFIABLE)
329                            .header("Content-Range", contentRangeValue);
330                } else {
331                    final long rangeStart = range.start();
332                    final long rangeSize = range.size() == -1 ? contentSize - rangeStart : range.size();
333                    final long remainingBytes = contentSize - rangeStart;
334                    final long bufSize = rangeSize < remainingBytes ? rangeSize : remainingBytes;
335
336                    if (bufSize < MAX_BUFFER_SIZE) {
337                        // Small size range content retrieval use javax.jcr.Binary to improve performance
338                        final byte[] buf = new byte[(int) bufSize];
339
340                        final Binary binaryContent = binary.getBinaryContent();
341                        try {
342                            binaryContent.read(buf, rangeStart);
343                        } catch (final RepositoryException e1) {
344                            throw new RepositoryRuntimeException(e1);
345                        }
346                        binaryContent.dispose();
347
348                        builder = status(PARTIAL_CONTENT).entity(buf)
349                                .header("Content-Range", contentRangeValue);
350                    } else {
351                        // For large range content retrieval, go with the InputStream class to balance
352                        // the memory usage, though this is a rare case in range content retrieval.
353                        final InputStream content = binary.getContent();
354                        final RangeRequestInputStream rangeInputStream =
355                                new RangeRequestInputStream(content, range.start(), range.size());
356
357                        builder = status(PARTIAL_CONTENT).entity(rangeInputStream)
358                                .header("Content-Range", contentRangeValue);
359                    }
360                }
361
362            } else {
363                final InputStream content = binary.getContent();
364                builder = ok(content);
365            }
366
367
368            // we set the content-type explicitly to avoid content-negotiation from getting in the way
369            return builder.type(binary.getMimeType())
370                    .cacheControl(cc)
371                    .build();
372
373        }
374
375    protected RdfStream getTriples(final Class<? extends RdfStream> x) {
376        return getTriples(resource(), x);
377    }
378
379    protected RdfStream getTriples(final FedoraResource resource, final Class<? extends RdfStream> x) {
380        return resource.getTriples(translator(), x);
381    }
382
383    protected URI getUri(final FedoraResource resource) {
384        try {
385            final String uri = translator().reverse().convert(resource).getURI();
386            return new URI(uri);
387        } catch (final URISyntaxException e) {
388            throw new BadRequestException(e);
389        }
390    }
391
392    protected FedoraResource resource() {
393        if (resource == null) {
394            resource = getResourceFromPath(externalPath());
395        }
396
397        return resource;
398    }
399
400
401    /**
402     * Add any resource-specific headers to the response
403     * @param resource the resource
404     */
405    protected void addResourceHttpHeaders(final FedoraResource resource) {
406        if (resource instanceof FedoraBinary) {
407
408            final FedoraBinary binary = (FedoraBinary)resource;
409            final ContentDisposition contentDisposition = ContentDisposition.type("attachment")
410                    .fileName(binary.getFilename())
411                    .creationDate(binary.getCreatedDate())
412                    .modificationDate(binary.getLastModifiedDate())
413                    .size(binary.getContentSize())
414                    .build();
415
416            servletResponse.addHeader("Content-Type", binary.getMimeType());
417            servletResponse.addHeader("Content-Length", String.valueOf(binary.getContentSize()));
418            servletResponse.addHeader("Accept-Ranges", "bytes");
419            servletResponse.addHeader("Content-Disposition", contentDisposition.toString());
420        }
421
422        servletResponse.addHeader("Link", "<" + LDP_NAMESPACE + "Resource>;rel=\"type\"");
423
424        if (resource instanceof NonRdfSource) {
425            servletResponse.addHeader("Link", "<" + LDP_NAMESPACE + "NonRDFSource>;rel=\"type\"");
426        } else if (resource instanceof Container) {
427            servletResponse.addHeader("Link", "<" + CONTAINER.getURI() + ">;rel=\"type\"");
428            if (resource.hasType(LDP_BASIC_CONTAINER)) {
429                servletResponse.addHeader("Link", "<" + BASIC_CONTAINER.getURI() + ">;rel=\"type\"");
430            } else if (resource.hasType(LDP_DIRECT_CONTAINER)) {
431                servletResponse.addHeader("Link", "<" + DIRECT_CONTAINER.getURI() + ">;rel=\"type\"");
432            } else if (resource.hasType(LDP_INDIRECT_CONTAINER)) {
433                servletResponse.addHeader("Link", "<" + INDIRECT_CONTAINER.getURI() + ">;rel=\"type\"");
434            } else {
435                servletResponse.addHeader("Link", "<" + BASIC_CONTAINER.getURI() + ">;rel=\"type\"");
436            }
437        } else {
438            servletResponse.addHeader("Link", "<" + LDP_NAMESPACE + "RDFSource>;rel=\"type\"");
439        }
440
441    }
442
443    /**
444     * Evaluate the cache control headers for the request to see if it can be served from
445     * the cache.
446     *
447     * @param request the request
448     * @param servletResponse the servlet response
449     * @param resource the fedora resource
450     * @param session the session
451     */
452    protected static void checkCacheControlHeaders(final Request request,
453                                                   final HttpServletResponse servletResponse,
454                                                   final FedoraResource resource,
455                                                   final Session session) {
456        evaluateRequestPreconditions(request, servletResponse, resource, session, true);
457        addCacheControlHeaders(servletResponse, resource, session);
458    }
459
460    /**
461     * Add ETag and Last-Modified cache control headers to the response
462     * @param servletResponse the servlet response
463     * @param resource the fedora resource
464     * @param session the session
465     */
466    protected static void addCacheControlHeaders(final HttpServletResponse servletResponse,
467                                                 final FedoraResource resource,
468                                                 final Session session) {
469
470        final String txId = TransactionServiceImpl.getCurrentTransactionId(session);
471        if (txId != null) {
472            // Do not add caching headers if in a transaction
473            return;
474        }
475
476        final EntityTag etag = new EntityTag(resource.getEtagValue());
477        final Date date = resource.getLastModifiedDate();
478
479        if (!etag.getValue().isEmpty()) {
480            servletResponse.addHeader("ETag", etag.toString());
481        }
482
483        if (date != null) {
484            servletResponse.addDateHeader("Last-Modified", date.getTime());
485        }
486    }
487
488    /**
489     * Evaluate request preconditions to ensure the resource is the expected state
490     * @param request the request
491     * @param servletResponse the servlet response
492     * @param resource the resource
493     * @param session the session
494     */
495    protected static void evaluateRequestPreconditions(final Request request,
496                                                       final HttpServletResponse servletResponse,
497                                                       final FedoraResource resource,
498                                                       final Session session) {
499        evaluateRequestPreconditions(request, servletResponse, resource, session, false);
500    }
501
502    private static void evaluateRequestPreconditions(final Request request,
503                                                     final HttpServletResponse servletResponse,
504                                                     final FedoraResource resource,
505                                                     final Session session,
506                                                     final boolean cacheControl) {
507
508        final String txId = TransactionServiceImpl.getCurrentTransactionId(session);
509        if (txId != null) {
510            // Force cache revalidation if in a transaction
511            servletResponse.addHeader(CACHE_CONTROL, "must-revalidate");
512            servletResponse.addHeader(CACHE_CONTROL, "max-age=0");
513            return;
514        }
515
516        final EntityTag etag = new EntityTag(resource.getEtagValue());
517        final Date date = resource.getLastModifiedDate();
518        final Date roundedDate = new Date();
519
520        if (date != null) {
521            roundedDate.setTime(date.getTime() - date.getTime() % 1000);
522        }
523
524        Response.ResponseBuilder builder = request.evaluatePreconditions(etag);
525        if ( builder != null ) {
526            builder = builder.entity("ETag mismatch");
527        } else {
528            builder = request.evaluatePreconditions(roundedDate);
529            if ( builder != null ) {
530                builder = builder.entity("Date mismatch");
531            }
532        }
533
534        if (builder != null && cacheControl ) {
535            final CacheControl cc = new CacheControl();
536            cc.setMaxAge(0);
537            cc.setMustRevalidate(true);
538            // here we are implicitly emitting a 304
539            // the exception is not an error, it's genuinely
540            // an exceptional condition
541            builder = builder.cacheControl(cc).lastModified(date).tag(etag);
542        }
543        if (builder != null) {
544            throw new WebApplicationException(builder.build());
545        }
546    }
547
548    protected static MediaType getSimpleContentType(final MediaType requestContentType) {
549        return requestContentType != null ? new MediaType(requestContentType.getType(), requestContentType.getSubtype())
550                : APPLICATION_OCTET_STREAM_TYPE;
551    }
552
553    protected static boolean isRdfContentType(final String contentTypeString) {
554        return contentTypeToLang(contentTypeString) != null;
555    }
556
557    protected void replaceResourceBinaryWithStream(final FedoraBinary result,
558                                                   final InputStream requestBodyStream,
559                                                   final ContentDisposition contentDisposition,
560                                                   final MediaType contentType,
561                                                   final String checksum) throws InvalidChecksumException {
562        final URI checksumURI = checksumURI(checksum);
563        final String originalFileName = contentDisposition != null ? contentDisposition.getFileName() : "";
564        final String originalContentType = contentType != null ? contentType.toString() : "";
565
566        result.setContent(requestBodyStream,
567                originalContentType,
568                checksumURI,
569                originalFileName,
570                storagePolicyDecisionPoint);
571    }
572
573    protected void replaceResourceWithStream(final FedoraResource resource,
574                                             final InputStream requestBodyStream,
575                                             final MediaType contentType,
576                                             final RdfStream resourceTriples) throws MalformedRdfException {
577        final Lang format = contentTypeToLang(contentType.toString());
578
579        final Model inputModel = createDefaultModel()
580                .read(requestBodyStream, getUri(resource).toString(), format.getName().toUpperCase());
581
582        resource.replaceProperties(translator(), inputModel, resourceTriples);
583    }
584
585    protected void patchResourcewithSparql(final FedoraResource resource,
586            final String requestBody,
587            final RdfStream resourceTriples)
588                    throws MalformedRdfException, AccessDeniedException {
589        if (resource instanceof NonRdfSourceDescription) {
590            // update the described resource instead
591            ((NonRdfSourceDescription) resource).getDescribedResource()
592                    .updateProperties(translator(), requestBody, resourceTriples);
593        } else {
594            resource.updateProperties(translator(), requestBody, resourceTriples);
595        }
596    }
597
598    /**
599     * Create a checksum URI object.
600     **/
601    private static URI checksumURI( final String checksum ) {
602        if (!isBlank(checksum)) {
603            return URI.create(checksum);
604        }
605        return null;
606    }
607}