001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.kernel.modeshape; 019 020import com.codahale.metrics.Counter; 021import com.codahale.metrics.Histogram; 022import com.codahale.metrics.Timer; 023import org.apache.jena.rdf.model.Resource; 024import org.fcrepo.kernel.api.exception.InvalidChecksumException; 025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException; 026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 027import org.fcrepo.kernel.api.identifiers.IdentifierConverter; 028import org.fcrepo.kernel.api.models.NonRdfSourceDescription; 029import org.fcrepo.kernel.api.models.FedoraBinary; 030import org.fcrepo.kernel.api.models.FedoraResource; 031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint; 032import org.fcrepo.kernel.api.RdfStream; 033import org.fcrepo.kernel.api.utils.CacheEntry; 034import org.fcrepo.kernel.api.utils.ContentDigest; 035import org.fcrepo.kernel.api.utils.FixityResult; 036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext; 037import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory; 038import org.fcrepo.metrics.RegistryService; 039import org.modeshape.jcr.api.Binary; 040import org.modeshape.jcr.api.ValueFactory; 041import org.slf4j.Logger; 042 043import javax.jcr.Node; 044import javax.jcr.PathNotFoundException; 045import javax.jcr.Property; 046import javax.jcr.RepositoryException; 047import javax.jcr.Value; 048import javax.jcr.version.Version; 049import javax.jcr.version.VersionHistory; 050import java.io.InputStream; 051import java.net.URI; 052import java.util.Collection; 053import java.util.HashMap; 054import java.util.HashSet; 055import java.util.Map; 056import java.util.Optional; 057import java.util.stream.Collectors; 058 059import static com.codahale.metrics.MetricRegistry.name; 060import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring; 061import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1; 062import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER; 063import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values; 064import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary; 065import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT; 066import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 067import static org.slf4j.LoggerFactory.getLogger; 068 069/** 070 * @author cabeer 071 * @since 9/19/14 072 */ 073public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary { 074 075 private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class); 076 077 078 static final RegistryService registryService = RegistryService.getInstance(); 079 static final Counter fixityCheckCounter 080 = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter")); 081 082 static final Timer timer = registryService.getMetrics().timer( 083 name(NonRdfSourceDescription.class, "fixity-check-time")); 084 085 static final Histogram contentSizeHistogram = 086 registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size")); 087 088 /** 089 * Wrap an existing Node as a Fedora Binary 090 * @param node the node 091 */ 092 public FedoraBinaryImpl(final Node node) { 093 super(node); 094 095 if (node.isNew()) { 096 initializeNewBinaryProperties(); 097 } 098 } 099 100 private void initializeNewBinaryProperties() { 101 try { 102 decorateContentNode(node, new HashSet<>()); 103 } catch (final RepositoryException e) { 104 LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e); 105 } 106 } 107 108 @Override 109 public FedoraResource getDescription() { 110 try { 111 return new NonRdfSourceDescriptionImpl(getNode().getParent()); 112 } catch (final RepositoryException e) { 113 throw new RepositoryRuntimeException(e); 114 } 115 } 116 117 /* 118 * (non-Javadoc) 119 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent() 120 */ 121 @Override 122 public InputStream getContent() { 123 try { 124 return getBinaryContent().getStream(); 125 } catch (final RepositoryException e) { 126 throw new RepositoryRuntimeException(e); 127 } 128 } 129 130 /** 131 * Retrieve the JCR Binary object 132 * @return a JCR-wrapped Binary object 133 */ 134 private javax.jcr.Binary getBinaryContent() { 135 try { 136 return getProperty(JCR_DATA).getBinary(); 137 } catch (final PathNotFoundException e) { 138 throw new PathNotFoundRuntimeException(e); 139 } catch (final RepositoryException e) { 140 throw new RepositoryRuntimeException(e); 141 } 142 } 143 144 /* 145 * (non-Javadoc) 146 * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream, 147 * java.lang.String, java.net.URI, java.lang.String, 148 * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint) 149 */ 150 @Override 151 public void setContent(final InputStream content, final String contentType, 152 final Collection<URI> checksums, final String originalFileName, 153 final StoragePolicyDecisionPoint storagePolicyDecisionPoint) 154 throws InvalidChecksumException { 155 156 try { 157 final Node contentNode = getNode(); 158 159 if (contentNode.canAddMixin(FEDORA_BINARY)) { 160 contentNode.addMixin(FEDORA_BINARY); 161 } 162 163 if (contentType != null) { 164 contentNode.setProperty(HAS_MIME_TYPE, contentType); 165 } 166 167 if (originalFileName != null) { 168 contentNode.setProperty(FILENAME, originalFileName); 169 } 170 171 LOGGER.debug("Created content node at path: {}", contentNode.getPath()); 172 173 String hint = null; 174 175 if (storagePolicyDecisionPoint != null) { 176 hint = storagePolicyDecisionPoint.evaluatePolicies(this); 177 } 178 final ValueFactory modevf = 179 (ValueFactory) node.getSession().getValueFactory(); 180 final Binary binary = modevf.createBinary(content, hint); 181 182 /* 183 * This next line of code deserves explanation. If we chose for the 184 * simpler line: Property dataProperty = 185 * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR 186 * would not block on the stream's completion, and we would return to 187 * the requester before the mutation to the repo had actually completed. 188 * So instead we use createBinary(requestBodyStream), because its 189 * contract specifies: "The passed InputStream is closed before this 190 * method returns either normally or because of an exception." which 191 * lets us block and not return until the job is done! The simpler code 192 * may still be useful to us for an asynchronous method that we develop 193 * later. 194 */ 195 final Property dataProperty = contentNode.setProperty(JCR_DATA, binary); 196 197 // Ensure provided checksums are valid 198 final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums; 199 verifyChecksums(nonNullChecksums, dataProperty); 200 201 decorateContentNode(contentNode, nonNullChecksums); 202 touch(); 203 ((FedoraResourceImpl) getDescription()).touch(); 204 205 LOGGER.debug("Created data property at path: {}", dataProperty.getPath()); 206 207 } catch (final RepositoryException e) { 208 throw new RepositoryRuntimeException(e); 209 } 210 } 211 212 /** 213 * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty. 214 * If one or more of the checksums are invalid, an InvalidChecksumException is thrown. 215 * 216 * @param checksums that the user provided 217 * @param dataProperty containing the binary against which the checksums will be verified 218 * @throws InvalidChecksumException 219 * @throws RepositoryException 220 */ 221 private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty) 222 throws InvalidChecksumException, RepositoryException { 223 224 final Map<URI, URI> checksumErrors = new HashMap<>(); 225 226 // Loop through provided checksums validating against computed values 227 checksums.forEach(checksum -> { 228 final String algorithm = ContentDigest.getAlgorithm(checksum); 229 try { 230 // The case internally supported by ModeShape 231 if (algorithm.equals(SHA1.algorithm)) { 232 final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash(); 233 final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1); 234 235 if (!dsSHA1Uri.equals(checksum)) { 236 LOGGER.debug("Failed checksum test"); 237 checksumErrors.put(checksum, dsSHA1Uri); 238 } 239 240 // The case that requires re-computing the checksum 241 } else { 242 final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty); 243 cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent( 244 fixityResult -> { 245 if (!fixityResult.matches(checksum)) { 246 LOGGER.debug("Failed checksum test"); 247 checksumErrors.put(checksum, fixityResult.getComputedChecksum()); 248 } 249 } 250 ); 251 } 252 } catch (RepositoryException e) { 253 throw new RepositoryRuntimeException(e); 254 } 255 }); 256 257 // Throw an exception if any checksum errors occurred 258 if (!checksumErrors.isEmpty()) { 259 final String template = "Checksum Mismatch of %1$s and %2$s\n"; 260 final StringBuilder error = new StringBuilder(); 261 checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value))); 262 throw new InvalidChecksumException(error.toString()); 263 } 264 265 } 266 267 /* 268 * (non-Javadoc) 269 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize() 270 */ 271 @Override 272 public long getContentSize() { 273 try { 274 if (hasProperty(CONTENT_SIZE)) { 275 return getProperty(CONTENT_SIZE).getLong(); 276 } 277 } catch (final RepositoryException e) { 278 LOGGER.info("Could not get contentSize(): {}", e.getMessage()); 279 } 280 281 return -1L; 282 } 283 284 /* 285 * (non-Javadoc) 286 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest() 287 */ 288 @Override 289 public URI getContentDigest() { 290 try { 291 // Determine which digest algorithm to use 292 final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ? 293 property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() : 294 ContentDigest.DEFAULT_ALGORITHM; 295 final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 296 297 if (hasProperty(CONTENT_DIGEST)) { 298 // Select the stored digest that matches the digest algorithm 299 Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> { 300 try { 301 final URI digestUri = URI.create(digest.getString()); 302 return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri)); 303 304 } catch (RepositoryException e) { 305 LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage()); 306 return false; 307 } 308 }).findFirst(); 309 310 // Success, return the digest value 311 if (digestValue.isPresent()) { 312 return URI.create(digestValue.get().getString()); 313 } 314 } 315 LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType); 316 } catch (final RepositoryException e) { 317 LOGGER.warn("Could not get content digest: {}", e.getMessage()); 318 } 319 320 return ContentDigest.missingChecksum(); 321 } 322 323 /* 324 * (non-Javadoc) 325 * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType() 326 */ 327 @Override 328 public String getMimeType() { 329 try { 330 if (hasProperty(HAS_MIME_TYPE)) { 331 return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 332 } 333 return "application/octet-stream"; 334 } catch (final RepositoryException e) { 335 throw new RepositoryRuntimeException(e); 336 } 337 } 338 339 /* 340 * (non-Javadoc) 341 * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename() 342 */ 343 @Override 344 public String getFilename() { 345 try { 346 if (hasProperty(FILENAME)) { 347 return getProperty(FILENAME).getString(); 348 } 349 return node.getParent().getName(); 350 } catch (final RepositoryException e) { 351 throw new RepositoryRuntimeException(e); 352 } 353 } 354 355 @Override 356 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) { 357 return getFixity(idTranslator, getContentDigest(), getContentSize()); 358 } 359 360 @Override 361 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator, 362 final URI digestUri, 363 final long size) { 364 365 fixityCheckCounter.inc(); 366 367 try (final Timer.Context context = timer.time()) { 368 369 LOGGER.debug("Checking resource: " + getPath()); 370 371 final String algorithm = ContentDigest.getAlgorithm(digestUri); 372 373 final long contentSize = size < 0 ? getBinaryContent().getSize() : size; 374 375 final Collection<FixityResult> fixityResults 376 = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm); 377 378 return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize); 379 } catch (final RepositoryException e) { 380 throw new RepositoryRuntimeException(e); 381 } 382 } 383 384 /** 385 * When deleting the binary, we also need to clean up the description document. 386 */ 387 @Override 388 public void delete() { 389 final FedoraResource description = getDescription(); 390 391 super.delete(); 392 393 description.delete(); 394 } 395 396 @Override 397 public Version getBaseVersion() { 398 return getDescription().getBaseVersion(); 399 } 400 401 private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums) 402 throws RepositoryException { 403 if (contentNode == null) { 404 LOGGER.warn("{} node appears to be null!", JCR_CONTENT); 405 return; 406 } 407 if (contentNode.canAddMixin(FEDORA_BINARY)) { 408 contentNode.addMixin(FEDORA_BINARY); 409 } 410 411 if (contentNode.hasProperty(JCR_DATA)) { 412 final Property dataProperty = contentNode.getProperty(JCR_DATA); 413 final Binary binary = (Binary) dataProperty.getBinary(); 414 final String dsChecksum = binary.getHexHash(); 415 416 contentSizeHistogram.update(dataProperty.getLength()); 417 418 checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum)); 419 420 final String[] checksumArray = new String[checksums.size()]; 421 checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray); 422 423 contentNode.setProperty(CONTENT_DIGEST, checksumArray); 424 contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength()); 425 426 LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath()); 427 } 428 } 429 430 /* 431 * (non-Javadoc) 432 * @see org.fcrepo.kernel.api.models.FedoraResource#getVersionHistory() 433 */ 434 @Override 435 public VersionHistory getVersionHistory() { 436 try { 437 return getVersionManager().getVersionHistory(getDescription().getPath()); 438 } catch (final RepositoryException e) { 439 throw new RepositoryRuntimeException(e); 440 } 441 } 442 443 444 @Override 445 public boolean isVersioned() { 446 return getDescription().isVersioned(); 447 } 448 449 @Override 450 public void enableVersioning() { 451 super.enableVersioning(); 452 getDescription().enableVersioning(); 453 } 454 455 @Override 456 public void disableVersioning() { 457 super.disableVersioning(); 458 getDescription().disableVersioning(); 459 } 460 461 /** 462 * Check if the given node is a Fedora binary 463 * @param node the given node 464 * @return whether the given node is a Fedora binary 465 */ 466 public static boolean hasMixin(final Node node) { 467 return isFedoraBinary.test(node); 468 } 469}