001/** 002 * Copyright 2015 DuraSpace, Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.fcrepo.connector.file; 017 018import static java.lang.System.currentTimeMillis; 019import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_DIGEST; 020import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_SIZE; 021import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_BINARY; 022import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_CONTAINER; 023import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION; 024import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_RESOURCE; 025import static org.fcrepo.kernel.FedoraJcrTypes.JCR_CREATED; 026import static org.fcrepo.kernel.FedoraJcrTypes.JCR_LASTMODIFIED; 027import static org.fcrepo.kernel.utils.ContentDigest.asURI; 028import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 029import static org.modeshape.jcr.api.JcrConstants.NT_FILE; 030import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER; 031import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE; 032 033import java.io.File; 034import java.net.URI; 035import java.util.Date; 036import java.util.HashMap; 037import java.io.IOException; 038import java.util.Map; 039 040import com.google.common.annotations.VisibleForTesting; 041 042import org.infinispan.schematic.document.Document; 043import org.modeshape.connector.filesystem.ExternalJsonSidecarExtraPropertyStore; 044import org.modeshape.connector.filesystem.FileSystemConnector; 045import org.modeshape.jcr.api.value.DateTime; 046import org.modeshape.jcr.api.nodetype.NodeTypeManager; 047import org.modeshape.jcr.spi.federation.DocumentChanges; 048import org.modeshape.jcr.spi.federation.DocumentReader; 049import org.modeshape.jcr.spi.federation.DocumentWriter; 050import org.modeshape.jcr.value.BinaryValue; 051import org.modeshape.jcr.value.Name; 052import org.modeshape.jcr.value.Property; 053import org.modeshape.jcr.value.basic.BasicSingleValueProperty; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import javax.jcr.NamespaceRegistry; 058import javax.jcr.RepositoryException; 059 060/** 061 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and 062 * content properties. 063 * 064 * @author Andrew Woods 065 * Date: 1/30/14 066 */ 067public class FedoraFileSystemConnector extends FileSystemConnector { 068 069 private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class); 070 071 private static final String DELIMITER = "/"; 072 private static final String JCR_CONTENT = "jcr:content"; 073 private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT; 074 075 /** 076 * The string path for a {@link File} object that represents the top-level directory in which properties are 077 * stored. This is optional for this connector, but if set allows properties to be cached (greatly 078 * improving performance) for even read-only connectors. When this property is specified the extraPropertiesStore 079 * should be null (not specified) as it would be overridden by this. 080 */ 081 private String propertiesDirectoryPath; 082 private File propertiesDirectory; 083 084 @Override 085 public void initialize(final NamespaceRegistry registry, 086 final NodeTypeManager nodeTypeManager) throws RepositoryException, IOException { 087 super.initialize(registry, nodeTypeManager); 088 089 if (propertiesDirectoryPath != null) { 090 propertiesDirectory = new File(propertiesDirectoryPath); 091 if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) { 092 throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 093 + ", does not exist or is not a directory."); 094 } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) { 095 throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 096 + ", should be readable and writable."); 097 } 098 if (extraPropertiesStore() != null) { 099 LOGGER.warn("Extra properties store was specified but won't be used!"); 100 } 101 setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory)); 102 } 103 } 104 105 /** 106 * This method returns the object/document for the node with the federated arg 'id'. 107 * 108 * Additionally, this method adds Fedora datastream and content properties to the result of the parent class 109 * implementation. 110 */ 111 @Override 112 public Document getDocumentById(final String id) { 113 LOGGER.debug("Getting Federated document: {}", id); 114 if (null == id || id.isEmpty()) { 115 LOGGER.warn("Can not get document with null id"); 116 return null; 117 } 118 119 final Document doc = super.getDocumentById(id); 120 if ( doc == null ) { 121 LOGGER.debug("Non-existent node, document is null: {}", id); 122 return doc; 123 } 124 125 final DocumentReader docReader = readDocument(doc); 126 final DocumentWriter docWriter = writeDocument(doc); 127 final long lastmod = fileFor(id).lastModified(); 128 LOGGER.debug("Adding lastModified={}", lastmod); 129 docWriter.addProperty(JCR_LASTMODIFIED, lastmod); 130 131 final String primaryType = docReader.getPrimaryTypeName(); 132 133 if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) { 134 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id); 135 docWriter.addMixinType(FEDORA_RESOURCE); 136 } 137 138 // Is Fedora Datastream? 139 if (primaryType.equals(NT_FILE)) { 140 decorateDatastreamNode(docReader, docWriter); 141 142 // Is Fedora Content? 143 } else if (primaryType.equals(NT_RESOURCE)) { 144 decorateContentNode(docReader, docWriter, fileFor(id)); 145 146 // Is Fedora Object? 147 } else if (primaryType.equals(NT_FOLDER)) { 148 decorateObjectNode(docReader, docWriter); 149 } 150 151 return docWriter.document(); 152 } 153 154 /** 155 * Checks whether internally managed properties can and should be stored to 156 * an ExtraPropertiesStore. 157 * @return whether internally managed properties can and should be stored to 158 */ 159 protected boolean shouldCacheProperties() { 160 return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null); 161 } 162 163 @Override 164 public String sha1(final File file) { 165 final String cachedSha1 = getCachedSha1(file); 166 if (cachedSha1 == null) { 167 return computeAndCacheSha1(file); 168 } 169 return cachedSha1; 170 } 171 172 173 private String getCachedSha1(final File file) { 174 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 175 if (extraPropertiesStore() != null) { 176 final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id); 177 final Name digestName = nameFrom(CONTENT_DIGEST); 178 if (extraProperties.containsKey(digestName)) { 179 if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) { 180 LOGGER.trace("Found sha1 for {} in extra properties store.", id); 181 final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString(); 182 return uriStr.substring(uriStr.indexOf("sha1:") + 5); 183 } 184 } 185 } else { 186 LOGGER.trace("No cache configured to contain object hashes."); 187 } 188 return null; 189 } 190 191 private String computeAndCacheSha1(final File file) { 192 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 193 LOGGER.trace("Computing sha1 for {}.", id); 194 final String sha1 = super.sha1(file); 195 if (shouldCacheProperties()) { 196 final Map<Name, Property> updateMap = new HashMap<>(); 197 final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST), 198 asURI("SHA-1", sha1)); 199 final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED), 200 factories().getDateFactory().create(file.lastModified())); 201 updateMap.put(digestProperty.getName(), digestProperty); 202 updateMap.put(digestDateProperty.getName(), digestDateProperty); 203 extraPropertiesStore().updateProperties(id, updateMap); 204 } 205 return sha1; 206 } 207 208 209 210 private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) { 211 if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) { 212 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId()); 213 docWriter.addMixinType(FEDORA_CONTAINER); 214 } 215 } 216 217 private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) { 218 if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) { 219 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId()); 220 docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION); 221 } 222 } 223 224 private static void decorateContentNode(final DocumentReader docReader, 225 final DocumentWriter docWriter, 226 final File file) { 227 if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) { 228 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId()); 229 docWriter.addMixinType(FEDORA_BINARY); 230 } 231 232 if (null == docReader.getProperty(CONTENT_DIGEST) 233 || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) { 234 final BinaryValue binaryValue = getBinaryValue(docReader); 235 final String dsChecksum = binaryValue.getHexHash(); 236 final String dsURI = asURI("SHA-1", dsChecksum).toString(); 237 238 LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId()); 239 docWriter.addProperty(CONTENT_DIGEST, dsURI); 240 } 241 242 if (null == docReader.getProperty(CONTENT_SIZE)) { 243 final long binarySize = file.length(); 244 LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId()); 245 docWriter.addProperty(CONTENT_SIZE, binarySize); 246 } 247 248 LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId()); 249 } 250 251 private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) { 252 if (lastModified == null) { 253 LOGGER.trace("Hash for {} has not been computed yet.", file.getName()); 254 return true; 255 } 256 final DateTime datetime = (DateTime) lastModified.getFirstValue(); 257 if (datetime.toDate().equals(new Date(file.lastModified()))) { 258 return false; 259 } 260 LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(), 261 new Date(file.lastModified()), datetime.toDate()); 262 return true; 263 } 264 265 private static BinaryValue getBinaryValue(final DocumentReader docReader) { 266 final Property binaryProperty = docReader.getProperty(JCR_DATA); 267 return (BinaryValue) binaryProperty.getFirstValue(); 268 } 269 270 /* Override write operations to also update the parent file's timestamp, so 271 its Last-Modified header correctly reflects changes to children. */ 272 @Override 273 public boolean removeDocument( final String id ) { 274 if ( super.removeDocument(id) ) { 275 touchParent(id); 276 return true; 277 } 278 return false; 279 } 280 281 @Override 282 public void storeDocument( final Document document ) { 283 super.storeDocument( document ); 284 touchParent(readDocument(document).getDocumentId()); 285 } 286 287 @Override 288 public void updateDocument( final DocumentChanges changes ) { 289 super.updateDocument( changes ); 290 touchParent( changes.getDocumentId() ); 291 } 292 293 /** 294 * Find the parent file, and set its timestamp to the current time. This 295 * timestamp will be used for populating the Last-Modified header. 296 * @param id the id 297 **/ 298 protected void touchParent( final String id ) { 299 if (!isRoot(id)) { 300 final File file = fileFor(id); 301 final File parent = file.getParentFile(); 302 parent.setLastModified(currentTimeMillis()); 303 } 304 } 305 306 /* Overriding so unit test can mock. */ 307 @Override 308 @VisibleForTesting 309 protected File fileFor( final String id ) { 310 return super.fileFor(id); 311 } 312 @Override 313 @VisibleForTesting 314 protected DocumentReader readDocument( final Document document ) { 315 return super.readDocument(document); 316 } 317 318 /* Overriding to make the FedoraFileSystemConnector is always read-only. */ 319 @Override 320 public boolean isReadonly() { 321 return true; 322 } 323}