001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.kernel.api.identifiers;
019
020import static org.fcrepo.kernel.api.FedoraTypes.FCR_ACL;
021import static org.fcrepo.kernel.api.FedoraTypes.FCR_METADATA;
022import static org.fcrepo.kernel.api.FedoraTypes.FCR_TOMBSTONE;
023import static org.fcrepo.kernel.api.FedoraTypes.FCR_VERSIONS;
024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_ID_PREFIX;
025import static org.fcrepo.kernel.api.services.VersionService.MEMENTO_LABEL_FORMATTER;
026
027import java.time.Instant;
028import java.time.format.DateTimeParseException;
029import java.util.Arrays;
030import java.util.Objects;
031import java.util.Set;
032import java.util.regex.Pattern;
033import java.util.stream.Collectors;
034
035import org.fcrepo.kernel.api.exception.InvalidMementoPathException;
036import org.fcrepo.kernel.api.exception.InvalidResourceIdentifierException;
037
038import org.apache.commons.lang3.StringUtils;
039
040import com.fasterxml.jackson.annotation.JsonCreator;
041import com.fasterxml.jackson.annotation.JsonValue;
042import com.google.common.escape.Escaper;
043import com.google.common.net.PercentEscaper;
044
045/**
046 * Class to store contextual information about a Fedora ID.
047 *
048 * Differentiates between the original ID of the request and the actual resource we are operating on.
049 *
050 * Resource Id : the shortened ID of the base resource, mostly needed to access the correct persistence object.
051 * fullId : the full ID from the request, used in most cases.
052 *
053 * So a fullId of info:fedora/object1/another/fcr:versions/20000101121212 has an id of info:fedora/object1/another
054 *
055 * @author whikloj
056 * @since 6.0.0
057 */
058public class FedoraId {
059
060    /**
061     * These are strings that can cause problems with our storage layout
062     */
063    private static final Set<String> FORBIDDEN_ID_PART_STRINGS = Set.of(
064            "fcr-root",
065            ".fcrepo",
066            "fcr-container.nt"
067    );
068    private static final Set<String> FORBIDDEN_ID_PART_SUFFIXES = Set.of(
069            "~fcr-desc",
070            "~fcr-acl",
071            "~fcr-desc.nt",
072            "~fcr-acl.nt"
073    );
074
075    /**
076     * The Fedora ID with prefix and extensions. eg info:fedora/object1/another/fcr:versions/20000101121212
077     */
078    private final String fullId;
079
080    /**
081     * The Fedora ID with prefix but without extensions. eg info:fedora/object1/another
082     */
083    private final String baseId;
084
085    /**
086     * The Fedora ID without prefix but with extensions. eg /object1/another/fcr:versions/20000101121212
087     */
088    private final String fullPath;
089
090    /**
091     * The Fedora ID prefix and extensions URL encoded.
092     */
093    private final String encodedFullId;
094
095    private String hashUri;
096    private boolean isRepositoryRoot = false;
097    private boolean isNonRdfSourceDescription = false;
098    private boolean isAcl = false;
099    private boolean isMemento = false;
100    private boolean isTimemap = false;
101    private boolean isTombstone = false;
102    private Instant mementoDatetime;
103    private String mementoDatetimeStr;
104
105    private final static Set<Pattern> extensions = Set.of(FCR_TOMBSTONE, FCR_METADATA, FCR_ACL, FCR_VERSIONS)
106            .stream().map(Pattern::compile).collect(Collectors.toSet());
107
108    private final static Escaper fedoraIdEscaper = new PercentEscaper("-._~!$'()*,;&=@:+/?#", false);
109
110    /**
111     * Basic constructor.
112     * @param fullId The full identifier or null if root.
113     * @throws IllegalArgumentException If ID does not start with expected prefix.
114     */
115    private FedoraId(final String fullId) {
116        this.fullId = ensurePrefix(fullId).replaceAll("/+$", "");
117        // Carry the path of the request for any exceptions.
118        this.fullPath = this.fullId.substring(FEDORA_ID_PREFIX.length());
119        checkForInvalidPath();
120        this.baseId = processIdentifier();
121        enforceStorageLayoutNamingConstraints();
122        this.encodedFullId = fedoraIdEscaper.escape(this.fullId);
123    }
124
125    /**
126     * Static create method
127     * @param additions One or more strings to build an ID.
128     * @return The FedoraId.
129     */
130    @JsonCreator
131    public static FedoraId create(final String... additions) {
132        return new FedoraId(idBuilder(additions));
133    }
134
135    /**
136     * Get a FedoraId for repository root.
137     * @return The FedoraId for repository root.
138     */
139    public static FedoraId getRepositoryRootId() {
140        return new FedoraId(null);
141    }
142
143    /**
144     * Is the identifier for the repository root.
145     * @return true of id is equal to info:fedora/
146     */
147    public boolean isRepositoryRoot() {
148        return isRepositoryRoot;
149    }
150
151    /**
152     * Is the identifier for a Memento?
153     * @return true if the id is for the fcr:versions endpoint and has a memento datetime string after it.
154     */
155    public boolean isMemento() {
156        return isMemento;
157    }
158
159    /**
160     * Is the identifier for an ACL?
161     * @return true if the id is for the fcr:acl endpoint.
162     */
163    public boolean isAcl() {
164        return isAcl;
165    }
166
167    /**
168     * Is the identifier for a timemap?
169     * @return true if id for the fcr:versions endpoint and NOT a memento.
170     */
171    public boolean isTimemap() {
172        return isTimemap;
173    }
174
175    /**
176     * Is the identifier for a nonRdfSourceDescription?
177     * @return true if id for the fcr:metadata endpoint
178     */
179    public boolean isDescription() {
180        return isNonRdfSourceDescription;
181    }
182
183    /**
184     * Is the identifier for a tombstone
185     * @return true if id for the fcr:tombstone endpoint
186     */
187    public boolean isTombstone() {
188        return isTombstone;
189    }
190
191    /**
192     * Is the identifier for a hash uri?
193     * @return true if full id referenced a hash uri.
194     */
195    public boolean isHashUri() {
196        return hashUri != null;
197    }
198
199    /**
200     * Get the hash uri.
201     * @return the hash uri from the id or null if none.
202     */
203    public String getHashUri() {
204        return hashUri;
205    }
206
207    /**
208     * Returns the ID string for the physical resource the Fedora ID describes. In most cases, this ID is the same as
209     * the full resource ID. However, if the resource is a memento, timemap, or tombstone, then the ID returned here
210     * will be for the resource that contains it. Here are some examples:
211     *
212     * <ul>
213     *     <li>"info:fedora/object1/another/fcr:versions/20000101121212" =&gt; "info:fedora/object1/another"</li>
214     *     <li>"info:fedora/object1/another/fcr:metadata" =&gt; "info:fedora/object1/another/fcr:metadata"</li>
215     *     <li>"info:fedora/object1/another" =&gt; "info:fedora/object1/another"</li>
216     * </ul>
217     *
218     * @return the ID of the associated physical resource
219     */
220    public String getResourceId() {
221        if (isNonRdfSourceDescription) {
222            return baseId + "/" + FCR_METADATA;
223        } else if (isAcl) {
224            return baseId + "/" + FCR_ACL;
225        }
226        return baseId;
227    }
228
229    /**
230     * Behaves the same as {@link #getResourceId()} except it returns a FedoraId rather than a String.
231     *
232     * @return the ID of the associated physical resource
233     */
234    public FedoraId asResourceId() {
235        return FedoraId.create(getResourceId());
236    }
237
238    /**
239     * Returns the ID string for the base ID the Fedora ID describes. This value is the equivalent of the full ID
240     * with all extensions removed.
241     *
242     * <ul>
243     *     <li>"info:fedora/object1/another/fcr:versions/20000101121212" =&gt; "info:fedora/object1/another"</li>
244     *     <li>"info:fedora/object1/another/fcr:metadata" =&gt; "info:fedora/object1/another"</li>
245     *     <li>"info:fedora/object1/another" =&gt; "info:fedora/object1/another"</li>
246     * </ul>
247     *
248     * @return the ID of the associated base resource
249     */
250    public String getBaseId() {
251        return baseId;
252    }
253
254    /**
255     * Behaves the same as {@link #getBaseId()} except it returns a FedoraId rather than a String.
256     *
257     * @return the ID of the associated base resource
258     */
259    public FedoraId asBaseId() {
260        return FedoraId.create(getBaseId());
261    }
262
263    /**
264     * Return the original full ID.
265     * @return the id.
266     */
267    public String getFullId() {
268        return fullId;
269    }
270
271    /**
272     * Return the original full ID without the info:fedora prefix.
273     * @return the full id path part
274     */
275    public String getFullIdPath() {
276        return fullPath;
277    }
278
279    /**
280     * @return The encoded full ID.
281     */
282    public String getEncodedFullId() {
283        return encodedFullId;
284    }
285
286    /**
287     * Return the Memento datetime as Instant.
288     * @return The datetime or null if not a memento.
289     */
290    public Instant getMementoInstant() {
291        return mementoDatetime;
292    }
293
294    /**
295     * Return the Memento datetime string.
296     * @return The yyyymmddhhiiss memento datetime or null if not a Memento.
297     */
298    public String getMementoString() {
299        return mementoDatetimeStr;
300    }
301
302    /**
303     * Creates a new Fedora ID by joining the base ID of this Fedora ID with the specified string part. Any extensions
304     * that this Fedora ID contains are discarded. For example:
305     * <p>
306     * Resolving "child" against "info:fedora/object1/another/fcr:versions/20000101121212" yields
307     * "info:fedora/object1/another/child".
308     *
309     * @param child the part to join
310     * @return new Fedora ID in the form baseId/child
311     */
312    public FedoraId resolve(final String child) {
313        if (StringUtils.isBlank(child)) {
314            throw new IllegalArgumentException("Child cannot be blank");
315        }
316        return FedoraId.create(baseId, child);
317    }
318
319    /**
320     * Creates a new Fedora ID based on this ID that points to an ACL resource. The base ID, full ID without extensions,
321     * is always used to construct an ACL ID. If this ID is already an ACL, then it returns itself.
322     *
323     * @return ACL resource ID
324     */
325    public FedoraId asAcl() {
326        if (isAcl()) {
327            return this;
328        }
329
330        return FedoraId.create(getBaseId(), FCR_ACL);
331    }
332
333    /**
334     * Creates a new Fedora ID based on this ID that points to a binary description resource. There is no guarantee that
335     * the binary description resource exists. If this ID is already a description, then it returns itself. Otherwise,
336     * it uses the base ID, without extensions, to construct the new ID. If this Fedora ID is a timemap or memento or
337     * a hash uri, then these extensions are applied to new description ID as well.
338     *
339     * @return description resource ID
340     */
341    public FedoraId asDescription() {
342        if (isDescription()) {
343            return this;
344        }
345
346        if (isTimemap()) {
347            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS);
348        }
349
350        if (isMemento()) {
351            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(getMementoString()));
352        }
353
354        return FedoraId.create(getBaseId(), appendHashIfPresent(FCR_METADATA));
355    }
356
357    /**
358     * Creates a new Fedora ID based on this ID that points to a tombstone resource. If this ID is already a tombstone,
359     * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID.
360     *
361     * @return tombstone resource ID
362     */
363    public FedoraId asTombstone() {
364        if (isTombstone()) {
365            return this;
366        }
367
368        return FedoraId.create(getBaseId(), FCR_TOMBSTONE);
369    }
370
371    /**
372     * Creates a new Fedora ID based on this ID that points to a timemap resource. If this ID is already a timemap,
373     * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. Unless
374     * this ID is a binary description, in which case the new ID is constructed using the full ID.
375     *
376     * @return timemap resource ID
377     */
378    public FedoraId asTimemap() {
379        if (isTimemap()) {
380            return this;
381        }
382
383        if (isDescription()) {
384            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS);
385        }
386
387        return FedoraId.create(getBaseId(), FCR_VERSIONS);
388    }
389
390    /**
391     * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento,
392     * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this
393     * ID's base ID. Otherwise, the full ID is used.
394     *
395     * @param mementoInstant memento representation
396     * @return memento resource ID
397     */
398    public FedoraId asMemento(final Instant mementoInstant) {
399        return asMemento(MEMENTO_LABEL_FORMATTER.format(mementoInstant));
400    }
401
402    /**
403     * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento,
404     * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this
405     * ID's base ID. If this ID is a description, then the new ID is appended to the description ID.
406     *
407     * @param mementoString string memento representation
408     * @return memento resource ID
409     */
410    public FedoraId asMemento(final String mementoString) {
411        if (isMemento()) {
412            return this;
413        }
414
415        if (isDescription()) {
416            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(mementoString));
417        }
418
419        if (isAcl() || isTombstone() || isTimemap()) {
420            return FedoraId.create(getBaseId(), FCR_VERSIONS, mementoString);
421        }
422
423        return FedoraId.create(getBaseId(), FCR_VERSIONS, appendHashIfPresent(mementoString));
424    }
425
426    @Override
427    public boolean equals(final Object obj) {
428        if (obj == this) {
429            return true;
430        }
431
432        if (!(obj instanceof FedoraId)) {
433            return false;
434        }
435
436        final var testObj = (FedoraId) obj;
437        return Objects.equals(testObj.getFullId(), this.getFullId());
438    }
439
440    @Override
441    public int hashCode() {
442        return getFullId().hashCode();
443    }
444
445    @JsonValue
446    @Override
447    public String toString() {
448        return getFullId();
449    }
450
451    /**
452     * Concatenates all the parts with slashes
453     * @param parts array of strings
454     * @return the concatenated string.
455     */
456    private static String idBuilder(final String... parts) {
457        if (parts != null && parts.length > 0) {
458            return Arrays.stream(parts).filter(Objects::nonNull)
459                    .map(s -> s.startsWith("/") ? s.substring(1) : s)
460                    .map(s -> s.endsWith("/") ? s.substring(0, s.length() - 1 ) : s)
461                    .collect(Collectors.joining("/"));
462        }
463        return "";
464    }
465
466    /**
467     * Ensure the ID has the info:fedora/ prefix.
468     * @param id the identifier, if null assume repository root (info:fedora/)
469     * @return the identifier with the info:fedora/ prefix.
470     */
471    private static String ensurePrefix(final String id) {
472        if (id == null) {
473            return FEDORA_ID_PREFIX;
474        }
475        return id.startsWith(FEDORA_ID_PREFIX) ? id : FEDORA_ID_PREFIX + "/" + id;
476    }
477
478    /**
479     * Process the original ID into its parts without using a regular expression.
480     */
481    private String processIdentifier() {
482        // Regex pattern which decomposes a http resource uri into components
483        // The first group determines if it is an fcr:metadata non-rdf source.
484        // The second group determines if the path is for a memento or timemap.
485        // The third group allows for a memento identifier.
486        // The fourth group for allows ACL.
487        // The fifth group allows for any hashed suffixes.
488        // ".*?(/" + FCR_METADATA + ")?(/" + FCR_VERSIONS + "(/\\d{14})?)?(/" + FCR_ACL + ")?(\\#\\S+)?$");
489        if (this.fullId.contains("//")) {
490            throw new InvalidResourceIdentifierException(String.format("Path contains empty element! %s", fullPath));
491        }
492        String processID = this.fullId;
493        if (processID.equals(FEDORA_ID_PREFIX)) {
494            this.isRepositoryRoot = true;
495            return this.fullId;
496        }
497        if (processID.contains("#")) {
498            final String[] hashSplits = StringUtils.splitPreserveAllTokens(processID, "#");
499            if (hashSplits.length > 2) {
500                throw new InvalidResourceIdentifierException(String.format(
501                        "Path <%s> is invalid. It may not contain more than one #",
502                        fullPath));
503            }
504            this.hashUri = hashSplits[1];
505            processID = hashSplits[0];
506        }
507        if (processID.contains(FCR_TOMBSTONE)) {
508            processID = removePart(processID, FCR_TOMBSTONE);
509            this.isTombstone = true;
510        }
511        if (processID.contains(FCR_ACL)) {
512            processID = removePart(processID, FCR_ACL);
513            this.isAcl = true;
514        }
515        if (processID.contains(FCR_VERSIONS)) {
516            final String[] versionSplits = split(processID, FCR_VERSIONS);
517            if (versionSplits.length > 2) {
518                throw new InvalidResourceIdentifierException(String.format(
519                        "Path <%s> is invalid. May not contain multiple %s parts.",
520                        fullPath, FCR_VERSIONS));
521            } else if (versionSplits.length == 2 && versionSplits[1].isEmpty()) {
522                this.isTimemap = true;
523            } else {
524                final String afterVersion = versionSplits[1];
525                if (afterVersion.matches("/\\d{14}")) {
526                    this.isMemento = true;
527                    this.mementoDatetimeStr = afterVersion.substring(1);
528                    try {
529                        this.mementoDatetime = Instant.from(MEMENTO_LABEL_FORMATTER.parse(this.mementoDatetimeStr));
530                    } catch (final DateTimeParseException e) {
531                        throw new InvalidMementoPathException(String.format("Invalid request for memento at %s",
532                                fullPath));
533                    }
534                } else if (afterVersion.equals("/")) {
535                    // Possible trailing slash?
536                    this.isTimemap = true;
537                } else {
538                    throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", fullPath));
539                }
540            }
541            processID = versionSplits[0];
542        }
543        if (processID.contains(FCR_METADATA)) {
544            processID = removePart(processID, FCR_METADATA);
545            this.isNonRdfSourceDescription = true;
546        }
547        if (processID.endsWith("/")) {
548            processID = processID.replaceAll("/+$", "");
549        }
550
551        return processID;
552    }
553
554    private String removePart(final String original, final String part) {
555        final String[] split = split(original, part);
556        if (split.length > 2 || (split.length == 2 && !split[1].isEmpty())) {
557            throw new InvalidResourceIdentifierException("Path is invalid:" + fullPath);
558        }
559        return split[0];
560    }
561
562    private String[] split(final String original, final String part) {
563        return StringUtils.splitByWholeSeparatorPreserveAllTokens(original, "/" + part);
564    }
565
566    /**
567     * Check for obvious path errors.
568     */
569    private void checkForInvalidPath() {
570        // Check for combinations of endpoints not allowed.
571        if (
572            // ID contains fcr:acl or fcr:tombstone AND fcr:metadata or fcr:versions
573            ((this.fullId.contains(FCR_ACL) || this.fullId.contains(FCR_TOMBSTONE)) &&
574                (this.fullId.contains(FCR_METADATA) || this.fullId.contains(FCR_VERSIONS))) ||
575            // or ID contains fcr:acl AND fcr:tombstone
576            (this.fullId.contains(FCR_TOMBSTONE) && this.fullId.contains(FCR_ACL))
577        ) {
578            throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath));
579        }
580        // Ensure we don't have 2 of any of the extensions, ie. info:fedora/object/fcr:acl/fcr:acl, etc.
581        for (final Pattern extension : extensions) {
582            if (extension.matcher(this.fullId).results().count() > 1) {
583                throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath));
584            }
585        }
586    }
587
588    /**
589     * Ensures that the Fedora ID does not violate any naming restrictions that are in place prevent collisions on disk.
590     * These restrictions are based on the following naming conventions:
591     *      https://wiki.lyrasis.org/display/FF/Design+-+Fedora+OCFL+Object+Structure
592     *
593     * All ids should be validated on resource creation
594     */
595    private void enforceStorageLayoutNamingConstraints() {
596        final var finalPart = StringUtils.substringAfterLast(baseId, "/");
597
598        if (FORBIDDEN_ID_PART_STRINGS.contains(finalPart)) {
599            throw new InvalidResourceIdentifierException(
600                    String.format("Invalid resource ID. IDs may not contain the string '%s'.", finalPart));
601        }
602
603        FORBIDDEN_ID_PART_SUFFIXES.forEach(suffix -> {
604            if (finalPart.endsWith(suffix) && !finalPart.equals(suffix)) {
605                throw new InvalidResourceIdentifierException(
606                        String.format("Invalid resource ID. IDs may not end with '%s'.", suffix));
607            }
608        });
609    }
610
611    private String appendHashIfPresent(final String original) {
612        if (isHashUri()) {
613            return original + "#" + getHashUri();
614        }
615        return original;
616    }
617
618}