package nl.basjes.parse.httpdlog.dissectors;

import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nl.basjes.parse.core.Casts;
import nl.basjes.parse.core.Dissector;
import nl.basjes.parse.core.Parsable;
import nl.basjes.parse.core.ParsedField;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import nl.basjes.parse.httpdlog.shaded.org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.text.StringEscapeUtils;

/* loaded from: input_file:nl/basjes/parse/httpdlog/dissectors/HttpUriDissector.class */
public class HttpUriDissector extends Dissector {
    private static final String INPUT_TYPE = "HTTP.URI";
    private boolean wantProtocol = false;
    private boolean wantUserinfo = false;
    private boolean wantHost = false;
    private boolean wantPort = false;
    private boolean wantPath = false;
    private boolean wantQuery = false;
    private boolean wantRef = false;
    private static final BitSet BAD_URI_CHARS = new BitSet(256);
    private static final Pattern BAD_EXCAPE_PATTERN;
    private static final Pattern EQUALS_HASH_PATTERN;
    private static final Pattern HASH_AMP_PATTERN;
    private static final Pattern DOUBLE_HASH_PATTERN;
    private static final Pattern ALMOST_HTML_ENCODED;

    public String getInputType() {
        return INPUT_TYPE;
    }

    public List<String> getPossibleOutput() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("HTTP.PROTOCOL:protocol");
        arrayList.add("HTTP.USERINFO:userinfo");
        arrayList.add("HTTP.HOST:host");
        arrayList.add("HTTP.PORT:port");
        arrayList.add("HTTP.PATH:path");
        arrayList.add("HTTP.QUERYSTRING:query");
        arrayList.add("HTTP.REF:ref");
        return arrayList;
    }

    public EnumSet<Casts> prepareForDissect(String str, String str2) {
        String extractFieldName = extractFieldName(str, str2);
        if ("protocol".equals(extractFieldName)) {
            this.wantProtocol = true;
            return Casts.STRING_ONLY;
        }
        if ("userinfo".equals(extractFieldName)) {
            this.wantUserinfo = true;
            return Casts.STRING_ONLY;
        }
        if ("host".equals(extractFieldName)) {
            this.wantHost = true;
            return Casts.STRING_ONLY;
        }
        if ("port".equals(extractFieldName)) {
            this.wantPort = true;
            return Casts.STRING_OR_LONG;
        }
        if ("path".equals(extractFieldName)) {
            this.wantPath = true;
            return Casts.STRING_ONLY;
        }
        if ("query".equals(extractFieldName)) {
            this.wantQuery = true;
            return Casts.STRING_ONLY;
        }
        if (!"ref".equals(extractFieldName)) {
            return Casts.NO_CASTS;
        }
        this.wantRef = true;
        return Casts.STRING_ONLY;
    }

    public void dissect(Parsable<?> parsable, String str) throws DissectionFailure {
        String str2;
        URI create;
        ParsedField parsableField = parsable.getParsableField(INPUT_TYPE, str);
        String string = parsableField.getValue().getString();
        if (string == null || string.isEmpty()) {
            return;
        }
        String str3 = new String(URLCodec.encodeUrl(BAD_URI_CHARS, string.getBytes(StandardCharsets.UTF_8)), StandardCharsets.US_ASCII);
        int indexOf = str3.indexOf(63);
        if (str3.indexOf(38) != -1 || indexOf != -1) {
            str3 = str3.replaceAll("\\?", "&").replaceFirst("&", "?&");
        }
        String replaceAll = HASH_AMP_PATTERN.matcher(EQUALS_HASH_PATTERN.matcher(StringEscapeUtils.unescapeHtml4(ALMOST_HTML_ENCODED.matcher(BAD_EXCAPE_PATTERN.matcher(BAD_EXCAPE_PATTERN.matcher(str3).replaceAll("%25$1")).replaceAll("%25$1")).replaceAll("$1&$2"))).replaceAll("=")).replaceAll("&");
        while (true) {
            str2 = replaceAll;
            Matcher matcher = DOUBLE_HASH_PATTERN.matcher(str2);
            if (!matcher.find()) {
                break;
            } else {
                replaceAll = matcher.replaceAll("~$1#");
            }
        }
        boolean z = true;
        try {
            if (str2.charAt(0) == '/') {
                create = URI.create("dummy-protocol://dummy.host.name" + str2);
                z = false;
            } else {
                create = URI.create(str2);
            }
            if (this.wantQuery || this.wantPath || this.wantRef) {
                if (this.wantQuery) {
                    String rawQuery = create.getRawQuery();
                    if (rawQuery == null) {
                        rawQuery = "";
                    }
                    parsable.addDissection(str, "HTTP.QUERYSTRING", "query", rawQuery);
                }
                if (this.wantPath) {
                    parsable.addDissection(str, "HTTP.PATH", "path", create.getPath());
                }
                if (this.wantRef) {
                    parsable.addDissection(str, "HTTP.REF", "ref", create.getFragment());
                }
            }
            if (z) {
                if (this.wantProtocol) {
                    parsable.addDissection(str, "HTTP.PROTOCOL", "protocol", create.getScheme());
                }
                if (this.wantUserinfo) {
                    parsable.addDissection(str, "HTTP.USERINFO", "userinfo", create.getUserInfo());
                }
                if (this.wantHost) {
                    parsable.addDissection(str, "HTTP.HOST", "host", create.getHost());
                }
                if (!this.wantPort || create.getPort() == -1) {
                    return;
                }
                parsable.addDissection(str, "HTTP.PORT", "port", create.getPort());
            }
        } catch (IllegalArgumentException e) {
            throw new DissectionFailure("Failed to parse URI >>" + parsableField.getValue().getString() + "<< because of : " + e.getMessage());
        }
    }

    static {
        BAD_URI_CHARS.set(0, 255);
        BAD_URI_CHARS.clear(123);
        BAD_URI_CHARS.clear(125);
        BAD_URI_CHARS.clear(124);
        BAD_URI_CHARS.clear(92);
        BAD_URI_CHARS.clear(94);
        BAD_URI_CHARS.clear(91);
        BAD_URI_CHARS.clear(93);
        BAD_URI_CHARS.clear(96);
        BAD_URI_CHARS.clear(32);
        BAD_URI_CHARS.clear(0, 31);
        BAD_URI_CHARS.clear(LexerATNSimulator.MAX_DFA_EDGE);
        BAD_URI_CHARS.clear(60);
        BAD_URI_CHARS.clear(62);
        BAD_URI_CHARS.clear(34);
        BAD_EXCAPE_PATTERN = Pattern.compile("%([^0-9a-fA-F]|[0-9a-fA-F][^0-9a-fA-F]|.$|$)");
        EQUALS_HASH_PATTERN = Pattern.compile("=#");
        HASH_AMP_PATTERN = Pattern.compile("#&");
        DOUBLE_HASH_PATTERN = Pattern.compile("#(.*)#");
        ALMOST_HTML_ENCODED = Pattern.compile("([^&])(#x[0-9a-fA-F][0-9a-fA-F];)");
    }
}
