/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.table.sources.parquet.update;

import org.apache.flink.table.api.VirtualColumn;
import org.apache.flink.table.dataformat.BaseRow;
import org.apache.flink.table.sinks.parquet.DeleteRowParquetOutputFormat;
import org.apache.flink.table.sources.parquet.ParquetVectorizedColumnRowReader;
import org.apache.flink.table.types.DataTypes;
import org.apache.flink.table.types.InternalType;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.parquet.hadoop.ParquetInputSplit;

import java.io.IOException;
import java.util.BitSet;

/**
 * Read parquet files and deal with delete files.
 */
public class UpdateParquetVectorizedColumnRowReader extends ParquetVectorizedColumnRowReader {

	private BitSet deleteBitSet = null;
	private int rowIdIndex = -1;
	private boolean isDeleteSource;

	public UpdateParquetVectorizedColumnRowReader(
			InternalType[] fieldTypes,
			String[] fieldNames, long limit, boolean isDeleteSource) {
		super(fieldTypes, fieldNames, limit);
		for (int i = 0; i < fieldNames.length; i++) {
			if (fieldNames[i].equals(VirtualColumn.ROWID.getName())) {
				rowIdIndex = i;
			}
		}
		this.isDeleteSource = isDeleteSource;
		if (isDeleteSource) {
			this.columnarRow = new VirtualColumnarRow();
		} else {
			this.columnarRow = new ActualColumnarRow();
		}
	}

	public void initialize(
			InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
		// the inputSplit may be null during the split phase
		super.initialize(inputSplit, taskAttemptContext);
		ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit;
		loadDeleteFiles(parquetInputSplit.getPath(),  taskAttemptContext);
		if (isDeleteSource) {
			((VirtualColumnarRow) columnarRow).setFileName(parquetInputSplit.getPath().toString());
		}
	}

	public boolean nextKeyValue() throws IOException, InterruptedException {
		if (currentConsumeRowCount >= limit) {
			return false;
		} else {
			currentConsumeRowCount++;
			do {
				rowIdx++;
				if (rowIdx >= batchSize) {
					if (!nextBatch()) {
						return false;
					} else {
						batchSize = columnarBatch.getNumRows();
						rowIdx = 0;

						columnarRow.setVectorizedColumnBatch(columnarBatch);
					}
				}
				columnarRow.setRowId(rowIdx);
				if (deleteBitSet == null || !deleteBitSet.get(columnarRow.getInt(rowIdIndex))) {
					return true;
				}
			} while (true);
		}
	}

	// load delete files and put deleted rowids to the bitSet.
	private void loadDeleteFiles(Path file, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
		deleteBitSet = new BitSet();
		Path deleteDirPath = file.suffix(DeleteRowParquetOutputFormat.DELETE_DIR_SUFFIX);
		FileSystem fs = file.getFileSystem(taskAttemptContext.getConfiguration());
		if (!fs.isDirectory(deleteDirPath)) {
			return;
		}
		// delete dirs made by different jobs.
		FileStatus[] deleteDirStatuses = fs.listStatus(deleteDirPath);
		if (deleteDirStatuses == null) {
			return;
		}
		deleteBitSet = new BitSet();
		for (FileStatus deleteDirStatus : deleteDirStatuses) {
			FileStatus[] deleteFileStatuses = fs.listStatus(deleteDirStatus.getPath());
			// delete files made by the same jobs.
			if (deleteFileStatuses == null) {
				return;
			}
			for (FileStatus deleteFileStatus : deleteFileStatuses) {
				Path deletePath = deleteFileStatus.getPath();
				ParquetVectorizedColumnRowReader reader = new ParquetVectorizedColumnRowReader(
						new InternalType[]{DataTypes.INT}, new String[]{VirtualColumn.ROWID.getName()}, Long.MAX_VALUE);
				ParquetInputSplit split = new ParquetInputSplit(
						new org.apache.hadoop.fs.Path(deletePath.toUri()),
						0,
						deleteFileStatus.getLen(),
						deleteFileStatus.getLen(),
						null,
						null);
				reader.initialize(split, taskAttemptContext);
				while (reader.nextKeyValue()) {
					int id = ((BaseRow) reader.getCurrentValue()).getInt(0);
					deleteBitSet.set(id);
				}
				reader.close();
			}
		}
	}
}
