Commit e08cc205 authored by zhoushiguang's avatar zhoushiguang

flink-sql-connectors-kafka 源码修改部分

parent ba9ce0e2
修改flink-sql-connector-kafka_2.11-1.14.3源码类:
org.apache.flink.streaming.connectors.kafka.table.KafkaDynamicSource
GROUP_OFFSET修改为:
case GROUP_OFFSETS:
//kafkaSourceBuilder.setStartingOffsets(OffsetsInitializer.committedOffsets());
String offsetResetConfig =
properties.getProperty(
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
OffsetResetStrategy.NONE.name());
OffsetResetStrategy offsetResetStrategy = getResetStrategy(offsetResetConfig);
kafkaSourceBuilder.setStartingOffsets(
OffsetsInitializer.committedOffsets(offsetResetStrategy));
break;
private OffsetResetStrategy getResetStrategy(String offsetResetConfig) {
return Arrays.stream(OffsetResetStrategy.values())
.filter(ors -> ors.name().equals(offsetResetConfig.toUpperCase(Locale.ROOT)))
.findAny()
.orElseThrow(
() ->
new IllegalArgumentException(
String.format(
"%s can not be set to %s. Valid values: [%s]",
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
offsetResetConfig,
Arrays.stream(OffsetResetStrategy.values())
.map(Enum::name)
.map(String::toLowerCase)
.collect(Collectors.joining(",")))));
}
\ No newline at end of file
This diff is collapsed.
package org.apache.flink.connector.kafka.source.enumerator.initializer;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.flink.kafka.shaded.org.apache.kafka.common.TopicPartition;
import java.io.Serializable;
import java.util.Collection;
import java.util.Map;
@PublicEvolving
public interface OffsetsInitializer extends Serializable {
Map<TopicPartition, Long> getPartitionOffsets(Collection<TopicPartition> paramCollection, PartitionOffsetsRetriever paramPartitionOffsetsRetriever);
OffsetResetStrategy getAutoOffsetResetStrategy();
static OffsetsInitializer committedOffsets() {
return committedOffsets(OffsetResetStrategy.NONE);
}
static OffsetsInitializer committedOffsets(OffsetResetStrategy offsetResetStrategy) {
return new ReaderHandledOffsetsInitializer(-3L, offsetResetStrategy);
}
static OffsetsInitializer timestamp(long timestamp) {
return new TimestampOffsetsInitializer(timestamp);
}
static OffsetsInitializer earliest() {
return new ReaderHandledOffsetsInitializer(-2L, OffsetResetStrategy.EARLIEST);
}
static OffsetsInitializer latest() {
return new ReaderHandledOffsetsInitializer(-1L, OffsetResetStrategy.LATEST);
}
static OffsetsInitializer offsets(Map<TopicPartition, Long> offsets) {
return new SpecifiedOffsetsInitializer(offsets, OffsetResetStrategy.EARLIEST);
}
static OffsetsInitializer offsets(Map<TopicPartition, Long> offsets, OffsetResetStrategy offsetResetStrategy) {
return new SpecifiedOffsetsInitializer(offsets, offsetResetStrategy);
}
public static interface PartitionOffsetsRetriever {
Map<TopicPartition, Long> committedOffsets(Collection<TopicPartition> param1Collection);
Map<TopicPartition, Long> endOffsets(Collection<TopicPartition> param1Collection);
Map<TopicPartition, Long> beginningOffsets(Collection<TopicPartition> param1Collection);
Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes(Map<TopicPartition, Long> param1Map);
}
}
package org.apache.flink.connector.kafka.source.enumerator.initializer;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.flink.kafka.shaded.org.apache.kafka.common.TopicPartition;
import org.apache.flink.util.Preconditions;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
class ReaderHandledOffsetsInitializer implements OffsetsInitializer, OffsetsInitializerValidator {
private static final long serialVersionUID = 172938052008787981L;
private final long startingOffset;
private final OffsetResetStrategy offsetResetStrategy;
ReaderHandledOffsetsInitializer(long startingOffset, OffsetResetStrategy offsetResetStrategy) {
this.startingOffset = startingOffset;
this.offsetResetStrategy = offsetResetStrategy;
}
public Map<TopicPartition, Long> getPartitionOffsets(Collection<TopicPartition> partitions, PartitionOffsetsRetriever partitionOffsetsRetriever) {
Map<TopicPartition, Long> initialOffsets = new HashMap<>();
for (TopicPartition tp : partitions)
initialOffsets.put(tp, Long.valueOf(this.startingOffset));
return initialOffsets;
}
public OffsetResetStrategy getAutoOffsetResetStrategy() {
return this.offsetResetStrategy;
}
public void validate(Properties kafkaSourceProperties) {
if (this.startingOffset == -3L)
Preconditions.checkState(kafkaSourceProperties
.containsKey("group.id"),
String.format("Property %s is required when using committed offset for offsets initializer", new Object[] { "group.id" }));
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.connector.kafka.source.enumerator.initializer;
import org.apache.flink.connector.kafka.source.split.KafkaPartitionSplit;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.flink.kafka.shaded.org.apache.kafka.common.TopicPartition;
import java.util.*;
import static org.apache.flink.util.Preconditions.checkState;
/**
* An implementation of {@link OffsetsInitializer} which initializes the offsets of the partition
* according to the user specified offsets.
*
* <p>Package private and should be instantiated via {@link OffsetsInitializer}.
*/
class SpecifiedOffsetsInitializer implements OffsetsInitializer, OffsetsInitializerValidator {
private static final long serialVersionUID = 1649702397250402877L;
private final Map<TopicPartition, Long> initialOffsets;
private final OffsetResetStrategy offsetResetStrategy;
SpecifiedOffsetsInitializer(
Map<TopicPartition, Long> initialOffsets, OffsetResetStrategy offsetResetStrategy) {
this.initialOffsets = Collections.unmodifiableMap(initialOffsets);
this.offsetResetStrategy = offsetResetStrategy;
}
@Override
public Map<TopicPartition, Long> getPartitionOffsets(
Collection<TopicPartition> partitions,
PartitionOffsetsRetriever partitionOffsetsRetriever) {
Map<TopicPartition, Long> offsets = new HashMap<>();
List<TopicPartition> toLookup = new ArrayList<>();
for (TopicPartition tp : partitions) {
Long offset = initialOffsets.get(tp);
if (offset == null) {
toLookup.add(tp);
} else {
offsets.put(tp, offset);
}
}
if (!toLookup.isEmpty()) {
// First check the committed offsets.
Map<TopicPartition, Long> committedOffsets =
partitionOffsetsRetriever.committedOffsets(toLookup);
offsets.putAll(committedOffsets);
toLookup.removeAll(committedOffsets.keySet());
switch (offsetResetStrategy) {
case EARLIEST:
offsets.putAll(partitionOffsetsRetriever.beginningOffsets(toLookup));
break;
case LATEST:
offsets.putAll(partitionOffsetsRetriever.endOffsets(toLookup));
break;
default:
throw new IllegalStateException(
"Cannot find initial offsets for partitions: " + toLookup);
}
}
return offsets;
}
@Override
public OffsetResetStrategy getAutoOffsetResetStrategy() {
return offsetResetStrategy;
}
@Override
public void validate(Properties kafkaSourceProperties) {
initialOffsets.forEach(
(tp, offset) -> {
if (offset == KafkaPartitionSplit.COMMITTED_OFFSET) {
checkState(
kafkaSourceProperties.containsKey(ConsumerConfig.GROUP_ID_CONFIG),
String.format(
"Property %s is required because partition %s is initialized with committed offset",
ConsumerConfig.GROUP_ID_CONFIG, tp));
}
});
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.kafka;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.flink.util.Collector;
import java.io.Serializable;
/**
* The deserialization schema describes how to turn the Kafka ConsumerRecords into data types
* (Java/Scala objects) that are processed by Flink.
*
* @param <T> The type created by the keyed deserialization schema.
*/
@PublicEvolving
public interface KafkaDeserializationSchema<T> extends Serializable, ResultTypeQueryable<T> {
/**
* Initialization method for the schema. It is called before the actual working methods {@link
* #deserialize} and thus suitable for one time setup work.
*
* <p>The provided {@link DeserializationSchema.InitializationContext} can be used to access
* additional features such as e.g. registering user metrics.
*
* @param context Contextual information that can be used during initialization.
*/
default void open(DeserializationSchema.InitializationContext context) throws Exception {}
/**
* Method to decide whether the element signals the end of the stream. If true is returned the
* element won't be emitted.
*
* @param nextElement The element to test for the end-of-stream signal.
* @return True, if the element signals end of stream, false otherwise.
*/
boolean isEndOfStream(T nextElement);
/**
* Deserializes the Kafka record.
*
* @param record Kafka record to be deserialized.
* @return The deserialized message as an object (null if the message cannot be deserialized).
*/
T deserialize(ConsumerRecord<byte[], byte[]> record) throws Exception;
/**
* Deserializes the Kafka record.
*
* <p>Can output multiple records through the {@link Collector}. Note that number and size of
* the produced records should be relatively small. Depending on the source implementation
* records can be buffered in memory or collecting records might delay emitting checkpoint
* barrier.
*
* @param message The message, as a byte array.
* @param out The collector to put the resulting messages.
*/
default void deserialize(ConsumerRecord<byte[], byte[]> message, Collector<T> out)
throws Exception {
T deserialized = deserialize(message);
if (deserialized != null) {
out.collect(deserialized);
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.kafka;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.kafka.clients.producer.ProducerRecord;
import javax.annotation.Nullable;
import java.io.Serializable;
/**
* A {@link KafkaSerializationSchema} defines how to serialize values of type {@code T} into {@link
* ProducerRecord ProducerRecords}.
*
* <p>Please also implement {@link KafkaContextAware} if your serialization schema needs information
* about the available partitions and the number of parallel subtasks along with the subtask ID on
* which the Kafka Producer is running.
*
* @param <T> the type of values being serialized
*/
@PublicEvolving
public interface KafkaSerializationSchema<T> extends Serializable {
/**
* Initialization method for the schema. It is called before the actual working methods {@link
* #serialize(Object, Long)} and thus suitable for one time setup work.
*
* <p>The provided {@link SerializationSchema.InitializationContext} can be used to access
* additional features such as e.g. registering user metrics.
*
* @param context Contextual information that can be used during initialization.
*/
default void open(SerializationSchema.InitializationContext context) throws Exception {}
/**
* Serializes given element and returns it as a {@link ProducerRecord}.
*
* @param element element to be serialized
* @param timestamp timestamp (can be null)
* @return Kafka {@link ProducerRecord}
*/
ProducerRecord<byte[], byte[]> serialize(T element, @Nullable Long timestamp);
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.kafka.table;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.types.DeserializationException;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
/** A specific {@link KafkaSerializationSchema} for {@link KafkaDynamicSource}. */
class DynamicKafkaDeserializationSchema implements KafkaDeserializationSchema<RowData> {
private static final long serialVersionUID = 1L;
private final @Nullable DeserializationSchema<RowData> keyDeserialization;
private final DeserializationSchema<RowData> valueDeserialization;
private final boolean hasMetadata;
private final BufferingCollector keyCollector;
private final OutputProjectionCollector outputCollector;
private final TypeInformation<RowData> producedTypeInfo;
private final boolean upsertMode;
DynamicKafkaDeserializationSchema(
int physicalArity,
@Nullable DeserializationSchema<RowData> keyDeserialization,
int[] keyProjection,
DeserializationSchema<RowData> valueDeserialization,
int[] valueProjection,
boolean hasMetadata,
MetadataConverter[] metadataConverters,
TypeInformation<RowData> producedTypeInfo,
boolean upsertMode) {
if (upsertMode) {
Preconditions.checkArgument(
keyDeserialization != null && keyProjection.length > 0,
"Key must be set in upsert mode for deserialization schema.");
}
this.keyDeserialization = keyDeserialization;
this.valueDeserialization = valueDeserialization;
this.hasMetadata = hasMetadata;
this.keyCollector = new BufferingCollector();
this.outputCollector =
new OutputProjectionCollector(
physicalArity,
keyProjection,
valueProjection,
metadataConverters,
upsertMode);
this.producedTypeInfo = producedTypeInfo;
this.upsertMode = upsertMode;
}
@Override
public void open(DeserializationSchema.InitializationContext context) throws Exception {
if (keyDeserialization != null) {
keyDeserialization.open(context);
}
valueDeserialization.open(context);
}
@Override
public boolean isEndOfStream(RowData nextElement) {
return false;
}
@Override
public RowData deserialize(ConsumerRecord<byte[], byte[]> record) throws Exception {
throw new IllegalStateException("A collector is required for deserializing.");
}
@Override
public void deserialize(ConsumerRecord<byte[], byte[]> record, Collector<RowData> collector)
throws Exception {
// shortcut in case no output projection is required,
// also not for a cartesian product with the keys
if (keyDeserialization == null && !hasMetadata) {
valueDeserialization.deserialize(record.value(), collector);
return;
}
// buffer key(s)
if (keyDeserialization != null) {
keyDeserialization.deserialize(record.key(), keyCollector);
}
// project output while emitting values
outputCollector.inputRecord = record;
outputCollector.physicalKeyRows = keyCollector.buffer;
outputCollector.outputCollector = collector;
if (record.value() == null && upsertMode) {
// collect tombstone messages in upsert mode by hand
outputCollector.collect(null);
} else {
valueDeserialization.deserialize(record.value(), outputCollector);
}
keyCollector.buffer.clear();
}
@Override
public TypeInformation<RowData> getProducedType() {
return producedTypeInfo;
}
// --------------------------------------------------------------------------------------------
interface MetadataConverter extends Serializable {
Object read(ConsumerRecord<?, ?> record);
}
// --------------------------------------------------------------------------------------------
private static final class BufferingCollector implements Collector<RowData>, Serializable {
private static final long serialVersionUID = 1L;
private final List<RowData> buffer = new ArrayList<>();
@Override
public void collect(RowData record) {
buffer.add(record);
}
@Override
public void close() {
// nothing to do
}
}
// --------------------------------------------------------------------------------------------
/**
* Emits a row with key, value, and metadata fields.
*
* <p>The collector is able to handle the following kinds of keys:
*
* <ul>
* <li>No key is used.
* <li>A key is used.
* <li>The deserialization schema emits multiple keys.
* <li>Keys and values have overlapping fields.
* <li>Keys are used and value is null.
* </ul>
*/
private static final class OutputProjectionCollector
implements Collector<RowData>, Serializable {
private static final long serialVersionUID = 1L;
private final int physicalArity;
private final int[] keyProjection;
private final int[] valueProjection;
private final MetadataConverter[] metadataConverters;
private final boolean upsertMode;
private transient ConsumerRecord<?, ?> inputRecord;
private transient List<RowData> physicalKeyRows;
private transient Collector<RowData> outputCollector;
OutputProjectionCollector(
int physicalArity,
int[] keyProjection,
int[] valueProjection,
MetadataConverter[] metadataConverters,
boolean upsertMode) {
this.physicalArity = physicalArity;
this.keyProjection = keyProjection;
this.valueProjection = valueProjection;
this.metadataConverters = metadataConverters;
this.upsertMode = upsertMode;
}
@Override
public void collect(RowData physicalValueRow) {
// no key defined
if (keyProjection.length == 0) {
emitRow(null, (GenericRowData) physicalValueRow);
return;
}
// otherwise emit a value for each key
for (RowData physicalKeyRow : physicalKeyRows) {
emitRow((GenericRowData) physicalKeyRow, (GenericRowData) physicalValueRow);
}
}
@Override
public void close() {
// nothing to do
}
private void emitRow(
@Nullable GenericRowData physicalKeyRow,
@Nullable GenericRowData physicalValueRow) {
final RowKind rowKind;
if (physicalValueRow == null) {
if (upsertMode) {
rowKind = RowKind.DELETE;
} else {
throw new DeserializationException(
"Invalid null value received in non-upsert mode. Could not to set row kind for output record.");
}
} else {
rowKind = physicalValueRow.getRowKind();
}
final int metadataArity = metadataConverters.length;
final GenericRowData producedRow =
new GenericRowData(rowKind, physicalArity + metadataArity);
for (int keyPos = 0; keyPos < keyProjection.length; keyPos++) {
assert physicalKeyRow != null;
producedRow.setField(keyProjection[keyPos], physicalKeyRow.getField(keyPos));
}
if (physicalValueRow != null) {
for (int valuePos = 0; valuePos < valueProjection.length; valuePos++) {
producedRow.setField(
valueProjection[valuePos], physicalValueRow.getField(valuePos));
}
}
for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) {
producedRow.setField(
physicalArity + metadataPos,
metadataConverters[metadataPos].read(inputRecord));
}
outputCollector.collect(producedRow);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment