Skip to content
Snippets Groups Projects
Commit da9e7a2d authored by Václav Bartoš's avatar Václav Bartoš
Browse files

FreqProcessor: Added support for nested fields.

plus data type check added
parent 913e1634
No related branches found
No related tags found
No related merge requests found
......@@ -208,16 +208,34 @@ class FreqProcessor(AbstractProcessor):
Get domain form record, compute probability using the FreqCounter,
and store the resulting probabilities to the record.
"""
text = record.getValue(self.input_field_name)
if text is None:
# FlowFile attribute tells we should take the domain from the field
# names as $input_field_name, but such a field i not present in the
# record.
# Don't set the result fields
self.logger.warn("The 'Input field' attribute points to '{}' field which is not present in the record.".format(self.input_field_name))
return
# Get value of given field.
# The field name may look like 'xx/yy/zz'. There can be a field with
# exactly this key (incl. slashes), or there can be tree of nested
# keys/sub-objects.
# First, try to get the field by its full name.
value = record.getValue(self.input_field_name)
if value is None:
# The field named exactly as $input_field_name is not present in
# the record. Maybe it's a nested key ...
if "/" in self.input_field_name:
path_parts = self.input_field_name.split('/')
# Get first sub-key/sub-object (returns another MapRecord or None)
value = record.getValue(path_parts.pop(0))
# iterate any possible sub-objects to final sub-key
while value and path_parts:
value = value.getValue(path_parts.pop(0))
# If value is still None, the field is not there
# -> don't set the result fields
if value is None:
self.logger.warn("The 'Input field' attribute points to '{}' field which is not present in the record, record skipped.".format(self.input_field_name))
return
prob1, prob2 = self.fc.probability(text)
if not isinstance(value, (str, unicode)):
self.logger.warn("Value of the '{}' field is not a string, record skipped.".format(self.input_field_name))
return
prob1, prob2 = self.fc.probability(value)
record.setValue(self.result1_field_name, prob1)
record.setValue(self.result2_field_name, prob2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment