diff --git a/scripts/freq/freqProcessor.py b/scripts/freq/freqProcessor.py index caa45671e86b2cb32fa8c4771eea32504d5139f2..9d8fe0b23ac57733b20aa8c46e6298ab1e9ce158 100755 --- a/scripts/freq/freqProcessor.py +++ b/scripts/freq/freqProcessor.py @@ -208,16 +208,34 @@ class FreqProcessor(AbstractProcessor): Get domain form record, compute probability using the FreqCounter, and store the resulting probabilities to the record. """ - text = record.getValue(self.input_field_name) - if text is None: - # FlowFile attribute tells we should take the domain from the field - # names as $input_field_name, but such a field i not present in the - # record. - # Don't set the result fields - self.logger.warn("The 'Input field' attribute points to '{}' field which is not present in the record.".format(self.input_field_name)) - return + # Get value of given field. + # The field name may look like 'xx/yy/zz'. There can be a field with + # exactly this key (incl. slashes), or there can be tree of nested + # keys/sub-objects. + # First, try to get the field by its full name. + value = record.getValue(self.input_field_name) + if value is None: + # The field named exactly as $input_field_name is not present in + # the record. Maybe it's a nested key ... + if "/" in self.input_field_name: + path_parts = self.input_field_name.split('/') + # Get first sub-key/sub-object (returns another MapRecord or None) + value = record.getValue(path_parts.pop(0)) + # iterate any possible sub-objects to final sub-key + while value and path_parts: + value = value.getValue(path_parts.pop(0)) + + # If value is still None, the field is not there + # -> don't set the result fields + if value is None: + self.logger.warn("The 'Input field' attribute points to '{}' field which is not present in the record, record skipped.".format(self.input_field_name)) + return - prob1, prob2 = self.fc.probability(text) + if not isinstance(value, (str, unicode)): + self.logger.warn("Value of the '{}' field is not a string, record skipped.".format(self.input_field_name)) + return + + prob1, prob2 = self.fc.probability(value) record.setValue(self.result1_field_name, prob1) record.setValue(self.result2_field_name, prob2)