Ok guys so I think I solved the issue of duplicate tweets and revised the responsehandlers.py code!
The issue was initializing the last_tweet_indexed_id = 0 so to overcome this I wrote the last_tweet_indexed_id to a file then whenever the responsehandlers.py code is called it initializes the last_tweet_indexed_id to the last value in that file, instead of 0. Here is my revised code and it is working, all you need to do is modify it to where ever you want the log that holds the id to be. Also, I had issues with the created_at field so I created my own timestamp field to make it easier to extract:
class TwitterEventHandler:
def __init__(self,**args):
pass
def __call__(self, response_object,raw_response_output,response_type,req_args,endpoint):
if response_type == "json":
output = json.loads(raw_response_output)
#tweet_ids only holds one value and is overwritten to the most current value which is read in
tweet_ids = open('/splunk/etc/apps/rest_ta/bin/last_tweet_id.log', 'r')
#splunk_log is for debugging and is optional to show where you left off for tweet_ids
splunk_log = open('/splunk/etc/apps/rest_ta/bin/splunk.log', 'a')
last_tweet_indexed_id = int(tweet_ids.readline())
tweet_ids.close()
for twitter_event in output["statuses"]:
#creating new __time field to make it easier to extract in props.conf
if 'created_at' in twitter_event:
twitter_event['__time'] = twitter_event['created_at']
print_xml_stream(json.dumps(twitter_event))
if "id_str" in twitter_event:
tweet_id = int(twitter_event["id_str"])
if tweet_id > last_tweet_indexed_id:
last_tweet_indexed_id = tweet_id
if not "params" in req_args:
req_args["params"] = {}
req_args["params"]["since_id"] = last_tweet_indexed_id
#writing to tweet_ids
tweet_ids = open('/splunk/etc/apps/rest_ta/bin/last_tweet_id.log', 'w')
tweet_ids.write(str(last_tweet_indexed_id))
splunk_log.write(str(last_tweet_indexed_id)+'\n')
tweet_ids.close()
splunk_log.close()
else:
print_xml_stream(raw_response_output)
... View more