News Named Entity Extraction (NER) and Sentiment Analysis

Summary

!pip install psycopg2-binary
!pip install feedparser
import pandas as pd
import numpy as np
from textblob import TextBlob
import feedparser
import requests
import json
import yaml
Requirement already satisfied: psycopg2-binary in /usr/local/lib/python3.6/dist-packages (2.8.5)
Requirement already satisfied: feedparser in /usr/local/lib/python3.6/dist-packages (5.2.1)
# Dictionary of RSS feeds that we will fetch and combine
# GlobeNewsire / Europe - http://www.globenewswire.com/Rss/List
newsurls = {
'globenewswire-us': 'http://www.globenewswire.com/RssFeed/country/United%20States/feedTitle/GlobeNewswire%20-%20News%20from%20United%20States',
}
# Function to fetch the rss feed and return the parsed RSS
def parse_rss( rss_url ):
return feedparser.parse( rss_url )

# Function grabs the rss feed headlines (titles) and returns them as a list
def get_headlines( rss_url ):
headlines = []
feed = parse_rss( rss_url )
for newsitem in feed['items']:
headlines.append(newsitem['title'])
return headlines
def get_summaries( rss_url ):
summaries = []
feed = parse_rss( rss_url )
for newsitem in feed['items']:
summaries.append(newsitem['summary'])
return summaries
def get_entries( rss_url ):
entries = []
feed = parse_rss( rss_url )
for newsitem in feed['items']:
entries.append(newsitem.keys())
return entries
# Inspect the entries available in the RSS feed
entries = []
# Iterate over the feed urls
for key,url in newsurls.items():
# Call getHeadlines() and combine the returned headlines with allheadlines
entries.extend( get_entries( url ) )
print(entries[0])dict_keys(['id', 'guidislink', 'link', 'links', 'tags', 'title', 'title_detail', 'summary', 'summary_detail', 'published', 'published_parsed', 'dc_identifier', 'language', 'publisher', 'publisher_detail', 'contributors', 'dc_modified', 'dc_keyword'])t Support During Lockdown'}
# A list to hold all headlines and summaries
allheadlines = []
summaries = []

# Iterate over the feed urls
for key,url in newsurls.items():
# Call getHeadlines() and combine the returned headlines with allheadlines
allheadlines.extend( getHeadlines( url ) )
summaries.extend( getSummaries( url ) )
# Iterate over the allheadlines list and print each headline
for hl in allheadlines:
print(hl)
Velocity (VEL) Alert: Johnson Fistel Investigates Velocity Financial, Inc.; Investors Suffering Losses Encouraged to Contact Firm
ALIGN DEADLINE ALERT: Faruqi & Faruqi, LLP Encourages Investors Who Suffered Losses Exceeding $100,000 In Align Technology, Inc. To Contact The Firm
ALLAKOS DEADLINE ALERT: Faruqi & Faruqi, LLP Encourages Investors Who Suffered Losses Exceeding $50,000 In Allakos Inc. To Contact The Firm
FUNKO LEAD PLAINTIFF DEADLINE ALERT: Faruqi & Faruqi, LLP Encourages Investors Who Suffered Losses Exceeding $50,000 In Funko, Inc. To Contact The Firm
WWE DEADLINE ALERT: Faruqi & Faruqi, LLP Encourages Investors Who Suffered Losses Exceeding $50,000 in World Wrestling Entertainment, Inc. to Contact the Firm
ROSEN, A GLOBALLY RECOGNIZED LAW FIRM, Reminds Golden Star Resources Ltd. Investors of Important Deadline in Securities Class Action – GSS
ROSEN, A GLOBALLY RECOGNIZED LAW FIRM, Reminds LogicBio Therapeutics, Inc. Investors of the Important Deadline in Securities Class Action First Filed by Firm – LOGC
...
# Define sample content to be queried
contentText = allheadlines[1]
print(contentText)
Velocity (VEL) Alert: Johnson Fistel Investigates Velocity Financial, Inc.; Investors Suffering Losses Encouraged to Contact Firm
headType = "text/raw"
token = 'oSyQfYcRShExGJmJPXRgr4kOFAsIHqoJ'
url = "https://api-eit.refinitiv.com/permid/calais"
payload = contentText.encode('utf8')
headers = {
'Content-Type': headType,
'X-AG-Access-Token': token,
'outputformat': "application/json"
}
# The daily limit is 5,000 requests, and the concurrent limit varies by API from 1-4 calls per second.
TRITResponse = requests.request("POST", url, data=payload, headers=headers)
# Load content into JSON object
JSONResponse = json.loads(TRITResponse.text)
# print(json.dumps(JSONResponse, indent=4, sort_keys=True))
#Get Entities
print('====Entities====')
print('Type, Name')
for key in JSONResponse:
if ('_typeGroup' in JSONResponse[key]):
if JSONResponse[key]['_typeGroup'] == 'entities':
print(JSONResponse[key]['_type'] + ", " + JSONResponse[key]['name'])
====Entities====
Type, Name
Company, JOHNSON FISTEL
Company, velocity financial, inc.
#Get RIC codeprint('====RIC====')
print('RIC')
for entity in JSONResponse:
for info in JSONResponse[entity]:
if (info =='resolutions'):
for companyinfo in (JSONResponse[entity][info]):
if 'primaryric' in companyinfo:
symbol = companyinfo['primaryric']
print(symbol)
====RIC====
RIC
VEL.N
#Print Header
print(symbol)
print('====Topics====')
print('Topics, Score')
for key in JSONResponse:
if ('_typeGroup' in JSONResponse[key]):
if JSONResponse[key]['_typeGroup'] == 'topics':
print(JSONResponse[key]['name'] + ", " + str(JSONResponse[key]['score']))
VEL.N
====Topics====
Topics, Score
Business_Finance, 1
Health_Medical_Pharma, 0.935
Disaster_Accident, 0.817
# Define function to be used for text senitments analysis 
def get_sentiment(txt):
'''
Utility function to clean text by removing links, special characters
using simple regex statements and to classify sentiment of passed tweet
using textblob's sentiment method
'''
#clean text
clean_txt = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", txt).split())
# create TextBlob object of passed tweet text
analysis = TextBlob(clean_txt)
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
print('headline: ', allheadlines[1])
print('headline sentiment: ', get_sentiment(allheadlines[1]))
print('summary: ', summaries[1])
print('summary sentiment: ', get_sentiment(summaries[1]))
Headline: Velocity (VEL) Alert: Johnson Fistel Investigates Velocity Financial, Inc.; Investors Suffering Losses Encouraged to Contact Firm
Headline sentiment: negative
Summary: <p>SAN DIEGO, April 26, 2020 (GLOBE NEWSWIRE) -- Shareholder rights law firm Johnson Fistel, LLP is investigating potential violations of the federal securities laws by Velocity Financial, Inc. ("Velocity" or "the Company") (NYSE: VEL).<br></p>
Summary sentiment: negative
eod_api_token = '5cc0ea63d1cda3.37070012'
eod_symbol = symbol.replace('N', 'US')
eod_price_url = 'https://eodhistoricaldata.com/api/eod/' + eod_symbol + '?api_token=' + eod_api_token
price_df = pd.read_csv(eod_price_url)
price_df.sort_values(by=['Date'], inplace=True, ascending=False)
price_df.head()
eod_api_token = '<mytoken>'
eod_symbol = symbol.replace('N', 'US')
eod_price_url = 'https://eodhistoricaldata.com/api/eod/' + eod_symbol + '?api_token=' + eod_api_token
price_df = pd.read_csv(eod_price_url)
price_df.sort_values(by=['Date'], inplace=True, ascending=False)
price_df.head()

CTO and Head of Data at Denario.io

Share your ideas with millions of readers.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store