# Word Embeddings

In [1]:
import numpy as np
import gensim.downloader as api
import gensim
import pandas as pd
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Read the dataframes
unlabeled = pd.read_csv("../../data/prepared/unlabeled.csv", index_col=False)
train = pd.read_csv("../../data/prepared/train.csv", index_col=False)
valid = pd.read_csv("../../data/prepared/valid.csv", index_col=False)
test = pd.read_csv("../../data/prepared/test.csv", index_col=False)

# Concatenate them together
df = pd.concat([unlabeled, train, valid], ignore_index=True)
df = df[['ticket', 'problem_abstract']]
df = df.drop_duplicates()

## Word2Vec

In [3]:
# Function to convert text into normalized averaged word embeddings
def text_to_avg_vector(text, model):
    words = text.split()
    words_in_vocab = [word for word in words if word in model.key_to_index]
    
    if len(words_in_vocab) == 0:
        return np.zeros(model.vector_size)
    
    avg_vector = np.mean([model[word] for word in words_in_vocab], axis=0)
    
    # Normalize the vector
    norm = np.linalg.norm(avg_vector)
    if norm == 0:
        return avg_vector
    return avg_vector / norm

# Convert entire DataFrame to averaged word embeddings
def transform_with_embeddings(df, model):
    return np.array(df['problem_abstract'].apply(lambda s: text_to_avg_vector("" if s is None or pd.isna(s) else s, model)).tolist())

class EmbeddingRecommender:
    def __init__(self, data: pd.DataFrame, model):
        self._data = data
        self.model = model
        self.document_vectors = transform_with_embeddings(data, model)
    
    def recommend(self, abstract: str, n: int = 10) -> pd.DataFrame:
        query_vector = text_to_avg_vector(abstract, self.model).reshape(1, -1)
        cosine_sims = cosine_similarity(query_vector, self.document_vectors)
        related_docs_indices = cosine_sims.argsort()[0][-n:][::-1]
        return self._data.iloc[related_docs_indices]

In [4]:
# Load models
word2vec_model = KeyedVectors.load_word2vec_format('../../models/GoogleNews-vectors-negative300.bin', binary=True)
# Example Usage with Word2Vec
recommender_word2vec = EmbeddingRecommender(df, word2vec_model)
recommended_word2vec = [
    (test.iloc[i]["problem_abstract"], recommender_word2vec.recommend(test.iloc[i]["problem_abstract"]))
    for i 
    in range(10)
]
recommended_word2vec

[('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
            ticket                                   problem_abstract
  8363   296481325  Fingertip -  Ethernet Processes>ASR Status>Eth...
  9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
  7377   310359990  Could you please change \nPurchase Unit\nQ89E ...
  8245   310640384  Access issues to  the Windows Jump servers  xx...
  6592   313517612  IBM AIA Devices Unable Authenticate with RSA S...
  3481   309870579  Request:\n1- Need User ID list for NS East US ...
  9232   304589474  Having Access Issues: AUS Syslog Server | Logi...
  6979   312255852  Problem Abstract: Request:1- Need User ID list...
  10419  311299222  Request Proxy Server IP to be added for Jump S...
  10643  312027243  PBI 4 Panel - ADI and VPN products need to sho...),
 ('Credentials for new customer Siemens Energy',
            ticket                                   problem_abstract
  8375   311658856                         

In [5]:
recommended_word2vec[0]

('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
           ticket                                   problem_abstract
 8363   296481325  Fingertip -  Ethernet Processes>ASR Status>Eth...
 9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
 7377   310359990  Could you please change \nPurchase Unit\nQ89E ...
 8245   310640384  Access issues to  the Windows Jump servers  xx...
 6592   313517612  IBM AIA Devices Unable Authenticate with RSA S...
 3481   309870579  Request:\n1- Need User ID list for NS East US ...
 9232   304589474  Having Access Issues: AUS Syslog Server | Logi...
 6979   312255852  Problem Abstract: Request:1- Need User ID list...
 10419  311299222  Request Proxy Server IP to be added for Jump S...
 10643  312027243  PBI 4 Panel - ADI and VPN products need to sho...)

In [6]:
recommended_word2vec[1]

('Credentials for new customer Siemens Energy',
           ticket                                   problem_abstract
 8375   311658856                                     Siemens Energy
 11440  314373286  Additional Credentials for Sysco Corp Customer...
 7352   310463338  ACTION Security Audit - Customer Score Card no...
 8660   310493396                        Credentials for Micro Focus
 11568  308589186  Need 2FA  for external customer User to access...
 10800  305946057  New MSS TACACS Domain Name for Customer Metso ...
 262    306670432  Requesting new user access for ka807b to mirro...
 969    307034869  BD Center receives company level error for Sie...
 1552   308050657  Need GPS eBond credentials for Micro Focus LLC...
 11329  308822185  GTAC  2FA authentication to be created  for Ex...)

In [7]:
recommended_word2vec[2]

('Request to provide Telco Inventory (BMP & WFA Inventory)',
           ticket                                   problem_abstract
 11082  310649862                  GTAC: Domain Group Access Request
 9172   307072440  Domain Group Access Request: Autozone:  Co-Man...
 8484   315253103                            Customer Access Request
 7985   308614232  Attn: Alex Pierce - SAFe NOW Data Power-BI Tab...
 7967   308655927  Attn: Alex Pierce - DOMs ATTBDAS NOW Data  - P...
 10903  307325021          Unable to see Domain Group Access Request
 6033   312688740  Three EPA users are not able access Client Req...
 10047  309059485  Please provide unredacted Domain Grant Report ...
 10505  300760130                         CA & SM Customer Inventory
 9765   305700324  New Customer and Domain Creation Request (Boeing))

In [8]:
recommended_word2vec[3]

('ACTION Web will not show me router information',
           ticket                                   problem_abstract
 9247   313541147  Hi team. I have issue on logging to some of AV...
 11476  309347855  Can log into poller but not a certain router n...
 7390   310294158  Opening the ticket as suggested by DH6094. AOT...
 7043   312433048  For some routers no option to view config. oth...
 11401  314221411  Getting error that my ATTUID is not in GPS Dat...
 6900   311892080  tkt number: 262901593  under External DB Info ...
 10334  305715700  I currently am unable to login to a Comerica r...
 6654   310852198  IPv6 access to routers via MCAP seems does not...
 8321   305268467             Action V3 can not pull out device info
 10851  305534616  Request Details: Hi Team, for the following Do...)

In [9]:
recommended_word2vec[4]

('Trying to change password get popup with fatal error',
           ticket                                   problem_abstract
 9085   305089627    Getting an error when trying to change password
 10984  304145595                        password change fatal error
 9062   309197971      Get error when trying to change GTAC password
 11475  306511298    Receiving error when attempting password change
 11635  306626018                  Trying to change password - Error
 9140   309341142  Encountering Fatal Error when trying to change...
 8063   305477651  I was not able to change password with below e...
 9286   306707973              I cant change password with php error
 9188   307716450  Unable to change password received error attac...
 9336   315068040  Client gets error messages when trying to logi...)

In [10]:
recommended_word2vec[5]

('Add the new Wireless Controller to the Cisco ISE servers and share the Key',
           ticket                                   problem_abstract
 9759   312447919  Add new Cisco Wireless controller to the Cisco...
 8834   314278357  Add the Wireless Controller below to the IBM R...
 9485   313507473        Add the eWC controller to Cisco ISE servers
 8388   307706782               Add the WLC to the Cisco ISE Servers
 9016   313020079             Add Wireless controller to ISE servers
 10190  309938787  Kyndryl Full Migration 4AO Markham - Create th...
 9702   307853907  ISE Kyndryl | KSEP - F1B- FLORENCEVILLE Add ne...
 10767  305814599  ISE Kyndryl | KSEP - IC2- Honolulu- Add new Ky...
 11015  308384677  ISE Kyndryl | KSEP - Gz4- Coulumbia - Add new ...
 9081   307189047  ISE IBM IGA | Add the controller to the Cisco ...)

In [11]:
recommended_word2vec[6]

('Kyndryl config backup is missing from the directories',
           ticket                                   problem_abstract
 7542   309686335         Missing backup config for multiple devices
 7902   308829455             don't see backup config in action-tool
 11021  315300651        Missing configuration backup file on poller
 7912   308818644  L4B-VO-MTP-1-KYN device config backup not succ...
 9272   305548788  Need config backup for network device for cust...
 7840   309049489  Device pulls config with no error but actual c...
 5736   312383233  SSH folder missing for MechID: m79942 and m995...
 6962   312225059  ASA backup issue : the config backup works but...
 7517   309799776  URGENT!!! - Action config backup not working p...
 7516   309799931  URGENT!!! - Action config backup not working p...)

In [12]:
recommended_word2vec[7]

('Customer is unable to login to firewall device . ID is configured in device with GTAC radius auth.',
           ticket                                   problem_abstract
 8840   302430152  from poller durnm811c . not able to login the ...
 11533  307292526                        Not able to login on device
 9884   307111289                        Not able to login to device
 8686   307402218  User ps200p unable to login /SSH to device SGI...
 9733   309016613      Network device login seems not prompt for RSA
 4960   311483729  A login does not exist for your ATTUID, for th...
 8860   308639699  Unable to lookup emergency password to check l...
 10876  306303912               Unable to login any device with GTAC
 8496   309850130  unable to login to device using GTAC/ RSA cred...
 3302   309717587  BD ID dhlexpbg  has a security violation on Cl...)

In [13]:
recommended_word2vec[8]

('.cfg files not available on the poller',
           ticket                                   problem_abstract
 8445   313159814  I'm not able to download the Action file from ...
 6618   310654476  Unable to download files from GPS documents ta...
 7636   309429378  In  gps not able to view or edit in order list...
 11125  306486807  I am unable to view device details in action V...
 10628  308049121  I have gtac profile cannot apply access reques...
 9660   309704182  On 4/28 we replaced a Steelhead ip address xxx...
 8856   310516455  Issue with vitalnet portal vnfr03.acss.att.com...
 10334  305715700  I currently am unable to login to a Comerica r...
 2492   308622478  Patches are needed on servers but the var file...
 6769   311304332  Lately on 2 new Site Acceptance Changes notice...)

In [14]:
recommended_word2vec[9]

('down',
          ticket                                   problem_abstract
 3710  310138967  BFEC.528194 down\nBFEC.528194 \nBFEC.528194 \n...
 3708  310138958                 BFEC.527085 down\nBFEC.527085 down
 2344  308813209                             T3.TRGE.623891.SC down
 6374  313098303    BFEC.569754 - 180 ETSEL TAMPFLCO TAMPFLCO  down
 2341  308814996                                   BFEC.504757 down
 5450  312141087                                    L6YX129861 down
 1426  307830493                                    BFEC544245 down
 92    306600717                                BFEC572657 ckt down
 93    306600718                                BFEC597027 ckt down
 3709  310138960                                  BFEC.517604  down)

## GloVe

In [15]:
glove_model = KeyedVectors.load_word2vec_format('../../models/glove.840B.300d.txt', binary=False, no_header=True)

recommender_glove = EmbeddingRecommender(df, glove_model)
recommended_glove = [
    (test.iloc[i]["problem_abstract"], recommender_glove.recommend(test.iloc[i]["problem_abstract"]))
    for i 
    in range(10)
]
recommended_glove

[('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
            ticket                                   problem_abstract
  11521  306608922                              Internal Server Error
  9792   308524725  Cisco ASA Devices Only Pulling Partial Configu...
  11102  308864468  New MSS TACACS Domain Name for Customer MICRO ...
  9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
  2180   308459397                                Change Account Type
  11163  308825576  New MSS TACACS Domain Name for Customer  MUELL...
  2549   308636704  ISE WK | Move ISE Logging and Reporting to AT&...
  7972   308648813  Updating MSS Tools Page Query for Software Ass...
  8509   308350893    TWB Portal - Broken After NA Server Maintenance
  10720  313454480    Query on Tacacs Server Secret Key Configuration),
 ('Credentials for new customer Siemens Energy',
            ticket                                   problem_abstract
  969    307034869  BD Center receives comp

In [16]:
recommended_glove[0]

('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
           ticket                                   problem_abstract
 11521  306608922                              Internal Server Error
 9792   308524725  Cisco ASA Devices Only Pulling Partial Configu...
 11102  308864468  New MSS TACACS Domain Name for Customer MICRO ...
 9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
 2180   308459397                                Change Account Type
 11163  308825576  New MSS TACACS Domain Name for Customer  MUELL...
 2549   308636704  ISE WK | Move ISE Logging and Reporting to AT&...
 7972   308648813  Updating MSS Tools Page Query for Software Ass...
 8509   308350893    TWB Portal - Broken After NA Server Maintenance
 10720  313454480    Query on Tacacs Server Secret Key Configuration)

In [17]:
recommended_glove[1]

('Credentials for new customer Siemens Energy',
           ticket                                   problem_abstract
 969    307034869  BD Center receives company level error for Sie...
 8375   311658856                                     Siemens Energy
 10428  308829273  CA/CM  report for customer IBM Capita Life and...
 9316   274471676  Need to include customer to PCS Active data cu...
 262    306670432  Requesting new user access for ka807b to mirro...
 7096   312603294  Create addtac script for new customer "Kaiser ...
 8436   314213744  Information required for IBM Access request an...
 3536   309892774           Access required for business continuity.
 10842  307440806  ISE IBM IGA | Please update new hostname of WL...
 11568  308589186  Need 2FA  for external customer User to access...)

In [18]:
recommended_glove[2]

('Request to provide Telco Inventory (BMP & WFA Inventory)',
           ticket                                   problem_abstract
 5532   312207747  Need to reopen CR#5087456 SR#8099205 Manage LM...
 5105   311721052  Please reopen Service Request & SR-Line page f...
 10352  311509796                          Request for Customer Data
 6871   311850096     Access Request for ADI & IPFLEX Reporting Data
 11199  309187940  unable to submit Service Management Engagement...
 8484   315253103                            Customer Access Request
 7554   309610511  Unable to publish to repository - Classic Serv...
 4993   311543455  Please reopen Service Request &SR-Line page fo...
 1612   308093769  Request for Access to GPS Tools for All IBM Re...
 5853   312526331  Request for Help: Report or List of Available ...)

In [19]:
recommended_glove[3]

('ACTION Web will not show me router information',
           ticket                                   problem_abstract
 9614   311724127  I have access to Action V.3 and have gained al...
 9404   310942387  Customer has stated: Login prompts are working...
 7043   312433048  For some routers no option to view config. oth...
 10628  308049121  I have gtac profile cannot apply access reques...
 11125  306486807  I am unable to view device details in action V...
 10947  310684247  Hi Team,I am not able to access below devices ...
 8273   305658728  I can not edit ticket logs, it shows READ ONLY...
 11283  309458482  Not able to login to Vitalnet because of below...
 10334  305715700  I currently am unable to login to a Comerica r...
 7892   308867642  Customers create tickets for Bvoip issues in E...)

In [20]:
recommended_glove[4]

('Trying to change password get popup with fatal error',
           ticket                                   problem_abstract
 9085   305089627    Getting an error when trying to change password
 9062   309197971      Get error when trying to change GTAC password
 9286   306707973              I cant change password with php error
 9336   315068040  Client gets error messages when trying to logi...
 11475  306511298    Receiving error when attempting password change
 10984  304145595                        password change fatal error
 9188   307716450  Unable to change password received error attac...
 8063   305477651  I was not able to change password with below e...
 11080  306532398  Unable to change password and it gives me "Fat...
 9140   309341142  Encountering Fatal Error when trying to change...)

In [21]:
recommended_glove[5]

('Add the new Wireless Controller to the Cisco ISE servers and share the Key',
           ticket                                   problem_abstract
 8834   314278357  Add the Wireless Controller below to the IBM R...
 9759   312447919  Add new Cisco Wireless controller to the Cisco...
 8928   313076227  Please create and add the Cisco WLC to the ISE...
 10201  314806707  Please create and add the below Cisco WLC to t...
 9485   313507473        Add the eWC controller to Cisco ISE servers
 10433  310955245  Please create and add the below Cisco WLC to t...
 9548   314843303  Please create and add the below Cisco WLC to t...
 6576   313499746  Add new wireless controller hj9-wc-c2 to the B...
 10324  310955253  Please create and add the below Cisco WLC to t...
 5785   312447883  Please create acs key for the new controller s...)

In [22]:
recommended_glove[6]

('Kyndryl config backup is missing from the directories',
           ticket                                   problem_abstract
 10694  311327063  /etc/hosts file replication from rlpv12843.gcs...
 7840   309049489  Device pulls config with no error but actual c...
 6962   312225059  ASA backup issue : the config backup works but...
 8090   312788475                Config missing from action database
 5736   312383233  SSH folder missing for MechID: m79942 and m995...
 10593  314298897  unable to pull older backup configs from the p...
 7542   309686335         Missing backup config for multiple devices
 5102   311718285  BHXNM402 is missing action_nologin configurati...
 7912   308818644  L4B-VO-MTP-1-KYN device config backup not succ...
 11401  314221411  Getting error that my ATTUID is not in GPS Dat...)

In [23]:
recommended_glove[7]

('Customer is unable to login to firewall device . ID is configured in device with GTAC radius auth.',
           ticket                                   problem_abstract
 8840   302430152  from poller durnm811c . not able to login the ...
 11307  306104165                   None of the device able to login
 10107  305166991  cannot access using token for all device point...
 8686   307402218  User ps200p unable to login /SSH to device SGI...
 9884   307111289                        Not able to login to device
 8860   308639699  Unable to lookup emergency password to check l...
 3302   309717587  BD ID dhlexpbg  has a security violation on Cl...
 8255   309362376  configuration is not pulled completely for dev...
 11283  309458482  Not able to login to Vitalnet because of below...
 9289   306864971  We are not able to login the device via ACAT. ...)

In [24]:
recommended_glove[8]

('.cfg files not available on the poller',
           ticket                                   problem_abstract
 6984   312257393   /tools/gt3/bin scripts not working on any poller
 6796   311489738  CBACK and HOIP scripts are end up in Error on ...
 2492   308622478  Patches are needed on servers but the var file...
 8840   302430152  from poller durnm811c . not able to login the ...
 6535   313383875  SSH works fine using only DR poller rlpv12021,...
 6114   312791933  We are getting multiple false alerts for route...
 8445   313159814  I'm not able to download the Action file from ...
 11190  310877877  verify ACTION backend processing is turned on ...
 8097   313629991  add seed file group write permission on poller...
 7938   308745353  Received multiple alerts for Kyndrl devices do...)

In [25]:
recommended_glove[9]

('down',
           ticket                                   problem_abstract
 2344   308813209                             T3.TRGE.623891.SC down
 3710   310138967  BFEC.528194 down\nBFEC.528194 \nBFEC.528194 \n...
 5450   312141087                                    L6YX129861 down
 1426   307830493                                    BFEC544245 down
 487    306676750                          rlpv11007 jumpserver down
 3708   310138958                 BFEC.527085 down\nBFEC.527085 down
 3709   310138960                                  BFEC.517604  down
 2341   308814996                                   BFEC.504757 down
 2340   308814990                                   BFEC.521155 down
 10562  313311257                                   Vitalnet is down)